traject_horizon 0.11.0 → 0.11.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,6 +5,7 @@ require 'traject/indexer/settings'
5
5
  require 'traject/horizon_bib_auth_merge'
6
6
 
7
7
  require 'marc'
8
+ require 'marc/marc4j' # for marc4j jars
8
9
 
9
10
  module Traject
10
11
  #
@@ -133,7 +134,8 @@ module Traject
133
134
  def require_jars!
134
135
  Traject::Util.jruby_ensure_init!("Traject::HorizonReader")
135
136
 
136
- Traject::Util.require_marc4j_jars(settings)
137
+ # ask marc-marc4j gem to load the marc4j jars
138
+ MARC::MARC4J.new(:jardir => settings['marc4j_reader.jar_dir'])
137
139
 
138
140
  # For some reason we seem to need to java_import it, and use
139
141
  # a string like this. can't just refer to it by full
@@ -228,7 +230,7 @@ module Traject
228
230
 
229
231
  # Turn Horizon's weird escaping into UTF8: <U+nnnn> where nnnn is a hex unicode
230
232
  # codepoint, turn it UTF8 for that codepoint
231
- if settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.destination_encoding"] == "UTF8"
233
+ if false && settings["horizon.codepoint_translate"].to_s == "true" && settings["horizon.destination_encoding"] == "UTF8"
232
234
  text.gsub!(/\<U\+([0-9A-F]{4})\>/) do
233
235
  [$1.hex].pack("U")
234
236
  end
@@ -314,7 +316,10 @@ module Traject
314
316
  text = rs.getBytes("longtext") || rs.getBytes("text")
315
317
 
316
318
  if tag == "000"
317
- record.leader = String.from_java_bytes text
319
+ # Horizon puts a \x1E marc field terminator on the end of hte
320
+ # leader in the db too, but that's not really part of it.
321
+ record.leader = String.from_java_bytes(text).chomp("\x1E")
322
+
318
323
  fix_leader!(record.leader)
319
324
  elsif tag != "001"
320
325
  # we add an 001 ourselves with bib id in another part of code.
@@ -567,6 +572,14 @@ module Traject
567
572
  if settings['horizon.destination_encoding'] == "UTF8"
568
573
  leader[9] = 'a'
569
574
  end
575
+
576
+ # Do not understand why this voodoo that should be a no-op is neccesary,
577
+ # but get a mysterious and hard to isolate/reproduce encoding
578
+ # bug without it, but not with it. Think it may be the same
579
+ # but as this:
580
+ # https://github.com/jruby/jruby/issues/886
581
+ leader.force_encoding(leader.encoding)
582
+
570
583
  end
571
584
 
572
585
  def include_some_holdings?
@@ -1,3 +1,3 @@
1
1
  module TrajectHorizon
2
- VERSION = "0.11.0"
2
+ VERSION = "0.11.1"
3
3
  end
@@ -19,6 +19,8 @@ Gem::Specification.new do |spec|
19
19
 
20
20
  spec.add_dependency "traject"
21
21
 
22
+ spec.add_dependency "marc-marc4j" # for marc4j jar files
23
+
22
24
  spec.add_development_dependency "bundler", "~> 1.3"
23
25
  spec.add_development_dependency "rake"
24
26
  spec.add_development_dependency "minitest"
metadata CHANGED
@@ -2,14 +2,14 @@
2
2
  name: traject_horizon
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.11.0
5
+ version: 0.11.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jonathan Rochkind
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-09-12 00:00:00.000000000 Z
12
+ date: 2013-09-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: traject
@@ -27,6 +27,22 @@ dependencies:
27
27
  none: false
28
28
  prerelease: false
29
29
  type: :runtime
30
+ - !ruby/object:Gem::Dependency
31
+ name: marc-marc4j
32
+ version_requirements: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - '>='
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ none: false
38
+ requirement: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - '>='
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ none: false
44
+ prerelease: false
45
+ type: :runtime
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: bundler
32
48
  version_requirements: !ruby/object:Gem::Requirement