traject_horizon 0.11.1 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -128,6 +128,8 @@ module Traject
128
128
  @settings = Traject::Indexer::Settings.new( self.class.default_settings).merge(settings)
129
129
 
130
130
  require_jars!
131
+
132
+ logger.info(" #{self.class.name} reading records from #{settings["horizon.jdbc_url"]}")
131
133
  end
132
134
 
133
135
  # Requires marc4j and jtds, and java_import's some classes.
@@ -210,6 +212,18 @@ module Traject
210
212
  sql += " AND " + clauses.join(" AND ") + " "
211
213
  end
212
214
 
215
+ # without the order by, rows USUALLY come back in order anyway,
216
+ # but sometimes they don't -- when they don't, it can cause one real
217
+ # record to be split up into multiple partial output record, which
218
+ # cna overwrite each other in the solr index.
219
+ #
220
+ # So we sort -- which seems to make query results come back somewhat
221
+ # slower, but SEEMS to be managagle. Ideally we might include 'tagord'
222
+ # in the sort too, but that seems to make performance even worse,
223
+ # we're willing to risk tags not being reassembled in exactly the
224
+ # right order, usually they are anyway, and it doesn't usually matter anyway.
225
+ sql+= " ORDER BY b.bib# " # ", tagord" would be even better, but slower.
226
+
213
227
  pstmt = conn.prepareStatement(sql);
214
228
 
215
229
  # this may be what's neccesary to keep the driver from fetching
@@ -315,6 +329,7 @@ module Traject
315
329
  authtext = rs.getBytes("xref_longtext") || rs.getBytes("xref_text")
316
330
  text = rs.getBytes("longtext") || rs.getBytes("text")
317
331
 
332
+
318
333
  if tag == "000"
319
334
  # Horizon puts a \x1E marc field terminator on the end of hte
320
335
  # leader in the db too, but that's not really part of it.
@@ -323,7 +338,8 @@ module Traject
323
338
  fix_leader!(record.leader)
324
339
  elsif tag != "001"
325
340
  # we add an 001 ourselves with bib id in another part of code.
326
- record.append build_marc_field!(error_handler, tag, indicators, text, authtext)
341
+ field = build_marc_field!(error_handler, tag, indicators, text, authtext)
342
+ record.append field unless field.nil?
327
343
  end
328
344
  end
329
345
 
@@ -332,6 +348,7 @@ module Traject
332
348
 
333
349
  # yield last batch
334
350
  enhance_batch!(extra_connection, record_batch)
351
+
335
352
  record_batch.each do |r|
336
353
  yield r
337
354
  end
@@ -374,6 +391,7 @@ module Traject
374
391
  # Other args are objects fetched from Horizon db via JDBC --
375
392
  # text and authtext must be byte arrays.
376
393
  def build_marc_field!(error_handler, tag, indicators, text, authtext)
394
+
377
395
  # convert text and authtext from java bytes to a ruby
378
396
  # binary string.
379
397
  if text
@@ -561,6 +579,7 @@ module Traject
561
579
 
562
580
  # Mutate string passed in to fix leader bytes for marc21
563
581
  def fix_leader!(leader)
582
+
564
583
  if leader.length < 24
565
584
  # pad it to 24 bytes, leader is supposed to be 24 bytes
566
585
  leader.replace( leader.ljust(24, ' ') )
@@ -573,12 +592,26 @@ module Traject
573
592
  leader[9] = 'a'
574
593
  end
575
594
 
576
- # Do not understand why this voodoo that should be a no-op is neccesary,
577
- # but get a mysterious and hard to isolate/reproduce encoding
578
- # bug without it, but not with it. Think it may be the same
579
- # but as this:
580
- # https://github.com/jruby/jruby/issues/886
581
- leader.force_encoding(leader.encoding)
595
+ # leader should only have ascii chars in it; invalid non-ascii
596
+ # chars can cause ruby encoding problems down the line.
597
+ # additionally, a force_encoding may be neccesary to
598
+ # deal with apparent weird hard to isolate jruby bug prob same one
599
+ # as at https://github.com/jruby/jruby/issues/886
600
+ leader.force_encoding('ascii')
601
+
602
+ unless leader.valid_encoding?
603
+ # replace any non-ascii chars with a space.
604
+
605
+ # Can't access leader.chars when it's not a valid encoding
606
+ # without a weird index out of bounds exception, think it's
607
+ # https://github.com/jruby/jruby/issues/886
608
+ # Grr.
609
+
610
+ #leader.replace( leader.chars.collect { |c| c.valid_encoding? ? c : ' ' }.join('') )
611
+ leader.replace(leader.split('').collect { |c| c.valid_encoding? ? c : ' ' }.join(''))
612
+ end
613
+
614
+
582
615
 
583
616
  end
584
617
 
@@ -1,3 +1,3 @@
1
1
  module TrajectHorizon
2
- VERSION = "0.11.1"
2
+ VERSION = "0.11.2"
3
3
  end
metadata CHANGED
@@ -1,97 +1,97 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject_horizon
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.11.1
4
+ version: 0.11.2
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jonathan Rochkind
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-09-16 00:00:00.000000000 Z
12
+ date: 2013-09-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: traject
16
- version_requirements: !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
17
18
  requirements:
18
- - - '>='
19
+ - - ! '>='
19
20
  - !ruby/object:Gem::Version
20
21
  version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
21
25
  none: false
22
- requirement: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - '>='
27
+ - - ! '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: '0'
27
- none: false
28
- prerelease: false
29
- type: :runtime
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: marc-marc4j
32
- version_requirements: !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
33
34
  requirements:
34
- - - '>='
35
+ - - ! '>='
35
36
  - !ruby/object:Gem::Version
36
37
  version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
37
41
  none: false
38
- requirement: !ruby/object:Gem::Requirement
39
42
  requirements:
40
- - - '>='
43
+ - - ! '>='
41
44
  - !ruby/object:Gem::Version
42
45
  version: '0'
43
- none: false
44
- prerelease: false
45
- type: :runtime
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: bundler
48
- version_requirements: !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
49
50
  requirements:
50
51
  - - ~>
51
52
  - !ruby/object:Gem::Version
52
53
  version: '1.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
53
57
  none: false
54
- requirement: !ruby/object:Gem::Requirement
55
58
  requirements:
56
59
  - - ~>
57
60
  - !ruby/object:Gem::Version
58
61
  version: '1.3'
59
- none: false
60
- prerelease: false
61
- type: :development
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: rake
64
- version_requirements: !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
65
66
  requirements:
66
- - - '>='
67
+ - - ! '>='
67
68
  - !ruby/object:Gem::Version
68
69
  version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
69
73
  none: false
70
- requirement: !ruby/object:Gem::Requirement
71
74
  requirements:
72
- - - '>='
75
+ - - ! '>='
73
76
  - !ruby/object:Gem::Version
74
77
  version: '0'
75
- none: false
76
- prerelease: false
77
- type: :development
78
78
  - !ruby/object:Gem::Dependency
79
79
  name: minitest
80
- version_requirements: !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
81
82
  requirements:
82
- - - '>='
83
+ - - ! '>='
83
84
  - !ruby/object:Gem::Version
84
85
  version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
85
89
  none: false
86
- requirement: !ruby/object:Gem::Requirement
87
90
  requirements:
88
- - - '>='
91
+ - - ! '>='
89
92
  - !ruby/object:Gem::Version
90
93
  version: '0'
91
- none: false
92
- prerelease: false
93
- type: :development
94
- description:
94
+ description:
95
95
  email:
96
96
  - jonathan@dnil.net
97
97
  executables: []
@@ -115,26 +115,26 @@ files:
115
115
  homepage: http://github.com/jrochkind/traject_horizon
116
116
  licenses:
117
117
  - MIT
118
- post_install_message:
118
+ post_install_message:
119
119
  rdoc_options: []
120
120
  require_paths:
121
121
  - lib
122
122
  required_ruby_version: !ruby/object:Gem::Requirement
123
+ none: false
123
124
  requirements:
124
- - - '>='
125
+ - - ! '>='
125
126
  - !ruby/object:Gem::Version
126
127
  version: '0'
127
- none: false
128
128
  required_rubygems_version: !ruby/object:Gem::Requirement
129
+ none: false
129
130
  requirements:
130
- - - '>='
131
+ - - ! '>='
131
132
  - !ruby/object:Gem::Version
132
133
  version: '0'
133
- none: false
134
134
  requirements: []
135
- rubyforge_project:
136
- rubygems_version: 1.8.24
137
- signing_key:
135
+ rubyforge_project:
136
+ rubygems_version: 1.8.23
137
+ signing_key:
138
138
  specification_version: 3
139
139
  summary: Horizon ILS MARC Exporter, a plugin for the traject tool
140
140
  test_files: