traject_horizon 0.11.1 → 0.11.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -128,6 +128,8 @@ module Traject
128
128
  @settings = Traject::Indexer::Settings.new( self.class.default_settings).merge(settings)
129
129
 
130
130
  require_jars!
131
+
132
+ logger.info(" #{self.class.name} reading records from #{settings["horizon.jdbc_url"]}")
131
133
  end
132
134
 
133
135
  # Requires marc4j and jtds, and java_import's some classes.
@@ -210,6 +212,18 @@ module Traject
210
212
  sql += " AND " + clauses.join(" AND ") + " "
211
213
  end
212
214
 
215
+ # without the order by, rows USUALLY come back in order anyway,
216
+ # but sometimes they don't -- when they don't, it can cause one real
217
+ # record to be split up into multiple partial output record, which
218
+ # cna overwrite each other in the solr index.
219
+ #
220
+ # So we sort -- which seems to make query results come back somewhat
221
+ # slower, but SEEMS to be managagle. Ideally we might include 'tagord'
222
+ # in the sort too, but that seems to make performance even worse,
223
+ # we're willing to risk tags not being reassembled in exactly the
224
+ # right order, usually they are anyway, and it doesn't usually matter anyway.
225
+ sql+= " ORDER BY b.bib# " # ", tagord" would be even better, but slower.
226
+
213
227
  pstmt = conn.prepareStatement(sql);
214
228
 
215
229
  # this may be what's neccesary to keep the driver from fetching
@@ -315,6 +329,7 @@ module Traject
315
329
  authtext = rs.getBytes("xref_longtext") || rs.getBytes("xref_text")
316
330
  text = rs.getBytes("longtext") || rs.getBytes("text")
317
331
 
332
+
318
333
  if tag == "000"
319
334
  # Horizon puts a \x1E marc field terminator on the end of hte
320
335
  # leader in the db too, but that's not really part of it.
@@ -323,7 +338,8 @@ module Traject
323
338
  fix_leader!(record.leader)
324
339
  elsif tag != "001"
325
340
  # we add an 001 ourselves with bib id in another part of code.
326
- record.append build_marc_field!(error_handler, tag, indicators, text, authtext)
341
+ field = build_marc_field!(error_handler, tag, indicators, text, authtext)
342
+ record.append field unless field.nil?
327
343
  end
328
344
  end
329
345
 
@@ -332,6 +348,7 @@ module Traject
332
348
 
333
349
  # yield last batch
334
350
  enhance_batch!(extra_connection, record_batch)
351
+
335
352
  record_batch.each do |r|
336
353
  yield r
337
354
  end
@@ -374,6 +391,7 @@ module Traject
374
391
  # Other args are objects fetched from Horizon db via JDBC --
375
392
  # text and authtext must be byte arrays.
376
393
  def build_marc_field!(error_handler, tag, indicators, text, authtext)
394
+
377
395
  # convert text and authtext from java bytes to a ruby
378
396
  # binary string.
379
397
  if text
@@ -561,6 +579,7 @@ module Traject
561
579
 
562
580
  # Mutate string passed in to fix leader bytes for marc21
563
581
  def fix_leader!(leader)
582
+
564
583
  if leader.length < 24
565
584
  # pad it to 24 bytes, leader is supposed to be 24 bytes
566
585
  leader.replace( leader.ljust(24, ' ') )
@@ -573,12 +592,26 @@ module Traject
573
592
  leader[9] = 'a'
574
593
  end
575
594
 
576
- # Do not understand why this voodoo that should be a no-op is neccesary,
577
- # but get a mysterious and hard to isolate/reproduce encoding
578
- # bug without it, but not with it. Think it may be the same
579
- # but as this:
580
- # https://github.com/jruby/jruby/issues/886
581
- leader.force_encoding(leader.encoding)
595
+ # leader should only have ascii chars in it; invalid non-ascii
596
+ # chars can cause ruby encoding problems down the line.
597
+ # additionally, a force_encoding may be neccesary to
598
+ # deal with apparent weird hard to isolate jruby bug prob same one
599
+ # as at https://github.com/jruby/jruby/issues/886
600
+ leader.force_encoding('ascii')
601
+
602
+ unless leader.valid_encoding?
603
+ # replace any non-ascii chars with a space.
604
+
605
+ # Can't access leader.chars when it's not a valid encoding
606
+ # without a weird index out of bounds exception, think it's
607
+ # https://github.com/jruby/jruby/issues/886
608
+ # Grr.
609
+
610
+ #leader.replace( leader.chars.collect { |c| c.valid_encoding? ? c : ' ' }.join('') )
611
+ leader.replace(leader.split('').collect { |c| c.valid_encoding? ? c : ' ' }.join(''))
612
+ end
613
+
614
+
582
615
 
583
616
  end
584
617
 
@@ -1,3 +1,3 @@
1
1
  module TrajectHorizon
2
- VERSION = "0.11.1"
2
+ VERSION = "0.11.2"
3
3
  end
metadata CHANGED
@@ -1,97 +1,97 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject_horizon
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.11.1
4
+ version: 0.11.2
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jonathan Rochkind
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-09-16 00:00:00.000000000 Z
12
+ date: 2013-09-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: traject
16
- version_requirements: !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
17
18
  requirements:
18
- - - '>='
19
+ - - ! '>='
19
20
  - !ruby/object:Gem::Version
20
21
  version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
21
25
  none: false
22
- requirement: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - '>='
27
+ - - ! '>='
25
28
  - !ruby/object:Gem::Version
26
29
  version: '0'
27
- none: false
28
- prerelease: false
29
- type: :runtime
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: marc-marc4j
32
- version_requirements: !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
33
34
  requirements:
34
- - - '>='
35
+ - - ! '>='
35
36
  - !ruby/object:Gem::Version
36
37
  version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
37
41
  none: false
38
- requirement: !ruby/object:Gem::Requirement
39
42
  requirements:
40
- - - '>='
43
+ - - ! '>='
41
44
  - !ruby/object:Gem::Version
42
45
  version: '0'
43
- none: false
44
- prerelease: false
45
- type: :runtime
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: bundler
48
- version_requirements: !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
49
50
  requirements:
50
51
  - - ~>
51
52
  - !ruby/object:Gem::Version
52
53
  version: '1.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
53
57
  none: false
54
- requirement: !ruby/object:Gem::Requirement
55
58
  requirements:
56
59
  - - ~>
57
60
  - !ruby/object:Gem::Version
58
61
  version: '1.3'
59
- none: false
60
- prerelease: false
61
- type: :development
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: rake
64
- version_requirements: !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
65
66
  requirements:
66
- - - '>='
67
+ - - ! '>='
67
68
  - !ruby/object:Gem::Version
68
69
  version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
69
73
  none: false
70
- requirement: !ruby/object:Gem::Requirement
71
74
  requirements:
72
- - - '>='
75
+ - - ! '>='
73
76
  - !ruby/object:Gem::Version
74
77
  version: '0'
75
- none: false
76
- prerelease: false
77
- type: :development
78
78
  - !ruby/object:Gem::Dependency
79
79
  name: minitest
80
- version_requirements: !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
81
82
  requirements:
82
- - - '>='
83
+ - - ! '>='
83
84
  - !ruby/object:Gem::Version
84
85
  version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
85
89
  none: false
86
- requirement: !ruby/object:Gem::Requirement
87
90
  requirements:
88
- - - '>='
91
+ - - ! '>='
89
92
  - !ruby/object:Gem::Version
90
93
  version: '0'
91
- none: false
92
- prerelease: false
93
- type: :development
94
- description:
94
+ description:
95
95
  email:
96
96
  - jonathan@dnil.net
97
97
  executables: []
@@ -115,26 +115,26 @@ files:
115
115
  homepage: http://github.com/jrochkind/traject_horizon
116
116
  licenses:
117
117
  - MIT
118
- post_install_message:
118
+ post_install_message:
119
119
  rdoc_options: []
120
120
  require_paths:
121
121
  - lib
122
122
  required_ruby_version: !ruby/object:Gem::Requirement
123
+ none: false
123
124
  requirements:
124
- - - '>='
125
+ - - ! '>='
125
126
  - !ruby/object:Gem::Version
126
127
  version: '0'
127
- none: false
128
128
  required_rubygems_version: !ruby/object:Gem::Requirement
129
+ none: false
129
130
  requirements:
130
- - - '>='
131
+ - - ! '>='
131
132
  - !ruby/object:Gem::Version
132
133
  version: '0'
133
- none: false
134
134
  requirements: []
135
- rubyforge_project:
136
- rubygems_version: 1.8.24
137
- signing_key:
135
+ rubyforge_project:
136
+ rubygems_version: 1.8.23
137
+ signing_key:
138
138
  specification_version: 3
139
139
  summary: Horizon ILS MARC Exporter, a plugin for the traject tool
140
140
  test_files: