traject_horizon 0.11.1 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/traject/horizon_reader.rb +40 -7
- data/lib/traject_horizon/version.rb +1 -1
- metadata +46 -46
@@ -128,6 +128,8 @@ module Traject
|
|
128
128
|
@settings = Traject::Indexer::Settings.new( self.class.default_settings).merge(settings)
|
129
129
|
|
130
130
|
require_jars!
|
131
|
+
|
132
|
+
logger.info(" #{self.class.name} reading records from #{settings["horizon.jdbc_url"]}")
|
131
133
|
end
|
132
134
|
|
133
135
|
# Requires marc4j and jtds, and java_import's some classes.
|
@@ -210,6 +212,18 @@ module Traject
|
|
210
212
|
sql += " AND " + clauses.join(" AND ") + " "
|
211
213
|
end
|
212
214
|
|
215
|
+
# without the order by, rows USUALLY come back in order anyway,
|
216
|
+
# but sometimes they don't -- when they don't, it can cause one real
|
217
|
+
# record to be split up into multiple partial output record, which
|
218
|
+
# cna overwrite each other in the solr index.
|
219
|
+
#
|
220
|
+
# So we sort -- which seems to make query results come back somewhat
|
221
|
+
# slower, but SEEMS to be managagle. Ideally we might include 'tagord'
|
222
|
+
# in the sort too, but that seems to make performance even worse,
|
223
|
+
# we're willing to risk tags not being reassembled in exactly the
|
224
|
+
# right order, usually they are anyway, and it doesn't usually matter anyway.
|
225
|
+
sql+= " ORDER BY b.bib# " # ", tagord" would be even better, but slower.
|
226
|
+
|
213
227
|
pstmt = conn.prepareStatement(sql);
|
214
228
|
|
215
229
|
# this may be what's neccesary to keep the driver from fetching
|
@@ -315,6 +329,7 @@ module Traject
|
|
315
329
|
authtext = rs.getBytes("xref_longtext") || rs.getBytes("xref_text")
|
316
330
|
text = rs.getBytes("longtext") || rs.getBytes("text")
|
317
331
|
|
332
|
+
|
318
333
|
if tag == "000"
|
319
334
|
# Horizon puts a \x1E marc field terminator on the end of hte
|
320
335
|
# leader in the db too, but that's not really part of it.
|
@@ -323,7 +338,8 @@ module Traject
|
|
323
338
|
fix_leader!(record.leader)
|
324
339
|
elsif tag != "001"
|
325
340
|
# we add an 001 ourselves with bib id in another part of code.
|
326
|
-
|
341
|
+
field = build_marc_field!(error_handler, tag, indicators, text, authtext)
|
342
|
+
record.append field unless field.nil?
|
327
343
|
end
|
328
344
|
end
|
329
345
|
|
@@ -332,6 +348,7 @@ module Traject
|
|
332
348
|
|
333
349
|
# yield last batch
|
334
350
|
enhance_batch!(extra_connection, record_batch)
|
351
|
+
|
335
352
|
record_batch.each do |r|
|
336
353
|
yield r
|
337
354
|
end
|
@@ -374,6 +391,7 @@ module Traject
|
|
374
391
|
# Other args are objects fetched from Horizon db via JDBC --
|
375
392
|
# text and authtext must be byte arrays.
|
376
393
|
def build_marc_field!(error_handler, tag, indicators, text, authtext)
|
394
|
+
|
377
395
|
# convert text and authtext from java bytes to a ruby
|
378
396
|
# binary string.
|
379
397
|
if text
|
@@ -561,6 +579,7 @@ module Traject
|
|
561
579
|
|
562
580
|
# Mutate string passed in to fix leader bytes for marc21
|
563
581
|
def fix_leader!(leader)
|
582
|
+
|
564
583
|
if leader.length < 24
|
565
584
|
# pad it to 24 bytes, leader is supposed to be 24 bytes
|
566
585
|
leader.replace( leader.ljust(24, ' ') )
|
@@ -573,12 +592,26 @@ module Traject
|
|
573
592
|
leader[9] = 'a'
|
574
593
|
end
|
575
594
|
|
576
|
-
#
|
577
|
-
#
|
578
|
-
#
|
579
|
-
#
|
580
|
-
# https://github.com/jruby/jruby/issues/886
|
581
|
-
leader.force_encoding(
|
595
|
+
# leader should only have ascii chars in it; invalid non-ascii
|
596
|
+
# chars can cause ruby encoding problems down the line.
|
597
|
+
# additionally, a force_encoding may be neccesary to
|
598
|
+
# deal with apparent weird hard to isolate jruby bug prob same one
|
599
|
+
# as at https://github.com/jruby/jruby/issues/886
|
600
|
+
leader.force_encoding('ascii')
|
601
|
+
|
602
|
+
unless leader.valid_encoding?
|
603
|
+
# replace any non-ascii chars with a space.
|
604
|
+
|
605
|
+
# Can't access leader.chars when it's not a valid encoding
|
606
|
+
# without a weird index out of bounds exception, think it's
|
607
|
+
# https://github.com/jruby/jruby/issues/886
|
608
|
+
# Grr.
|
609
|
+
|
610
|
+
#leader.replace( leader.chars.collect { |c| c.valid_encoding? ? c : ' ' }.join('') )
|
611
|
+
leader.replace(leader.split('').collect { |c| c.valid_encoding? ? c : ' ' }.join(''))
|
612
|
+
end
|
613
|
+
|
614
|
+
|
582
615
|
|
583
616
|
end
|
584
617
|
|
metadata
CHANGED
@@ -1,97 +1,97 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject_horizon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
4
|
+
version: 0.11.2
|
5
|
+
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jonathan Rochkind
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-09-
|
12
|
+
date: 2013-09-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: traject
|
16
|
-
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
17
18
|
requirements:
|
18
|
-
- - '>='
|
19
|
+
- - ! '>='
|
19
20
|
- !ruby/object:Gem::Version
|
20
21
|
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
25
|
none: false
|
22
|
-
requirement: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
|
-
- - '>='
|
27
|
+
- - ! '>='
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '0'
|
27
|
-
none: false
|
28
|
-
prerelease: false
|
29
|
-
type: :runtime
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: marc-marc4j
|
32
|
-
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
33
34
|
requirements:
|
34
|
-
- - '>='
|
35
|
+
- - ! '>='
|
35
36
|
- !ruby/object:Gem::Version
|
36
37
|
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
41
|
none: false
|
38
|
-
requirement: !ruby/object:Gem::Requirement
|
39
42
|
requirements:
|
40
|
-
- - '>='
|
43
|
+
- - ! '>='
|
41
44
|
- !ruby/object:Gem::Version
|
42
45
|
version: '0'
|
43
|
-
none: false
|
44
|
-
prerelease: false
|
45
|
-
type: :runtime
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: bundler
|
48
|
-
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
49
50
|
requirements:
|
50
51
|
- - ~>
|
51
52
|
- !ruby/object:Gem::Version
|
52
53
|
version: '1.3'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
57
|
none: false
|
54
|
-
requirement: !ruby/object:Gem::Requirement
|
55
58
|
requirements:
|
56
59
|
- - ~>
|
57
60
|
- !ruby/object:Gem::Version
|
58
61
|
version: '1.3'
|
59
|
-
none: false
|
60
|
-
prerelease: false
|
61
|
-
type: :development
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: rake
|
64
|
-
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
65
66
|
requirements:
|
66
|
-
- - '>='
|
67
|
+
- - ! '>='
|
67
68
|
- !ruby/object:Gem::Version
|
68
69
|
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
69
73
|
none: false
|
70
|
-
requirement: !ruby/object:Gem::Requirement
|
71
74
|
requirements:
|
72
|
-
- - '>='
|
75
|
+
- - ! '>='
|
73
76
|
- !ruby/object:Gem::Version
|
74
77
|
version: '0'
|
75
|
-
none: false
|
76
|
-
prerelease: false
|
77
|
-
type: :development
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
79
|
name: minitest
|
80
|
-
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
81
82
|
requirements:
|
82
|
-
- - '>='
|
83
|
+
- - ! '>='
|
83
84
|
- !ruby/object:Gem::Version
|
84
85
|
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
89
|
none: false
|
86
|
-
requirement: !ruby/object:Gem::Requirement
|
87
90
|
requirements:
|
88
|
-
- - '>='
|
91
|
+
- - ! '>='
|
89
92
|
- !ruby/object:Gem::Version
|
90
93
|
version: '0'
|
91
|
-
|
92
|
-
prerelease: false
|
93
|
-
type: :development
|
94
|
-
description:
|
94
|
+
description:
|
95
95
|
email:
|
96
96
|
- jonathan@dnil.net
|
97
97
|
executables: []
|
@@ -115,26 +115,26 @@ files:
|
|
115
115
|
homepage: http://github.com/jrochkind/traject_horizon
|
116
116
|
licenses:
|
117
117
|
- MIT
|
118
|
-
post_install_message:
|
118
|
+
post_install_message:
|
119
119
|
rdoc_options: []
|
120
120
|
require_paths:
|
121
121
|
- lib
|
122
122
|
required_ruby_version: !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
123
124
|
requirements:
|
124
|
-
- - '>='
|
125
|
+
- - ! '>='
|
125
126
|
- !ruby/object:Gem::Version
|
126
127
|
version: '0'
|
127
|
-
none: false
|
128
128
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
129
130
|
requirements:
|
130
|
-
- - '>='
|
131
|
+
- - ! '>='
|
131
132
|
- !ruby/object:Gem::Version
|
132
133
|
version: '0'
|
133
|
-
none: false
|
134
134
|
requirements: []
|
135
|
-
rubyforge_project:
|
136
|
-
rubygems_version: 1.8.
|
137
|
-
signing_key:
|
135
|
+
rubyforge_project:
|
136
|
+
rubygems_version: 1.8.23
|
137
|
+
signing_key:
|
138
138
|
specification_version: 3
|
139
139
|
summary: Horizon ILS MARC Exporter, a plugin for the traject tool
|
140
140
|
test_files:
|