traject_horizon 0.11.1 → 0.11.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/traject/horizon_reader.rb +40 -7
- data/lib/traject_horizon/version.rb +1 -1
- metadata +46 -46
@@ -128,6 +128,8 @@ module Traject
|
|
128
128
|
@settings = Traject::Indexer::Settings.new( self.class.default_settings).merge(settings)
|
129
129
|
|
130
130
|
require_jars!
|
131
|
+
|
132
|
+
logger.info(" #{self.class.name} reading records from #{settings["horizon.jdbc_url"]}")
|
131
133
|
end
|
132
134
|
|
133
135
|
# Requires marc4j and jtds, and java_import's some classes.
|
@@ -210,6 +212,18 @@ module Traject
|
|
210
212
|
sql += " AND " + clauses.join(" AND ") + " "
|
211
213
|
end
|
212
214
|
|
215
|
+
# without the order by, rows USUALLY come back in order anyway,
|
216
|
+
# but sometimes they don't -- when they don't, it can cause one real
|
217
|
+
# record to be split up into multiple partial output record, which
|
218
|
+
# cna overwrite each other in the solr index.
|
219
|
+
#
|
220
|
+
# So we sort -- which seems to make query results come back somewhat
|
221
|
+
# slower, but SEEMS to be managagle. Ideally we might include 'tagord'
|
222
|
+
# in the sort too, but that seems to make performance even worse,
|
223
|
+
# we're willing to risk tags not being reassembled in exactly the
|
224
|
+
# right order, usually they are anyway, and it doesn't usually matter anyway.
|
225
|
+
sql+= " ORDER BY b.bib# " # ", tagord" would be even better, but slower.
|
226
|
+
|
213
227
|
pstmt = conn.prepareStatement(sql);
|
214
228
|
|
215
229
|
# this may be what's neccesary to keep the driver from fetching
|
@@ -315,6 +329,7 @@ module Traject
|
|
315
329
|
authtext = rs.getBytes("xref_longtext") || rs.getBytes("xref_text")
|
316
330
|
text = rs.getBytes("longtext") || rs.getBytes("text")
|
317
331
|
|
332
|
+
|
318
333
|
if tag == "000"
|
319
334
|
# Horizon puts a \x1E marc field terminator on the end of hte
|
320
335
|
# leader in the db too, but that's not really part of it.
|
@@ -323,7 +338,8 @@ module Traject
|
|
323
338
|
fix_leader!(record.leader)
|
324
339
|
elsif tag != "001"
|
325
340
|
# we add an 001 ourselves with bib id in another part of code.
|
326
|
-
|
341
|
+
field = build_marc_field!(error_handler, tag, indicators, text, authtext)
|
342
|
+
record.append field unless field.nil?
|
327
343
|
end
|
328
344
|
end
|
329
345
|
|
@@ -332,6 +348,7 @@ module Traject
|
|
332
348
|
|
333
349
|
# yield last batch
|
334
350
|
enhance_batch!(extra_connection, record_batch)
|
351
|
+
|
335
352
|
record_batch.each do |r|
|
336
353
|
yield r
|
337
354
|
end
|
@@ -374,6 +391,7 @@ module Traject
|
|
374
391
|
# Other args are objects fetched from Horizon db via JDBC --
|
375
392
|
# text and authtext must be byte arrays.
|
376
393
|
def build_marc_field!(error_handler, tag, indicators, text, authtext)
|
394
|
+
|
377
395
|
# convert text and authtext from java bytes to a ruby
|
378
396
|
# binary string.
|
379
397
|
if text
|
@@ -561,6 +579,7 @@ module Traject
|
|
561
579
|
|
562
580
|
# Mutate string passed in to fix leader bytes for marc21
|
563
581
|
def fix_leader!(leader)
|
582
|
+
|
564
583
|
if leader.length < 24
|
565
584
|
# pad it to 24 bytes, leader is supposed to be 24 bytes
|
566
585
|
leader.replace( leader.ljust(24, ' ') )
|
@@ -573,12 +592,26 @@ module Traject
|
|
573
592
|
leader[9] = 'a'
|
574
593
|
end
|
575
594
|
|
576
|
-
#
|
577
|
-
#
|
578
|
-
#
|
579
|
-
#
|
580
|
-
# https://github.com/jruby/jruby/issues/886
|
581
|
-
leader.force_encoding(
|
595
|
+
# leader should only have ascii chars in it; invalid non-ascii
|
596
|
+
# chars can cause ruby encoding problems down the line.
|
597
|
+
# additionally, a force_encoding may be neccesary to
|
598
|
+
# deal with apparent weird hard to isolate jruby bug prob same one
|
599
|
+
# as at https://github.com/jruby/jruby/issues/886
|
600
|
+
leader.force_encoding('ascii')
|
601
|
+
|
602
|
+
unless leader.valid_encoding?
|
603
|
+
# replace any non-ascii chars with a space.
|
604
|
+
|
605
|
+
# Can't access leader.chars when it's not a valid encoding
|
606
|
+
# without a weird index out of bounds exception, think it's
|
607
|
+
# https://github.com/jruby/jruby/issues/886
|
608
|
+
# Grr.
|
609
|
+
|
610
|
+
#leader.replace( leader.chars.collect { |c| c.valid_encoding? ? c : ' ' }.join('') )
|
611
|
+
leader.replace(leader.split('').collect { |c| c.valid_encoding? ? c : ' ' }.join(''))
|
612
|
+
end
|
613
|
+
|
614
|
+
|
582
615
|
|
583
616
|
end
|
584
617
|
|
metadata
CHANGED
@@ -1,97 +1,97 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject_horizon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
4
|
+
version: 0.11.2
|
5
|
+
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jonathan Rochkind
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-09-
|
12
|
+
date: 2013-09-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: traject
|
16
|
-
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
17
18
|
requirements:
|
18
|
-
- - '>='
|
19
|
+
- - ! '>='
|
19
20
|
- !ruby/object:Gem::Version
|
20
21
|
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
25
|
none: false
|
22
|
-
requirement: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
|
-
- - '>='
|
27
|
+
- - ! '>='
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '0'
|
27
|
-
none: false
|
28
|
-
prerelease: false
|
29
|
-
type: :runtime
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: marc-marc4j
|
32
|
-
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
33
34
|
requirements:
|
34
|
-
- - '>='
|
35
|
+
- - ! '>='
|
35
36
|
- !ruby/object:Gem::Version
|
36
37
|
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
41
|
none: false
|
38
|
-
requirement: !ruby/object:Gem::Requirement
|
39
42
|
requirements:
|
40
|
-
- - '>='
|
43
|
+
- - ! '>='
|
41
44
|
- !ruby/object:Gem::Version
|
42
45
|
version: '0'
|
43
|
-
none: false
|
44
|
-
prerelease: false
|
45
|
-
type: :runtime
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: bundler
|
48
|
-
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
49
50
|
requirements:
|
50
51
|
- - ~>
|
51
52
|
- !ruby/object:Gem::Version
|
52
53
|
version: '1.3'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
57
|
none: false
|
54
|
-
requirement: !ruby/object:Gem::Requirement
|
55
58
|
requirements:
|
56
59
|
- - ~>
|
57
60
|
- !ruby/object:Gem::Version
|
58
61
|
version: '1.3'
|
59
|
-
none: false
|
60
|
-
prerelease: false
|
61
|
-
type: :development
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: rake
|
64
|
-
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
65
66
|
requirements:
|
66
|
-
- - '>='
|
67
|
+
- - ! '>='
|
67
68
|
- !ruby/object:Gem::Version
|
68
69
|
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
69
73
|
none: false
|
70
|
-
requirement: !ruby/object:Gem::Requirement
|
71
74
|
requirements:
|
72
|
-
- - '>='
|
75
|
+
- - ! '>='
|
73
76
|
- !ruby/object:Gem::Version
|
74
77
|
version: '0'
|
75
|
-
none: false
|
76
|
-
prerelease: false
|
77
|
-
type: :development
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
79
|
name: minitest
|
80
|
-
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
81
82
|
requirements:
|
82
|
-
- - '>='
|
83
|
+
- - ! '>='
|
83
84
|
- !ruby/object:Gem::Version
|
84
85
|
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
89
|
none: false
|
86
|
-
requirement: !ruby/object:Gem::Requirement
|
87
90
|
requirements:
|
88
|
-
- - '>='
|
91
|
+
- - ! '>='
|
89
92
|
- !ruby/object:Gem::Version
|
90
93
|
version: '0'
|
91
|
-
|
92
|
-
prerelease: false
|
93
|
-
type: :development
|
94
|
-
description:
|
94
|
+
description:
|
95
95
|
email:
|
96
96
|
- jonathan@dnil.net
|
97
97
|
executables: []
|
@@ -115,26 +115,26 @@ files:
|
|
115
115
|
homepage: http://github.com/jrochkind/traject_horizon
|
116
116
|
licenses:
|
117
117
|
- MIT
|
118
|
-
post_install_message:
|
118
|
+
post_install_message:
|
119
119
|
rdoc_options: []
|
120
120
|
require_paths:
|
121
121
|
- lib
|
122
122
|
required_ruby_version: !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
123
124
|
requirements:
|
124
|
-
- - '>='
|
125
|
+
- - ! '>='
|
125
126
|
- !ruby/object:Gem::Version
|
126
127
|
version: '0'
|
127
|
-
none: false
|
128
128
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
129
130
|
requirements:
|
130
|
-
- - '>='
|
131
|
+
- - ! '>='
|
131
132
|
- !ruby/object:Gem::Version
|
132
133
|
version: '0'
|
133
|
-
none: false
|
134
134
|
requirements: []
|
135
|
-
rubyforge_project:
|
136
|
-
rubygems_version: 1.8.
|
137
|
-
signing_key:
|
135
|
+
rubyforge_project:
|
136
|
+
rubygems_version: 1.8.23
|
137
|
+
signing_key:
|
138
138
|
specification_version: 3
|
139
139
|
summary: Horizon ILS MARC Exporter, a plugin for the traject tool
|
140
140
|
test_files:
|