traject 1.1.0 → 2.0.0.rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +20 -0
  3. data/README.md +85 -73
  4. data/doc/batch_execution.md +2 -6
  5. data/doc/other_commands.md +3 -5
  6. data/doc/settings.md +27 -38
  7. data/lib/traject/command_line.rb +1 -1
  8. data/lib/traject/csv_writer.rb +34 -0
  9. data/lib/traject/delimited_writer.rb +110 -0
  10. data/lib/traject/indexer.rb +29 -11
  11. data/lib/traject/indexer/settings.rb +39 -13
  12. data/lib/traject/line_writer.rb +10 -6
  13. data/lib/traject/marc_reader.rb +2 -1
  14. data/lib/traject/solr_json_writer.rb +277 -0
  15. data/lib/traject/thread_pool.rb +38 -48
  16. data/lib/traject/translation_map.rb +3 -0
  17. data/lib/traject/util.rb +13 -51
  18. data/lib/traject/version.rb +1 -1
  19. data/lib/translation_maps/marc_geographic.yaml +2 -2
  20. data/test/delimited_writer_test.rb +104 -0
  21. data/test/indexer/read_write_test.rb +0 -22
  22. data/test/indexer/settings_test.rb +24 -0
  23. data/test/solr_json_writer_test.rb +248 -0
  24. data/test/test_helper.rb +5 -3
  25. data/test/test_support/demo_config.rb +0 -5
  26. data/test/translation_map_test.rb +9 -0
  27. data/traject.gemspec +18 -5
  28. metadata +77 -87
  29. data/lib/traject/marc4j_reader.rb +0 -153
  30. data/lib/traject/solrj_writer.rb +0 -351
  31. data/test/marc4j_reader_test.rb +0 -136
  32. data/test/solrj_writer_test.rb +0 -209
  33. data/vendor/solrj/README +0 -8
  34. data/vendor/solrj/build.xml +0 -39
  35. data/vendor/solrj/ivy.xml +0 -16
  36. data/vendor/solrj/lib/commons-codec-1.7.jar +0 -0
  37. data/vendor/solrj/lib/commons-io-2.1.jar +0 -0
  38. data/vendor/solrj/lib/httpclient-4.2.3.jar +0 -0
  39. data/vendor/solrj/lib/httpcore-4.2.2.jar +0 -0
  40. data/vendor/solrj/lib/httpmime-4.2.3.jar +0 -0
  41. data/vendor/solrj/lib/jcl-over-slf4j-1.6.6.jar +0 -0
  42. data/vendor/solrj/lib/jul-to-slf4j-1.6.6.jar +0 -0
  43. data/vendor/solrj/lib/log4j-1.2.16.jar +0 -0
  44. data/vendor/solrj/lib/noggit-0.5.jar +0 -0
  45. data/vendor/solrj/lib/slf4j-api-1.6.6.jar +0 -0
  46. data/vendor/solrj/lib/slf4j-log4j12-1.6.6.jar +0 -0
  47. data/vendor/solrj/lib/solr-solrj-4.3.1-javadoc.jar +0 -0
  48. data/vendor/solrj/lib/solr-solrj-4.3.1-sources.jar +0 -0
  49. data/vendor/solrj/lib/solr-solrj-4.3.1.jar +0 -0
  50. data/vendor/solrj/lib/wstx-asl-3.2.7.jar +0 -0
  51. data/vendor/solrj/lib/zookeeper-3.4.5.jar +0 -0
@@ -47,9 +47,11 @@ def empty_record
47
47
  rec
48
48
  end
49
49
 
50
- # pretends to be a SolrJ HTTPServer-like thing, just kind of mocks it up
50
+ # pretends to be a Solr HTTPServer-like thing, just kind of mocks it up
51
51
  # and records what happens and simulates errors in some cases.
52
52
  class MockSolrServer
53
+ class Exception < RuntimeError;end
54
+
53
55
  attr_accessor :things_added, :url, :committed, :parser, :shutted_down
54
56
 
55
57
  def initialize(url)
@@ -61,12 +63,12 @@ class MockSolrServer
61
63
  def add(thing)
62
64
  @add_mutex.synchronize do # easy peasy threadsafety for our mock
63
65
  if @url == "http://no.such.place"
64
- raise org.apache.solr.client.solrj.SolrServerException.new("mock bad uri", java.io.IOException.new)
66
+ raise MockSolrServer::Exception.new("mock bad uri")
65
67
  end
66
68
 
67
69
  # simulate a multiple id error please
68
70
  if [thing].flatten.find {|doc| doc.getField("id").getValueCount() != 1}
69
- raise org.apache.solr.client.solrj.SolrServerException.new("mock non-1 size of 'id'")
71
+ raise MockSolrServer::Exception.new("mock non-1 size of 'id'")
70
72
  else
71
73
  things_added << thing
72
74
  end
@@ -21,11 +21,6 @@ extend Traject::Macros::MarcFormats
21
21
  # config files as you like, `traject -c one.rb -c two.rb -c etc.rb`
22
22
  settings do
23
23
  provide "solr.url", "http://solr.somewhere.edu:8983/solr/corename"
24
-
25
- # Only if you need to connect to a Solr 1.x:
26
- provide "solrj_writer.parser_class_name", "XMLResponseParser"
27
-
28
- provide "solrj_writer.commit_on_close", true
29
24
  end
30
25
 
31
26
  # Extract first 001, then supply code block to add "bib_" prefix to it
@@ -104,6 +104,15 @@ describe "TranslationMap" do
104
104
  assert_equal "output_value", map["input_value"]
105
105
  end
106
106
 
107
+ it "can be initialized with another map" do
108
+ map = Traject::TranslationMap.new({"alpha" => "one", "beta" => nil}, :default => "DEFAULT")
109
+
110
+ new_map = Traject::TranslationMap.new(map)
111
+
112
+ assert_equal map.to_hash, new_map.to_hash
113
+ assert_equal map.default, new_map.default
114
+ end
115
+
107
116
  it "respects __default__ literal" do
108
117
  map = Traject::TranslationMap.new("default_literal")
109
118
 
@@ -20,12 +20,25 @@ Gem::Specification.new do |spec|
20
20
  spec.extra_rdoc_files = spec.files.grep(%r{^doc/})
21
21
 
22
22
 
23
- spec.add_dependency "marc", ">= 0.8.0"
24
- spec.add_dependency "marc-marc4j", ">=0.1.1" # use and convert marc4j
25
- spec.add_dependency "hashie", ">= 2.0.5", "< 2.1" # used for Indexer#settings
23
+ spec.add_dependency "concurrent-ruby", ">= 0.8.0"
24
+ spec.add_dependency "marc", "~> 1.0"
25
+
26
+ spec.add_dependency "hashie", "~> 3.1" # used for Indexer#settings
26
27
  spec.add_dependency "slop", ">= 3.4.5", "< 4.0" # command line parsing
27
- spec.add_dependency "yell" # logging
28
- spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
28
+ spec.add_dependency "yell" # logging
29
+ spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
30
+ spec.add_dependency "httpclient", "~> 2.5"
31
+
32
+ # If we're building the package under JRuby, add in the
33
+ # jruby-only gems and specify the platform.
34
+
35
+ if defined? JRUBY_VERSION
36
+ spec.platform = 'java'
37
+ spec.add_dependency "traject-marc4j_reader", "~> 1.0"
38
+ else
39
+ spec.platform = "ruby"
40
+ end
41
+
29
42
 
30
43
  spec.add_development_dependency "bundler", "~> 1.3"
31
44
  spec.add_development_dependency "rake"
metadata CHANGED
@@ -1,155 +1,163 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 2.0.0.rc.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Rochkind
8
8
  - Bill Dueber
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-04-07 00:00:00.000000000 Z
12
+ date: 2015-02-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
+ name: concurrent-ruby
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
17
- - - '>='
18
+ - - ">="
18
19
  - !ruby/object:Gem::Version
19
20
  version: 0.8.0
20
- name: marc
21
- prerelease: false
22
21
  type: :runtime
22
+ prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - '>='
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: 0.8.0
28
28
  - !ruby/object:Gem::Dependency
29
+ name: marc
29
30
  requirement: !ruby/object:Gem::Requirement
30
31
  requirements:
31
- - - '>='
32
+ - - "~>"
32
33
  - !ruby/object:Gem::Version
33
- version: 0.1.1
34
- name: marc-marc4j
35
- prerelease: false
34
+ version: '1.0'
36
35
  type: :runtime
36
+ prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - '>='
39
+ - - "~>"
40
40
  - !ruby/object:Gem::Version
41
- version: 0.1.1
41
+ version: '1.0'
42
42
  - !ruby/object:Gem::Dependency
43
+ name: hashie
43
44
  requirement: !ruby/object:Gem::Requirement
44
45
  requirements:
45
- - - '>='
46
+ - - "~>"
46
47
  - !ruby/object:Gem::Version
47
- version: 2.0.5
48
- - - <
49
- - !ruby/object:Gem::Version
50
- version: '2.1'
51
- name: hashie
52
- prerelease: false
48
+ version: '3.1'
53
49
  type: :runtime
50
+ prerelease: false
54
51
  version_requirements: !ruby/object:Gem::Requirement
55
52
  requirements:
56
- - - '>='
57
- - !ruby/object:Gem::Version
58
- version: 2.0.5
59
- - - <
53
+ - - "~>"
60
54
  - !ruby/object:Gem::Version
61
- version: '2.1'
55
+ version: '3.1'
62
56
  - !ruby/object:Gem::Dependency
57
+ name: slop
63
58
  requirement: !ruby/object:Gem::Requirement
64
59
  requirements:
65
- - - '>='
60
+ - - ">="
66
61
  - !ruby/object:Gem::Version
67
62
  version: 3.4.5
68
- - - <
63
+ - - "<"
69
64
  - !ruby/object:Gem::Version
70
65
  version: '4.0'
71
- name: slop
72
- prerelease: false
73
66
  type: :runtime
67
+ prerelease: false
74
68
  version_requirements: !ruby/object:Gem::Requirement
75
69
  requirements:
76
- - - '>='
70
+ - - ">="
77
71
  - !ruby/object:Gem::Version
78
72
  version: 3.4.5
79
- - - <
73
+ - - "<"
80
74
  - !ruby/object:Gem::Version
81
75
  version: '4.0'
82
76
  - !ruby/object:Gem::Dependency
77
+ name: yell
83
78
  requirement: !ruby/object:Gem::Requirement
84
79
  requirements:
85
- - - '>='
80
+ - - ">="
86
81
  - !ruby/object:Gem::Version
87
82
  version: '0'
88
- name: yell
89
- prerelease: false
90
83
  type: :runtime
84
+ prerelease: false
91
85
  version_requirements: !ruby/object:Gem::Requirement
92
86
  requirements:
93
- - - '>='
87
+ - - ">="
94
88
  - !ruby/object:Gem::Version
95
89
  version: '0'
96
90
  - !ruby/object:Gem::Dependency
91
+ name: dot-properties
97
92
  requirement: !ruby/object:Gem::Requirement
98
93
  requirements:
99
- - - '>='
94
+ - - ">="
100
95
  - !ruby/object:Gem::Version
101
96
  version: 0.1.1
102
- name: dot-properties
103
- prerelease: false
104
97
  type: :runtime
98
+ prerelease: false
105
99
  version_requirements: !ruby/object:Gem::Requirement
106
100
  requirements:
107
- - - '>='
101
+ - - ">="
108
102
  - !ruby/object:Gem::Version
109
103
  version: 0.1.1
110
104
  - !ruby/object:Gem::Dependency
105
+ name: httpclient
111
106
  requirement: !ruby/object:Gem::Requirement
112
107
  requirements:
113
- - - ~>
108
+ - - "~>"
114
109
  - !ruby/object:Gem::Version
115
- version: '1.3'
116
- name: bundler
110
+ version: '2.5'
111
+ type: :runtime
117
112
  prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.5'
118
+ - !ruby/object:Gem::Dependency
119
+ name: bundler
120
+ requirement: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '1.3'
118
125
  type: :development
126
+ prerelease: false
119
127
  version_requirements: !ruby/object:Gem::Requirement
120
128
  requirements:
121
- - - ~>
129
+ - - "~>"
122
130
  - !ruby/object:Gem::Version
123
131
  version: '1.3'
124
132
  - !ruby/object:Gem::Dependency
133
+ name: rake
125
134
  requirement: !ruby/object:Gem::Requirement
126
135
  requirements:
127
- - - '>='
136
+ - - ">="
128
137
  - !ruby/object:Gem::Version
129
138
  version: '0'
130
- name: rake
131
- prerelease: false
132
139
  type: :development
140
+ prerelease: false
133
141
  version_requirements: !ruby/object:Gem::Requirement
134
142
  requirements:
135
- - - '>='
143
+ - - ">="
136
144
  - !ruby/object:Gem::Version
137
145
  version: '0'
138
146
  - !ruby/object:Gem::Dependency
147
+ name: minitest
139
148
  requirement: !ruby/object:Gem::Requirement
140
149
  requirements:
141
- - - '>='
150
+ - - ">="
142
151
  - !ruby/object:Gem::Version
143
152
  version: '0'
144
- name: minitest
145
- prerelease: false
146
153
  type: :development
154
+ prerelease: false
147
155
  version_requirements: !ruby/object:Gem::Requirement
148
156
  requirements:
149
- - - '>='
157
+ - - ">="
150
158
  - !ruby/object:Gem::Version
151
159
  version: '0'
152
- description:
160
+ description:
153
161
  email:
154
162
  - none@nowhere.org
155
163
  executables:
@@ -162,9 +170,9 @@ extra_rdoc_files:
162
170
  - doc/other_commands.md
163
171
  - doc/settings.md
164
172
  files:
165
- - .gitignore
166
- - .travis.yml
167
- - .yardopts
173
+ - ".gitignore"
174
+ - ".travis.yml"
175
+ - ".yardopts"
168
176
  - Gemfile
169
177
  - LICENSE.txt
170
178
  - README.md
@@ -179,7 +187,9 @@ files:
179
187
  - lib/tasks/load_maps.rake
180
188
  - lib/traject.rb
181
189
  - lib/traject/command_line.rb
190
+ - lib/traject/csv_writer.rb
182
191
  - lib/traject/debug_writer.rb
192
+ - lib/traject/delimited_writer.rb
183
193
  - lib/traject/indexer.rb
184
194
  - lib/traject/indexer/settings.rb
185
195
  - lib/traject/json_writer.rb
@@ -188,14 +198,13 @@ files:
188
198
  - lib/traject/macros/marc21.rb
189
199
  - lib/traject/macros/marc21_semantics.rb
190
200
  - lib/traject/macros/marc_format_classifier.rb
191
- - lib/traject/marc4j_reader.rb
192
201
  - lib/traject/marc_extractor.rb
193
202
  - lib/traject/marc_reader.rb
194
203
  - lib/traject/mock_reader.rb
195
204
  - lib/traject/ndj_reader.rb
196
205
  - lib/traject/null_writer.rb
197
206
  - lib/traject/qualified_const_get.rb
198
- - lib/traject/solrj_writer.rb
207
+ - lib/traject/solr_json_writer.rb
199
208
  - lib/traject/thread_pool.rb
200
209
  - lib/traject/translation_map.rb
201
210
  - lib/traject/util.rb
@@ -208,6 +217,7 @@ files:
208
217
  - lib/translation_maps/marc_instruments.yaml
209
218
  - lib/translation_maps/marc_languages.yaml
210
219
  - test/debug_writer_test.rb
220
+ - test/delimited_writer_test.rb
211
221
  - test/indexer/each_record_test.rb
212
222
  - test/indexer/macros_marc21_semantics_test.rb
213
223
  - test/indexer/macros_marc21_test.rb
@@ -216,11 +226,10 @@ files:
216
226
  - test/indexer/read_write_test.rb
217
227
  - test/indexer/settings_test.rb
218
228
  - test/indexer/to_field_test.rb
219
- - test/marc4j_reader_test.rb
220
229
  - test/marc_extractor_test.rb
221
230
  - test/marc_format_classifier_test.rb
222
231
  - test/marc_reader_test.rb
223
- - test/solrj_writer_test.rb
232
+ - test/solr_json_writer_test.rb
224
233
  - test/test_helper.rb
225
234
  - test/test_support/245_no_ab.marc
226
235
  - test/test_support/880_with_no_6.utf8.marc
@@ -263,51 +272,33 @@ files:
263
272
  - test/translation_maps/translate_array_test.yaml
264
273
  - test/translation_maps/yaml_map.yaml
265
274
  - traject.gemspec
266
- - vendor/solrj/README
267
- - vendor/solrj/build.xml
268
- - vendor/solrj/ivy.xml
269
- - vendor/solrj/lib/commons-codec-1.7.jar
270
- - vendor/solrj/lib/commons-io-2.1.jar
271
- - vendor/solrj/lib/httpclient-4.2.3.jar
272
- - vendor/solrj/lib/httpcore-4.2.2.jar
273
- - vendor/solrj/lib/httpmime-4.2.3.jar
274
- - vendor/solrj/lib/jcl-over-slf4j-1.6.6.jar
275
- - vendor/solrj/lib/jul-to-slf4j-1.6.6.jar
276
- - vendor/solrj/lib/log4j-1.2.16.jar
277
- - vendor/solrj/lib/noggit-0.5.jar
278
- - vendor/solrj/lib/slf4j-api-1.6.6.jar
279
- - vendor/solrj/lib/slf4j-log4j12-1.6.6.jar
280
- - vendor/solrj/lib/solr-solrj-4.3.1-javadoc.jar
281
- - vendor/solrj/lib/solr-solrj-4.3.1-sources.jar
282
- - vendor/solrj/lib/solr-solrj-4.3.1.jar
283
- - vendor/solrj/lib/wstx-asl-3.2.7.jar
284
- - vendor/solrj/lib/zookeeper-3.4.5.jar
285
275
  homepage: http://github.com/traject-project/traject
286
276
  licenses:
287
277
  - MIT
288
278
  metadata: {}
289
- post_install_message:
279
+ post_install_message:
290
280
  rdoc_options: []
291
281
  require_paths:
292
282
  - lib
293
283
  required_ruby_version: !ruby/object:Gem::Requirement
294
284
  requirements:
295
- - - '>='
285
+ - - ">="
296
286
  - !ruby/object:Gem::Version
297
287
  version: '0'
298
288
  required_rubygems_version: !ruby/object:Gem::Requirement
299
289
  requirements:
300
- - - '>='
290
+ - - ">"
301
291
  - !ruby/object:Gem::Version
302
- version: '0'
292
+ version: 1.3.1
303
293
  requirements: []
304
- rubyforge_project:
305
- rubygems_version: 2.1.9
306
- signing_key:
294
+ rubyforge_project:
295
+ rubygems_version: 2.4.5
296
+ signing_key:
307
297
  specification_version: 4
308
298
  summary: Index MARC to Solr; or generally process source records to hash-like structures
309
299
  test_files:
310
300
  - test/debug_writer_test.rb
301
+ - test/delimited_writer_test.rb
311
302
  - test/indexer/each_record_test.rb
312
303
  - test/indexer/macros_marc21_semantics_test.rb
313
304
  - test/indexer/macros_marc21_test.rb
@@ -316,11 +307,10 @@ test_files:
316
307
  - test/indexer/read_write_test.rb
317
308
  - test/indexer/settings_test.rb
318
309
  - test/indexer/to_field_test.rb
319
- - test/marc4j_reader_test.rb
320
310
  - test/marc_extractor_test.rb
321
311
  - test/marc_format_classifier_test.rb
322
312
  - test/marc_reader_test.rb
323
- - test/solrj_writer_test.rb
313
+ - test/solr_json_writer_test.rb
324
314
  - test/test_helper.rb
325
315
  - test/test_support/245_no_ab.marc
326
316
  - test/test_support/880_with_no_6.utf8.marc
@@ -1,153 +0,0 @@
1
- require 'traject'
2
- require 'marc'
3
- require 'marc/marc4j'
4
-
5
- # `Traject::Marc4JReader` uses the marc4j java package to parse the MARC records
6
- # into standard ruby-marc MARC::Record objects. This reader may be faster than
7
- # Traject::MarcReader, especially for XML.
8
- #
9
- # Marc4JReader can read MARC ISO 2709 ("binary") or MARCXML. We use the Marc4J MarcPermissiveStreamReader
10
- # for reading binary, but sometimes in non-permissive mode, according to settings. We use the Marc4j MarcXmlReader
11
- # for reading xml. The actual code for dealing with Marc4J is in the separate
12
- # [marc-marc4j gem](https://github.com/billdueber/ruby-marc-marc4j).
13
- #
14
- # See also the pure ruby Traject::MarcReader as an alternative, if you need to read
15
- # marc-in-json, or if you don't need binary Marc8 support, it may in some cases
16
- # be faster.
17
- #
18
- # ## Settings
19
- #
20
- # * marc_source.type: serialization type. default 'binary', also 'xml' (TODO: json/marc-in-json)
21
- #
22
- # * marc4j_reader.permissive: default true, false to turn off permissive reading. Used as
23
- # value to 'permissive' arg of MarcPermissiveStreamReader constructor.
24
- # Only used for 'binary'
25
- #
26
- # * marc_source.encoding: Only used for 'binary', otherwise always UTF-8.
27
- # String of the values MarcPermissiveStreamReader accepts:
28
- # * BESTGUESS (default: not entirely clear what Marc4J does with this)
29
- # * ISO-8859-1 (also accepted: ISO8859_1)
30
- # * UTF-8
31
- # * MARC-8 (also accepted: MARC8)
32
- # Default 'BESTGUESS', but HIGHLY recommend setting
33
- # to avoid some Marc4J unpredictability, Marc4J "BESTGUESS" can be unpredictable
34
- # in a variety of ways.
35
- # (will ALWAYS be transcoded to UTF-8 on the way out. We insist.)
36
- #
37
- # * marc4j_reader.jar_dir: Path to a directory containing Marc4J jar file to use. All .jar's in dir will
38
- # be loaded. If unset, uses marc4j.jar bundled with traject.
39
- #
40
- # * marc4j_reader.keep_marc4j: Keeps the original marc4j record accessible from
41
- # the eventual ruby-marc record via record#original_marc4j. Intended for
42
- # those that have legacy java code for which a marc4j object is needed. .
43
- #
44
- #
45
- # ## Example
46
- #
47
- # In a configuration file:
48
- #
49
- # require 'traject/marc4j_reader
50
- # settings do
51
- # provide "reader_class_name", "Traject::Marc4JReader"
52
- #
53
- # #for MarcXML:
54
- # # provide "marc_source.type", "xml"
55
- #
56
- # # Or instead for binary:
57
- # provide "marc4j_reader.permissive", true
58
- # provide "marc_source.encoding", "MARC8"
59
- # end
60
- class Traject::Marc4JReader
61
- include Enumerable
62
-
63
- attr_reader :settings, :input_stream
64
-
65
- def initialize(input_stream, settings)
66
- @settings = Traject::Indexer::Settings.new settings
67
- @input_stream = input_stream
68
-
69
- if @settings['marc4j_reader.keep_marc4j'] &&
70
- ! (MARC::Record.instance_methods.include?(:original_marc4j) &&
71
- MARC::Record.instance_methods.include?(:"original_marc4j="))
72
- MARC::Record.class_eval('attr_accessor :original_marc4j')
73
- end
74
-
75
- # Creating a converter will do the following:
76
- # - nothing, if it detects that the marc4j jar is already loaded
77
- # - load all the .jar files in settings['marc4j_reader.jar_dir'] if set
78
- # - load the marc4j jar file bundled with MARC::MARC4J otherwise
79
-
80
- @converter = MARC::MARC4J.new(:jardir => settings['marc4j_reader.jar_dir'], :logger => logger)
81
-
82
- # Convenience
83
- java_import org.marc4j.MarcPermissiveStreamReader
84
- java_import org.marc4j.MarcXmlReader
85
-
86
- end
87
-
88
-
89
- def internal_reader
90
- @internal_reader ||= create_marc_reader!
91
- end
92
-
93
- def input_type
94
- # maybe later add some guessing somehow
95
- settings["marc_source.type"]
96
- end
97
-
98
- def specified_source_encoding
99
- #settings["marc4j_reader.source_encoding"]
100
- enc = settings["marc_source.encoding"]
101
-
102
- # one is standard for ruby and we want to support,
103
- # the other is used by Marc4J and we have to pass it to Marc4J
104
- enc = "ISO8859_1" if enc == "ISO-8859-1"
105
-
106
- # default
107
- enc = "BESTGUESS" if enc.nil? || enc.empty?
108
-
109
- return enc
110
- end
111
-
112
- def create_marc_reader!
113
- case input_type
114
- when "binary"
115
- permissive = settings["marc4j_reader.permissive"].to_s == "true"
116
-
117
- # #to_inputstream turns our ruby IO into a Java InputStream
118
- # third arg means 'convert to UTF-8, yes'
119
- MarcPermissiveStreamReader.new(input_stream.to_inputstream, permissive, true, specified_source_encoding)
120
- when "xml"
121
- MarcXmlReader.new(input_stream.to_inputstream)
122
- else
123
- raise IllegalArgument.new("Unrecgonized marc_source.type: #{input_type}")
124
- end
125
- end
126
-
127
- def each
128
- while (internal_reader.hasNext)
129
- begin
130
- marc4j = internal_reader.next
131
- rubymarc = @converter.marc4j_to_rubymarc(marc4j)
132
- if @settings['marc4j_reader.keep_marc4j']
133
- rubymarc.original_marc4j = marc4j
134
- end
135
- rescue Exception =>e
136
- msg = "MARC4JReader: Error reading MARC, fatal, re-raising"
137
- if marc4j
138
- msg += "\n 001 id: #{marc4j.getControlNumber}"
139
- end
140
- msg += "\n #{Traject::Util.exception_to_log_message(e)}"
141
- logger.fatal msg
142
- raise e
143
- end
144
-
145
- yield rubymarc
146
- end
147
- end
148
-
149
- def logger
150
- @logger ||= (settings[:logger] || Yell.new(STDERR, :level => "gt.fatal")) # null logger)
151
- end
152
-
153
- end