traject-marc4j_reader 1.0.2-java → 1.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +27 -0
- data/README.md +20 -7
- data/lib/traject/marc4j_reader.rb +16 -8
- data/lib/traject/marc4j_reader/version.rb +1 -1
- metadata +32 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 731f7a7338beafc60639283c73a4ac0f0ced73cc
|
4
|
+
data.tar.gz: b9c6733b4a14aead04cbbba9221dc7edd56ac294
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4fe9c0eb1f72492777b96778259a1a74e58a0e2633d36b8be0527da51c723209377a0542259629628081ad4695d8280d1e747b2eb9a151fa41a2c4932fac14a
|
7
|
+
data.tar.gz: e1ad15787f3e51ac71e58c83ea074b904979f35f3337989e643344361c14b318539f78923cf29b6b2cba0fa33153df6f23a519532f949739e75e322e9c3c52f7
|
data/CHANGES.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Adds a new setting to allow finer-grained control over which `marc4j` reader
|
2
|
+
is used to process binary MARC.
|
3
|
+
|
4
|
+
`traject` uses a broadly permissive set of defaults to read binary MARC
|
5
|
+
records, which may not always be what you want. The `permissive` setting sets
|
6
|
+
the flag of the same name on the
|
7
|
+
(https://github.com/marc4j/marc4j/blob/master/src/org/marc4j/MarcPermissiveStreamReader.java#L164)['permissive'
|
8
|
+
reader class] provided by marc4j (which, at the time of writing, controls how
|
9
|
+
that reader guesses the encoding of input records.
|
10
|
+
|
11
|
+
## Use the "strict" `org.marc4j.MarcStreamReader` class to read MARC21
|
12
|
+
|
13
|
+
In situations where you want stricter record processing -- or in case your records can't be processed by the permissive stream reader (paradoxically, `MarcStreamReader` is more forgiving of certain non-standard MARC, e.g. uppercase subfields), you can specify that `traject` should use the `org.marc4j.MarcStreamReader` class:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
settings do
|
17
|
+
provide 'marc4j_reader.class', 'MarcStreamReader'
|
18
|
+
end
|
19
|
+
```
|
20
|
+
|
21
|
+
## A note about `permissive`
|
22
|
+
|
23
|
+
The `marc4j_reader.permissive` setting, which previously existed, is passed
|
24
|
+
through to the constructor of the `MarcPermissiveStreamReader` class, and does
|
25
|
+
not effect which class is used to read MARC21 input. If you set both this parameter and the `marc4j_reader.class` parameter, the `permissive` setting will be ignored.
|
26
|
+
|
27
|
+
|
data/README.md
CHANGED
@@ -26,17 +26,28 @@ of the workload in a traject run, you'll almost certainly see performance gains.
|
|
26
26
|
|
27
27
|
## Installation
|
28
28
|
|
29
|
-
|
29
|
+
Traject prior to 3.0 included this as a dependency on JRuby, and defaulted to using it.
|
30
30
|
|
31
|
-
|
31
|
+
In Traject 3.0+, you need to manually add this gem and configure to use it.
|
32
32
|
|
33
|
-
|
33
|
+
If you are using bundler and a `Gemfile`, add `gem "traject-marc4j_reader", "~> 1.0"` to your `Gemfile`. Otherwise, just `gem install traject-marc4j_reader`.
|
34
34
|
|
35
|
-
|
35
|
+
Then, in your traject config file:
|
36
36
|
|
37
|
-
|
37
|
+
# Instead of require in config file, you could use the `-r` traject
|
38
|
+
# command-line option.
|
39
|
+
require 'traject/marc4j_reader'
|
38
40
|
|
39
|
-
|
41
|
+
settings do
|
42
|
+
provide "reader_class_name", "Traject::Marc4JReader"
|
43
|
+
|
44
|
+
# Recommend marc4j_reader.permissive true unless you have reason not to.
|
45
|
+
# true was default provided by core traject gem in Traject pre-3.0, but isn't
|
46
|
+
# anymore in traject 3.0 -- so set to true explicitly to maintain behavior
|
47
|
+
#
|
48
|
+
# Only relevant for binary MARC source data.
|
49
|
+
provide "marc4j_reader.permissive", true
|
50
|
+
end
|
40
51
|
|
41
52
|
## Traject::Marc4jReader settings
|
42
53
|
|
@@ -49,7 +60,7 @@ so output will always reflect that conversion.
|
|
49
60
|
* `marc4j.jar_dir`: Path to a directory containing Marc4J jar file to use. All .jar's in dir will
|
50
61
|
be loaded. If unset, uses marc4j.jar bundled with traject.
|
51
62
|
|
52
|
-
* `marc4j_reader.permissive`: Used by Marc4JReader only when marc.source_type is 'binary', boolean, argument to the underlying MarcPermissiveStreamReader. Default true.
|
63
|
+
* `marc4j_reader.permissive`: Used by Marc4JReader only when marc.source_type is 'binary', boolean, argument to the underlying MarcPermissiveStreamReader. Default false, but recommend true for most uses.
|
53
64
|
|
54
65
|
* `marc4j_reader.source_encoding`: Used by Marc4JReader only when marc.source_type is 'binary', encoding strings accepted
|
55
66
|
by marc4j MarcPermissiveStreamReader. Default "BESTGUESS", also "UTF-8", "MARC"
|
@@ -57,6 +68,8 @@ so output will always reflect that conversion.
|
|
57
68
|
* `marc4j_reader.keep_marc4j`: After translating the marc4j record into a normal ruby-marc object,
|
58
69
|
provides access to the former via `record#original_marc4j`.
|
59
70
|
|
71
|
+
* 'marc4j_reader.class': Set to eg 'MarcStreamReader' to use that more strict Marc4J reader class, instead of the default Marc4J `MarcPermissiveStreamReader`.
|
72
|
+
|
60
73
|
|
61
74
|
## Sample use
|
62
75
|
|
@@ -19,9 +19,9 @@ require 'marc/marc4j'
|
|
19
19
|
#
|
20
20
|
# * marc_source.type: serialization type. default 'binary', also 'xml' (TODO: json/marc-in-json)
|
21
21
|
#
|
22
|
-
# * marc4j_reader.permissive:
|
22
|
+
# * marc4j_reader.permissive: Used as
|
23
23
|
# value to 'permissive' arg of MarcPermissiveStreamReader constructor.
|
24
|
-
# Only used for 'binary'
|
24
|
+
# Only used for 'binary'. Default false, but recommend true for most uses.
|
25
25
|
#
|
26
26
|
# * marc_source.encoding: Only used for 'binary', otherwise always UTF-8.
|
27
27
|
# String of the values MarcPermissiveStreamReader accepts:
|
@@ -81,6 +81,7 @@ class Traject::Marc4JReader
|
|
81
81
|
|
82
82
|
# Convenience
|
83
83
|
java_import org.marc4j.MarcPermissiveStreamReader
|
84
|
+
java_import org.marc4j.MarcStreamReader
|
84
85
|
java_import org.marc4j.MarcXmlReader
|
85
86
|
|
86
87
|
end
|
@@ -112,19 +113,26 @@ class Traject::Marc4JReader
|
|
112
113
|
def create_marc_reader!
|
113
114
|
case input_type
|
114
115
|
when "binary"
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
116
|
+
the_stream = input_stream.to_inputstream
|
117
|
+
if settings['marc4j_reader.class'] == 'MarcStreamReader'
|
118
|
+
MarcStreamReader.new(the_stream, specified_source_encoding)
|
119
|
+
else
|
120
|
+
permissive = settings["marc4j_reader.permissive"].to_s == "true"
|
121
|
+
|
122
|
+
# #to_inputstream turns our ruby IO into a Java InputStream
|
123
|
+
# third arg means 'convert to UTF-8, yes'
|
124
|
+
MarcPermissiveStreamReader.new(the_stream, permissive, true, specified_source_encoding)
|
125
|
+
end
|
120
126
|
when "xml"
|
121
127
|
MarcXmlReader.new(input_stream.to_inputstream)
|
122
128
|
else
|
123
|
-
raise
|
129
|
+
raise ArgumentError.new("Unrecgonized marc_source.type: #{input_type}")
|
124
130
|
end
|
125
131
|
end
|
126
132
|
|
127
133
|
def each
|
134
|
+
return to_enum(:each) unless block_given?
|
135
|
+
|
128
136
|
while (internal_reader.hasNext)
|
129
137
|
begin
|
130
138
|
marc4j = internal_reader.next
|
metadata
CHANGED
@@ -1,83 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject-marc4j_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Bill Dueber
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-10-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
+
name: marc
|
14
15
|
requirement: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
|
-
- - ~>
|
17
|
+
- - "~>"
|
17
18
|
- !ruby/object:Gem::Version
|
18
19
|
version: '1.0'
|
19
|
-
name: marc
|
20
|
-
prerelease: false
|
21
20
|
type: :runtime
|
21
|
+
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
+
name: marc-marc4j
|
28
29
|
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- - ~>
|
31
|
+
- - "~>"
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '1.0'
|
33
|
-
name: marc-marc4j
|
34
|
-
prerelease: false
|
35
34
|
type: :runtime
|
35
|
+
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - ~>
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
42
43
|
requirement: !ruby/object:Gem::Requirement
|
43
44
|
requirements:
|
44
|
-
- - ~>
|
45
|
+
- - "~>"
|
45
46
|
- !ruby/object:Gem::Version
|
46
47
|
version: '1.6'
|
47
|
-
name: bundler
|
48
|
-
prerelease: false
|
49
48
|
type: :development
|
49
|
+
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - ~>
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.6'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
56
57
|
requirement: !ruby/object:Gem::Requirement
|
57
58
|
requirements:
|
58
|
-
- -
|
59
|
+
- - ">="
|
59
60
|
- !ruby/object:Gem::Version
|
60
61
|
version: '0'
|
61
|
-
name: rake
|
62
|
-
prerelease: false
|
63
62
|
type: :development
|
63
|
+
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
+
name: minitest
|
70
71
|
requirement: !ruby/object:Gem::Requirement
|
71
72
|
requirements:
|
72
|
-
- -
|
73
|
+
- - ">="
|
73
74
|
- !ruby/object:Gem::Version
|
74
75
|
version: '0'
|
75
|
-
name: minitest
|
76
|
-
prerelease: false
|
77
76
|
type: :development
|
77
|
+
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
description: 'Allows jruby users to leverage marc-marc4j to use marc4j as a reader
|
@@ -88,8 +88,9 @@ executables: []
|
|
88
88
|
extensions: []
|
89
89
|
extra_rdoc_files: []
|
90
90
|
files:
|
91
|
-
- .gitignore
|
92
|
-
- .travis.yml
|
91
|
+
- ".gitignore"
|
92
|
+
- ".travis.yml"
|
93
|
+
- CHANGES.md
|
93
94
|
- Gemfile
|
94
95
|
- LICENSE.txt
|
95
96
|
- README.md
|
@@ -110,24 +111,24 @@ homepage: ''
|
|
110
111
|
licenses:
|
111
112
|
- MIT
|
112
113
|
metadata: {}
|
113
|
-
post_install_message:
|
114
|
+
post_install_message:
|
114
115
|
rdoc_options: []
|
115
116
|
require_paths:
|
116
117
|
- lib
|
117
118
|
required_ruby_version: !ruby/object:Gem::Requirement
|
118
119
|
requirements:
|
119
|
-
- -
|
120
|
+
- - ">="
|
120
121
|
- !ruby/object:Gem::Version
|
121
122
|
version: '0'
|
122
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
124
|
requirements:
|
124
|
-
- -
|
125
|
+
- - ">="
|
125
126
|
- !ruby/object:Gem::Version
|
126
127
|
version: '0'
|
127
128
|
requirements: []
|
128
|
-
rubyforge_project:
|
129
|
-
rubygems_version: 2.
|
130
|
-
signing_key:
|
129
|
+
rubyforge_project:
|
130
|
+
rubygems_version: 2.5.2.3
|
131
|
+
signing_key:
|
131
132
|
specification_version: 4
|
132
133
|
summary: Use marc4j (java) library under traject
|
133
134
|
test_files:
|
@@ -140,4 +141,3 @@ test_files:
|
|
140
141
|
- test/test_support/test_data.utf8.marc.xml
|
141
142
|
- test/test_support/test_data.utf8.mrc
|
142
143
|
- test/test_traject_marc4j_reader.rb
|
143
|
-
has_rdoc:
|