traject 1.0.0.beta.7 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -8
- data/doc/batch_execution.md +5 -1
- data/doc/settings.md +4 -2
- data/lib/traject/indexer.rb +1 -1
- data/lib/traject/indexer/settings.rb +4 -4
- data/lib/traject/marc4j_reader.rb +25 -10
- data/lib/traject/marc_reader.rb +19 -6
- data/lib/traject/version.rb +1 -1
- data/test/marc4j_reader_test.rb +50 -2
- data/test/marc_reader_test.rb +65 -10
- data/test/test_support/bad_utf_byte.utf8.marc +1 -0
- data/test/test_support/escaped_character_reference.marc8.marc +1 -0
- data/traject.gemspec +1 -1
- metadata +11 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31243d453f43fbc8f8634c2511340d8fb96c606f
|
4
|
+
data.tar.gz: a843a583f235920931304fc6baa7ff75ce91dfab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a0e6f3c695a5fe497f4cadd14fba7a2a10a3ea1ff40e42a6625949227d1eb3f995dc4d0ecb228f5e98c95f737df1c1e0e8089b59e17fcb0fedca8de5208b535
|
7
|
+
data.tar.gz: 3598e91ed10b039c4382d56ddc69939713425c4f059eb27adfddfdc272e25f2c52e8f900a31b94ceda6576d716c2155188dc03d4697156314e71aadf99a92471
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Might be used to index MARC data for a Solr-based discovery product like [Blackl
|
|
6
6
|
Traject might also be generalized to a set of tools for getting structured data from a source, and transforming it to a hash-like object to send to a destination.
|
7
7
|
|
8
8
|
|
9
|
-
**Traject is
|
9
|
+
**Traject is stable, mature software, that is already being used in production by its authors.**
|
10
10
|
|
11
11
|
[](http://badge.fury.io/rb/traject)
|
12
12
|
[](https://travis-ci.org/traject-project/traject)
|
@@ -89,13 +89,11 @@ settings do
|
|
89
89
|
# various others...
|
90
90
|
provide "solrj_writer.commit_on_close", "true"
|
91
91
|
|
92
|
-
# By default, we use the Traject::
|
93
|
-
#
|
94
|
-
#
|
95
|
-
#
|
96
|
-
|
97
|
-
# as to encoding when reading binary, you may want to tell it instead
|
98
|
-
provide "marc4j_reader.source_encoding", "MARC8" # or UTF-8 or ISO8859_1
|
92
|
+
# By default, we use the Traject::MarcReader
|
93
|
+
# One altenrnative is the Marc4JReader, using Marc4J.
|
94
|
+
# provide "reader_class_name", "Traject::Marc4Reader"
|
95
|
+
# If we're reading binary MARC, it's best to tell it the encoding.
|
96
|
+
provide "marc4j_reader.source_encoding", "MARC-8" # or 'UTF-8' or 'ISO-8859-1' or whatever.
|
99
97
|
end
|
100
98
|
~~~
|
101
99
|
|
data/doc/batch_execution.md
CHANGED
@@ -224,7 +224,7 @@ object you configure yourself however you like:
|
|
224
224
|
~~~ruby
|
225
225
|
# inside a traject configuration file
|
226
226
|
|
227
|
-
logger = Yell.new do |l|
|
227
|
+
self.logger = Yell.new do |l|
|
228
228
|
l.level = 'gte.info' # will only pass :info and above to the adapters
|
229
229
|
|
230
230
|
l.adapter :datefile, 'production.log', level: 'lte.warn' # anything lower or equal to :warn
|
@@ -232,6 +232,10 @@ object you configure yourself however you like:
|
|
232
232
|
end
|
233
233
|
~~~
|
234
234
|
|
235
|
+
**note** it's important to use to use `self.logger =`, or due to
|
236
|
+
ruby idiosyncracies you'll just be setting a local variable, not the Indexer's
|
237
|
+
logger attribute.
|
238
|
+
|
235
239
|
See [yell](https://github.com/rudionrails/yell) docs for more, you can
|
236
240
|
do whatever you can make yell, just write ruby.
|
237
241
|
|
data/doc/settings.md
CHANGED
@@ -47,8 +47,10 @@ settings are applied first of all. It's recommended you use `provide`.
|
|
47
47
|
* `log.level`: Log this level and above. Default 'info', set to eg 'debug' to get potentially more logging info,
|
48
48
|
or 'error' to get less. https://github.com/rudionrails/yell/wiki/101-setting-the-log-level
|
49
49
|
|
50
|
-
* `log.batch_size`: If set to a number N (or string representation), will output a progress line to
|
51
|
-
log
|
50
|
+
* `log.batch_size`: If set to a number N (or string representation), will output a progress line to
|
51
|
+
log. (by default as INFO, but see log.batch_size.severity)
|
52
|
+
|
53
|
+
* `log.batch_size.severity`: If `log.batch_size` is set, what logger severity level to log to. Default "INFO", set to "DEBUG" etc if desired.
|
52
54
|
|
53
55
|
* `marc_source.type`: default 'binary'. Can also set to 'xml' or (not yet implemented todo) 'json'. Command line shortcut `-t`
|
54
56
|
|
data/lib/traject/indexer.rb
CHANGED
@@ -332,7 +332,7 @@ class Traject::Indexer
|
|
332
332
|
if log_batch_size && (count % log_batch_size == 0)
|
333
333
|
batch_rps = log_batch_size / (Time.now - batch_start_time)
|
334
334
|
overall_rps = count / (Time.now - start_time)
|
335
|
-
logger.
|
335
|
+
logger.send(settings["log.batch_size.severity"].downcase.to_sym, "Traject::Indexer#process, read #{count} records at id:#{id_string(record)}; #{'%.0f' % batch_rps}/s this batch, #{'%.0f' % overall_rps}/s overall")
|
336
336
|
batch_start_time = Time.now
|
337
337
|
end
|
338
338
|
|
@@ -62,14 +62,14 @@ class Traject::Indexer
|
|
62
62
|
|
63
63
|
def self.defaults
|
64
64
|
@@defaults ||= {
|
65
|
-
"reader_class_name" => "Traject::
|
65
|
+
"reader_class_name" => "Traject::MarcReader",
|
66
66
|
"writer_class_name" => "Traject::SolrJWriter",
|
67
|
-
"marc_source.type" => "binary",
|
67
|
+
"marc_source.type" => "binary",
|
68
68
|
"marc4j_reader.permissive" => true,
|
69
|
-
"marc4j_reader.source_encoding" => "BESTGUESS",
|
70
69
|
"solrj_writer.batch_size" => 200,
|
71
70
|
"solrj_writer.thread_pool" => 1,
|
72
|
-
"processing_thread_pool" => 3
|
71
|
+
"processing_thread_pool" => 3,
|
72
|
+
"log.batch_size.severity" => "info"
|
73
73
|
}
|
74
74
|
end
|
75
75
|
|
@@ -3,9 +3,8 @@ require 'marc'
|
|
3
3
|
require 'marc/marc4j'
|
4
4
|
|
5
5
|
# `Traject::Marc4JReader` uses the marc4j java package to parse the MARC records
|
6
|
-
# into standard ruby-marc MARC::Record objects. This reader
|
7
|
-
# Traject::MarcReader, especially for XML
|
8
|
-
# encoded records and transcoding to UTF8.
|
6
|
+
# into standard ruby-marc MARC::Record objects. This reader may be faster than
|
7
|
+
# Traject::MarcReader, especially for XML.
|
9
8
|
#
|
10
9
|
# Marc4JReader can read MARC ISO 2709 ("binary") or MARCXML. We use the Marc4J MarcPermissiveStreamReader
|
11
10
|
# for reading binary, but sometimes in non-permissive mode, according to settings. We use the Marc4j MarcXmlReader
|
@@ -24,13 +23,15 @@ require 'marc/marc4j'
|
|
24
23
|
# value to 'permissive' arg of MarcPermissiveStreamReader constructor.
|
25
24
|
# Only used for 'binary'
|
26
25
|
#
|
27
|
-
# *
|
26
|
+
# * marc_source.encoding: Only used for 'binary', otherwise always UTF-8.
|
28
27
|
# String of the values MarcPermissiveStreamReader accepts:
|
29
|
-
# * BESTGUESS (
|
30
|
-
# * ISO8859_1
|
28
|
+
# * BESTGUESS (default: not entirely clear what Marc4J does with this)
|
29
|
+
# * ISO-8859-1 (also accepted: ISO8859_1)
|
31
30
|
# * UTF-8
|
32
|
-
# * MARC8
|
33
|
-
# Default 'BESTGUESS', but
|
31
|
+
# * MARC-8 (also accepted: MARC8)
|
32
|
+
# Default 'BESTGUESS', but HIGHLY recommend setting
|
33
|
+
# to avoid some Marc4J unpredictability, Marc4J "BESTGUESS" can be unpredictable
|
34
|
+
# in a variety of ways.
|
34
35
|
# (will ALWAYS be transcoded to UTF-8 on the way out. We insist.)
|
35
36
|
#
|
36
37
|
# * marc4j_reader.jar_dir: Path to a directory containing Marc4J jar file to use. All .jar's in dir will
|
@@ -54,7 +55,7 @@ require 'marc/marc4j'
|
|
54
55
|
#
|
55
56
|
# # Or instead for binary:
|
56
57
|
# provide "marc4j_reader.permissive", true
|
57
|
-
# provide "
|
58
|
+
# provide "marc_source.encoding", "MARC8"
|
58
59
|
# end
|
59
60
|
class Traject::Marc4JReader
|
60
61
|
include Enumerable
|
@@ -94,6 +95,20 @@ class Traject::Marc4JReader
|
|
94
95
|
settings["marc_source.type"]
|
95
96
|
end
|
96
97
|
|
98
|
+
def specified_source_encoding
|
99
|
+
#settings["marc4j_reader.source_encoding"]
|
100
|
+
enc = settings["marc_source.encoding"]
|
101
|
+
|
102
|
+
# one is standard for ruby and we want to support,
|
103
|
+
# the other is used by Marc4J and we have to pass it to Marc4J
|
104
|
+
enc = "ISO8859_1" if enc == "ISO-8859-1"
|
105
|
+
|
106
|
+
# default
|
107
|
+
enc = "BESTGUESS" if enc.nil? || enc.empty?
|
108
|
+
|
109
|
+
return enc
|
110
|
+
end
|
111
|
+
|
97
112
|
def create_marc_reader!
|
98
113
|
case input_type
|
99
114
|
when "binary"
|
@@ -101,7 +116,7 @@ class Traject::Marc4JReader
|
|
101
116
|
|
102
117
|
# #to_inputstream turns our ruby IO into a Java InputStream
|
103
118
|
# third arg means 'convert to UTF-8, yes'
|
104
|
-
MarcPermissiveStreamReader.new(input_stream.to_inputstream, permissive, true,
|
119
|
+
MarcPermissiveStreamReader.new(input_stream.to_inputstream, permissive, true, specified_source_encoding)
|
105
120
|
when "xml"
|
106
121
|
MarcXmlReader.new(input_stream.to_inputstream)
|
107
122
|
else
|
data/lib/traject/marc_reader.rb
CHANGED
@@ -4,22 +4,33 @@ require 'traject/ndj_reader'
|
|
4
4
|
# `Traject::MarcReader` uses pure ruby marc gem to parse MARC records. It
|
5
5
|
# can read MARC ISO 2709 ('binary'), MARC-XML, and Marc-in-json (newline-delimited-json).
|
6
6
|
#
|
7
|
-
#
|
8
|
-
#
|
7
|
+
# Marc4JReader is an alternative to this class, powered by Marc4J. You may be interested
|
8
|
+
# in comparing for performance, under your particular use case.
|
9
9
|
#
|
10
10
|
# By default assumes binary MARC encoding, please set marc_source.type setting
|
11
|
-
# for XML or json.
|
11
|
+
# for XML or json. If binary, please set marc_source.encoding with char encoding.
|
12
12
|
#
|
13
13
|
# ## Settings
|
14
14
|
|
15
15
|
# * "marc_source.type": serialization type. default 'binary'
|
16
|
-
# * "binary". standard ISO 2709 "binary" MARC format
|
17
|
-
#
|
16
|
+
# * "binary". standard ISO 2709 "binary" MARC format,
|
17
|
+
# will use ruby-marc MARC::Reader (Note, if you are using
|
18
|
+
# type 'binary', you probably want to also set 'marc_source.encoding')
|
19
|
+
# * "xml", MarcXML, will use ruby-marc MARC::XMLReader
|
18
20
|
# * "json" The "marc-in-json" format, encoded as newline-separated
|
19
21
|
# json. (synonym 'ndj'). A simplistic newline-separated json, with no comments
|
20
22
|
# allowed, and no unescpaed internal newlines allowed in the json
|
21
23
|
# objects -- we just read line by line, and assume each line is a
|
22
24
|
# marc-in-json. http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/
|
25
|
+
# will use Traject::NDJReader which uses MARC::Record.new_from_hash.
|
26
|
+
# * "marc_source.encoding": Only used for marc_source.type 'binary', character encoding
|
27
|
+
# of the source marc records. Can be any
|
28
|
+
# encoding recognized by ruby, OR 'MARC-8'. For 'MARC-8', content will
|
29
|
+
# be transcoded (by ruby-marc) to UTF-8 in internal MARC::Record Strings.
|
30
|
+
# Default nil, meaning let MARC::Reader use it's default, which will
|
31
|
+
# probably be Encoding.default_internal, which will probably be UTF-8.
|
32
|
+
# Right now Traject::MarcReader is hard-coded to transcode to UTF-8 as
|
33
|
+
# an internal encoding.
|
23
34
|
# * "marc_reader.xml_parser": For XML type, which XML parser to tell Marc::Reader
|
24
35
|
# to use. Anything recognized by [Marc::Reader :parser
|
25
36
|
# argument](http://rdoc.info/github/ruby-marc/ruby-marc/MARC/XMLReader).
|
@@ -62,7 +73,9 @@ class Traject::MarcReader
|
|
62
73
|
when 'json'
|
63
74
|
Traject::NDJReader.new(self.input_stream, settings)
|
64
75
|
else
|
65
|
-
|
76
|
+
args = { :invalid => :replace }
|
77
|
+
args[:external_encoding] = settings["marc_source.encoding"]
|
78
|
+
MARC::Reader.new(self.input_stream, args)
|
66
79
|
end
|
67
80
|
end
|
68
81
|
return @internal_reader
|
data/lib/traject/version.rb
CHANGED
data/test/marc4j_reader_test.rb
CHANGED
@@ -20,7 +20,7 @@ describe "Marc4JReader" do
|
|
20
20
|
first = array.first
|
21
21
|
|
22
22
|
assert_kind_of MARC::Record, first
|
23
|
-
|
23
|
+
assert_equal first['245']['a'].encoding.name, "UTF-8"
|
24
24
|
end
|
25
25
|
|
26
26
|
it "can skip a bad subfield code" do
|
@@ -37,7 +37,7 @@ describe "Marc4JReader" do
|
|
37
37
|
|
38
38
|
it "reads Marc binary in Marc8 encoding" do
|
39
39
|
file = File.new(support_file_path("one-marc8.mrc"))
|
40
|
-
settings = Traject::Indexer::Settings.new("
|
40
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC8")
|
41
41
|
reader = Traject::Marc4JReader.new(file, settings)
|
42
42
|
|
43
43
|
array = reader.to_a
|
@@ -84,5 +84,53 @@ describe "Marc4JReader" do
|
|
84
84
|
assert_kind_of Java::org.marc4j.marc.impl::RecordImpl, record.original_marc4j
|
85
85
|
end
|
86
86
|
|
87
|
+
it "replaces unicode character reference in Marc8 transcode" do
|
88
|
+
file = File.new(support_file_path "escaped_character_reference.marc8.marc")
|
89
|
+
# due to marc4j idiosyncracies, this test will NOT pass with default source_encoding
|
90
|
+
# of "BESTGUESS", it only works if you explicitly set to MARC8. Doh.
|
91
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC8") # binary type is default
|
92
|
+
record = Traject::Marc4JReader.new(file, settings).to_a.first
|
93
|
+
|
94
|
+
assert_equal "Rio de Janeiro escaped replacement char: \uFFFD .", record['260']['a']
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "Marc4J Java Permissive Stream Reader" do
|
98
|
+
# needed for sanity check when our tests fail to see if Marc4J
|
99
|
+
# is not behaving how we think it should.
|
100
|
+
it "converts character references" do
|
101
|
+
file = File.new(support_file_path "escaped_character_reference.marc8.marc")
|
102
|
+
reader = MarcPermissiveStreamReader.new(file.to_inputstream, true, true, "MARC-8")
|
103
|
+
record = reader.next
|
104
|
+
|
105
|
+
field = record.getVariableField("260")
|
106
|
+
subfield = field.getSubfield('a'.ord)
|
107
|
+
value = subfield.getData
|
108
|
+
|
109
|
+
assert_equal "Rio de Janeiro escaped replacement char: \uFFFD .", value
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
it "replaces bad byte in UTF8 marc" do
|
114
|
+
skip "Marc4J needs fixing on it's end" # Marc4J won't do this in 'permissive' mode, gah.
|
115
|
+
|
116
|
+
# Note this only works because the marc file DOES correctly
|
117
|
+
# have leader byte 9 set to 'a' for UTF8, otherwise Marc4J can't do it.
|
118
|
+
file = File.new(support_file_path "bad_utf_byte.utf8.marc")
|
119
|
+
|
120
|
+
settings = Traject::Indexer::Settings.new() # binary UTF8 type is default
|
121
|
+
reader = Traject::Marc4JReader.new(file, settings)
|
122
|
+
|
123
|
+
record = reader.to_a.first
|
124
|
+
|
125
|
+
value = record['300']['a']
|
126
|
+
|
127
|
+
assert_equal value.encoding.name, "UTF-8"
|
128
|
+
assert value.valid_encoding?, "Has valid encoding"
|
129
|
+
assert_equal "This is a bad byte: '\uFFFD' and another: '\uFFFD'", record['300']['a']
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
|
87
135
|
|
88
136
|
end
|
data/test/marc_reader_test.rb
CHANGED
@@ -17,21 +17,74 @@ describe "Traject::MarcReader" do
|
|
17
17
|
assert_equal 30, array.length
|
18
18
|
end
|
19
19
|
|
20
|
-
it "reads Marc binary" do
|
21
|
-
file = File.new(support_file_path "test_data.utf8.mrc")
|
22
|
-
settings = Traject::Indexer::Settings.new() # binary type is default
|
23
|
-
reader = Traject::MarcReader.new(file, settings)
|
24
20
|
|
25
|
-
|
21
|
+
describe "MARC binary" do
|
22
|
+
it "reads" do
|
23
|
+
file = File.new(support_file_path "test_data.utf8.mrc")
|
24
|
+
settings = Traject::Indexer::Settings.new() # binary type is default
|
25
|
+
reader = Traject::MarcReader.new(file, settings)
|
26
26
|
|
27
|
-
|
27
|
+
array = reader.to_a
|
28
28
|
|
29
|
-
|
29
|
+
assert_equal 30, array.length
|
30
30
|
|
31
|
-
|
31
|
+
first = array.first
|
32
32
|
|
33
|
-
|
34
|
-
|
33
|
+
assert_kind_of MARC::Record, first
|
34
|
+
|
35
|
+
assert first['245']['a'].encoding.name, "UTF-8"
|
36
|
+
assert_equal "Fikr-i Ayāz /", first['245']['a']
|
37
|
+
end
|
38
|
+
|
39
|
+
it "reads Marc binary in Marc8 encoding, transcoding to UTF-8" do
|
40
|
+
file = File.new(support_file_path("one-marc8.mrc"))
|
41
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC-8")
|
42
|
+
reader = Traject::MarcReader.new(file, settings)
|
43
|
+
|
44
|
+
array = reader.to_a
|
45
|
+
|
46
|
+
assert_length 1, array
|
47
|
+
|
48
|
+
|
49
|
+
assert_kind_of MARC::Record, array.first
|
50
|
+
a245a = array.first['245']['a']
|
51
|
+
|
52
|
+
assert a245a.encoding.name, "UTF-8"
|
53
|
+
assert a245a.valid_encoding?
|
54
|
+
assert_equal "Por uma outra globalização :", a245a
|
55
|
+
end
|
56
|
+
|
57
|
+
it "replaces unicode character reference in Marc8 transcode" do
|
58
|
+
file = File.new(support_file_path("escaped_character_reference.marc8.marc"))
|
59
|
+
|
60
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC-8") # binary type is default
|
61
|
+
record = Traject::MarcReader.new(file, settings).to_a.first
|
62
|
+
|
63
|
+
assert_equal "Rio de Janeiro escaped replacement char: \uFFFD .", record['260']['a']
|
64
|
+
end
|
65
|
+
|
66
|
+
it "raises on unrecognized encoding for binary type" do
|
67
|
+
file = File.new(support_file_path "one-marc8.mrc")
|
68
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "ADFADFADF")
|
69
|
+
assert_raises(ArgumentError) do
|
70
|
+
record = Traject::MarcReader.new(file, settings).to_a.first
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
it "replaces bad byte in UTF8 marc binary" do
|
75
|
+
file = File.new(support_file_path "bad_utf_byte.utf8.marc")
|
76
|
+
|
77
|
+
settings = Traject::Indexer::Settings.new() # binary type is default
|
78
|
+
reader = Traject::MarcReader.new(file, settings)
|
79
|
+
|
80
|
+
record = reader.to_a.first
|
81
|
+
|
82
|
+
value = record['300']['a']
|
83
|
+
|
84
|
+
assert_equal value.encoding.name, "UTF-8"
|
85
|
+
assert value.valid_encoding?, "Has valid encoding"
|
86
|
+
assert_equal "This is a bad byte: '\uFFFD' and another: '\uFFFD'", value
|
87
|
+
end
|
35
88
|
end
|
36
89
|
|
37
90
|
it "reads JSON" do
|
@@ -52,4 +105,6 @@ describe "Traject::MarcReader" do
|
|
52
105
|
|
53
106
|
|
54
107
|
|
108
|
+
|
109
|
+
|
55
110
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
00083 a2200037 4500300004500000 aThis is a bad byte: '�' and another: '�'
|
@@ -0,0 +1 @@
|
|
1
|
+
00138cam 2200049Ia 45000010008000002600080000082196384 aRio de Janeiro escaped replacement char: � .bEditora Record,c2000.
|
data/traject.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.extra_rdoc_files = spec.files.grep(%r{^doc/})
|
21
21
|
|
22
22
|
|
23
|
-
spec.add_dependency "marc", ">= 0.
|
23
|
+
spec.add_dependency "marc", ">= 0.8.0"
|
24
24
|
spec.add_dependency "marc-marc4j", ">=0.1.1" # use and convert marc4j
|
25
25
|
spec.add_dependency "hashie", ">= 2.0.5", "< 2.1" # used for Indexer#settings
|
26
26
|
spec.add_dependency "slop", ">= 3.4.5", "< 4.0" # command line parsing
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Rochkind
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: marc
|
@@ -17,12 +17,12 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - '>='
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 0.
|
20
|
+
version: 0.8.0
|
21
21
|
requirement: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - '>='
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: 0.
|
25
|
+
version: 0.8.0
|
26
26
|
prerelease: false
|
27
27
|
type: :runtime
|
28
28
|
- !ruby/object:Gem::Dependency
|
@@ -211,11 +211,13 @@ files:
|
|
211
211
|
- test/test_support/245_no_ab.marc
|
212
212
|
- test/test_support/880_with_no_6.utf8.marc
|
213
213
|
- test/test_support/bad_subfield_code.marc
|
214
|
+
- test/test_support/bad_utf_byte.utf8.marc
|
214
215
|
- test/test_support/date_resort_to_260.marc
|
215
216
|
- test/test_support/date_type_r_missing_date2.marc
|
216
217
|
- test/test_support/date_with_u.marc
|
217
218
|
- test/test_support/demo_config.rb
|
218
219
|
- test/test_support/emptyish_record.marc
|
220
|
+
- test/test_support/escaped_character_reference.marc8.marc
|
219
221
|
- test/test_support/george_eliot.marc
|
220
222
|
- test/test_support/hebrew880s.marc
|
221
223
|
- test/test_support/louis_armstrong.marc
|
@@ -281,12 +283,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
281
283
|
version: '0'
|
282
284
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
283
285
|
requirements:
|
284
|
-
- - '
|
286
|
+
- - '>='
|
285
287
|
- !ruby/object:Gem::Version
|
286
|
-
version:
|
288
|
+
version: '0'
|
287
289
|
requirements: []
|
288
290
|
rubyforge_project:
|
289
|
-
rubygems_version: 2.1.
|
291
|
+
rubygems_version: 2.1.11
|
290
292
|
signing_key:
|
291
293
|
specification_version: 4
|
292
294
|
summary: Index MARC to Solr; or generally process source records to hash-like structures
|
@@ -309,11 +311,13 @@ test_files:
|
|
309
311
|
- test/test_support/245_no_ab.marc
|
310
312
|
- test/test_support/880_with_no_6.utf8.marc
|
311
313
|
- test/test_support/bad_subfield_code.marc
|
314
|
+
- test/test_support/bad_utf_byte.utf8.marc
|
312
315
|
- test/test_support/date_resort_to_260.marc
|
313
316
|
- test/test_support/date_type_r_missing_date2.marc
|
314
317
|
- test/test_support/date_with_u.marc
|
315
318
|
- test/test_support/demo_config.rb
|
316
319
|
- test/test_support/emptyish_record.marc
|
320
|
+
- test/test_support/escaped_character_reference.marc8.marc
|
317
321
|
- test/test_support/george_eliot.marc
|
318
322
|
- test/test_support/hebrew880s.marc
|
319
323
|
- test/test_support/louis_armstrong.marc
|