traject 1.0.0.beta.7 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -8
- data/doc/batch_execution.md +5 -1
- data/doc/settings.md +4 -2
- data/lib/traject/indexer.rb +1 -1
- data/lib/traject/indexer/settings.rb +4 -4
- data/lib/traject/marc4j_reader.rb +25 -10
- data/lib/traject/marc_reader.rb +19 -6
- data/lib/traject/version.rb +1 -1
- data/test/marc4j_reader_test.rb +50 -2
- data/test/marc_reader_test.rb +65 -10
- data/test/test_support/bad_utf_byte.utf8.marc +1 -0
- data/test/test_support/escaped_character_reference.marc8.marc +1 -0
- data/traject.gemspec +1 -1
- metadata +11 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31243d453f43fbc8f8634c2511340d8fb96c606f
|
4
|
+
data.tar.gz: a843a583f235920931304fc6baa7ff75ce91dfab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a0e6f3c695a5fe497f4cadd14fba7a2a10a3ea1ff40e42a6625949227d1eb3f995dc4d0ecb228f5e98c95f737df1c1e0e8089b59e17fcb0fedca8de5208b535
|
7
|
+
data.tar.gz: 3598e91ed10b039c4382d56ddc69939713425c4f059eb27adfddfdc272e25f2c52e8f900a31b94ceda6576d716c2155188dc03d4697156314e71aadf99a92471
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Might be used to index MARC data for a Solr-based discovery product like [Blackl
|
|
6
6
|
Traject might also be generalized to a set of tools for getting structured data from a source, and transforming it to a hash-like object to send to a destination.
|
7
7
|
|
8
8
|
|
9
|
-
**Traject is
|
9
|
+
**Traject is stable, mature software, that is already being used in production by its authors.**
|
10
10
|
|
11
11
|
[![Gem Version](https://badge.fury.io/rb/traject.png)](http://badge.fury.io/rb/traject)
|
12
12
|
[![Build Status](https://travis-ci.org/traject-project/traject.png)](https://travis-ci.org/traject-project/traject)
|
@@ -89,13 +89,11 @@ settings do
|
|
89
89
|
# various others...
|
90
90
|
provide "solrj_writer.commit_on_close", "true"
|
91
91
|
|
92
|
-
# By default, we use the Traject::
|
93
|
-
#
|
94
|
-
#
|
95
|
-
#
|
96
|
-
|
97
|
-
# as to encoding when reading binary, you may want to tell it instead
|
98
|
-
provide "marc4j_reader.source_encoding", "MARC8" # or UTF-8 or ISO8859_1
|
92
|
+
# By default, we use the Traject::MarcReader
|
93
|
+
# One altenrnative is the Marc4JReader, using Marc4J.
|
94
|
+
# provide "reader_class_name", "Traject::Marc4Reader"
|
95
|
+
# If we're reading binary MARC, it's best to tell it the encoding.
|
96
|
+
provide "marc4j_reader.source_encoding", "MARC-8" # or 'UTF-8' or 'ISO-8859-1' or whatever.
|
99
97
|
end
|
100
98
|
~~~
|
101
99
|
|
data/doc/batch_execution.md
CHANGED
@@ -224,7 +224,7 @@ object you configure yourself however you like:
|
|
224
224
|
~~~ruby
|
225
225
|
# inside a traject configuration file
|
226
226
|
|
227
|
-
logger = Yell.new do |l|
|
227
|
+
self.logger = Yell.new do |l|
|
228
228
|
l.level = 'gte.info' # will only pass :info and above to the adapters
|
229
229
|
|
230
230
|
l.adapter :datefile, 'production.log', level: 'lte.warn' # anything lower or equal to :warn
|
@@ -232,6 +232,10 @@ object you configure yourself however you like:
|
|
232
232
|
end
|
233
233
|
~~~
|
234
234
|
|
235
|
+
**note** it's important to use to use `self.logger =`, or due to
|
236
|
+
ruby idiosyncracies you'll just be setting a local variable, not the Indexer's
|
237
|
+
logger attribute.
|
238
|
+
|
235
239
|
See [yell](https://github.com/rudionrails/yell) docs for more, you can
|
236
240
|
do whatever you can make yell, just write ruby.
|
237
241
|
|
data/doc/settings.md
CHANGED
@@ -47,8 +47,10 @@ settings are applied first of all. It's recommended you use `provide`.
|
|
47
47
|
* `log.level`: Log this level and above. Default 'info', set to eg 'debug' to get potentially more logging info,
|
48
48
|
or 'error' to get less. https://github.com/rudionrails/yell/wiki/101-setting-the-log-level
|
49
49
|
|
50
|
-
* `log.batch_size`: If set to a number N (or string representation), will output a progress line to
|
51
|
-
log
|
50
|
+
* `log.batch_size`: If set to a number N (or string representation), will output a progress line to
|
51
|
+
log. (by default as INFO, but see log.batch_size.severity)
|
52
|
+
|
53
|
+
* `log.batch_size.severity`: If `log.batch_size` is set, what logger severity level to log to. Default "INFO", set to "DEBUG" etc if desired.
|
52
54
|
|
53
55
|
* `marc_source.type`: default 'binary'. Can also set to 'xml' or (not yet implemented todo) 'json'. Command line shortcut `-t`
|
54
56
|
|
data/lib/traject/indexer.rb
CHANGED
@@ -332,7 +332,7 @@ class Traject::Indexer
|
|
332
332
|
if log_batch_size && (count % log_batch_size == 0)
|
333
333
|
batch_rps = log_batch_size / (Time.now - batch_start_time)
|
334
334
|
overall_rps = count / (Time.now - start_time)
|
335
|
-
logger.
|
335
|
+
logger.send(settings["log.batch_size.severity"].downcase.to_sym, "Traject::Indexer#process, read #{count} records at id:#{id_string(record)}; #{'%.0f' % batch_rps}/s this batch, #{'%.0f' % overall_rps}/s overall")
|
336
336
|
batch_start_time = Time.now
|
337
337
|
end
|
338
338
|
|
@@ -62,14 +62,14 @@ class Traject::Indexer
|
|
62
62
|
|
63
63
|
def self.defaults
|
64
64
|
@@defaults ||= {
|
65
|
-
"reader_class_name" => "Traject::
|
65
|
+
"reader_class_name" => "Traject::MarcReader",
|
66
66
|
"writer_class_name" => "Traject::SolrJWriter",
|
67
|
-
"marc_source.type" => "binary",
|
67
|
+
"marc_source.type" => "binary",
|
68
68
|
"marc4j_reader.permissive" => true,
|
69
|
-
"marc4j_reader.source_encoding" => "BESTGUESS",
|
70
69
|
"solrj_writer.batch_size" => 200,
|
71
70
|
"solrj_writer.thread_pool" => 1,
|
72
|
-
"processing_thread_pool" => 3
|
71
|
+
"processing_thread_pool" => 3,
|
72
|
+
"log.batch_size.severity" => "info"
|
73
73
|
}
|
74
74
|
end
|
75
75
|
|
@@ -3,9 +3,8 @@ require 'marc'
|
|
3
3
|
require 'marc/marc4j'
|
4
4
|
|
5
5
|
# `Traject::Marc4JReader` uses the marc4j java package to parse the MARC records
|
6
|
-
# into standard ruby-marc MARC::Record objects. This reader
|
7
|
-
# Traject::MarcReader, especially for XML
|
8
|
-
# encoded records and transcoding to UTF8.
|
6
|
+
# into standard ruby-marc MARC::Record objects. This reader may be faster than
|
7
|
+
# Traject::MarcReader, especially for XML.
|
9
8
|
#
|
10
9
|
# Marc4JReader can read MARC ISO 2709 ("binary") or MARCXML. We use the Marc4J MarcPermissiveStreamReader
|
11
10
|
# for reading binary, but sometimes in non-permissive mode, according to settings. We use the Marc4j MarcXmlReader
|
@@ -24,13 +23,15 @@ require 'marc/marc4j'
|
|
24
23
|
# value to 'permissive' arg of MarcPermissiveStreamReader constructor.
|
25
24
|
# Only used for 'binary'
|
26
25
|
#
|
27
|
-
# *
|
26
|
+
# * marc_source.encoding: Only used for 'binary', otherwise always UTF-8.
|
28
27
|
# String of the values MarcPermissiveStreamReader accepts:
|
29
|
-
# * BESTGUESS (
|
30
|
-
# * ISO8859_1
|
28
|
+
# * BESTGUESS (default: not entirely clear what Marc4J does with this)
|
29
|
+
# * ISO-8859-1 (also accepted: ISO8859_1)
|
31
30
|
# * UTF-8
|
32
|
-
# * MARC8
|
33
|
-
# Default 'BESTGUESS', but
|
31
|
+
# * MARC-8 (also accepted: MARC8)
|
32
|
+
# Default 'BESTGUESS', but HIGHLY recommend setting
|
33
|
+
# to avoid some Marc4J unpredictability, Marc4J "BESTGUESS" can be unpredictable
|
34
|
+
# in a variety of ways.
|
34
35
|
# (will ALWAYS be transcoded to UTF-8 on the way out. We insist.)
|
35
36
|
#
|
36
37
|
# * marc4j_reader.jar_dir: Path to a directory containing Marc4J jar file to use. All .jar's in dir will
|
@@ -54,7 +55,7 @@ require 'marc/marc4j'
|
|
54
55
|
#
|
55
56
|
# # Or instead for binary:
|
56
57
|
# provide "marc4j_reader.permissive", true
|
57
|
-
# provide "
|
58
|
+
# provide "marc_source.encoding", "MARC8"
|
58
59
|
# end
|
59
60
|
class Traject::Marc4JReader
|
60
61
|
include Enumerable
|
@@ -94,6 +95,20 @@ class Traject::Marc4JReader
|
|
94
95
|
settings["marc_source.type"]
|
95
96
|
end
|
96
97
|
|
98
|
+
def specified_source_encoding
|
99
|
+
#settings["marc4j_reader.source_encoding"]
|
100
|
+
enc = settings["marc_source.encoding"]
|
101
|
+
|
102
|
+
# one is standard for ruby and we want to support,
|
103
|
+
# the other is used by Marc4J and we have to pass it to Marc4J
|
104
|
+
enc = "ISO8859_1" if enc == "ISO-8859-1"
|
105
|
+
|
106
|
+
# default
|
107
|
+
enc = "BESTGUESS" if enc.nil? || enc.empty?
|
108
|
+
|
109
|
+
return enc
|
110
|
+
end
|
111
|
+
|
97
112
|
def create_marc_reader!
|
98
113
|
case input_type
|
99
114
|
when "binary"
|
@@ -101,7 +116,7 @@ class Traject::Marc4JReader
|
|
101
116
|
|
102
117
|
# #to_inputstream turns our ruby IO into a Java InputStream
|
103
118
|
# third arg means 'convert to UTF-8, yes'
|
104
|
-
MarcPermissiveStreamReader.new(input_stream.to_inputstream, permissive, true,
|
119
|
+
MarcPermissiveStreamReader.new(input_stream.to_inputstream, permissive, true, specified_source_encoding)
|
105
120
|
when "xml"
|
106
121
|
MarcXmlReader.new(input_stream.to_inputstream)
|
107
122
|
else
|
data/lib/traject/marc_reader.rb
CHANGED
@@ -4,22 +4,33 @@ require 'traject/ndj_reader'
|
|
4
4
|
# `Traject::MarcReader` uses pure ruby marc gem to parse MARC records. It
|
5
5
|
# can read MARC ISO 2709 ('binary'), MARC-XML, and Marc-in-json (newline-delimited-json).
|
6
6
|
#
|
7
|
-
#
|
8
|
-
#
|
7
|
+
# Marc4JReader is an alternative to this class, powered by Marc4J. You may be interested
|
8
|
+
# in comparing for performance, under your particular use case.
|
9
9
|
#
|
10
10
|
# By default assumes binary MARC encoding, please set marc_source.type setting
|
11
|
-
# for XML or json.
|
11
|
+
# for XML or json. If binary, please set marc_source.encoding with char encoding.
|
12
12
|
#
|
13
13
|
# ## Settings
|
14
14
|
|
15
15
|
# * "marc_source.type": serialization type. default 'binary'
|
16
|
-
# * "binary". standard ISO 2709 "binary" MARC format
|
17
|
-
#
|
16
|
+
# * "binary". standard ISO 2709 "binary" MARC format,
|
17
|
+
# will use ruby-marc MARC::Reader (Note, if you are using
|
18
|
+
# type 'binary', you probably want to also set 'marc_source.encoding')
|
19
|
+
# * "xml", MarcXML, will use ruby-marc MARC::XMLReader
|
18
20
|
# * "json" The "marc-in-json" format, encoded as newline-separated
|
19
21
|
# json. (synonym 'ndj'). A simplistic newline-separated json, with no comments
|
20
22
|
# allowed, and no unescpaed internal newlines allowed in the json
|
21
23
|
# objects -- we just read line by line, and assume each line is a
|
22
24
|
# marc-in-json. http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/
|
25
|
+
# will use Traject::NDJReader which uses MARC::Record.new_from_hash.
|
26
|
+
# * "marc_source.encoding": Only used for marc_source.type 'binary', character encoding
|
27
|
+
# of the source marc records. Can be any
|
28
|
+
# encoding recognized by ruby, OR 'MARC-8'. For 'MARC-8', content will
|
29
|
+
# be transcoded (by ruby-marc) to UTF-8 in internal MARC::Record Strings.
|
30
|
+
# Default nil, meaning let MARC::Reader use it's default, which will
|
31
|
+
# probably be Encoding.default_internal, which will probably be UTF-8.
|
32
|
+
# Right now Traject::MarcReader is hard-coded to transcode to UTF-8 as
|
33
|
+
# an internal encoding.
|
23
34
|
# * "marc_reader.xml_parser": For XML type, which XML parser to tell Marc::Reader
|
24
35
|
# to use. Anything recognized by [Marc::Reader :parser
|
25
36
|
# argument](http://rdoc.info/github/ruby-marc/ruby-marc/MARC/XMLReader).
|
@@ -62,7 +73,9 @@ class Traject::MarcReader
|
|
62
73
|
when 'json'
|
63
74
|
Traject::NDJReader.new(self.input_stream, settings)
|
64
75
|
else
|
65
|
-
|
76
|
+
args = { :invalid => :replace }
|
77
|
+
args[:external_encoding] = settings["marc_source.encoding"]
|
78
|
+
MARC::Reader.new(self.input_stream, args)
|
66
79
|
end
|
67
80
|
end
|
68
81
|
return @internal_reader
|
data/lib/traject/version.rb
CHANGED
data/test/marc4j_reader_test.rb
CHANGED
@@ -20,7 +20,7 @@ describe "Marc4JReader" do
|
|
20
20
|
first = array.first
|
21
21
|
|
22
22
|
assert_kind_of MARC::Record, first
|
23
|
-
|
23
|
+
assert_equal first['245']['a'].encoding.name, "UTF-8"
|
24
24
|
end
|
25
25
|
|
26
26
|
it "can skip a bad subfield code" do
|
@@ -37,7 +37,7 @@ describe "Marc4JReader" do
|
|
37
37
|
|
38
38
|
it "reads Marc binary in Marc8 encoding" do
|
39
39
|
file = File.new(support_file_path("one-marc8.mrc"))
|
40
|
-
settings = Traject::Indexer::Settings.new("
|
40
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC8")
|
41
41
|
reader = Traject::Marc4JReader.new(file, settings)
|
42
42
|
|
43
43
|
array = reader.to_a
|
@@ -84,5 +84,53 @@ describe "Marc4JReader" do
|
|
84
84
|
assert_kind_of Java::org.marc4j.marc.impl::RecordImpl, record.original_marc4j
|
85
85
|
end
|
86
86
|
|
87
|
+
it "replaces unicode character reference in Marc8 transcode" do
|
88
|
+
file = File.new(support_file_path "escaped_character_reference.marc8.marc")
|
89
|
+
# due to marc4j idiosyncracies, this test will NOT pass with default source_encoding
|
90
|
+
# of "BESTGUESS", it only works if you explicitly set to MARC8. Doh.
|
91
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC8") # binary type is default
|
92
|
+
record = Traject::Marc4JReader.new(file, settings).to_a.first
|
93
|
+
|
94
|
+
assert_equal "Rio de Janeiro escaped replacement char: \uFFFD .", record['260']['a']
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "Marc4J Java Permissive Stream Reader" do
|
98
|
+
# needed for sanity check when our tests fail to see if Marc4J
|
99
|
+
# is not behaving how we think it should.
|
100
|
+
it "converts character references" do
|
101
|
+
file = File.new(support_file_path "escaped_character_reference.marc8.marc")
|
102
|
+
reader = MarcPermissiveStreamReader.new(file.to_inputstream, true, true, "MARC-8")
|
103
|
+
record = reader.next
|
104
|
+
|
105
|
+
field = record.getVariableField("260")
|
106
|
+
subfield = field.getSubfield('a'.ord)
|
107
|
+
value = subfield.getData
|
108
|
+
|
109
|
+
assert_equal "Rio de Janeiro escaped replacement char: \uFFFD .", value
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
it "replaces bad byte in UTF8 marc" do
|
114
|
+
skip "Marc4J needs fixing on it's end" # Marc4J won't do this in 'permissive' mode, gah.
|
115
|
+
|
116
|
+
# Note this only works because the marc file DOES correctly
|
117
|
+
# have leader byte 9 set to 'a' for UTF8, otherwise Marc4J can't do it.
|
118
|
+
file = File.new(support_file_path "bad_utf_byte.utf8.marc")
|
119
|
+
|
120
|
+
settings = Traject::Indexer::Settings.new() # binary UTF8 type is default
|
121
|
+
reader = Traject::Marc4JReader.new(file, settings)
|
122
|
+
|
123
|
+
record = reader.to_a.first
|
124
|
+
|
125
|
+
value = record['300']['a']
|
126
|
+
|
127
|
+
assert_equal value.encoding.name, "UTF-8"
|
128
|
+
assert value.valid_encoding?, "Has valid encoding"
|
129
|
+
assert_equal "This is a bad byte: '\uFFFD' and another: '\uFFFD'", record['300']['a']
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
|
87
135
|
|
88
136
|
end
|
data/test/marc_reader_test.rb
CHANGED
@@ -17,21 +17,74 @@ describe "Traject::MarcReader" do
|
|
17
17
|
assert_equal 30, array.length
|
18
18
|
end
|
19
19
|
|
20
|
-
it "reads Marc binary" do
|
21
|
-
file = File.new(support_file_path "test_data.utf8.mrc")
|
22
|
-
settings = Traject::Indexer::Settings.new() # binary type is default
|
23
|
-
reader = Traject::MarcReader.new(file, settings)
|
24
20
|
|
25
|
-
|
21
|
+
describe "MARC binary" do
|
22
|
+
it "reads" do
|
23
|
+
file = File.new(support_file_path "test_data.utf8.mrc")
|
24
|
+
settings = Traject::Indexer::Settings.new() # binary type is default
|
25
|
+
reader = Traject::MarcReader.new(file, settings)
|
26
26
|
|
27
|
-
|
27
|
+
array = reader.to_a
|
28
28
|
|
29
|
-
|
29
|
+
assert_equal 30, array.length
|
30
30
|
|
31
|
-
|
31
|
+
first = array.first
|
32
32
|
|
33
|
-
|
34
|
-
|
33
|
+
assert_kind_of MARC::Record, first
|
34
|
+
|
35
|
+
assert first['245']['a'].encoding.name, "UTF-8"
|
36
|
+
assert_equal "Fikr-i Ayāz /", first['245']['a']
|
37
|
+
end
|
38
|
+
|
39
|
+
it "reads Marc binary in Marc8 encoding, transcoding to UTF-8" do
|
40
|
+
file = File.new(support_file_path("one-marc8.mrc"))
|
41
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC-8")
|
42
|
+
reader = Traject::MarcReader.new(file, settings)
|
43
|
+
|
44
|
+
array = reader.to_a
|
45
|
+
|
46
|
+
assert_length 1, array
|
47
|
+
|
48
|
+
|
49
|
+
assert_kind_of MARC::Record, array.first
|
50
|
+
a245a = array.first['245']['a']
|
51
|
+
|
52
|
+
assert a245a.encoding.name, "UTF-8"
|
53
|
+
assert a245a.valid_encoding?
|
54
|
+
assert_equal "Por uma outra globalização :", a245a
|
55
|
+
end
|
56
|
+
|
57
|
+
it "replaces unicode character reference in Marc8 transcode" do
|
58
|
+
file = File.new(support_file_path("escaped_character_reference.marc8.marc"))
|
59
|
+
|
60
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC-8") # binary type is default
|
61
|
+
record = Traject::MarcReader.new(file, settings).to_a.first
|
62
|
+
|
63
|
+
assert_equal "Rio de Janeiro escaped replacement char: \uFFFD .", record['260']['a']
|
64
|
+
end
|
65
|
+
|
66
|
+
it "raises on unrecognized encoding for binary type" do
|
67
|
+
file = File.new(support_file_path "one-marc8.mrc")
|
68
|
+
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "ADFADFADF")
|
69
|
+
assert_raises(ArgumentError) do
|
70
|
+
record = Traject::MarcReader.new(file, settings).to_a.first
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
it "replaces bad byte in UTF8 marc binary" do
|
75
|
+
file = File.new(support_file_path "bad_utf_byte.utf8.marc")
|
76
|
+
|
77
|
+
settings = Traject::Indexer::Settings.new() # binary type is default
|
78
|
+
reader = Traject::MarcReader.new(file, settings)
|
79
|
+
|
80
|
+
record = reader.to_a.first
|
81
|
+
|
82
|
+
value = record['300']['a']
|
83
|
+
|
84
|
+
assert_equal value.encoding.name, "UTF-8"
|
85
|
+
assert value.valid_encoding?, "Has valid encoding"
|
86
|
+
assert_equal "This is a bad byte: '\uFFFD' and another: '\uFFFD'", value
|
87
|
+
end
|
35
88
|
end
|
36
89
|
|
37
90
|
it "reads JSON" do
|
@@ -52,4 +105,6 @@ describe "Traject::MarcReader" do
|
|
52
105
|
|
53
106
|
|
54
107
|
|
108
|
+
|
109
|
+
|
55
110
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
00083 a2200037 4500300004500000 aThis is a bad byte: '�' and another: '�'
|
@@ -0,0 +1 @@
|
|
1
|
+
00138cam 2200049Ia 45000010008000002600080000082196384 aRio de Janeiro escaped replacement char: � .bEditora Record,c2000.
|
data/traject.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.extra_rdoc_files = spec.files.grep(%r{^doc/})
|
21
21
|
|
22
22
|
|
23
|
-
spec.add_dependency "marc", ">= 0.
|
23
|
+
spec.add_dependency "marc", ">= 0.8.0"
|
24
24
|
spec.add_dependency "marc-marc4j", ">=0.1.1" # use and convert marc4j
|
25
25
|
spec.add_dependency "hashie", ">= 2.0.5", "< 2.1" # used for Indexer#settings
|
26
26
|
spec.add_dependency "slop", ">= 3.4.5", "< 4.0" # command line parsing
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Rochkind
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: marc
|
@@ -17,12 +17,12 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - '>='
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 0.
|
20
|
+
version: 0.8.0
|
21
21
|
requirement: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - '>='
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: 0.
|
25
|
+
version: 0.8.0
|
26
26
|
prerelease: false
|
27
27
|
type: :runtime
|
28
28
|
- !ruby/object:Gem::Dependency
|
@@ -211,11 +211,13 @@ files:
|
|
211
211
|
- test/test_support/245_no_ab.marc
|
212
212
|
- test/test_support/880_with_no_6.utf8.marc
|
213
213
|
- test/test_support/bad_subfield_code.marc
|
214
|
+
- test/test_support/bad_utf_byte.utf8.marc
|
214
215
|
- test/test_support/date_resort_to_260.marc
|
215
216
|
- test/test_support/date_type_r_missing_date2.marc
|
216
217
|
- test/test_support/date_with_u.marc
|
217
218
|
- test/test_support/demo_config.rb
|
218
219
|
- test/test_support/emptyish_record.marc
|
220
|
+
- test/test_support/escaped_character_reference.marc8.marc
|
219
221
|
- test/test_support/george_eliot.marc
|
220
222
|
- test/test_support/hebrew880s.marc
|
221
223
|
- test/test_support/louis_armstrong.marc
|
@@ -281,12 +283,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
281
283
|
version: '0'
|
282
284
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
283
285
|
requirements:
|
284
|
-
- - '
|
286
|
+
- - '>='
|
285
287
|
- !ruby/object:Gem::Version
|
286
|
-
version:
|
288
|
+
version: '0'
|
287
289
|
requirements: []
|
288
290
|
rubyforge_project:
|
289
|
-
rubygems_version: 2.1.
|
291
|
+
rubygems_version: 2.1.11
|
290
292
|
signing_key:
|
291
293
|
specification_version: 4
|
292
294
|
summary: Index MARC to Solr; or generally process source records to hash-like structures
|
@@ -309,11 +311,13 @@ test_files:
|
|
309
311
|
- test/test_support/245_no_ab.marc
|
310
312
|
- test/test_support/880_with_no_6.utf8.marc
|
311
313
|
- test/test_support/bad_subfield_code.marc
|
314
|
+
- test/test_support/bad_utf_byte.utf8.marc
|
312
315
|
- test/test_support/date_resort_to_260.marc
|
313
316
|
- test/test_support/date_type_r_missing_date2.marc
|
314
317
|
- test/test_support/date_with_u.marc
|
315
318
|
- test/test_support/demo_config.rb
|
316
319
|
- test/test_support/emptyish_record.marc
|
320
|
+
- test/test_support/escaped_character_reference.marc8.marc
|
317
321
|
- test/test_support/george_eliot.marc
|
318
322
|
- test/test_support/hebrew880s.marc
|
319
323
|
- test/test_support/louis_armstrong.marc
|