traject 0.0.2 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/README.md +85 -61
- data/Rakefile +5 -0
- data/bin/traject +31 -3
- data/doc/settings.md +74 -13
- data/lib/tasks/load_maps.rake +48 -0
- data/lib/traject/indexer/settings.rb +75 -0
- data/lib/traject/indexer.rb +255 -45
- data/lib/traject/json_writer.rb +4 -2
- data/lib/traject/macros/marc21.rb +18 -6
- data/lib/traject/macros/marc21_semantics.rb +405 -0
- data/lib/traject/macros/marc_format_classifier.rb +180 -0
- data/lib/traject/marc4j_reader.rb +160 -0
- data/lib/traject/marc_extractor.rb +33 -17
- data/lib/traject/marc_reader.rb +14 -11
- data/lib/traject/solrj_writer.rb +247 -9
- data/lib/traject/thread_pool.rb +154 -0
- data/lib/traject/translation_map.rb +46 -4
- data/lib/traject/util.rb +30 -0
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/lcc_top_level.yaml +26 -0
- data/lib/translation_maps/marc_genre_007.yaml +9 -0
- data/lib/translation_maps/marc_genre_leader.yaml +22 -0
- data/lib/translation_maps/marc_geographic.yaml +589 -0
- data/lib/translation_maps/marc_instruments.yaml +102 -0
- data/lib/translation_maps/marc_languages.yaml +490 -0
- data/test/indexer/each_record_test.rb +34 -0
- data/test/indexer/macros_marc21_semantics_test.rb +206 -0
- data/test/indexer/macros_marc21_test.rb +10 -1
- data/test/indexer/map_record_test.rb +78 -8
- data/test/indexer/read_write_test.rb +43 -10
- data/test/indexer/settings_test.rb +60 -4
- data/test/indexer/to_field_test.rb +39 -0
- data/test/marc4j_reader_test.rb +75 -0
- data/test/marc_extractor_test.rb +62 -0
- data/test/marc_format_classifier_test.rb +91 -0
- data/test/marc_reader_test.rb +12 -0
- data/test/solrj_writer_test.rb +146 -43
- data/test/test_helper.rb +50 -0
- data/test/test_support/245_no_ab.marc +1 -0
- data/test/test_support/880_with_no_6.utf8.marc +1 -0
- data/test/test_support/bad_subfield_code.marc +1 -0
- data/test/test_support/date_resort_to_260.marc +1 -0
- data/test/test_support/date_type_r_missing_date2.marc +1 -0
- data/test/test_support/date_with_u.marc +1 -0
- data/test/test_support/demo_config.rb +153 -0
- data/test/test_support/emptyish_record.marc +1 -0
- data/test/test_support/louis_armstrong.marc +1 -0
- data/test/test_support/manuscript_online_thesis.marc +1 -0
- data/test/test_support/microform_online_conference.marc +1 -0
- data/test/test_support/multi_era.marc +1 -0
- data/test/test_support/multi_geo.marc +1 -0
- data/test/test_support/musical_cage.marc +1 -0
- data/test/test_support/one-marc8.mrc +1 -0
- data/test/test_support/online_only.marc +1 -0
- data/test/test_support/packed_041a_lang.marc +1 -0
- data/test/test_support/the_business_ren.marc +1 -0
- data/test/translation_map_test.rb +8 -0
- data/test/translation_maps/properties_map.properties +5 -0
- data/traject.gemspec +1 -1
- data/vendor/marc4j/README.md +17 -0
- data/vendor/marc4j/lib/marc4j-2.5.1-beta.jar +0 -0
- metadata +81 -2
@@ -45,7 +45,16 @@ describe "Traject::Macros::Marc21" do
|
|
45
45
|
|
46
46
|
output = @indexer.map_record(@record)
|
47
47
|
|
48
|
-
|
48
|
+
assert_equal ["Manufacturing consent : the political economy of the mass media"], output["title"]
|
49
|
+
end
|
50
|
+
|
51
|
+
it "respects :default option" do
|
52
|
+
@indexer.instance_eval do
|
53
|
+
to_field "only_default", extract_marc("9999", :default => "DEFAULT VALUE")
|
54
|
+
end
|
55
|
+
output = @indexer.map_record(@record)
|
56
|
+
|
57
|
+
assert_equal ["DEFAULT VALUE"], output["only_default"]
|
49
58
|
end
|
50
59
|
|
51
60
|
it "Marc21::trim_punctuation class method" do
|
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
|
-
describe "Traject::Indexer#map_record" do
|
3
|
+
describe "Traject::Indexer#map_record" do
|
4
4
|
before do
|
5
5
|
@indexer = Traject::Indexer.new
|
6
|
-
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
6
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
7
7
|
end
|
8
8
|
|
9
9
|
|
@@ -16,7 +16,7 @@ describe "Traject::Indexer#map_record" do
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
describe "#to_field" do
|
19
|
+
describe "#to_field" do
|
20
20
|
it "works with block" do
|
21
21
|
called = false
|
22
22
|
|
@@ -27,7 +27,7 @@ describe "Traject::Indexer#map_record" do
|
|
27
27
|
called = true # by the power of closure!
|
28
28
|
accumulator << "Some Title"
|
29
29
|
end
|
30
|
-
|
30
|
+
|
31
31
|
output = @indexer.map_record(@record)
|
32
32
|
|
33
33
|
assert called
|
@@ -46,8 +46,8 @@ describe "Traject::Indexer#map_record" do
|
|
46
46
|
accumulator << "Some Title"
|
47
47
|
end
|
48
48
|
|
49
|
-
@indexer.to_field("title", logic)
|
50
|
-
|
49
|
+
@indexer.to_field("title", logic)
|
50
|
+
|
51
51
|
output = @indexer.map_record(@record)
|
52
52
|
|
53
53
|
assert called
|
@@ -103,10 +103,11 @@ describe "Traject::Indexer#map_record" do
|
|
103
103
|
called = true
|
104
104
|
|
105
105
|
assert_kind_of Traject::Indexer::Context, context
|
106
|
-
|
106
|
+
|
107
107
|
assert_kind_of Hash, context.clipboard
|
108
108
|
assert_kind_of Hash, context.output_hash
|
109
109
|
|
110
|
+
assert_same @record, record
|
110
111
|
assert_same record, context.source_record
|
111
112
|
assert_same @indexer.settings, context.settings
|
112
113
|
end
|
@@ -116,5 +117,74 @@ describe "Traject::Indexer#map_record" do
|
|
116
117
|
assert called
|
117
118
|
end
|
118
119
|
end
|
119
|
-
|
120
|
+
|
121
|
+
describe "#each_record" do
|
122
|
+
it "is called with one-arg record" do
|
123
|
+
called = false
|
124
|
+
@indexer.each_record do |record|
|
125
|
+
called = true
|
126
|
+
assert_kind_of MARC::Record, record
|
127
|
+
end
|
128
|
+
@indexer.map_record(@record)
|
129
|
+
|
130
|
+
assert called, "each_record was called"
|
131
|
+
end
|
132
|
+
it "is called with two-arg record and context" do
|
133
|
+
called = false
|
134
|
+
@indexer.each_record do |record, context|
|
135
|
+
called = true
|
136
|
+
assert_kind_of MARC::Record, record
|
137
|
+
assert_kind_of Traject::Indexer::Context, context
|
138
|
+
end
|
139
|
+
@indexer.map_record(@record)
|
140
|
+
|
141
|
+
assert called, "each_record was called"
|
142
|
+
end
|
143
|
+
it "accepts lambda AND block" do
|
144
|
+
lambda_arg = lambda do |record, context|
|
145
|
+
context.output_hash["field"] ||= []
|
146
|
+
context.output_hash["field"] << "first"
|
147
|
+
end
|
148
|
+
|
149
|
+
@indexer.each_record(lambda_arg) do |record, context|
|
150
|
+
context.output_hash["field"] ||= []
|
151
|
+
context.output_hash["field"] << "second"
|
152
|
+
end
|
153
|
+
|
154
|
+
output = @indexer.map_record(@record)
|
155
|
+
|
156
|
+
assert_equal %w{first second}, output["field"]
|
157
|
+
end
|
158
|
+
it "is called in order with #to_field" do
|
159
|
+
@indexer.to_field("foo") {|record, accumulator| accumulator << "first"}
|
160
|
+
@indexer.each_record {|record, context| context.output_hash["foo"] << "second" }
|
161
|
+
@indexer.to_field("foo") {|record, accumulator| accumulator << "third"}
|
162
|
+
|
163
|
+
output = @indexer.map_record(@record)
|
164
|
+
|
165
|
+
assert_equal %w{first second third}, output["foo"]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
describe "map_to_context!" do
|
170
|
+
before do
|
171
|
+
@context = Traject::Indexer::Context.new(:source_record => @record, :settings => @indexer.settings, :position => 10 )
|
172
|
+
end
|
173
|
+
it "passes context to indexing routines" do
|
174
|
+
called = false
|
175
|
+
@indexer.to_field("title") do |record, accumulator, context|
|
176
|
+
called = true
|
177
|
+
assert_kind_of Traject::Indexer::Context, context
|
178
|
+
assert_same @context, context
|
179
|
+
end
|
180
|
+
|
181
|
+
context = @indexer.map_to_context!(@context)
|
182
|
+
|
183
|
+
assert_same @context, context
|
184
|
+
|
185
|
+
assert called, "Called mapping routine"
|
186
|
+
end
|
187
|
+
|
188
|
+
end
|
189
|
+
|
120
190
|
end
|
@@ -4,7 +4,8 @@ require 'test_helper'
|
|
4
4
|
# in an array, just added to settings for easy access
|
5
5
|
memory_writer_class = Class.new do
|
6
6
|
def initialize(settings)
|
7
|
-
|
7
|
+
# store them in a class variable so we can test em later
|
8
|
+
@@last_writer_settings = @settings = settings
|
8
9
|
@settings["memory_writer.added"] = []
|
9
10
|
end
|
10
11
|
|
@@ -17,31 +18,63 @@ memory_writer_class = Class.new do
|
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
20
|
-
describe "Traject::Indexer#process" do
|
21
|
+
describe "Traject::Indexer#process" do
|
21
22
|
before do
|
22
|
-
|
23
|
+
# no threading for these tests
|
24
|
+
@indexer = Traject::Indexer.new("processing_thread_pool" => nil)
|
23
25
|
@indexer.writer_class = memory_writer_class
|
24
26
|
@file = File.open(support_file_path "test_data.utf8.mrc")
|
25
27
|
end
|
26
28
|
|
27
29
|
it "works" do
|
30
|
+
# oops, this times_called counter isn't thread-safe under multi-threading
|
31
|
+
# is why this fails sometimes.
|
32
|
+
# fixed to be single-threaded for these tests.
|
33
|
+
times_called = 0
|
28
34
|
@indexer.to_field("title") do |record, accumulator, context|
|
35
|
+
times_called += 1
|
29
36
|
accumulator << "ADDED TITLE"
|
30
37
|
assert_equal "title", context.field_name
|
38
|
+
|
39
|
+
assert_equal times_called, context.position
|
31
40
|
end
|
32
41
|
|
33
|
-
@indexer.process( @file )
|
42
|
+
return_value = @indexer.process( @file )
|
43
|
+
|
44
|
+
assert return_value, "Returns `true` on success"
|
34
45
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
assert_equal ["ADDED TITLE"], @indexer.settings["memory_writer.added"].first["title"]
|
46
|
+
# Grab the settings out of a class variable where we left em,
|
47
|
+
# as a convenient place to store outcomes so we can test em.
|
48
|
+
writer_settings = memory_writer_class.class_variable_get("@@last_writer_settings")
|
39
49
|
|
40
|
-
assert
|
50
|
+
assert writer_settings["memory_writer.added"]
|
51
|
+
assert_equal 30, writer_settings["memory_writer.added"].length
|
52
|
+
assert_kind_of Traject::Indexer::Context, writer_settings["memory_writer.added"].first
|
53
|
+
assert_equal ["ADDED TITLE"], writer_settings["memory_writer.added"].first.output_hash["title"]
|
41
54
|
|
55
|
+
# logger provided in settings
|
56
|
+
assert writer_settings["logger"]
|
57
|
+
|
58
|
+
assert writer_settings["memory_writer.closed"]
|
42
59
|
end
|
43
60
|
|
44
|
-
|
61
|
+
it "returns false if skipped records" do
|
62
|
+
@indexer = Traject::Indexer.new(
|
63
|
+
"solrj_writer.server_class_name" => "MockSolrServer",
|
64
|
+
"solr.url" => "http://example.org",
|
65
|
+
"writer_class_name" => "Traject::SolrJWriter"
|
66
|
+
)
|
67
|
+
@file = File.open(support_file_path "manufacturing_consent.marc")
|
68
|
+
|
69
|
+
|
70
|
+
@indexer.to_field("id") do |record, accumulator|
|
71
|
+
# intentionally make error
|
72
|
+
accumulator.concat ["one_id", "two_id"]
|
73
|
+
end
|
74
|
+
return_value = @indexer.process(@file)
|
75
|
+
|
76
|
+
assert ! return_value, "returns false on skipped record errors"
|
77
|
+
end
|
45
78
|
|
46
79
|
|
47
80
|
end
|
@@ -1,13 +1,31 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
|
-
describe "Traject::Indexer#settings" do
|
3
|
+
describe "Traject::Indexer#settings" do
|
4
4
|
before do
|
5
5
|
@indexer = Traject::Indexer.new
|
6
6
|
end
|
7
7
|
|
8
|
-
it "starts out
|
8
|
+
it "starts out a Hash, that can fill in it's defaults" do
|
9
9
|
assert_kind_of Hash, @indexer.settings
|
10
|
-
|
10
|
+
|
11
|
+
Traject::Indexer::Settings.defaults.each_pair do |key, value|
|
12
|
+
assert_equal value, @indexer.settings[key]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
it "can fill_in_defaults!" do
|
17
|
+
@indexer.settings.fill_in_defaults!
|
18
|
+
|
19
|
+
assert_equal Traject::Indexer::Settings.defaults, @indexer.settings
|
20
|
+
end
|
21
|
+
|
22
|
+
it "doesn't overwrite with fill_in_defaults!" do
|
23
|
+
key = Traject::Indexer::Settings.defaults.keys.first
|
24
|
+
@indexer.settings[ key ] = "MINE KEEP IT"
|
25
|
+
|
26
|
+
@indexer.settings.fill_in_defaults!
|
27
|
+
|
28
|
+
assert_equal "MINE KEEP IT", @indexer.settings[key]
|
11
29
|
end
|
12
30
|
|
13
31
|
it "can take argument to set" do
|
@@ -42,7 +60,7 @@ describe "Traject::Indexer#settings" do
|
|
42
60
|
@indexer.settings("four" => "fourth")
|
43
61
|
|
44
62
|
{"one" => "original", "two" => "second", "three" => "third", "four" => "fourth"}.each_pair do |key, value|
|
45
|
-
assert_equal value, @indexer.settings[key]
|
63
|
+
assert_equal value, @indexer.settings[key]
|
46
64
|
end
|
47
65
|
end
|
48
66
|
|
@@ -60,6 +78,44 @@ describe "Traject::Indexer#settings" do
|
|
60
78
|
assert_equal "foo 4", @indexer.settings["foo"]
|
61
79
|
end
|
62
80
|
|
81
|
+
it "implements #provide as cautious setter" do
|
82
|
+
@indexer.settings[:a] = "original"
|
83
|
+
|
84
|
+
@indexer.settings do
|
85
|
+
provide :a, "new"
|
86
|
+
provide :b, "new"
|
87
|
+
end
|
88
|
+
|
89
|
+
assert_equal "original", @indexer.settings[:a]
|
90
|
+
assert_equal "new", @indexer.settings[:b]
|
91
|
+
end
|
92
|
+
|
93
|
+
it "has reverse_merge" do
|
94
|
+
settings = Traject::Indexer::Settings.new("a" => "original", "b" => "original")
|
95
|
+
|
96
|
+
new_settings = settings.reverse_merge(:a => "new", :c => "new")
|
97
|
+
|
98
|
+
assert_kind_of Traject::Indexer::Settings, new_settings
|
99
|
+
|
100
|
+
assert_equal "original", new_settings["a"]
|
101
|
+
assert_equal "original", new_settings["b"]
|
102
|
+
assert_equal "new", new_settings["c"]
|
103
|
+
end
|
104
|
+
|
105
|
+
it "has reverse_merge!" do
|
106
|
+
settings = Traject::Indexer::Settings.new("a" => "original", "b" => "original")
|
63
107
|
|
108
|
+
settings.reverse_merge!(:a => "new", :c => "new")
|
109
|
+
|
110
|
+
assert_kind_of Traject::Indexer::Settings, settings
|
111
|
+
|
112
|
+
assert_equal "original", settings["a"]
|
113
|
+
assert_equal "original", settings["b"]
|
114
|
+
assert_equal "new", settings["c"]
|
115
|
+
end
|
116
|
+
|
117
|
+
describe "defaults" do
|
118
|
+
|
119
|
+
end
|
64
120
|
|
65
121
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer.to_field" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
end
|
7
|
+
describe "checks it's arguments" do
|
8
|
+
it "rejects nil first arg" do
|
9
|
+
assert_raises(ArgumentError) { @indexer.to_field(nil) }
|
10
|
+
end
|
11
|
+
it "rejects empty string first arg" do
|
12
|
+
assert_raises(ArgumentError) {@indexer.to_field("")}
|
13
|
+
end
|
14
|
+
it "rejects one-arg lambda" do
|
15
|
+
assert_raises(ArgumentError) do
|
16
|
+
@indexer.to_field("foo") do |one_arg|
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
it "rejects four-arg lambda" do
|
22
|
+
assert_raises(ArgumentError) do
|
23
|
+
@indexer.to_field("foo") do |one_arg, two_arg, three_arg, four_arg|
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
it "accepts two arg lambda" do
|
28
|
+
@indexer.to_field("foo") do |one, two|
|
29
|
+
end
|
30
|
+
end
|
31
|
+
it "accepts three arg lambda" do
|
32
|
+
@indexer.to_field("foo") {|one, two, three| one }
|
33
|
+
end
|
34
|
+
it "accepts variable lambda" do
|
35
|
+
@indexer.to_field("foo") do |*variable|
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
require 'traject'
|
6
|
+
require 'traject/indexer'
|
7
|
+
require 'traject/marc4j_reader'
|
8
|
+
|
9
|
+
require 'marc'
|
10
|
+
|
11
|
+
describe "Marc4JReader" do
|
12
|
+
it "reads Marc binary" do
|
13
|
+
file = File.new(support_file_path("test_data.utf8.mrc"))
|
14
|
+
settings = Traject::Indexer::Settings.new() # binary type is default
|
15
|
+
reader = Traject::Marc4JReader.new(file, settings)
|
16
|
+
|
17
|
+
array = reader.to_a
|
18
|
+
|
19
|
+
assert_equal 30, array.length
|
20
|
+
first = array.first
|
21
|
+
|
22
|
+
assert_kind_of MARC::Record, first
|
23
|
+
assert first['245']['a'].encoding.name, "UTF-8"
|
24
|
+
end
|
25
|
+
|
26
|
+
it "can skip a bad subfield code" do
|
27
|
+
file = File.new(support_file_path("bad_subfield_code.marc"))
|
28
|
+
settings = Traject::Indexer::Settings.new() # binary type is default
|
29
|
+
reader = Traject::Marc4JReader.new(file, settings)
|
30
|
+
|
31
|
+
array = reader.to_a
|
32
|
+
|
33
|
+
assert_equal 1, array.length
|
34
|
+
assert_kind_of MARC::Record, array.first
|
35
|
+
assert_length 2, array.first['260'].subfields
|
36
|
+
end
|
37
|
+
|
38
|
+
it "reads Marc binary in Marc8 encoding" do
|
39
|
+
file = File.new(support_file_path("one-marc8.mrc"))
|
40
|
+
settings = Traject::Indexer::Settings.new("marc4j_reader.source_encoding" => "MARC8")
|
41
|
+
reader = Traject::Marc4JReader.new(file, settings)
|
42
|
+
|
43
|
+
array = reader.to_a
|
44
|
+
|
45
|
+
assert_length 1, array
|
46
|
+
|
47
|
+
|
48
|
+
assert_kind_of MARC::Record, array.first
|
49
|
+
a245a = array.first['245']['a']
|
50
|
+
|
51
|
+
assert a245a.encoding.name, "UTF-8"
|
52
|
+
assert a245a.valid_encoding?
|
53
|
+
# marc4j converts to denormalized unicode, bah. Although
|
54
|
+
# it's legal, it probably looks weird as a string literal
|
55
|
+
# below, depending on your editor.
|
56
|
+
assert_equal "Por uma outra globalização :", a245a
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
it "reads XML" do
|
61
|
+
file = File.new(support_file_path "test_data.utf8.marc.xml")
|
62
|
+
settings = Traject::Indexer::Settings.new("marc_source.type" => "xml")
|
63
|
+
reader = Traject::Marc4JReader.new(file, settings)
|
64
|
+
|
65
|
+
array = reader.to_a
|
66
|
+
|
67
|
+
assert_equal 30, array.length
|
68
|
+
|
69
|
+
first = array.first
|
70
|
+
|
71
|
+
assert_kind_of MARC::Record, first
|
72
|
+
assert first['245']['a'].encoding.name, "UTF-8"
|
73
|
+
assert_equal "Fikr-i Ayāz /", first['245']['a']
|
74
|
+
end
|
75
|
+
end
|
data/test/marc_extractor_test.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'test_helper'
|
4
4
|
require 'traject/marc_extractor'
|
5
5
|
|
6
|
+
require 'marc'
|
6
7
|
|
7
8
|
describe "Traject::MarcExtractor" do
|
8
9
|
describe "#parse_marc_spec" do
|
@@ -161,6 +162,67 @@ describe "Traject::MarcExtractor" do
|
|
161
162
|
assert values.first.include?("Manufacturing consent"), "Extracted value includes title"
|
162
163
|
end
|
163
164
|
|
165
|
+
it "returns empty array if no matching tags" do
|
166
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, "999abc")
|
167
|
+
assert_equal [], values
|
168
|
+
|
169
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, "999")
|
170
|
+
assert_equal [], values
|
171
|
+
end
|
172
|
+
|
173
|
+
it "returns empty array if matching tag but no subfield" do
|
174
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, "245xyz")
|
175
|
+
assert_equal [], values
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
describe "with bad data" do
|
181
|
+
it "can ignore an 880 with no $6" do
|
182
|
+
@record = MARC::Reader.new(support_file_path "880_with_no_6.utf8.marc").to_a.first
|
183
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, "001")
|
184
|
+
assert_equal ["3468569"], values
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
describe "#each_matching_line" do
|
189
|
+
before do
|
190
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
191
|
+
@extractor = Traject::MarcExtractor.new(@record, "245abc")
|
192
|
+
end
|
193
|
+
it "yields two args" do
|
194
|
+
called = false
|
195
|
+
@extractor.each_matching_line do |field, spec|
|
196
|
+
called = true
|
197
|
+
assert_kind_of MARC::DataField, field
|
198
|
+
assert_kind_of Hash, spec
|
199
|
+
end
|
200
|
+
assert called, "calls block"
|
201
|
+
end
|
202
|
+
it "yields three args" do
|
203
|
+
called = false
|
204
|
+
@extractor.each_matching_line do |field, spec, extractor|
|
205
|
+
called = true
|
206
|
+
assert_kind_of MARC::DataField, field
|
207
|
+
assert_kind_of Hash, spec
|
208
|
+
assert_kind_of Traject::MarcExtractor, extractor
|
209
|
+
assert_same @extractor, extractor
|
210
|
+
end
|
211
|
+
assert called, "calls block"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
describe "#collect_matching_lines" do
|
216
|
+
before do
|
217
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
218
|
+
@extractor = Traject::MarcExtractor.new(@record, "245abc")
|
219
|
+
end
|
220
|
+
it "collects with custom block" do
|
221
|
+
results = @extractor.collect_matching_lines do |field, spec, extractor|
|
222
|
+
extractor.collect_subfields(field, spec)
|
223
|
+
end
|
224
|
+
assert_equal ["Manufacturing consent : the political economy of the mass media / Edward S. Herman and Noam Chomsky ; with a new introduction by the authors."], results
|
225
|
+
end
|
164
226
|
end
|
165
227
|
|
166
228
|
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
require 'traject/macros/marc_format_classifier'
|
4
|
+
|
5
|
+
MarcFormatClassifier = Traject::Macros::MarcFormatClassifier
|
6
|
+
|
7
|
+
def classifier_for(filename)
|
8
|
+
record = MARC::Reader.new(support_file_path filename).to_a.first
|
9
|
+
return MarcFormatClassifier.new( record )
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "MarcFormatClassifier" do
|
13
|
+
|
14
|
+
describe "genre" do
|
15
|
+
# We don't have the patience to test every case, just a sampling
|
16
|
+
it "says book" do
|
17
|
+
assert_equal ["Book"], classifier_for("manufacturing_consent.marc").genre
|
18
|
+
end
|
19
|
+
it "says Book for a weird one" do
|
20
|
+
assert_equal ["Book"], classifier_for("microform_online_conference.marc").genre
|
21
|
+
end
|
22
|
+
it "says Musical Recording" do
|
23
|
+
assert_equal ["Musical Recording"], classifier_for("musical_cage.marc").genre
|
24
|
+
end
|
25
|
+
it "says Journal" do
|
26
|
+
assert_equal ["Journal/Newspaper"], classifier_for("the_business_ren.marc").genre
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
describe "print?" do
|
32
|
+
it "says print when it is" do
|
33
|
+
assert classifier_for("manufacturing_consent.marc").print?
|
34
|
+
end
|
35
|
+
it "does not say print for online only" do
|
36
|
+
assert ! classifier_for("online_only.marc").print?
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe "online?" do
|
41
|
+
it "says online when it is" do
|
42
|
+
assert classifier_for("online_only.marc").online?
|
43
|
+
assert classifier_for("microform_online_conference.marc").online?
|
44
|
+
assert classifier_for("manuscript_online_thesis.marc").online?
|
45
|
+
end
|
46
|
+
it "does not say online for a print only" do
|
47
|
+
assert ! classifier_for("manufacturing_consent.marc").online?
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
describe "microform?" do
|
52
|
+
it "says microform when it is" do
|
53
|
+
assert classifier_for("microform_online_conference.marc").microform?
|
54
|
+
end
|
55
|
+
it "does not say microform when it ain't" do
|
56
|
+
assert ! classifier_for("manufacturing_consent.marc").microform?
|
57
|
+
assert ! classifier_for("online_only.marc").microform?
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe "conference?" do
|
62
|
+
it "says conference when it is" do
|
63
|
+
assert classifier_for("microform_online_conference.marc").proceeding?
|
64
|
+
end
|
65
|
+
it "does not say conference when it ain't" do
|
66
|
+
assert ! classifier_for("manufacturing_consent.marc").proceeding?
|
67
|
+
assert ! classifier_for("online_only.marc").proceeding?
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "thesis?" do
|
72
|
+
it "says thesis when it is" do
|
73
|
+
assert classifier_for("manuscript_online_thesis.marc").thesis?
|
74
|
+
end
|
75
|
+
it "does not say thesis when it ain't" do
|
76
|
+
assert ! classifier_for("manufacturing_consent.marc").thesis?
|
77
|
+
assert ! classifier_for("online_only.marc").thesis?
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
describe "manuscript_archive?" do
|
82
|
+
it "says manuscript when it is" do
|
83
|
+
assert classifier_for("manuscript_online_thesis.marc").manuscript_archive?
|
84
|
+
end
|
85
|
+
it "does not say manuscript when it ain't" do
|
86
|
+
assert ! classifier_for("manufacturing_consent.marc").manuscript_archive?
|
87
|
+
assert ! classifier_for("online_only.marc").manuscript_archive?
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
data/test/marc_reader_test.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
|
1
3
|
require 'test_helper'
|
2
4
|
require 'traject/marc_reader'
|
5
|
+
require 'marc'
|
3
6
|
|
4
7
|
describe "Traject::MarcReader" do
|
5
8
|
|
@@ -22,8 +25,17 @@ describe "Traject::MarcReader" do
|
|
22
25
|
array = reader.to_a
|
23
26
|
|
24
27
|
assert_equal 30, array.length
|
28
|
+
|
29
|
+
first = array.first
|
30
|
+
|
31
|
+
assert_kind_of MARC::Record, first
|
32
|
+
|
33
|
+
assert first['245']['a'].encoding.name, "UTF-8"
|
34
|
+
assert_equal "Fikr-i Ayāz /", first['245']['a']
|
25
35
|
end
|
26
36
|
|
37
|
+
|
38
|
+
|
27
39
|
|
28
40
|
|
29
41
|
end
|