traject 2.0.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.travis.yml +27 -0
  4. data/.yardopts +3 -0
  5. data/Gemfile +12 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +461 -0
  8. data/Rakefile +21 -0
  9. data/bench/bench.rb +30 -0
  10. data/bin/traject +16 -0
  11. data/doc/batch_execution.md +243 -0
  12. data/doc/extending.md +190 -0
  13. data/doc/indexing_rules.md +265 -0
  14. data/doc/other_commands.md +47 -0
  15. data/doc/settings.md +101 -0
  16. data/lib/tasks/load_maps.rake +48 -0
  17. data/lib/traject.rb +11 -0
  18. data/lib/traject/command_line.rb +301 -0
  19. data/lib/traject/csv_writer.rb +34 -0
  20. data/lib/traject/debug_writer.rb +47 -0
  21. data/lib/traject/delimited_writer.rb +110 -0
  22. data/lib/traject/indexer.rb +613 -0
  23. data/lib/traject/indexer/settings.rb +110 -0
  24. data/lib/traject/json_writer.rb +51 -0
  25. data/lib/traject/line_writer.rb +63 -0
  26. data/lib/traject/macros/basic.rb +9 -0
  27. data/lib/traject/macros/marc21.rb +223 -0
  28. data/lib/traject/macros/marc21_semantics.rb +584 -0
  29. data/lib/traject/macros/marc_format_classifier.rb +197 -0
  30. data/lib/traject/marc_extractor.rb +410 -0
  31. data/lib/traject/marc_reader.rb +89 -0
  32. data/lib/traject/mock_reader.rb +97 -0
  33. data/lib/traject/ndj_reader.rb +40 -0
  34. data/lib/traject/null_writer.rb +22 -0
  35. data/lib/traject/qualified_const_get.rb +40 -0
  36. data/lib/traject/solr_json_writer.rb +277 -0
  37. data/lib/traject/thread_pool.rb +161 -0
  38. data/lib/traject/translation_map.rb +267 -0
  39. data/lib/traject/util.rb +52 -0
  40. data/lib/traject/version.rb +3 -0
  41. data/lib/traject/yaml_writer.rb +9 -0
  42. data/lib/translation_maps/lcc_top_level.yaml +26 -0
  43. data/lib/translation_maps/marc_genre_007.yaml +9 -0
  44. data/lib/translation_maps/marc_genre_leader.yaml +22 -0
  45. data/lib/translation_maps/marc_geographic.yaml +589 -0
  46. data/lib/translation_maps/marc_instruments.yaml +102 -0
  47. data/lib/translation_maps/marc_languages.yaml +490 -0
  48. data/test/debug_writer_test.rb +38 -0
  49. data/test/delimited_writer_test.rb +104 -0
  50. data/test/indexer/each_record_test.rb +59 -0
  51. data/test/indexer/macros_marc21_semantics_test.rb +391 -0
  52. data/test/indexer/macros_marc21_test.rb +190 -0
  53. data/test/indexer/macros_test.rb +40 -0
  54. data/test/indexer/map_record_test.rb +209 -0
  55. data/test/indexer/read_write_test.rb +101 -0
  56. data/test/indexer/settings_test.rb +152 -0
  57. data/test/indexer/to_field_test.rb +77 -0
  58. data/test/marc_extractor_test.rb +412 -0
  59. data/test/marc_format_classifier_test.rb +98 -0
  60. data/test/marc_reader_test.rb +110 -0
  61. data/test/solr_json_writer_test.rb +248 -0
  62. data/test/test_helper.rb +90 -0
  63. data/test/test_support/245_no_ab.marc +1 -0
  64. data/test/test_support/880_with_no_6.utf8.marc +1 -0
  65. data/test/test_support/bad_subfield_code.marc +1 -0
  66. data/test/test_support/bad_utf_byte.utf8.marc +1 -0
  67. data/test/test_support/date_resort_to_260.marc +1 -0
  68. data/test/test_support/date_type_r_missing_date2.marc +1 -0
  69. data/test/test_support/date_with_u.marc +1 -0
  70. data/test/test_support/demo_config.rb +155 -0
  71. data/test/test_support/emptyish_record.marc +1 -0
  72. data/test/test_support/escaped_character_reference.marc8.marc +1 -0
  73. data/test/test_support/george_eliot.marc +1 -0
  74. data/test/test_support/hebrew880s.marc +1 -0
  75. data/test/test_support/louis_armstrong.marc +1 -0
  76. data/test/test_support/manufacturing_consent.marc +1 -0
  77. data/test/test_support/manuscript_online_thesis.marc +1 -0
  78. data/test/test_support/microform_online_conference.marc +1 -0
  79. data/test/test_support/multi_era.marc +1 -0
  80. data/test/test_support/multi_geo.marc +1 -0
  81. data/test/test_support/musical_cage.marc +1 -0
  82. data/test/test_support/nature.marc +1 -0
  83. data/test/test_support/one-marc8.mrc +1 -0
  84. data/test/test_support/online_only.marc +1 -0
  85. data/test/test_support/packed_041a_lang.marc +1 -0
  86. data/test/test_support/test_data.utf8.json +30 -0
  87. data/test/test_support/test_data.utf8.marc.xml +2609 -0
  88. data/test/test_support/test_data.utf8.mrc +1 -0
  89. data/test/test_support/test_data.utf8.mrc.gz +0 -0
  90. data/test/test_support/the_business_ren.marc +1 -0
  91. data/test/translation_map_test.rb +225 -0
  92. data/test/translation_maps/bad_ruby.rb +8 -0
  93. data/test/translation_maps/bad_yaml.yaml +1 -0
  94. data/test/translation_maps/both_map.rb +1 -0
  95. data/test/translation_maps/both_map.yaml +1 -0
  96. data/test/translation_maps/default_literal.rb +10 -0
  97. data/test/translation_maps/default_passthrough.rb +10 -0
  98. data/test/translation_maps/marc_040a_translate_test.yaml +1 -0
  99. data/test/translation_maps/properties_map.properties +5 -0
  100. data/test/translation_maps/ruby_map.rb +10 -0
  101. data/test/translation_maps/translate_array_test.yaml +8 -0
  102. data/test/translation_maps/yaml_map.yaml +7 -0
  103. data/traject.gemspec +47 -0
  104. metadata +382 -0
@@ -0,0 +1,38 @@
1
+ require 'test_helper'
2
+ require 'stringio'
3
+
4
+ require 'traject/debug_writer'
5
+ require 'traject'
6
+ require 'marc'
7
+
8
+ describe 'Simple output' do
9
+ before do
10
+ @record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
11
+ @indexer = Traject::Indexer.new
12
+ @indexer.instance_eval do
13
+ to_field "id", extract_marc("001", :first => true)
14
+ to_field "title", extract_marc("245ab")
15
+ end
16
+ @io = StringIO.new
17
+ @writer = Traject::DebugWriter.new("output_stream" => @io)
18
+
19
+ @id = "2710183"
20
+ @title = "Manufacturing consent : the political economy of the mass media /"
21
+ end
22
+
23
+ it "does a simple output" do
24
+ @writer.put Traject::Indexer::Context.new(:output_hash => @indexer.map_record(@record))
25
+ expected = [
26
+ "#{@id} id #{@id}",
27
+ "#{@id} title #{@title}",
28
+ "\n"
29
+ ]
30
+ assert_equal expected.join("\n").gsub(/\s/, ''), @io.string.gsub(/\s/, '')
31
+ @writer.close
32
+
33
+ end
34
+
35
+ end
36
+
37
+
38
+
@@ -0,0 +1,104 @@
1
+ # Encoding: UTF-8
2
+
3
+ require 'test_helper'
4
+ require 'stringio'
5
+ require 'traject/delimited_writer'
6
+ require 'traject/csv_writer'
7
+
8
+ require 'csv'
9
+
10
+ describe "Delimited/CSV Writers" do
11
+
12
+ before do
13
+ @out = StringIO.new
14
+ @settings = {'output_stream' => @out, 'delimited_writer.fields' => 'four,one,two'}
15
+ @context = Struct.new(:output_hash).new
16
+ @context.output_hash = {'one' => 'one', 'two' => %w[two1 two2], 'three' => 'three', 'four' => 'four'}
17
+ end
18
+
19
+ after do
20
+ @out.close
21
+ end
22
+
23
+ describe "Traject::DelimitedWriter" do
24
+
25
+ it "creates a dw with defaults" do
26
+ dw = Traject::DelimitedWriter.new(@settings)
27
+ dw.delimiter.must_equal "\t"
28
+ dw.internal_delimiter.must_equal '|'
29
+ dw.edelim.must_equal ' '
30
+ dw.eidelim.must_equal '\\|'
31
+ end
32
+
33
+ it "respects different delimiter" do
34
+ @settings['delimited_writer.delimiter'] = '^'
35
+ dw = Traject::DelimitedWriter.new(@settings)
36
+ dw.delimiter.must_equal '^'
37
+ dw.edelim.must_equal '\\^'
38
+ dw.internal_delimiter.must_equal '|'
39
+ end
40
+
41
+ it "outputs a header if asked to" do
42
+ dw = Traject::DelimitedWriter.new(@settings)
43
+ @out.string.chomp.must_equal %w[four one two].join("\t")
44
+ end
45
+
46
+ it "doesn't output a header if asked not to" do
47
+ @settings['delimited_writer.header'] = 'false'
48
+ dw = Traject::DelimitedWriter.new(@settings)
49
+ @out.string.must_be_empty
50
+ end
51
+
52
+ it "deals with multiple values" do
53
+ dw = Traject::DelimitedWriter.new(@settings)
54
+ dw.put @context
55
+ @out.string.split("\n").last.must_equal ['four', 'one', 'two1|two2'].join(dw.delimiter)
56
+ end
57
+
58
+ it "bails if delimited_writer.fields isn't set" do
59
+ @settings.delete 'delimited_writer.fields'
60
+ proc { Traject::DelimitedWriter.new(@settings) }.must_raise(ArgumentError)
61
+ end
62
+
63
+ end
64
+
65
+ describe "Traject::CSVWriter" do
66
+ it "unsets the delimiter" do
67
+ cw = Traject::CSVWriter.new(@settings)
68
+ cw.delimiter.must_be_nil
69
+ end
70
+
71
+ it "writes the header" do
72
+ cw = Traject::CSVWriter.new(@settings)
73
+ @out.string.chomp.must_equal 'four,one,two'
74
+ end
75
+
76
+ it "uses the internal delimiter" do
77
+ cw = Traject::CSVWriter.new(@settings)
78
+ cw.put @context
79
+ @out.string.split("\n").last.must_equal ['four', 'one', 'two1|two2'].join(',')
80
+ end
81
+
82
+ it "produces complex output" do
83
+ @context.output_hash = {
84
+ 'four' => ['Bill Clinton, Jr.', 'Jesse "the Body" Ventura'],
85
+ 'one' => 'Willard "Mitt" Romney',
86
+ 'two' => 'Dueber, Bill'
87
+ }
88
+ canonical = StringIO.new
89
+ csv = CSV.new(canonical)
90
+
91
+ csv_vals = [@context.output_hash['four'].join('|'), @context.output_hash['one'], @context.output_hash['two']]
92
+ csv << csv_vals
93
+ csv_output = canonical.string.chomp
94
+
95
+ cw = Traject::CSVWriter.new(@settings)
96
+ cw.put @context
97
+ traject_csvwriter_output = @out.string.split("\n").last.chomp
98
+
99
+ assert_equal(csv_output, traject_csvwriter_output)
100
+
101
+ end
102
+
103
+ end
104
+ end
@@ -0,0 +1,59 @@
1
+ require 'test_helper'
2
+
3
+ describe "Traject::Indexer#each_record" do
4
+ before do
5
+ @indexer = Traject::Indexer.new
6
+ end
7
+
8
+ describe "checks arguments" do
9
+ it "rejects no-arg block" do
10
+ assert_raises(Traject::Indexer::ArityError) do
11
+ @indexer.each_record do
12
+ end
13
+ end
14
+ end
15
+ it "rejects three-arg block" do
16
+ assert_raises(Traject::Indexer::ArityError) do
17
+ @indexer.each_record do |one, two, three|
18
+ end
19
+ end
20
+ end
21
+ it "accepts one-arg block" do
22
+ @indexer.each_record do |record|
23
+ end
24
+ end
25
+ it "accepts two-arg block" do
26
+ @indexer.each_record do |record, context|
27
+ end
28
+ end
29
+ it "accepts variable arity block" do
30
+ @indexer.each_record do |*variable|
31
+ end
32
+ end
33
+
34
+ it "outputs error with source location" do
35
+ begin
36
+ @indexer.to_field('foo') {|one, two| }
37
+ @indexer.each_record {|one, two, three| } # bad arity
38
+ flunk("Should have rejected bad arity ")
39
+ rescue Traject::Indexer::ArityError => e
40
+ assert_match(/each_record at .*\/.*:\d+/, e.message)
41
+ rescue
42
+ flunk("Should only fail with a ArityError")
43
+ end
44
+ end
45
+
46
+ it "rejects each_record with a name (e.g., using a to_field syntax)" do
47
+ assert_raises(Traject::Indexer::NamingError) do
48
+ @indexer.each_record('bad_name') {|one, two| }
49
+ end
50
+ end
51
+
52
+ it "reject each_record with no arguments/blocks at all" do
53
+ assert_raises(ArgumentError) do
54
+ @indexer.each_record()
55
+ end
56
+ end
57
+
58
+ end
59
+ end
@@ -0,0 +1,391 @@
1
+ # Encoding: UTF-8
2
+
3
+ require 'test_helper'
4
+
5
+ require 'traject/indexer'
6
+ require 'traject/macros/marc21_semantics'
7
+
8
+ require 'json'
9
+ require 'marc/record'
10
+
11
+ # See also marc_extractor_test.rb for more detailed tests on marc extraction,
12
+ # this is just a basic test to make sure our macro works passing through to there
13
+ # and other options.
14
+ describe "Traject::Macros::Marc21Semantics" do
15
+ Marc21Semantics = Traject::Macros::Marc21Semantics # shortcut
16
+
17
+ before do
18
+ @indexer = Traject::Indexer.new
19
+ @indexer.extend Marc21Semantics
20
+
21
+ @record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
22
+ end
23
+
24
+ it "oclcnum" do
25
+ @indexer.instance_eval do
26
+ to_field "oclcnum", oclcnum
27
+ end
28
+ output = @indexer.map_record(@record)
29
+
30
+ assert_equal %w{47971712}, output["oclcnum"]
31
+
32
+ assert_equal({}, @indexer.map_record(empty_record))
33
+ end
34
+
35
+ it "deals with all prefixed OCLC nunbers" do
36
+ @record.append(MARC::DataField.new('035', ' ', ' ', ['a', '(OCoLC)ocm111111111']))
37
+ @record.append(MARC::DataField.new('035', ' ', ' ', ['a', '(OCoLC)222222222']))
38
+ @record.append(MARC::DataField.new('035', ' ', ' ', ['a', 'ocm333333333']))
39
+ @record.append(MARC::DataField.new('035', ' ', ' ', ['a', 'ocn444444444']))
40
+ @record.append(MARC::DataField.new('035', ' ', ' ', ['a', '(OCoLC)ocn555555555']))
41
+ @record.append(MARC::DataField.new('035', ' ', ' ', ['a', '(OCoLC)on666666666']))
42
+ @record.append(MARC::DataField.new('035', ' ', ' ', ['a', '777777777'])) # not OCLC number
43
+
44
+ @indexer.instance_eval do
45
+ to_field "oclcnum", oclcnum
46
+ end
47
+ output = @indexer.map_record(@record)
48
+
49
+ assert_equal %w{47971712 111111111 222222222 333333333 444444444 555555555 666666666}, output["oclcnum"]
50
+ end
51
+
52
+
53
+
54
+ it "#marc_series_facet" do
55
+ @record = MARC::Reader.new(support_file_path "louis_armstrong.marc").to_a.first
56
+
57
+ @indexer.instance_eval do
58
+ to_field "series_facet", marc_series_facet
59
+ end
60
+ output = @indexer.map_record(@record)
61
+
62
+ # trims punctuation too
63
+ assert_equal ["Big bands"], output["series_facet"]
64
+ assert_equal({}, @indexer.map_record(empty_record))
65
+
66
+ end
67
+
68
+ describe "marc_sortable_author" do
69
+ # these probably should be taking only certain subfields, but we're copying
70
+ # from SolrMarc that didn't do so either and nobody noticed, so not bothering for now.
71
+ before do
72
+ @indexer.instance_eval do
73
+ to_field "author_sort", marc_sortable_author
74
+ end
75
+ end
76
+ it "collates author and title" do
77
+ output = @indexer.map_record(@record)
78
+
79
+ assert_equal ["Herman, Edward S. Manufacturing consent the political economy of the mass media Edward S. Herman and Noam Chomsky ; with a new introduction by the authors"], output["author_sort"]
80
+ assert_equal [""], @indexer.map_record(empty_record)['author_sort']
81
+
82
+ end
83
+ it "respects non-filing" do
84
+ @record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
85
+
86
+ output = @indexer.map_record(@record)
87
+
88
+ assert_equal ["Business renaissance quarterly [electronic resource]."], output["author_sort"]
89
+ assert_equal [""], @indexer.map_record(empty_record)['author_sort']
90
+
91
+ end
92
+ end
93
+
94
+ describe "marc_sortable_title" do
95
+ before do
96
+ @indexer.instance_eval { to_field "title_sort", marc_sortable_title }
97
+ end
98
+ it "works" do
99
+ output = @indexer.map_record(@record)
100
+ assert_equal ["Manufacturing consent : the political economy of the mass media"], output["title_sort"]
101
+ assert_equal({}, @indexer.map_record(empty_record))
102
+
103
+ end
104
+ it "respects non-filing" do
105
+ @record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
106
+ output = @indexer.map_record(@record)
107
+
108
+ assert_equal ["Business renaissance quarterly"], output["title_sort"]
109
+ end
110
+ it "works with a record with no 245$ab" do
111
+ @record = MARC::Reader.new(support_file_path "245_no_ab.marc").to_a.first
112
+ output = @indexer.map_record(@record)
113
+ assert_equal ["Papers"], output["title_sort"]
114
+ end
115
+ end
116
+
117
+ describe "marc_languages" do
118
+ before do
119
+ @indexer.instance_eval {to_field "languages", marc_languages() }
120
+ end
121
+
122
+ it "unpacks packed 041a and translates" do
123
+ @record = MARC::Reader.new(support_file_path "packed_041a_lang.marc").to_a.first
124
+ output = @indexer.map_record(@record)
125
+
126
+ assert_equal ["English", "French", "German", "Italian", "Spanish", "Russian"], output["languages"]
127
+ assert_equal({}, @indexer.map_record(empty_record))
128
+
129
+ end
130
+ end
131
+
132
+ describe "marc_instrumentation_humanized" do
133
+ before do
134
+ @record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
135
+ @indexer.instance_eval {to_field "instrumentation", marc_instrumentation_humanized }
136
+ end
137
+
138
+ it "translates, de-duping" do
139
+ output = @indexer.map_record(@record)
140
+
141
+ assert_equal ["Larger ensemble, Unspecified", "Piano", "Soprano voice", "Tenor voice", "Violin", "Larger ensemble, Ethnic", "Guitar", "Voices, Unspecified"], output["instrumentation"]
142
+ assert_equal({}, @indexer.map_record(empty_record))
143
+
144
+ end
145
+ end
146
+
147
+ describe "marc_instrument_codes_normalized" do
148
+ before do
149
+ @record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
150
+ @indexer.instance_eval {to_field "instrument_codes", marc_instrument_codes_normalized }
151
+ end
152
+ it "normalizes, de-duping" do
153
+ output = @indexer.map_record(@record)
154
+
155
+ assert_equal ["on", "ka01", "ka", "va01", "va", "vd01", "vd", "sa01", "sa", "oy", "tb01", "tb", "vn12", "vn"],
156
+ output["instrument_codes"]
157
+ end
158
+ it "codes soloist 048$b" do
159
+ @record = MARC::Reader.new(support_file_path "louis_armstrong.marc").to_a.first
160
+ output = @indexer.map_record(@record)
161
+
162
+ assert_equal ["bb01", "bb01.s", "bb", "bb.s", "oe"], output["instrument_codes"]
163
+ assert_equal({}, @indexer.map_record(empty_record))
164
+
165
+ end
166
+ end
167
+
168
+ describe "publication_date" do
169
+ # there are way too many edge cases for us to test em all, but we'll test some of em.
170
+
171
+ it "works when there's no date information" do
172
+ assert_equal nil, Marc21Semantics.publication_date(empty_record)
173
+ end
174
+
175
+ it "uses macro correctly with no date info" do
176
+ @indexer.instance_eval {to_field "date", marc_publication_date }
177
+ assert_equal({}, @indexer.map_record(empty_record))
178
+ end
179
+
180
+
181
+ it "pulls out 008 date_type s" do
182
+ @record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
183
+ assert_equal 2002, Marc21Semantics.publication_date(@record)
184
+
185
+ end
186
+ it "uses start date for date_type c continuing resource" do
187
+ @record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
188
+ assert_equal 2006, Marc21Semantics.publication_date(@record)
189
+ end
190
+ it "returns nil when the records really got nothing" do
191
+ @record = MARC::Reader.new(support_file_path "emptyish_record.marc").to_a.first
192
+ assert_equal nil, Marc21Semantics.publication_date(@record)
193
+ end
194
+ it "estimates with a single 'u'" do
195
+ @record = MARC::Reader.new(support_file_path "date_with_u.marc").to_a.first
196
+ # was 184u as date1 on a continuing resource. For continuing resources,
197
+ # we take the first date. And need to deal with the u.
198
+ assert_equal 1845, Marc21Semantics.publication_date(@record)
199
+ end
200
+ it "resorts to 260c" do
201
+ @record = MARC::Reader.new(support_file_path "date_resort_to_260.marc").to_a.first
202
+ assert_equal 1980, Marc21Semantics.publication_date(@record)
203
+ end
204
+ it "works with date type r missing date2" do
205
+ @record = MARC::Reader.new(support_file_path "date_type_r_missing_date2.marc").to_a.first
206
+ assert_equal 1957, Marc21Semantics.publication_date(@record)
207
+ end
208
+
209
+ it "works correctly with date type 'q'" do
210
+ val = @record['008'].value
211
+ val[6] = 'q'
212
+ val[7..10] = '191u'
213
+ val[11..14] = '192u'
214
+ @record['008'].value = val
215
+
216
+ # Date should be date1 + date2 / 2 = (1910 + 1929) / 2 = 1919
217
+ estimate_tolerance = 30
218
+ assert_equal 1919, Marc21Semantics.publication_date(@record, estimate_tolerance)
219
+ end
220
+ end
221
+
222
+ describe "marc_lcc_to_broad_category" do
223
+ before do
224
+ @indexer.instance_eval {to_field "discipline_facet", marc_lcc_to_broad_category }
225
+ end
226
+ it "maps a simple example" do
227
+ @record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
228
+ output = @indexer.map_record(@record)
229
+
230
+ assert_equal ["Language & Literature"], output["discipline_facet"]
231
+
232
+ end
233
+ it "maps to default" do
234
+ @record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
235
+ output = @indexer.map_record(@record)
236
+ assert_equal ["Unknown"], output["discipline_facet"]
237
+ assert_equal(["Unknown"], @indexer.map_record(empty_record)['discipline_facet'])
238
+ end
239
+
240
+ it "maps to nothing if none and no default" do
241
+ @indexer.instance_eval {to_field "discipline_no_default", marc_lcc_to_broad_category(:default => nil)}
242
+ @record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
243
+ output = @indexer.map_record(@record)
244
+
245
+ assert_nil output["discipline_no_default"]
246
+
247
+ assert_nil @indexer.map_record(empty_record)["discipline_no_default"]
248
+
249
+ end
250
+
251
+ describe "LCC_REGEX" do
252
+ it "rejects a non-LCC" do
253
+ refute_match Traject::Macros::Marc21Semantics::LCC_REGEX, "Film no. A .N285"
254
+ end
255
+ end
256
+
257
+ end
258
+
259
+ describe "marc_geo_facet" do
260
+ before do
261
+ @indexer.instance_eval {to_field "geo_facet", marc_geo_facet }
262
+ end
263
+ it "maps a complicated record" do
264
+ @record = MARC::Reader.new(support_file_path "multi_geo.marc").to_a.first
265
+ output = @indexer.map_record(@record)
266
+
267
+ assert_equal ["Europe", "Middle East", "Africa, North", "Agora (Athens, Greece)", "Rome (Italy)", "Italy"], output["geo_facet"]
268
+ assert_equal({}, @indexer.map_record(empty_record))
269
+ end
270
+ it "maps nothing on a record with no geo" do
271
+ @record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
272
+ output = @indexer.map_record(@record)
273
+ assert_nil output["geo_facet"]
274
+ assert_equal({}, @indexer.map_record(empty_record))
275
+
276
+ end
277
+ end
278
+
279
+ describe "marc_era_facet" do
280
+ before do
281
+ @indexer.instance_eval {to_field "era_facet", marc_era_facet}
282
+ end
283
+ it "maps a complicated record" do
284
+ @record = MARC::Reader.new(support_file_path "multi_era.marc").to_a.first
285
+ output = @indexer.map_record(@record)
286
+
287
+ assert_equal ["Early modern, 1500-1700", "17th century", "Great Britain: Puritan Revolution, 1642-1660", "Great Britain: Civil War, 1642-1649", "1642-1660"],
288
+ output["era_facet"]
289
+ assert_equal({}, @indexer.map_record(empty_record))
290
+
291
+ end
292
+ end
293
+
294
+ describe "marc_lcsh_display" do
295
+ it "formats typical field" do
296
+ field = MARC::DataField.new('650', ' ', ' ', ['a', 'Psychoanalysis and literature'], ['z', 'England'], ['x', 'History'], ['y', '19th century.'])
297
+ str = Marc21Semantics.assemble_lcsh(field)
298
+
299
+ assert_equal "Psychoanalysis and literature — England — History — 19th century", str
300
+
301
+ end
302
+
303
+ it "ignores numeric subfields" do
304
+ field = MARC::DataField.new('650', ' ', ' ', ['a', 'Psychoanalysis and literature'], ['x', 'History'], ['0', '01234'], ['3', 'Some part'])
305
+ str = Marc21Semantics.assemble_lcsh(field)
306
+
307
+ assert_equal "Psychoanalysis and literature — History", str
308
+ end
309
+
310
+ it "doesn't put subdivision in wrong place" do
311
+ field = MARC::DataField.new('600', ' ', ' ', ['a', 'Eliot, George,'],['d', '1819-1880.'], ['t', 'Middlemarch'])
312
+ str = Marc21Semantics.assemble_lcsh(field)
313
+
314
+ assert_equal "Eliot, George, 1819-1880. Middlemarch", str
315
+ end
316
+
317
+ it "mixes non-subdivisions with subdivisions" do
318
+ field = MARC::DataField.new('600', ' ', ' ', ['a', 'Eliot, George,'],['d', '1819-1880.'], ['t', 'Middlemarch'], ['x', 'Criticism.'])
319
+ str = Marc21Semantics.assemble_lcsh(field)
320
+
321
+ assert_equal "Eliot, George, 1819-1880. Middlemarch — Criticism", str
322
+ end
323
+
324
+ it "returns nil for a field with no relevant subfields" do
325
+ field = MARC::DataField.new('650', ' ', ' ')
326
+ assert_nil Marc21Semantics.assemble_lcsh(field)
327
+ end
328
+
329
+ describe "marc_lcsh_formatted macro" do
330
+ it "smoke test" do
331
+ @record = MARC::Reader.new(support_file_path "george_eliot.marc").to_a.first
332
+ @indexer.instance_eval {to_field "lcsh", marc_lcsh_formatted}
333
+ output = @indexer.map_record(@record)
334
+
335
+ assert output["lcsh"].length > 0, "outputs data"
336
+ assert output["lcsh"].include?("Eliot, George, 1819-1880 — Characters"), "includes a string its supposed to"
337
+
338
+ assert_equal({}, @indexer.map_record(empty_record))
339
+
340
+ end
341
+ end
342
+ end
343
+
344
+ describe "extract_marc_filing_version" do
345
+ before do
346
+ @record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
347
+ end
348
+
349
+ it "works as expected" do
350
+ @indexer.instance_eval do
351
+ to_field 'title_phrase', extract_marc_filing_version('245ab')
352
+ end
353
+ output = @indexer.map_record(@record)
354
+ assert_equal ['Business renaissance quarterly'], output['title_phrase']
355
+ assert_equal({}, @indexer.map_record(empty_record))
356
+
357
+ end
358
+
359
+ it "works with :include_original" do
360
+ @indexer.instance_eval do
361
+ to_field 'title_phrase', extract_marc_filing_version('245ab', :include_original=>true)
362
+ end
363
+ output = @indexer.map_record(@record)
364
+ assert_equal ['The Business renaissance quarterly', 'Business renaissance quarterly'], output['title_phrase']
365
+ assert_equal({}, @indexer.map_record(empty_record))
366
+ end
367
+
368
+ it "doesn't do anything if you don't include the first subfield" do
369
+ @indexer.instance_eval do
370
+ to_field 'title_phrase', extract_marc_filing_version('245h')
371
+ end
372
+ output = @indexer.map_record(@record)
373
+ assert_equal ['[electronic resource].'], output['title_phrase']
374
+ assert_equal({}, @indexer.map_record(empty_record))
375
+
376
+ end
377
+
378
+
379
+ it "dies if you pass it something else" do
380
+ assert_raises(RuntimeError) do
381
+ @indexer.instance_eval do
382
+ to_field 'title_phrase', extract_marc_filing_version('245ab', :include_original=>true, :uniq => true)
383
+ end
384
+ end
385
+ end
386
+
387
+ end
388
+
389
+
390
+
391
+ end