traject 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +1 -0
- data/README.md +183 -191
- data/bench/bench.rb +1 -1
- data/doc/batch_execution.md +14 -0
- data/doc/extending.md +14 -12
- data/doc/indexing_rules.md +265 -0
- data/lib/traject/command_line.rb +12 -41
- data/lib/traject/debug_writer.rb +32 -13
- data/lib/traject/indexer.rb +101 -24
- data/lib/traject/indexer/settings.rb +18 -17
- data/lib/traject/json_writer.rb +32 -11
- data/lib/traject/line_writer.rb +6 -6
- data/lib/traject/macros/basic.rb +1 -1
- data/lib/traject/macros/marc21.rb +17 -13
- data/lib/traject/macros/marc21_semantics.rb +27 -25
- data/lib/traject/macros/marc_format_classifier.rb +39 -25
- data/lib/traject/marc4j_reader.rb +36 -22
- data/lib/traject/marc_extractor.rb +79 -75
- data/lib/traject/marc_reader.rb +33 -25
- data/lib/traject/mock_reader.rb +9 -10
- data/lib/traject/ndj_reader.rb +7 -7
- data/lib/traject/null_writer.rb +1 -1
- data/lib/traject/qualified_const_get.rb +12 -2
- data/lib/traject/solrj_writer.rb +61 -52
- data/lib/traject/thread_pool.rb +45 -45
- data/lib/traject/translation_map.rb +59 -27
- data/lib/traject/util.rb +3 -3
- data/lib/traject/version.rb +1 -1
- data/lib/traject/yaml_writer.rb +1 -1
- data/test/debug_writer_test.rb +7 -7
- data/test/indexer/each_record_test.rb +4 -4
- data/test/indexer/macros_marc21_semantics_test.rb +12 -12
- data/test/indexer/macros_marc21_test.rb +10 -10
- data/test/indexer/macros_test.rb +1 -1
- data/test/indexer/map_record_test.rb +6 -6
- data/test/indexer/read_write_test.rb +43 -4
- data/test/indexer/settings_test.rb +2 -2
- data/test/indexer/to_field_test.rb +8 -8
- data/test/marc4j_reader_test.rb +4 -4
- data/test/marc_extractor_test.rb +33 -25
- data/test/marc_format_classifier_test.rb +3 -3
- data/test/marc_reader_test.rb +2 -2
- data/test/test_helper.rb +3 -3
- data/test/test_support/demo_config.rb +52 -48
- data/test/translation_map_test.rb +22 -4
- data/test/translation_maps/bad_ruby.rb +2 -2
- data/test/translation_maps/both_map.rb +1 -1
- data/test/translation_maps/default_literal.rb +1 -1
- data/test/translation_maps/default_passthrough.rb +1 -1
- data/test/translation_maps/ruby_map.rb +1 -1
- metadata +7 -31
- data/doc/macros.md +0 -103
@@ -116,7 +116,7 @@ describe "Traject::Indexer#settings" do
|
|
116
116
|
|
117
117
|
describe "inspect" do
|
118
118
|
it "keeps keys ending in 'password' out of inspect" do
|
119
|
-
settings = Traject::Indexer::Settings.new("a" => "a",
|
119
|
+
settings = Traject::Indexer::Settings.new("a" => "a",
|
120
120
|
"password" => "password", "some_password" => "password",
|
121
121
|
"some.password" => "password")
|
122
122
|
|
@@ -125,4 +125,4 @@ describe "Traject::Indexer#settings" do
|
|
125
125
|
end
|
126
126
|
end
|
127
127
|
|
128
|
-
end
|
128
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
3
|
describe "Traject::Indexer.to_field" do
|
4
|
-
before do
|
4
|
+
before do
|
5
5
|
@indexer = Traject::Indexer.new
|
6
6
|
end
|
7
7
|
describe "checks it's arguments" do
|
@@ -14,7 +14,7 @@ describe "Traject::Indexer.to_field" do
|
|
14
14
|
it "rejects non-string first arg" do
|
15
15
|
assert_raises(Traject::Indexer::NamingError) {@indexer.to_field(:symbol)}
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
it "rejects one-arg lambda" do
|
19
19
|
assert_raises(Traject::Indexer::ArityError) do
|
20
20
|
@indexer.to_field("foo") do |one_arg|
|
@@ -22,7 +22,7 @@ describe "Traject::Indexer.to_field" do
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
it "rejects four-arg lambda" do
|
25
|
-
assert_raises(Traject::Indexer::ArityError) do
|
25
|
+
assert_raises(Traject::Indexer::ArityError) do
|
26
26
|
@indexer.to_field("foo") do |one_arg, two_arg, three_arg, four_arg|
|
27
27
|
end
|
28
28
|
end
|
@@ -39,7 +39,7 @@ describe "Traject::Indexer.to_field" do
|
|
39
39
|
end
|
40
40
|
end
|
41
41
|
end
|
42
|
-
|
42
|
+
|
43
43
|
it "outputs error with source location" do
|
44
44
|
begin
|
45
45
|
@indexer.to_field('foo') {|one, two| }
|
@@ -51,7 +51,7 @@ describe "Traject::Indexer.to_field" do
|
|
51
51
|
flunk("Should only fail with a NamingError")
|
52
52
|
end
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
# Just verifying this is how it works
|
56
56
|
it "doesn't allow you to just wholesale assignment to the accumulator" do
|
57
57
|
@indexer.to_field('foo') do |rec, acc|
|
@@ -60,7 +60,7 @@ describe "Traject::Indexer.to_field" do
|
|
60
60
|
output = @indexer.map_record('never looked at')
|
61
61
|
assert_equal nil, output['foo']
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
it "allows use of accumulator.replace" do
|
65
65
|
@indexer.to_field('foo') do |rec, acc|
|
66
66
|
acc.replace ['hello']
|
@@ -68,8 +68,8 @@ describe "Traject::Indexer.to_field" do
|
|
68
68
|
output = @indexer.map_record('never looked at')
|
69
69
|
assert_equal ['hello'], output['foo']
|
70
70
|
end
|
71
|
-
|
72
|
-
|
71
|
+
|
72
|
+
|
73
73
|
end
|
74
74
|
|
75
75
|
|
data/test/marc4j_reader_test.rb
CHANGED
@@ -75,7 +75,7 @@ describe "Marc4JReader" do
|
|
75
75
|
assert first['245']['a'].encoding.name, "UTF-8"
|
76
76
|
assert_equal "Fikr-i Ayāz /", first['245']['a']
|
77
77
|
end
|
78
|
-
|
78
|
+
|
79
79
|
it "keeps marc4j object when asked" do
|
80
80
|
file = File.new(support_file_path "test_data.utf8.marc.xml")
|
81
81
|
settings = Traject::Indexer::Settings.new("marc_source.type" => "xml", 'marc4j_reader.keep_marc4j' => true)
|
@@ -83,6 +83,6 @@ describe "Marc4JReader" do
|
|
83
83
|
assert_kind_of MARC::Record, record
|
84
84
|
assert_kind_of Java::org.marc4j.marc.impl::RecordImpl, record.original_marc4j
|
85
85
|
end
|
86
|
-
|
87
|
-
|
88
|
-
end
|
86
|
+
|
87
|
+
|
88
|
+
end
|
data/test/marc_extractor_test.rb
CHANGED
@@ -6,6 +6,14 @@ require 'traject/marc_extractor'
|
|
6
6
|
require 'marc'
|
7
7
|
|
8
8
|
describe "Traject::MarcExtractor" do
|
9
|
+
it "is frozen read-only" do
|
10
|
+
extractor = Traject::MarcExtractor.new("100abcde", :seperator => ";")
|
11
|
+
assert extractor.frozen?
|
12
|
+
assert extractor.spec_hash.frozen?
|
13
|
+
assert extractor.options.frozen?
|
14
|
+
end
|
15
|
+
|
16
|
+
|
9
17
|
describe "#parse_marc_spec" do
|
10
18
|
it "parses single spec with all elements" do
|
11
19
|
parsed = Traject::MarcExtractor.parse_string_spec("245|1*|abcg")
|
@@ -17,7 +25,7 @@ describe "Traject::MarcExtractor" do
|
|
17
25
|
|
18
26
|
assert_equal "1", spec.indicator1
|
19
27
|
assert_nil spec.indicator2
|
20
|
-
|
28
|
+
|
21
29
|
assert_kind_of Array, spec.subfields
|
22
30
|
end
|
23
31
|
|
@@ -44,7 +52,7 @@ describe "Traject::MarcExtractor" do
|
|
44
52
|
#700-*4bcd
|
45
53
|
assert spec700
|
46
54
|
assert_nil spec700.indicator1
|
47
|
-
assert_equal "4", spec700.indicator2
|
55
|
+
assert_equal "4", spec700.indicator2
|
48
56
|
assert_equal %w{b c d}, spec700.subfields
|
49
57
|
end
|
50
58
|
|
@@ -54,7 +62,7 @@ describe "Traject::MarcExtractor" do
|
|
54
62
|
assert_equal 5, parsed["005"].first.bytes
|
55
63
|
assert_equal 7..10, parsed["008"].first.bytes
|
56
64
|
end
|
57
|
-
|
65
|
+
|
58
66
|
it "allows arrays of specs" do
|
59
67
|
parsed = Traject::MarcExtractor.parse_string_spec %w(
|
60
68
|
245abcde
|
@@ -63,7 +71,7 @@ describe "Traject::MarcExtractor" do
|
|
63
71
|
)
|
64
72
|
assert_length 3, parsed
|
65
73
|
end
|
66
|
-
|
74
|
+
|
67
75
|
it "allows mixture of array and colon-delimited specs" do
|
68
76
|
parsed = Traject::MarcExtractor.parse_string_spec %w(
|
69
77
|
245abcde
|
@@ -73,8 +81,8 @@ describe "Traject::MarcExtractor" do
|
|
73
81
|
)
|
74
82
|
assert_length 6, parsed
|
75
83
|
end
|
76
|
-
|
77
|
-
|
84
|
+
|
85
|
+
|
78
86
|
end
|
79
87
|
|
80
88
|
# Mostly an internal method, not neccesarily API, but
|
@@ -107,11 +115,11 @@ describe "Traject::MarcExtractor" do
|
|
107
115
|
end
|
108
116
|
it "does not find spec for 880 if disabled" do
|
109
117
|
@extractor = Traject::MarcExtractor.new("245", :alternate_script => false)
|
110
|
-
|
118
|
+
assert_equal [], @extractor.specs_covering_field(@a880_245)
|
111
119
|
end
|
112
120
|
it "finds only 880 if so configured" do
|
113
121
|
@extractor = Traject::MarcExtractor.new("245", :alternate_script => :only)
|
114
|
-
|
122
|
+
assert_equal [], @extractor.specs_covering_field(@a245)
|
115
123
|
assert_equal([Traject::MarcExtractor::Spec.new(:tag => "245")], @extractor.specs_covering_field(@a880_245))
|
116
124
|
end
|
117
125
|
end
|
@@ -294,7 +302,7 @@ describe "Traject::MarcExtractor" do
|
|
294
302
|
it "creates" do
|
295
303
|
extractor = Traject::MarcExtractor.cached("245abc", :separator => nil)
|
296
304
|
spec_hash = extractor.spec_hash
|
297
|
-
|
305
|
+
|
298
306
|
assert extractor.options[:separator].nil?, "extractor options[:separator] is nil"
|
299
307
|
assert_equal({"245"=>[Traject::MarcExtractor::Spec.new(:tag => "245", :subfields=>["a", "b", "c"])]}, spec_hash)
|
300
308
|
end
|
@@ -311,23 +319,23 @@ describe "Traject::MarcExtractor" do
|
|
311
319
|
before do
|
312
320
|
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
313
321
|
end
|
314
|
-
|
322
|
+
|
315
323
|
it "allows repated tags for a variable field" do
|
316
324
|
extractor = Traject::MarcExtractor.new("245a:245b")
|
317
325
|
values = extractor.extract(@record)
|
318
326
|
assert_equal ['Manufacturing consent :', 'the political economy of the mass media /'], values
|
319
327
|
end
|
320
|
-
|
328
|
+
|
321
329
|
it "allows repeated tags with indicators specs" do
|
322
330
|
extractor = Traject::MarcExtractor.new("245|1*|a:245|2*|b")
|
323
331
|
@record.append(MARC::DataField.new('245', '2', '0', ['a', 'Subfield A Value'], ['b', 'Subfield B Value']))
|
324
332
|
results = extractor.extract(@record)
|
325
333
|
assert_equal ['Manufacturing consent :', 'Subfield B Value'], results
|
326
334
|
end
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
335
|
+
|
336
|
+
|
337
|
+
|
338
|
+
|
331
339
|
it "provides multiple values for repeated subfields with single specified subfield" do
|
332
340
|
ex = Traject::MarcExtractor.new("245a")
|
333
341
|
f = @record.fields('245').first
|
@@ -345,7 +353,7 @@ describe "Traject::MarcExtractor" do
|
|
345
353
|
results = ex.extract(@record)
|
346
354
|
assert_equal ["#{title_a} #{title_a}"], results
|
347
355
|
end
|
348
|
-
|
356
|
+
|
349
357
|
it "provides single value for repeated subfields with multiple specified subfields" do
|
350
358
|
ex = Traject::MarcExtractor.new("245ab")
|
351
359
|
f = @record.fields('245').first
|
@@ -354,9 +362,9 @@ describe "Traject::MarcExtractor" do
|
|
354
362
|
f.append(MARC::Subfield.new('a', title_a))
|
355
363
|
results = ex.extract(@record)
|
356
364
|
assert_equal ["#{title_a} #{title_b} #{title_a}"], results
|
357
|
-
|
365
|
+
|
358
366
|
end
|
359
|
-
|
367
|
+
|
360
368
|
it "provides single value for repeated subfields with no specified subfield" do
|
361
369
|
ex = Traject::MarcExtractor.new("245")
|
362
370
|
f = @record.fields('245').first
|
@@ -365,10 +373,10 @@ describe "Traject::MarcExtractor" do
|
|
365
373
|
results = ex.extract(@record)
|
366
374
|
assert_equal 1, results.size
|
367
375
|
end
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
376
|
+
|
377
|
+
|
378
|
+
|
379
|
+
|
372
380
|
it "allows repeated tags for a control field" do
|
373
381
|
extractor = Traject::MarcExtractor.new("001[0-1]:001[0-3]")
|
374
382
|
values = extractor.extract(@record)
|
@@ -392,13 +400,13 @@ describe "Traject::MarcExtractor" do
|
|
392
400
|
describe "MarcExtractor::Spec" do
|
393
401
|
describe "==" do
|
394
402
|
it "equals when equal" do
|
395
|
-
assert_equal Traject::MarcExtractor::Spec.new(:subfields => %w{a b c}), Traject::MarcExtractor::Spec.new(:subfields => %w{a b c})
|
403
|
+
assert_equal Traject::MarcExtractor::Spec.new(:subfields => %w{a b c}), Traject::MarcExtractor::Spec.new(:subfields => %w{a b c})
|
396
404
|
end
|
397
405
|
it "does not equal when not" do
|
398
|
-
refute_equal Traject::MarcExtractor::Spec.new(:subfields => %w{a b c}), Traject::MarcExtractor::Spec.new(:subfields => %w{a b c}, :indicator2 => '1')
|
406
|
+
refute_equal Traject::MarcExtractor::Spec.new(:subfields => %w{a b c}), Traject::MarcExtractor::Spec.new(:subfields => %w{a b c}, :indicator2 => '1')
|
399
407
|
end
|
400
408
|
end
|
401
409
|
end
|
402
|
-
|
410
|
+
|
403
411
|
|
404
412
|
end
|
@@ -63,7 +63,7 @@ describe "MarcFormatClassifier" do
|
|
63
63
|
|
64
64
|
describe "conference?" do
|
65
65
|
it "says conference when it is" do
|
66
|
-
assert classifier_for("microform_online_conference.marc").proceeding?
|
66
|
+
assert classifier_for("microform_online_conference.marc").proceeding?
|
67
67
|
end
|
68
68
|
it "does not say conference when it ain't" do
|
69
69
|
assert ! classifier_for("manufacturing_consent.marc").proceeding?
|
@@ -89,6 +89,6 @@ describe "MarcFormatClassifier" do
|
|
89
89
|
assert ! classifier_for("manufacturing_consent.marc").manuscript_archive?
|
90
90
|
assert ! classifier_for("online_only.marc").manuscript_archive?
|
91
91
|
end
|
92
|
-
end
|
92
|
+
end
|
93
93
|
|
94
|
-
end
|
94
|
+
end
|
data/test/marc_reader_test.rb
CHANGED
data/test/test_helper.rb
CHANGED
@@ -11,7 +11,7 @@ require 'marc'
|
|
11
11
|
STDERR.sync = true
|
12
12
|
|
13
13
|
# Hacky way to turn off Indexer logging by default, say only
|
14
|
-
# log things higher than fatal, which is nothing.
|
14
|
+
# log things higher than fatal, which is nothing.
|
15
15
|
require 'traject/indexer/settings'
|
16
16
|
Traject::Indexer::Settings.defaults["log.level"] = "gt.fatal"
|
17
17
|
|
@@ -38,7 +38,7 @@ def assert_start_with(start_with, obj, msg = nil)
|
|
38
38
|
end
|
39
39
|
|
40
40
|
# pretends to be a SolrJ HTTPServer-like thing, just kind of mocks it up
|
41
|
-
# and records what happens and simulates errors in some cases.
|
41
|
+
# and records what happens and simulates errors in some cases.
|
42
42
|
class MockSolrServer
|
43
43
|
attr_accessor :things_added, :url, :committed, :parser, :shutted_down
|
44
44
|
|
@@ -75,4 +75,4 @@ class MockSolrServer
|
|
75
75
|
@shutted_down = true
|
76
76
|
end
|
77
77
|
|
78
|
-
end
|
78
|
+
end
|
@@ -1,40 +1,42 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
1
|
+
# A sample traject configration, save as say `traject_config.rb`, then
|
2
|
+
# run `traject -c traject_config.rb marc_file.marc` to index to
|
3
|
+
# solr specified in config file, according to rules specified in
|
4
|
+
# config file
|
5
5
|
|
6
|
+
|
7
|
+
# To have access to various built-in logic
|
8
|
+
# for pulling things out of MARC21, like `marc_languages`
|
6
9
|
require 'traject/macros/marc21_semantics'
|
7
10
|
extend Traject::Macros::Marc21Semantics
|
8
11
|
|
12
|
+
# To have access to the traject marc format/carrier classifier
|
9
13
|
require 'traject/macros/marc_format_classifier'
|
10
14
|
extend Traject::Macros::MarcFormats
|
11
15
|
|
12
|
-
settings do
|
13
|
-
#provide "solr.url", "http://catsolrmaster.library.jhu.edu:8985/solr/master_prod"
|
14
16
|
|
17
|
+
# In this case for simplicity we provide all our settings, including
|
18
|
+
# solr connection details, in this one file. But you could choose
|
19
|
+
# to separate them into antoher config file; divide things between
|
20
|
+
# files however you like, you can call traject with as many
|
21
|
+
# config files as you like, `traject -c one.rb -c two.rb -c etc.rb`
|
22
|
+
settings do
|
15
23
|
provide "solr.url", "http://blacklight.mse.jhu.edu:8983/solr/prod"
|
24
|
+
|
25
|
+
# Only if you need to connect to a Solr 1.x:
|
16
26
|
provide "solrj_writer.parser_class_name", "XMLResponseParser"
|
17
27
|
|
18
28
|
provide "solrj_writer.commit_on_close", true
|
19
|
-
|
20
|
-
#require 'traject/marc4j_reader'
|
21
|
-
#store "reader_class_name", "Marc4JReader"
|
22
29
|
end
|
23
30
|
|
31
|
+
# Extract first 001, then supply code block to add "bib_" prefix to it
|
24
32
|
to_field "id", extract_marc("001", :first => true) do |marc_record, accumulator, context|
|
25
33
|
accumulator.collect! {|s| "bib_#{s}"}
|
26
|
-
|
27
|
-
# A way to intentionally add errors
|
28
|
-
#if context.position % 10 == 0
|
29
|
-
# intentionally add another one to error
|
30
|
-
# accumulator << "ANOTHER"
|
31
|
-
#end
|
32
|
-
|
33
34
|
end
|
34
35
|
|
36
|
+
# An exact literal string, always this string:
|
35
37
|
to_field "source", literal("traject_test_last")
|
36
38
|
|
37
|
-
to_field "marc_display", serialized_marc(:format => "binary", :binary_escape => false)
|
39
|
+
to_field "marc_display", serialized_marc(:format => "binary", :binary_escape => false, :allow_oversized => true)
|
38
40
|
|
39
41
|
to_field "text", extract_all_marc_values
|
40
42
|
|
@@ -56,9 +58,12 @@ to_field "title_t", extract_marc("245ak")
|
|
56
58
|
to_field "title1_t", extract_marc("245abk")
|
57
59
|
to_field "title2_t", extract_marc("245nps:130:240abcdefgklmnopqrs:210ab:222ab:242abcehnp:243abcdefgklmnopqrs:246abcdefgnp:247abcdefgnp")
|
58
60
|
to_field "title3_t", extract_marc("700gklmnoprst:710fgklmnopqrst:711fgklnpst:730abdefgklmnopqrst:740anp:505t:780abcrst:785abcrst:773abrst")
|
61
|
+
|
62
|
+
# Note we can mention the same field twice, these
|
63
|
+
# ones will be added on to what's already there. Some custom
|
64
|
+
# logic for extracting 505$t, but only from 505 field that
|
65
|
+
# also has $r -- we consider that more likely to be a titleish string
|
59
66
|
to_field "title3_t" do |record, accumulator|
|
60
|
-
# also add in 505$t only if the 505 has an $r -- we consider this likely to be
|
61
|
-
# a titleish string, if there's a 505$r
|
62
67
|
record.each_by_tag('505') do |field|
|
63
68
|
if field['r']
|
64
69
|
accumulator.concat field.subfields.collect {|sf| sf.value if sf.code == 't'}.compact
|
@@ -66,7 +71,7 @@ to_field "title3_t" do |record, accumulator|
|
|
66
71
|
end
|
67
72
|
end
|
68
73
|
|
69
|
-
to_field "title_display", extract_marc("245abk", :
|
74
|
+
to_field "title_display", extract_marc("245abk", :trim_punctuation => true, :first => true)
|
70
75
|
to_field "title_sort", marc_sortable_title
|
71
76
|
|
72
77
|
to_field "title_series_t", extract_marc("440a:490a:800abcdt:400abcd:810abcdt:410abcd:811acdeft:411acdef:830adfgklmnoprst:760ast:762ast")
|
@@ -84,7 +89,7 @@ to_field "author_facet", extract_marc("100abcdq:110abcdgnu:111acdenqu:700
|
|
84
89
|
to_field "subject_t", extract_marc("600:610:611:630:650:651avxyz:653aa:654abcvyz:655abcvxyz:690abcdxyz:691abxyz:692abxyz:693abxyz:656akvxyz:657avxyz:652axyz:658abcd")
|
85
90
|
|
86
91
|
to_field "subject_topic_facet", extract_marc("600abcdtq:610abt:610x:611abt:611x:630aa:630x:648a:648x:650aa:650x:651a:651x:691a:691x:653aa:654ab:656aa:690a:690x",
|
87
|
-
:
|
92
|
+
:trim_punctuation => true, ) do |record, accumulator|
|
88
93
|
#upcase first letter if needed, in MeSH sometimes inconsistently downcased
|
89
94
|
accumulator.collect! do |value|
|
90
95
|
value.gsub(/\A[a-z]/) do |m|
|
@@ -96,47 +101,46 @@ end
|
|
96
101
|
to_field "subject_geo_facet", marc_geo_facet
|
97
102
|
to_field "subject_era_facet", marc_era_facet
|
98
103
|
|
99
|
-
# not doing this at present.
|
104
|
+
# not doing this at present.
|
100
105
|
#to_field "subject_facet", extract_marc("600:610:611:630:650:651:655:690")
|
101
106
|
|
102
107
|
to_field "published_display", extract_marc("260a", :trim_punctuation => true)
|
103
108
|
|
104
109
|
to_field "pub_date", marc_publication_date
|
105
110
|
|
106
|
-
#
|
107
|
-
#
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
111
|
+
# An example of more complex ruby logic 'in line' in the config file--
|
112
|
+
# too much more complicated than this, and you'd probably want to extract
|
113
|
+
# it to an external routine to keep things tidy.
|
114
|
+
#
|
115
|
+
# Use traject's LCC to broad category routine, but then supply
|
116
|
+
# custom block to also use our local holdings 9xx info, and
|
117
|
+
# also classify sudoc-possessing records as 'Government Publication' discipline
|
112
118
|
to_field "discipline_facet", marc_lcc_to_broad_category(:default => nil) do |record, accumulator|
|
113
119
|
# add in our local call numbers
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
end
|
131
|
-
end.compact
|
132
|
-
)
|
120
|
+
Traject::MarcExtractor.cached("991:937").each_matching_line(record) do |field, spec, extractor|
|
121
|
+
# we output call type 'processor' in subfield 'f' of our holdings
|
122
|
+
# fields, that sort of maybe tells us if it's an LCC field.
|
123
|
+
# When the data is right, which it often isn't.
|
124
|
+
call_type = field['f']
|
125
|
+
if call_type == "sudoc"
|
126
|
+
# we choose to call it:
|
127
|
+
accumulator << "Government Publication"
|
128
|
+
elsif call_type.nil? || call_type == "lc" || field['a'] =~ Traject::Macros::Marc21Semantics::LCC_REGEX
|
129
|
+
# run it through the map
|
130
|
+
s = field['a']
|
131
|
+
s = s.slice(0, 1) if s
|
132
|
+
accumulator << Traject::TranslationMap.new("lcc_top_level")[s]
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
133
136
|
|
134
137
|
# If it's got an 086, we'll put it in "Government Publication", to be
|
135
138
|
# consistent with when we do that from a local SuDoc call #.
|
136
|
-
if
|
139
|
+
if Traject::MarcExtractor.cached("086a").extract(record).length > 0
|
137
140
|
accumulator << "Government Publication"
|
138
141
|
end
|
139
142
|
|
143
|
+
# uniq it in case we added the same thing twice with GovPub
|
140
144
|
accumulator.uniq!
|
141
145
|
|
142
146
|
if accumulator.empty?
|