traject 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +346 -0
- data/Rakefile +16 -0
- data/bin/traject +153 -0
- data/doc/macros.md +103 -0
- data/doc/settings.md +34 -0
- data/lib/traject.rb +10 -0
- data/lib/traject/indexer.rb +196 -0
- data/lib/traject/json_writer.rb +51 -0
- data/lib/traject/macros/basic.rb +9 -0
- data/lib/traject/macros/marc21.rb +145 -0
- data/lib/traject/marc_extractor.rb +206 -0
- data/lib/traject/marc_reader.rb +61 -0
- data/lib/traject/qualified_const_get.rb +30 -0
- data/lib/traject/solrj_writer.rb +120 -0
- data/lib/traject/translation_map.rb +184 -0
- data/lib/traject/version.rb +3 -0
- data/test/indexer/macros_marc21_test.rb +146 -0
- data/test/indexer/macros_test.rb +40 -0
- data/test/indexer/map_record_test.rb +120 -0
- data/test/indexer/read_write_test.rb +47 -0
- data/test/indexer/settings_test.rb +65 -0
- data/test/marc_extractor_test.rb +168 -0
- data/test/marc_reader_test.rb +29 -0
- data/test/solrj_writer_test.rb +106 -0
- data/test/test_helper.rb +28 -0
- data/test/test_support/hebrew880s.marc +1 -0
- data/test/test_support/manufacturing_consent.marc +1 -0
- data/test/test_support/test_data.utf8.marc.xml +2609 -0
- data/test/test_support/test_data.utf8.mrc +1 -0
- data/test/translation_map_test.rb +98 -0
- data/test/translation_maps/bad_ruby.rb +8 -0
- data/test/translation_maps/bad_yaml.yaml +1 -0
- data/test/translation_maps/both_map.rb +1 -0
- data/test/translation_maps/both_map.yaml +1 -0
- data/test/translation_maps/default_literal.rb +10 -0
- data/test/translation_maps/default_passthrough.rb +10 -0
- data/test/translation_maps/marc_040a_translate_test.yaml +1 -0
- data/test/translation_maps/ruby_map.rb +10 -0
- data/test/translation_maps/translate_array_test.yaml +8 -0
- data/test/translation_maps/yaml_map.yaml +7 -0
- data/traject.gemspec +30 -0
- data/vendor/solrj/README +8 -0
- data/vendor/solrj/build.xml +39 -0
- data/vendor/solrj/ivy.xml +16 -0
- data/vendor/solrj/lib/commons-codec-1.7.jar +0 -0
- data/vendor/solrj/lib/commons-io-2.1.jar +0 -0
- data/vendor/solrj/lib/httpclient-4.2.3.jar +0 -0
- data/vendor/solrj/lib/httpcore-4.2.2.jar +0 -0
- data/vendor/solrj/lib/httpmime-4.2.3.jar +0 -0
- data/vendor/solrj/lib/jcl-over-slf4j-1.6.6.jar +0 -0
- data/vendor/solrj/lib/jul-to-slf4j-1.6.6.jar +0 -0
- data/vendor/solrj/lib/log4j-1.2.16.jar +0 -0
- data/vendor/solrj/lib/noggit-0.5.jar +0 -0
- data/vendor/solrj/lib/slf4j-api-1.6.6.jar +0 -0
- data/vendor/solrj/lib/slf4j-log4j12-1.6.6.jar +0 -0
- data/vendor/solrj/lib/solr-solrj-4.3.1-javadoc.jar +0 -0
- data/vendor/solrj/lib/solr-solrj-4.3.1-sources.jar +0 -0
- data/vendor/solrj/lib/solr-solrj-4.3.1.jar +0 -0
- data/vendor/solrj/lib/wstx-asl-3.2.7.jar +0 -0
- data/vendor/solrj/lib/zookeeper-3.4.5.jar +0 -0
- metadata +264 -0
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
require 'traject/indexer'
|
4
|
+
require 'traject/macros/marc21'
|
5
|
+
|
6
|
+
require 'json'
|
7
|
+
require 'marc/record'
|
8
|
+
|
9
|
+
# See also marc_extractor_test.rb for more detailed tests on marc extraction,
|
10
|
+
# this is just a basic test to make sure our macro works passing through to there
|
11
|
+
# and other options.
|
12
|
+
describe "Traject::Macros::Marc21" do
|
13
|
+
Marc21 = Traject::Macros::Marc21 # shortcut
|
14
|
+
|
15
|
+
before do
|
16
|
+
@indexer = Traject::Indexer.new
|
17
|
+
@indexer.extend Traject::Macros::Marc21
|
18
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "extract_marc" do
|
22
|
+
it "extracts marc" do
|
23
|
+
@indexer.instance_eval do
|
24
|
+
to_field "title", extract_marc("245ab")
|
25
|
+
end
|
26
|
+
|
27
|
+
output = @indexer.map_record(@record)
|
28
|
+
|
29
|
+
assert_equal ["Manufacturing consent : the political economy of the mass media /"], output["title"]
|
30
|
+
end
|
31
|
+
|
32
|
+
it "respects :first=>true option" do
|
33
|
+
@indexer.instance_eval do
|
34
|
+
to_field "other_id", extract_marc("035a", :first => true)
|
35
|
+
end
|
36
|
+
|
37
|
+
output = @indexer.map_record(@record)
|
38
|
+
|
39
|
+
assert_length 1, output["other_id"]
|
40
|
+
end
|
41
|
+
|
42
|
+
it "trims punctuation with :trim_punctuation => true" do
|
43
|
+
@indexer.instance_eval do
|
44
|
+
to_field "title", extract_marc("245ab", :trim_punctuation => true)
|
45
|
+
end
|
46
|
+
|
47
|
+
output = @indexer.map_record(@record)
|
48
|
+
|
49
|
+
assert ! output["title"].first.end_with?("/"), "does not end with /"
|
50
|
+
end
|
51
|
+
|
52
|
+
it "Marc21::trim_punctuation class method" do
|
53
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three")
|
54
|
+
|
55
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three,")
|
56
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three/")
|
57
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three;")
|
58
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three:")
|
59
|
+
assert_equal "one two three .", Marc21.trim_punctuation("one two three .")
|
60
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three.")
|
61
|
+
|
62
|
+
assert_equal "one two [three]", Marc21.trim_punctuation("one two [three]")
|
63
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three]")
|
64
|
+
assert_equal "one two three", Marc21.trim_punctuation("[one two three")
|
65
|
+
assert_equal "one two three", Marc21.trim_punctuation("[one two three]")
|
66
|
+
end
|
67
|
+
|
68
|
+
it "uses :translation_map" do
|
69
|
+
@indexer.instance_eval do
|
70
|
+
to_field "cataloging_agency", extract_marc("040a", :seperator => nil, :translation_map => "marc_040a_translate_test")
|
71
|
+
end
|
72
|
+
output = @indexer.map_record(@record)
|
73
|
+
|
74
|
+
assert_equal ["Library of Congress"], output["cataloging_agency"]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe "serialized_marc" do
|
79
|
+
it "serializes xml" do
|
80
|
+
@indexer.instance_eval do
|
81
|
+
to_field "marc_record", serialized_marc(:format => "xml")
|
82
|
+
end
|
83
|
+
output = @indexer.map_record(@record)
|
84
|
+
|
85
|
+
assert_length 1, output["marc_record"]
|
86
|
+
assert_kind_of String, output["marc_record"].first
|
87
|
+
assert output["marc_record"].first.start_with?("<record xmlns='http://www.loc.gov/MARC21/slim'>"), "looks like serialized MarcXML"
|
88
|
+
end
|
89
|
+
|
90
|
+
it "serializes binary UUEncoded" do
|
91
|
+
@indexer.instance_eval do
|
92
|
+
to_field "marc_record", serialized_marc(:format => "binary")
|
93
|
+
end
|
94
|
+
output = @indexer.map_record(@record)
|
95
|
+
|
96
|
+
assert_length 1, output["marc_record"]
|
97
|
+
assert_kind_of String, output["marc_record"].first
|
98
|
+
|
99
|
+
decoded = Base64.decode64( output["marc_record"].first )
|
100
|
+
|
101
|
+
# just check the marc header for now
|
102
|
+
assert_start_with "02067cam a2200469", decoded
|
103
|
+
end
|
104
|
+
|
105
|
+
it "serializes binary raw" do
|
106
|
+
@indexer.instance_eval do
|
107
|
+
to_field "marc_record", serialized_marc(:format => "binary", :binary_escape => false)
|
108
|
+
end
|
109
|
+
output = @indexer.map_record(@record)
|
110
|
+
|
111
|
+
assert_length 1, output["marc_record"]
|
112
|
+
assert_kind_of String, output["marc_record"].first
|
113
|
+
|
114
|
+
# just check the marc header for now
|
115
|
+
assert_start_with "02067cam a2200469", output["marc_record"].first
|
116
|
+
end
|
117
|
+
|
118
|
+
it "serializes json" do
|
119
|
+
@indexer.instance_eval do
|
120
|
+
to_field "marc_record", serialized_marc(:format => "json")
|
121
|
+
end
|
122
|
+
output = @indexer.map_record(@record)
|
123
|
+
|
124
|
+
assert_length 1, output["marc_record"]
|
125
|
+
|
126
|
+
# okay, let's actually deserialize it, why not
|
127
|
+
|
128
|
+
hash = JSON.parse( output["marc_record"].first )
|
129
|
+
|
130
|
+
deserialized = MARC::Record.new_from_hash(hash)
|
131
|
+
|
132
|
+
assert_equal @record, deserialized
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
it "#extract_all_marc_values" do
|
137
|
+
@indexer.instance_eval do
|
138
|
+
to_field "text", extract_all_marc_values
|
139
|
+
end
|
140
|
+
output = @indexer.map_record(@record)
|
141
|
+
|
142
|
+
assert_length 13, output["text"]
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Indexer Macros:" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
7
|
+
end
|
8
|
+
|
9
|
+
it "works with simple literal" do
|
10
|
+
@indexer.instance_eval do
|
11
|
+
extend Traject::Macros::Basic
|
12
|
+
|
13
|
+
to_field "source", literal("MY LIBRARY")
|
14
|
+
end
|
15
|
+
|
16
|
+
output = @indexer.map_record(@record)
|
17
|
+
|
18
|
+
assert_equal ["MY LIBRARY"], output["source"]
|
19
|
+
end
|
20
|
+
|
21
|
+
it "works with macro AND block" do
|
22
|
+
called = false
|
23
|
+
|
24
|
+
@indexer.instance_eval do
|
25
|
+
extend Traject::Macros::Basic
|
26
|
+
to_field "source", literal("MY LIBRARY") do |record, accumulator, context|
|
27
|
+
called = true
|
28
|
+
accumulator << "SECOND VALUE"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
output = @indexer.map_record(@record)
|
33
|
+
|
34
|
+
assert called
|
35
|
+
assert_equal ["MY LIBRARY", "SECOND VALUE"], output["source"]
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer#map_record" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
describe "with no indexing rules" do
|
11
|
+
it "returns empty hash" do
|
12
|
+
output = @indexer.map_record(@record)
|
13
|
+
|
14
|
+
assert_kind_of Hash, output
|
15
|
+
assert_empty output
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "#to_field" do
|
20
|
+
it "works with block" do
|
21
|
+
called = false
|
22
|
+
|
23
|
+
@indexer.to_field("title") do |record, accumulator|
|
24
|
+
assert_kind_of MARC::Record, record
|
25
|
+
assert_kind_of Array, accumulator
|
26
|
+
|
27
|
+
called = true # by the power of closure!
|
28
|
+
accumulator << "Some Title"
|
29
|
+
end
|
30
|
+
|
31
|
+
output = @indexer.map_record(@record)
|
32
|
+
|
33
|
+
assert called
|
34
|
+
assert_kind_of Hash, output
|
35
|
+
assert_equal ["Some Title"], output["title"]
|
36
|
+
end
|
37
|
+
|
38
|
+
it "works with a lambda arg" do
|
39
|
+
called = false
|
40
|
+
|
41
|
+
logic = lambda do |record, accumulator|
|
42
|
+
assert_kind_of MARC::Record, record
|
43
|
+
assert_kind_of Array, accumulator
|
44
|
+
|
45
|
+
called = true # by the power of closure!
|
46
|
+
accumulator << "Some Title"
|
47
|
+
end
|
48
|
+
|
49
|
+
@indexer.to_field("title", logic)
|
50
|
+
|
51
|
+
output = @indexer.map_record(@record)
|
52
|
+
|
53
|
+
assert called
|
54
|
+
assert_kind_of Hash, output
|
55
|
+
assert_equal ["Some Title"], output["title"]
|
56
|
+
end
|
57
|
+
|
58
|
+
it "works with both lambda and Proc" do
|
59
|
+
block_called = false
|
60
|
+
|
61
|
+
lambda_arg = lambda do |record, accumulator|
|
62
|
+
accumulator << "Lambda-provided Value"
|
63
|
+
end
|
64
|
+
|
65
|
+
@indexer.to_field("title", lambda_arg) do |record, accumulator|
|
66
|
+
assert_includes accumulator, "Lambda-provided Value"
|
67
|
+
accumulator << "Block-provided Value"
|
68
|
+
|
69
|
+
block_called = true
|
70
|
+
end
|
71
|
+
|
72
|
+
output = @indexer.map_record(@record)
|
73
|
+
|
74
|
+
assert block_called
|
75
|
+
assert_includes output["title"], "Lambda-provided Value"
|
76
|
+
assert_includes output["title"], "Block-provided Value"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "multiple to_field blocks" do
|
81
|
+
it "get called in order" do
|
82
|
+
order = []
|
83
|
+
@indexer.to_field("title") do |rec, acc|
|
84
|
+
order << :first_one
|
85
|
+
acc << "First"
|
86
|
+
end
|
87
|
+
@indexer.to_field("title") do |rec, acc|
|
88
|
+
order << :second_one
|
89
|
+
acc << "Second"
|
90
|
+
end
|
91
|
+
|
92
|
+
output = @indexer.map_record(@record)
|
93
|
+
|
94
|
+
assert_equal [:first_one, :second_one], order
|
95
|
+
assert_equal ["First", "Second"], output["title"]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe "context argument" do
|
100
|
+
it "is third argument to block" do
|
101
|
+
called = false
|
102
|
+
@indexer.to_field("title") do |record, accumulator, context|
|
103
|
+
called = true
|
104
|
+
|
105
|
+
assert_kind_of Traject::Indexer::Context, context
|
106
|
+
|
107
|
+
assert_kind_of Hash, context.clipboard
|
108
|
+
assert_kind_of Hash, context.output_hash
|
109
|
+
|
110
|
+
assert_same record, context.source_record
|
111
|
+
assert_same @indexer.settings, context.settings
|
112
|
+
end
|
113
|
+
|
114
|
+
@indexer.map_record @record
|
115
|
+
|
116
|
+
assert called
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
# A little Traject Writer that just keeps everything
|
4
|
+
# in an array, just added to settings for easy access
|
5
|
+
memory_writer_class = Class.new do
|
6
|
+
def initialize(settings)
|
7
|
+
@settings = settings
|
8
|
+
@settings["memory_writer.added"] = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def put(hash)
|
12
|
+
@settings["memory_writer.added"] << hash
|
13
|
+
end
|
14
|
+
|
15
|
+
def close
|
16
|
+
@settings["memory_writer.closed"] = true
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "Traject::Indexer#process" do
|
21
|
+
before do
|
22
|
+
@indexer = Traject::Indexer.new
|
23
|
+
@indexer.writer_class = memory_writer_class
|
24
|
+
@file = File.open(support_file_path "test_data.utf8.mrc")
|
25
|
+
end
|
26
|
+
|
27
|
+
it "works" do
|
28
|
+
@indexer.to_field("title") do |record, accumulator, context|
|
29
|
+
accumulator << "ADDED TITLE"
|
30
|
+
assert_equal "title", context.field_name
|
31
|
+
end
|
32
|
+
|
33
|
+
@indexer.process( @file )
|
34
|
+
|
35
|
+
assert @indexer.settings["memory_writer.added"]
|
36
|
+
assert_equal 30, @indexer.settings["memory_writer.added"].length
|
37
|
+
assert_kind_of Hash, @indexer.settings["memory_writer.added"].first
|
38
|
+
assert_equal ["ADDED TITLE"], @indexer.settings["memory_writer.added"].first["title"]
|
39
|
+
|
40
|
+
assert @indexer.settings["memory_writer.closed"]
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer#settings" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
end
|
7
|
+
|
8
|
+
it "starts out default hash" do
|
9
|
+
assert_kind_of Hash, @indexer.settings
|
10
|
+
assert_equal Traject::Indexer.default_settings, @indexer.settings
|
11
|
+
end
|
12
|
+
|
13
|
+
it "can take argument to set" do
|
14
|
+
@indexer.settings("foo" => "foo", "bar" => "bar")
|
15
|
+
|
16
|
+
assert_equal "foo", @indexer.settings["foo"]
|
17
|
+
assert_equal "bar", @indexer.settings["bar"]
|
18
|
+
end
|
19
|
+
|
20
|
+
it "has settings DSL to set" do
|
21
|
+
@indexer.instance_eval do
|
22
|
+
settings do
|
23
|
+
store "foo", "foo"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
assert_equal "foo", @indexer.settings["foo"]
|
28
|
+
end
|
29
|
+
|
30
|
+
it "merges new values, not completely replaces" do
|
31
|
+
@indexer.settings("one" => "original", "two" => "original", "three" => "original", "four" => "original")
|
32
|
+
|
33
|
+
@indexer.settings do
|
34
|
+
store "two", "second"
|
35
|
+
store "three", "second"
|
36
|
+
end
|
37
|
+
|
38
|
+
@indexer.settings do
|
39
|
+
store "three", "third"
|
40
|
+
end
|
41
|
+
|
42
|
+
@indexer.settings("four" => "fourth")
|
43
|
+
|
44
|
+
{"one" => "original", "two" => "second", "three" => "third", "four" => "fourth"}.each_pair do |key, value|
|
45
|
+
assert_equal value, @indexer.settings[key]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
it "is indifferent between string and symbol" do
|
50
|
+
@indexer.settings[:foo] = "foo 1"
|
51
|
+
@indexer.settings["foo"] = "foo 2"
|
52
|
+
|
53
|
+
assert_equal "foo 2", @indexer.settings[:foo]
|
54
|
+
|
55
|
+
@indexer.settings do
|
56
|
+
store "foo", "foo 3"
|
57
|
+
store :foo, "foo 4"
|
58
|
+
end
|
59
|
+
|
60
|
+
assert_equal "foo 4", @indexer.settings["foo"]
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
require 'traject/marc_extractor'
|
5
|
+
|
6
|
+
|
7
|
+
describe "Traject::MarcExtractor" do
|
8
|
+
describe "#parse_marc_spec" do
|
9
|
+
it "parses single spec with all elements" do
|
10
|
+
parsed = Traject::MarcExtractor.parse_string_spec("245|1*|abcg")
|
11
|
+
|
12
|
+
assert_kind_of Hash, parsed
|
13
|
+
assert_equal 1, parsed.keys.length
|
14
|
+
assert_kind_of Hash, parsed["245"]
|
15
|
+
|
16
|
+
assert_kind_of Array, parsed["245"][:indicators]
|
17
|
+
assert_equal 2, parsed["245"][:indicators].length
|
18
|
+
assert_equal "1", parsed["245"][:indicators][0]
|
19
|
+
assert_nil parsed["245"][:indicators][1]
|
20
|
+
|
21
|
+
assert_kind_of Array, parsed["245"][:subfields]
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
it "parses a mixed bag" do
|
26
|
+
parsed = Traject::MarcExtractor.parse_string_spec("245abcde:810:700|*4|bcd")
|
27
|
+
|
28
|
+
assert_length 3, parsed
|
29
|
+
|
30
|
+
#245abcde
|
31
|
+
assert parsed["245"]
|
32
|
+
assert_nil parsed["245"][:indicators]
|
33
|
+
assert_equal %w{a b c d e}, parsed["245"][:subfields]
|
34
|
+
|
35
|
+
#810
|
36
|
+
assert parsed["810"]
|
37
|
+
assert_nil parsed["810"][:indicators]
|
38
|
+
assert_nil parsed["810"][:subfields]
|
39
|
+
|
40
|
+
#700-*4bcd
|
41
|
+
assert parsed["700"]
|
42
|
+
assert_equal [nil, "4"], parsed["700"][:indicators]
|
43
|
+
assert_equal %w{b c d}, parsed["700"][:subfields]
|
44
|
+
end
|
45
|
+
|
46
|
+
it "parses fixed field byte offsets" do
|
47
|
+
parsed = Traject::MarcExtractor.parse_string_spec("005[5]:008[7-10]")
|
48
|
+
|
49
|
+
assert_equal 5, parsed["005"][:bytes]
|
50
|
+
assert_equal 7..10, parsed["008"][:bytes]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe "#extract_by_spec" do
|
55
|
+
before do
|
56
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "extracts a basic case" do
|
60
|
+
before do
|
61
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("700abcdef:856|*2|:505|1*|:245ba")
|
62
|
+
@values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "returns an array" do
|
66
|
+
assert_kind_of Array, @values
|
67
|
+
end
|
68
|
+
|
69
|
+
it "handles no subfields given" do
|
70
|
+
a856s = @record.find_all {|f| f.tag == "856"}
|
71
|
+
assert a856s, "Record must have 856 fields for this test to work"
|
72
|
+
|
73
|
+
a856s.each do |field|
|
74
|
+
assert @values.include?( field.subfields.collect(&:value).join(" "))
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
it "does not have 505, due to non-matching indicators" do
|
79
|
+
assert ! @values.find {|s| s.include? "propaganda model"}
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
it "respects original record order, for both fields and subfields" do
|
85
|
+
expected = ["Manufacturing consent : the political economy of the mass media /",
|
86
|
+
"Chomsky, Noam.",
|
87
|
+
"Contributor biographical information http://www.loc.gov/catdir/bios/random051/2001050014.html",
|
88
|
+
"Publisher description http://www.loc.gov/catdir/description/random044/2001050014.html"]
|
89
|
+
assert_equal expected, @values
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "extracts fixed fields" do
|
94
|
+
it ", complete" do
|
95
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("001")
|
96
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
|
97
|
+
|
98
|
+
assert_equal ["2710183"], values
|
99
|
+
end
|
100
|
+
it ", single byte offset" do
|
101
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("008[5]")
|
102
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
|
103
|
+
|
104
|
+
assert_equal ["1"], values
|
105
|
+
end
|
106
|
+
it ", byte range" do
|
107
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("008[7-10]")
|
108
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
|
109
|
+
|
110
|
+
assert_equal ["2002"], values
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe "seperator argument" do
|
115
|
+
it "causes non-join when nil" do
|
116
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
|
117
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec, :seperator => nil)
|
118
|
+
|
119
|
+
assert_length 3, values
|
120
|
+
end
|
121
|
+
|
122
|
+
it "can be non-default" do
|
123
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
|
124
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec, :seperator => "!! ")
|
125
|
+
|
126
|
+
assert_length 1, values
|
127
|
+
assert_equal "Manufacturing consent :!! the political economy of the mass media /!! Edward S. Herman and Noam Chomsky ; with a new introduction by the authors.", values.first
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
describe "extracts alternate script" do
|
132
|
+
before do
|
133
|
+
@record = MARC::Reader.new(support_file_path "hebrew880s.marc").to_a.first
|
134
|
+
@parsed_spec = Traject::MarcExtractor.parse_string_spec("245b")
|
135
|
+
end
|
136
|
+
it "from default :include" do
|
137
|
+
|
138
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, @parsed_spec)
|
139
|
+
|
140
|
+
assert_length 2, values # both the original and the 880
|
141
|
+
assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /", "בין מרטין בובר לאהרן דוד גורדון /"], values
|
142
|
+
end
|
143
|
+
it "with :only" do
|
144
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, @parsed_spec, :alternate_script => :only)
|
145
|
+
|
146
|
+
assert_length 1, values
|
147
|
+
assert_equal ["בין מרטין בובר לאהרן דוד גורדון /"], values
|
148
|
+
end
|
149
|
+
it "with false" do
|
150
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, @parsed_spec, :alternate_script => false)
|
151
|
+
|
152
|
+
assert_length 1, values
|
153
|
+
assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /"], values
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
it "works with string second arg too" do
|
158
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, "245abc")
|
159
|
+
|
160
|
+
assert_length 1, values
|
161
|
+
assert values.first.include?("Manufacturing consent"), "Extracted value includes title"
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
|
167
|
+
|
168
|
+
end
|