traject 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +346 -0
- data/Rakefile +16 -0
- data/bin/traject +153 -0
- data/doc/macros.md +103 -0
- data/doc/settings.md +34 -0
- data/lib/traject.rb +10 -0
- data/lib/traject/indexer.rb +196 -0
- data/lib/traject/json_writer.rb +51 -0
- data/lib/traject/macros/basic.rb +9 -0
- data/lib/traject/macros/marc21.rb +145 -0
- data/lib/traject/marc_extractor.rb +206 -0
- data/lib/traject/marc_reader.rb +61 -0
- data/lib/traject/qualified_const_get.rb +30 -0
- data/lib/traject/solrj_writer.rb +120 -0
- data/lib/traject/translation_map.rb +184 -0
- data/lib/traject/version.rb +3 -0
- data/test/indexer/macros_marc21_test.rb +146 -0
- data/test/indexer/macros_test.rb +40 -0
- data/test/indexer/map_record_test.rb +120 -0
- data/test/indexer/read_write_test.rb +47 -0
- data/test/indexer/settings_test.rb +65 -0
- data/test/marc_extractor_test.rb +168 -0
- data/test/marc_reader_test.rb +29 -0
- data/test/solrj_writer_test.rb +106 -0
- data/test/test_helper.rb +28 -0
- data/test/test_support/hebrew880s.marc +1 -0
- data/test/test_support/manufacturing_consent.marc +1 -0
- data/test/test_support/test_data.utf8.marc.xml +2609 -0
- data/test/test_support/test_data.utf8.mrc +1 -0
- data/test/translation_map_test.rb +98 -0
- data/test/translation_maps/bad_ruby.rb +8 -0
- data/test/translation_maps/bad_yaml.yaml +1 -0
- data/test/translation_maps/both_map.rb +1 -0
- data/test/translation_maps/both_map.yaml +1 -0
- data/test/translation_maps/default_literal.rb +10 -0
- data/test/translation_maps/default_passthrough.rb +10 -0
- data/test/translation_maps/marc_040a_translate_test.yaml +1 -0
- data/test/translation_maps/ruby_map.rb +10 -0
- data/test/translation_maps/translate_array_test.yaml +8 -0
- data/test/translation_maps/yaml_map.yaml +7 -0
- data/traject.gemspec +30 -0
- data/vendor/solrj/README +8 -0
- data/vendor/solrj/build.xml +39 -0
- data/vendor/solrj/ivy.xml +16 -0
- data/vendor/solrj/lib/commons-codec-1.7.jar +0 -0
- data/vendor/solrj/lib/commons-io-2.1.jar +0 -0
- data/vendor/solrj/lib/httpclient-4.2.3.jar +0 -0
- data/vendor/solrj/lib/httpcore-4.2.2.jar +0 -0
- data/vendor/solrj/lib/httpmime-4.2.3.jar +0 -0
- data/vendor/solrj/lib/jcl-over-slf4j-1.6.6.jar +0 -0
- data/vendor/solrj/lib/jul-to-slf4j-1.6.6.jar +0 -0
- data/vendor/solrj/lib/log4j-1.2.16.jar +0 -0
- data/vendor/solrj/lib/noggit-0.5.jar +0 -0
- data/vendor/solrj/lib/slf4j-api-1.6.6.jar +0 -0
- data/vendor/solrj/lib/slf4j-log4j12-1.6.6.jar +0 -0
- data/vendor/solrj/lib/solr-solrj-4.3.1-javadoc.jar +0 -0
- data/vendor/solrj/lib/solr-solrj-4.3.1-sources.jar +0 -0
- data/vendor/solrj/lib/solr-solrj-4.3.1.jar +0 -0
- data/vendor/solrj/lib/wstx-asl-3.2.7.jar +0 -0
- data/vendor/solrj/lib/zookeeper-3.4.5.jar +0 -0
- metadata +264 -0
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
require 'traject/indexer'
|
4
|
+
require 'traject/macros/marc21'
|
5
|
+
|
6
|
+
require 'json'
|
7
|
+
require 'marc/record'
|
8
|
+
|
9
|
+
# See also marc_extractor_test.rb for more detailed tests on marc extraction,
|
10
|
+
# this is just a basic test to make sure our macro works passing through to there
|
11
|
+
# and other options.
|
12
|
+
describe "Traject::Macros::Marc21" do
|
13
|
+
Marc21 = Traject::Macros::Marc21 # shortcut
|
14
|
+
|
15
|
+
before do
|
16
|
+
@indexer = Traject::Indexer.new
|
17
|
+
@indexer.extend Traject::Macros::Marc21
|
18
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "extract_marc" do
|
22
|
+
it "extracts marc" do
|
23
|
+
@indexer.instance_eval do
|
24
|
+
to_field "title", extract_marc("245ab")
|
25
|
+
end
|
26
|
+
|
27
|
+
output = @indexer.map_record(@record)
|
28
|
+
|
29
|
+
assert_equal ["Manufacturing consent : the political economy of the mass media /"], output["title"]
|
30
|
+
end
|
31
|
+
|
32
|
+
it "respects :first=>true option" do
|
33
|
+
@indexer.instance_eval do
|
34
|
+
to_field "other_id", extract_marc("035a", :first => true)
|
35
|
+
end
|
36
|
+
|
37
|
+
output = @indexer.map_record(@record)
|
38
|
+
|
39
|
+
assert_length 1, output["other_id"]
|
40
|
+
end
|
41
|
+
|
42
|
+
it "trims punctuation with :trim_punctuation => true" do
|
43
|
+
@indexer.instance_eval do
|
44
|
+
to_field "title", extract_marc("245ab", :trim_punctuation => true)
|
45
|
+
end
|
46
|
+
|
47
|
+
output = @indexer.map_record(@record)
|
48
|
+
|
49
|
+
assert ! output["title"].first.end_with?("/"), "does not end with /"
|
50
|
+
end
|
51
|
+
|
52
|
+
it "Marc21::trim_punctuation class method" do
|
53
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three")
|
54
|
+
|
55
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three,")
|
56
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three/")
|
57
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three;")
|
58
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three:")
|
59
|
+
assert_equal "one two three .", Marc21.trim_punctuation("one two three .")
|
60
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three.")
|
61
|
+
|
62
|
+
assert_equal "one two [three]", Marc21.trim_punctuation("one two [three]")
|
63
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three]")
|
64
|
+
assert_equal "one two three", Marc21.trim_punctuation("[one two three")
|
65
|
+
assert_equal "one two three", Marc21.trim_punctuation("[one two three]")
|
66
|
+
end
|
67
|
+
|
68
|
+
it "uses :translation_map" do
|
69
|
+
@indexer.instance_eval do
|
70
|
+
to_field "cataloging_agency", extract_marc("040a", :seperator => nil, :translation_map => "marc_040a_translate_test")
|
71
|
+
end
|
72
|
+
output = @indexer.map_record(@record)
|
73
|
+
|
74
|
+
assert_equal ["Library of Congress"], output["cataloging_agency"]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe "serialized_marc" do
|
79
|
+
it "serializes xml" do
|
80
|
+
@indexer.instance_eval do
|
81
|
+
to_field "marc_record", serialized_marc(:format => "xml")
|
82
|
+
end
|
83
|
+
output = @indexer.map_record(@record)
|
84
|
+
|
85
|
+
assert_length 1, output["marc_record"]
|
86
|
+
assert_kind_of String, output["marc_record"].first
|
87
|
+
assert output["marc_record"].first.start_with?("<record xmlns='http://www.loc.gov/MARC21/slim'>"), "looks like serialized MarcXML"
|
88
|
+
end
|
89
|
+
|
90
|
+
it "serializes binary UUEncoded" do
|
91
|
+
@indexer.instance_eval do
|
92
|
+
to_field "marc_record", serialized_marc(:format => "binary")
|
93
|
+
end
|
94
|
+
output = @indexer.map_record(@record)
|
95
|
+
|
96
|
+
assert_length 1, output["marc_record"]
|
97
|
+
assert_kind_of String, output["marc_record"].first
|
98
|
+
|
99
|
+
decoded = Base64.decode64( output["marc_record"].first )
|
100
|
+
|
101
|
+
# just check the marc header for now
|
102
|
+
assert_start_with "02067cam a2200469", decoded
|
103
|
+
end
|
104
|
+
|
105
|
+
it "serializes binary raw" do
|
106
|
+
@indexer.instance_eval do
|
107
|
+
to_field "marc_record", serialized_marc(:format => "binary", :binary_escape => false)
|
108
|
+
end
|
109
|
+
output = @indexer.map_record(@record)
|
110
|
+
|
111
|
+
assert_length 1, output["marc_record"]
|
112
|
+
assert_kind_of String, output["marc_record"].first
|
113
|
+
|
114
|
+
# just check the marc header for now
|
115
|
+
assert_start_with "02067cam a2200469", output["marc_record"].first
|
116
|
+
end
|
117
|
+
|
118
|
+
it "serializes json" do
|
119
|
+
@indexer.instance_eval do
|
120
|
+
to_field "marc_record", serialized_marc(:format => "json")
|
121
|
+
end
|
122
|
+
output = @indexer.map_record(@record)
|
123
|
+
|
124
|
+
assert_length 1, output["marc_record"]
|
125
|
+
|
126
|
+
# okay, let's actually deserialize it, why not
|
127
|
+
|
128
|
+
hash = JSON.parse( output["marc_record"].first )
|
129
|
+
|
130
|
+
deserialized = MARC::Record.new_from_hash(hash)
|
131
|
+
|
132
|
+
assert_equal @record, deserialized
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
it "#extract_all_marc_values" do
|
137
|
+
@indexer.instance_eval do
|
138
|
+
to_field "text", extract_all_marc_values
|
139
|
+
end
|
140
|
+
output = @indexer.map_record(@record)
|
141
|
+
|
142
|
+
assert_length 13, output["text"]
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Indexer Macros:" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
7
|
+
end
|
8
|
+
|
9
|
+
it "works with simple literal" do
|
10
|
+
@indexer.instance_eval do
|
11
|
+
extend Traject::Macros::Basic
|
12
|
+
|
13
|
+
to_field "source", literal("MY LIBRARY")
|
14
|
+
end
|
15
|
+
|
16
|
+
output = @indexer.map_record(@record)
|
17
|
+
|
18
|
+
assert_equal ["MY LIBRARY"], output["source"]
|
19
|
+
end
|
20
|
+
|
21
|
+
it "works with macro AND block" do
|
22
|
+
called = false
|
23
|
+
|
24
|
+
@indexer.instance_eval do
|
25
|
+
extend Traject::Macros::Basic
|
26
|
+
to_field "source", literal("MY LIBRARY") do |record, accumulator, context|
|
27
|
+
called = true
|
28
|
+
accumulator << "SECOND VALUE"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
output = @indexer.map_record(@record)
|
33
|
+
|
34
|
+
assert called
|
35
|
+
assert_equal ["MY LIBRARY", "SECOND VALUE"], output["source"]
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer#map_record" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
describe "with no indexing rules" do
|
11
|
+
it "returns empty hash" do
|
12
|
+
output = @indexer.map_record(@record)
|
13
|
+
|
14
|
+
assert_kind_of Hash, output
|
15
|
+
assert_empty output
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "#to_field" do
|
20
|
+
it "works with block" do
|
21
|
+
called = false
|
22
|
+
|
23
|
+
@indexer.to_field("title") do |record, accumulator|
|
24
|
+
assert_kind_of MARC::Record, record
|
25
|
+
assert_kind_of Array, accumulator
|
26
|
+
|
27
|
+
called = true # by the power of closure!
|
28
|
+
accumulator << "Some Title"
|
29
|
+
end
|
30
|
+
|
31
|
+
output = @indexer.map_record(@record)
|
32
|
+
|
33
|
+
assert called
|
34
|
+
assert_kind_of Hash, output
|
35
|
+
assert_equal ["Some Title"], output["title"]
|
36
|
+
end
|
37
|
+
|
38
|
+
it "works with a lambda arg" do
|
39
|
+
called = false
|
40
|
+
|
41
|
+
logic = lambda do |record, accumulator|
|
42
|
+
assert_kind_of MARC::Record, record
|
43
|
+
assert_kind_of Array, accumulator
|
44
|
+
|
45
|
+
called = true # by the power of closure!
|
46
|
+
accumulator << "Some Title"
|
47
|
+
end
|
48
|
+
|
49
|
+
@indexer.to_field("title", logic)
|
50
|
+
|
51
|
+
output = @indexer.map_record(@record)
|
52
|
+
|
53
|
+
assert called
|
54
|
+
assert_kind_of Hash, output
|
55
|
+
assert_equal ["Some Title"], output["title"]
|
56
|
+
end
|
57
|
+
|
58
|
+
it "works with both lambda and Proc" do
|
59
|
+
block_called = false
|
60
|
+
|
61
|
+
lambda_arg = lambda do |record, accumulator|
|
62
|
+
accumulator << "Lambda-provided Value"
|
63
|
+
end
|
64
|
+
|
65
|
+
@indexer.to_field("title", lambda_arg) do |record, accumulator|
|
66
|
+
assert_includes accumulator, "Lambda-provided Value"
|
67
|
+
accumulator << "Block-provided Value"
|
68
|
+
|
69
|
+
block_called = true
|
70
|
+
end
|
71
|
+
|
72
|
+
output = @indexer.map_record(@record)
|
73
|
+
|
74
|
+
assert block_called
|
75
|
+
assert_includes output["title"], "Lambda-provided Value"
|
76
|
+
assert_includes output["title"], "Block-provided Value"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "multiple to_field blocks" do
|
81
|
+
it "get called in order" do
|
82
|
+
order = []
|
83
|
+
@indexer.to_field("title") do |rec, acc|
|
84
|
+
order << :first_one
|
85
|
+
acc << "First"
|
86
|
+
end
|
87
|
+
@indexer.to_field("title") do |rec, acc|
|
88
|
+
order << :second_one
|
89
|
+
acc << "Second"
|
90
|
+
end
|
91
|
+
|
92
|
+
output = @indexer.map_record(@record)
|
93
|
+
|
94
|
+
assert_equal [:first_one, :second_one], order
|
95
|
+
assert_equal ["First", "Second"], output["title"]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe "context argument" do
|
100
|
+
it "is third argument to block" do
|
101
|
+
called = false
|
102
|
+
@indexer.to_field("title") do |record, accumulator, context|
|
103
|
+
called = true
|
104
|
+
|
105
|
+
assert_kind_of Traject::Indexer::Context, context
|
106
|
+
|
107
|
+
assert_kind_of Hash, context.clipboard
|
108
|
+
assert_kind_of Hash, context.output_hash
|
109
|
+
|
110
|
+
assert_same record, context.source_record
|
111
|
+
assert_same @indexer.settings, context.settings
|
112
|
+
end
|
113
|
+
|
114
|
+
@indexer.map_record @record
|
115
|
+
|
116
|
+
assert called
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
# A little Traject Writer that just keeps everything
|
4
|
+
# in an array, just added to settings for easy access
|
5
|
+
memory_writer_class = Class.new do
|
6
|
+
def initialize(settings)
|
7
|
+
@settings = settings
|
8
|
+
@settings["memory_writer.added"] = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def put(hash)
|
12
|
+
@settings["memory_writer.added"] << hash
|
13
|
+
end
|
14
|
+
|
15
|
+
def close
|
16
|
+
@settings["memory_writer.closed"] = true
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "Traject::Indexer#process" do
|
21
|
+
before do
|
22
|
+
@indexer = Traject::Indexer.new
|
23
|
+
@indexer.writer_class = memory_writer_class
|
24
|
+
@file = File.open(support_file_path "test_data.utf8.mrc")
|
25
|
+
end
|
26
|
+
|
27
|
+
it "works" do
|
28
|
+
@indexer.to_field("title") do |record, accumulator, context|
|
29
|
+
accumulator << "ADDED TITLE"
|
30
|
+
assert_equal "title", context.field_name
|
31
|
+
end
|
32
|
+
|
33
|
+
@indexer.process( @file )
|
34
|
+
|
35
|
+
assert @indexer.settings["memory_writer.added"]
|
36
|
+
assert_equal 30, @indexer.settings["memory_writer.added"].length
|
37
|
+
assert_kind_of Hash, @indexer.settings["memory_writer.added"].first
|
38
|
+
assert_equal ["ADDED TITLE"], @indexer.settings["memory_writer.added"].first["title"]
|
39
|
+
|
40
|
+
assert @indexer.settings["memory_writer.closed"]
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer#settings" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
end
|
7
|
+
|
8
|
+
it "starts out default hash" do
|
9
|
+
assert_kind_of Hash, @indexer.settings
|
10
|
+
assert_equal Traject::Indexer.default_settings, @indexer.settings
|
11
|
+
end
|
12
|
+
|
13
|
+
it "can take argument to set" do
|
14
|
+
@indexer.settings("foo" => "foo", "bar" => "bar")
|
15
|
+
|
16
|
+
assert_equal "foo", @indexer.settings["foo"]
|
17
|
+
assert_equal "bar", @indexer.settings["bar"]
|
18
|
+
end
|
19
|
+
|
20
|
+
it "has settings DSL to set" do
|
21
|
+
@indexer.instance_eval do
|
22
|
+
settings do
|
23
|
+
store "foo", "foo"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
assert_equal "foo", @indexer.settings["foo"]
|
28
|
+
end
|
29
|
+
|
30
|
+
it "merges new values, not completely replaces" do
|
31
|
+
@indexer.settings("one" => "original", "two" => "original", "three" => "original", "four" => "original")
|
32
|
+
|
33
|
+
@indexer.settings do
|
34
|
+
store "two", "second"
|
35
|
+
store "three", "second"
|
36
|
+
end
|
37
|
+
|
38
|
+
@indexer.settings do
|
39
|
+
store "three", "third"
|
40
|
+
end
|
41
|
+
|
42
|
+
@indexer.settings("four" => "fourth")
|
43
|
+
|
44
|
+
{"one" => "original", "two" => "second", "three" => "third", "four" => "fourth"}.each_pair do |key, value|
|
45
|
+
assert_equal value, @indexer.settings[key]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
it "is indifferent between string and symbol" do
|
50
|
+
@indexer.settings[:foo] = "foo 1"
|
51
|
+
@indexer.settings["foo"] = "foo 2"
|
52
|
+
|
53
|
+
assert_equal "foo 2", @indexer.settings[:foo]
|
54
|
+
|
55
|
+
@indexer.settings do
|
56
|
+
store "foo", "foo 3"
|
57
|
+
store :foo, "foo 4"
|
58
|
+
end
|
59
|
+
|
60
|
+
assert_equal "foo 4", @indexer.settings["foo"]
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
require 'traject/marc_extractor'
|
5
|
+
|
6
|
+
|
7
|
+
describe "Traject::MarcExtractor" do
|
8
|
+
describe "#parse_marc_spec" do
|
9
|
+
it "parses single spec with all elements" do
|
10
|
+
parsed = Traject::MarcExtractor.parse_string_spec("245|1*|abcg")
|
11
|
+
|
12
|
+
assert_kind_of Hash, parsed
|
13
|
+
assert_equal 1, parsed.keys.length
|
14
|
+
assert_kind_of Hash, parsed["245"]
|
15
|
+
|
16
|
+
assert_kind_of Array, parsed["245"][:indicators]
|
17
|
+
assert_equal 2, parsed["245"][:indicators].length
|
18
|
+
assert_equal "1", parsed["245"][:indicators][0]
|
19
|
+
assert_nil parsed["245"][:indicators][1]
|
20
|
+
|
21
|
+
assert_kind_of Array, parsed["245"][:subfields]
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
it "parses a mixed bag" do
|
26
|
+
parsed = Traject::MarcExtractor.parse_string_spec("245abcde:810:700|*4|bcd")
|
27
|
+
|
28
|
+
assert_length 3, parsed
|
29
|
+
|
30
|
+
#245abcde
|
31
|
+
assert parsed["245"]
|
32
|
+
assert_nil parsed["245"][:indicators]
|
33
|
+
assert_equal %w{a b c d e}, parsed["245"][:subfields]
|
34
|
+
|
35
|
+
#810
|
36
|
+
assert parsed["810"]
|
37
|
+
assert_nil parsed["810"][:indicators]
|
38
|
+
assert_nil parsed["810"][:subfields]
|
39
|
+
|
40
|
+
#700-*4bcd
|
41
|
+
assert parsed["700"]
|
42
|
+
assert_equal [nil, "4"], parsed["700"][:indicators]
|
43
|
+
assert_equal %w{b c d}, parsed["700"][:subfields]
|
44
|
+
end
|
45
|
+
|
46
|
+
it "parses fixed field byte offsets" do
|
47
|
+
parsed = Traject::MarcExtractor.parse_string_spec("005[5]:008[7-10]")
|
48
|
+
|
49
|
+
assert_equal 5, parsed["005"][:bytes]
|
50
|
+
assert_equal 7..10, parsed["008"][:bytes]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe "#extract_by_spec" do
|
55
|
+
before do
|
56
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "extracts a basic case" do
|
60
|
+
before do
|
61
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("700abcdef:856|*2|:505|1*|:245ba")
|
62
|
+
@values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "returns an array" do
|
66
|
+
assert_kind_of Array, @values
|
67
|
+
end
|
68
|
+
|
69
|
+
it "handles no subfields given" do
|
70
|
+
a856s = @record.find_all {|f| f.tag == "856"}
|
71
|
+
assert a856s, "Record must have 856 fields for this test to work"
|
72
|
+
|
73
|
+
a856s.each do |field|
|
74
|
+
assert @values.include?( field.subfields.collect(&:value).join(" "))
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
it "does not have 505, due to non-matching indicators" do
|
79
|
+
assert ! @values.find {|s| s.include? "propaganda model"}
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
it "respects original record order, for both fields and subfields" do
|
85
|
+
expected = ["Manufacturing consent : the political economy of the mass media /",
|
86
|
+
"Chomsky, Noam.",
|
87
|
+
"Contributor biographical information http://www.loc.gov/catdir/bios/random051/2001050014.html",
|
88
|
+
"Publisher description http://www.loc.gov/catdir/description/random044/2001050014.html"]
|
89
|
+
assert_equal expected, @values
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "extracts fixed fields" do
|
94
|
+
it ", complete" do
|
95
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("001")
|
96
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
|
97
|
+
|
98
|
+
assert_equal ["2710183"], values
|
99
|
+
end
|
100
|
+
it ", single byte offset" do
|
101
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("008[5]")
|
102
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
|
103
|
+
|
104
|
+
assert_equal ["1"], values
|
105
|
+
end
|
106
|
+
it ", byte range" do
|
107
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("008[7-10]")
|
108
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
|
109
|
+
|
110
|
+
assert_equal ["2002"], values
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe "seperator argument" do
|
115
|
+
it "causes non-join when nil" do
|
116
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
|
117
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec, :seperator => nil)
|
118
|
+
|
119
|
+
assert_length 3, values
|
120
|
+
end
|
121
|
+
|
122
|
+
it "can be non-default" do
|
123
|
+
parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
|
124
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec, :seperator => "!! ")
|
125
|
+
|
126
|
+
assert_length 1, values
|
127
|
+
assert_equal "Manufacturing consent :!! the political economy of the mass media /!! Edward S. Herman and Noam Chomsky ; with a new introduction by the authors.", values.first
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
describe "extracts alternate script" do
|
132
|
+
before do
|
133
|
+
@record = MARC::Reader.new(support_file_path "hebrew880s.marc").to_a.first
|
134
|
+
@parsed_spec = Traject::MarcExtractor.parse_string_spec("245b")
|
135
|
+
end
|
136
|
+
it "from default :include" do
|
137
|
+
|
138
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, @parsed_spec)
|
139
|
+
|
140
|
+
assert_length 2, values # both the original and the 880
|
141
|
+
assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /", "בין מרטין בובר לאהרן דוד גורדון /"], values
|
142
|
+
end
|
143
|
+
it "with :only" do
|
144
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, @parsed_spec, :alternate_script => :only)
|
145
|
+
|
146
|
+
assert_length 1, values
|
147
|
+
assert_equal ["בין מרטין בובר לאהרן דוד גורדון /"], values
|
148
|
+
end
|
149
|
+
it "with false" do
|
150
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, @parsed_spec, :alternate_script => false)
|
151
|
+
|
152
|
+
assert_length 1, values
|
153
|
+
assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /"], values
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
it "works with string second arg too" do
|
158
|
+
values = Traject::MarcExtractor.extract_by_spec(@record, "245abc")
|
159
|
+
|
160
|
+
assert_length 1, values
|
161
|
+
assert values.first.include?("Manufacturing consent"), "Extracted value includes title"
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
|
167
|
+
|
168
|
+
end
|