traject 2.3.4 → 3.0.0.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +16 -9
- data/CHANGES.md +74 -1
- data/Gemfile +2 -1
- data/README.md +104 -53
- data/Rakefile +8 -1
- data/doc/indexing_rules.md +79 -63
- data/doc/programmatic_use.md +218 -0
- data/doc/settings.md +28 -1
- data/doc/xml.md +134 -0
- data/lib/traject.rb +5 -0
- data/lib/traject/array_writer.rb +34 -0
- data/lib/traject/command_line.rb +18 -22
- data/lib/traject/debug_writer.rb +2 -5
- data/lib/traject/experimental_nokogiri_streaming_reader.rb +276 -0
- data/lib/traject/hashie/indifferent_access_fix.rb +25 -0
- data/lib/traject/indexer.rb +321 -92
- data/lib/traject/indexer/context.rb +39 -13
- data/lib/traject/indexer/marc_indexer.rb +30 -0
- data/lib/traject/indexer/nokogiri_indexer.rb +30 -0
- data/lib/traject/indexer/settings.rb +36 -53
- data/lib/traject/indexer/step.rb +27 -33
- data/lib/traject/macros/marc21.rb +37 -12
- data/lib/traject/macros/nokogiri_macros.rb +43 -0
- data/lib/traject/macros/transformation.rb +162 -0
- data/lib/traject/marc_extractor.rb +2 -0
- data/lib/traject/ndj_reader.rb +1 -1
- data/lib/traject/nokogiri_reader.rb +179 -0
- data/lib/traject/oai_pmh_nokogiri_reader.rb +159 -0
- data/lib/traject/solr_json_writer.rb +19 -12
- data/lib/traject/thread_pool.rb +13 -0
- data/lib/traject/util.rb +14 -2
- data/lib/traject/version.rb +1 -1
- data/test/debug_writer_test.rb +3 -3
- data/test/delimited_writer_test.rb +3 -3
- data/test/experimental_nokogiri_streaming_reader_test.rb +169 -0
- data/test/indexer/context_test.rb +23 -13
- data/test/indexer/error_handler_test.rb +59 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +46 -46
- data/test/indexer/macros/marc21/extract_all_marc_values_test.rb +1 -1
- data/test/indexer/macros/marc21/extract_marc_test.rb +19 -9
- data/test/indexer/macros/marc21/serialize_marc_test.rb +4 -4
- data/test/indexer/macros/to_field_test.rb +2 -2
- data/test/indexer/macros/transformation_test.rb +177 -0
- data/test/indexer/map_record_test.rb +2 -3
- data/test/indexer/nokogiri_indexer_test.rb +103 -0
- data/test/indexer/process_record_test.rb +55 -0
- data/test/indexer/process_with_test.rb +148 -0
- data/test/indexer/read_write_test.rb +52 -2
- data/test/indexer/settings_test.rb +34 -24
- data/test/indexer/to_field_test.rb +27 -2
- data/test/marc_extractor_test.rb +7 -7
- data/test/marc_reader_test.rb +4 -4
- data/test/nokogiri_reader_test.rb +158 -0
- data/test/oai_pmh_nokogiri_reader_test.rb +23 -0
- data/test/solr_json_writer_test.rb +24 -28
- data/test/test_helper.rb +8 -2
- data/test/test_support/namespace-test.xml +7 -0
- data/test/test_support/nokogiri_demo_config.rb +17 -0
- data/test/test_support/oai-pmh-one-record-2.xml +24 -0
- data/test/test_support/oai-pmh-one-record-first.xml +24 -0
- data/test/test_support/sample-oai-no-namespace.xml +197 -0
- data/test/test_support/sample-oai-pmh.xml +197 -0
- data/test/thread_pool_test.rb +38 -0
- data/test/translation_map_test.rb +3 -3
- data/test/translation_maps/ruby_map.rb +2 -1
- data/test/translation_maps/yaml_map.yaml +2 -1
- data/traject.gemspec +4 -11
- metadata +92 -6
@@ -1,6 +1,8 @@
|
|
1
1
|
# Represents the context of a specific record being indexed, passed
|
2
2
|
# to indexing logic blocks
|
3
3
|
#
|
4
|
+
# Arg source_record_id_proc is a lambda that takes one arg (indexer-specific source record),
|
5
|
+
# and returns an ID for it suitable for use in log messages.
|
4
6
|
class Traject::Indexer
|
5
7
|
class Context
|
6
8
|
def initialize(hash_init = {})
|
@@ -17,9 +19,13 @@ class Traject::Indexer
|
|
17
19
|
end
|
18
20
|
|
19
21
|
attr_accessor :clipboard, :output_hash, :logger
|
20
|
-
attr_accessor :index_step, :source_record, :settings
|
21
|
-
# 1-based position in stream of processed records.
|
22
|
+
attr_accessor :index_step, :source_record, :settings, :source_record_id_proc
|
23
|
+
# 'position' is a 1-based position in stream of processed records.
|
22
24
|
attr_accessor :position
|
25
|
+
# sometimes we have multiple inputs, input_name describes the current one, and
|
26
|
+
# position_in_input the position of the record in the current input -- both can
|
27
|
+
# sometimes be blanl when we don't know.
|
28
|
+
attr_accessor :input_name, :position_in_input
|
23
29
|
|
24
30
|
# Should we be skipping this record?
|
25
31
|
attr_accessor :skipmessage
|
@@ -41,19 +47,39 @@ class Traject::Indexer
|
|
41
47
|
# in output messages, especially since this method may sometimes
|
42
48
|
# return empty string if info on record id is not available.
|
43
49
|
#
|
44
|
-
# Returns
|
50
|
+
# Returns id from source_record (if we can get it from a source_record_id_proc),
|
51
|
+
# then a slash,then output_hash["id"] -- if both
|
45
52
|
# are present. Otherwise may return just one, or even an empty string.
|
46
|
-
#
|
47
|
-
# Likely override this for a future XML or other source format version.
|
48
53
|
def source_record_id
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
source_record_id_proc && source_record_id_proc.call(source_record)
|
55
|
+
end
|
56
|
+
|
57
|
+
# a string label that can be used to refer to a particular record in log messages and
|
58
|
+
# exceptions. Includes various parts depending on what we got.
|
59
|
+
def record_inspect
|
60
|
+
str = "<"
|
61
|
+
|
62
|
+
str << "record ##{position}" if position
|
63
|
+
|
64
|
+
if input_name && position_in_input
|
65
|
+
str << " (#{input_name} ##{position_in_input}), "
|
66
|
+
elsif position
|
67
|
+
str << ", "
|
68
|
+
end
|
69
|
+
|
70
|
+
if source_id = source_record_id
|
71
|
+
str << "source_id:#{source_id} "
|
72
|
+
end
|
73
|
+
|
74
|
+
if output_id = self.output_hash["id"]
|
75
|
+
str << "output_id:#{[output_id].join(',')}"
|
76
|
+
end
|
77
|
+
|
78
|
+
str.chomp!(" ")
|
79
|
+
str.chomp!(",")
|
80
|
+
str << ">"
|
81
|
+
|
82
|
+
str
|
57
83
|
end
|
58
84
|
|
59
85
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Traject
|
2
|
+
class Indexer
|
3
|
+
# An indexer sub-class that includes "extract_marc" and other macros from
|
4
|
+
# Traject::Macros::Marc21, and also adds some marc-specific default settings.
|
5
|
+
class MarcIndexer < ::Traject::Indexer
|
6
|
+
include Traject::Macros::Marc21
|
7
|
+
|
8
|
+
def self.default_settings
|
9
|
+
@default_settings ||= begin
|
10
|
+
marc_settings = {
|
11
|
+
"reader_class_name" => "Traject::MarcReader",
|
12
|
+
"marc_source.type" => "binary",
|
13
|
+
}
|
14
|
+
super.merge(marc_settings)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Overridden from base Indexer, to get MARC 001 for log messages.
|
19
|
+
def source_record_id_proc
|
20
|
+
@source_record_id_proc ||= lambda do |source_marc_record|
|
21
|
+
if ( source_marc_record &&
|
22
|
+
source_marc_record.kind_of?(MARC::Record) &&
|
23
|
+
source_marc_record['001'] )
|
24
|
+
source_marc_record['001'].value
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'traject/nokogiri_reader'
|
2
|
+
require 'traject/macros/nokogiri_macros'
|
3
|
+
require 'traject/oai_pmh_nokogiri_reader'
|
4
|
+
|
5
|
+
module Traject
|
6
|
+
class Indexer
|
7
|
+
# An indexer sub-class for XML, where the source records in the pipeline are
|
8
|
+
# Nokogiri::XML::Document objects. It sets a default reader of NokogiriReader, and
|
9
|
+
# includes Traject::Macros::Nokogiri (with `extract_xpath`).
|
10
|
+
#
|
11
|
+
# See docs on XML use. (TODO)
|
12
|
+
class NokogiriIndexer < ::Traject::Indexer
|
13
|
+
include Traject::Macros::NokogiriMacros
|
14
|
+
|
15
|
+
def self.default_settings
|
16
|
+
@default_settings ||= super.merge("reader_class_name" => "Traject::NokogiriReader")
|
17
|
+
end
|
18
|
+
|
19
|
+
# Overridden from base Indexer, try an `id` attribute or element on record.
|
20
|
+
def source_record_id_proc
|
21
|
+
@source_record_id_proc ||= lambda do |source_xml_record|
|
22
|
+
if ( source_xml_record &&
|
23
|
+
source_xml_record.kind_of?(Nokogiri::XML::Node) )
|
24
|
+
source_xml_record['id'] || (el = source_xml_record.at_xpath('./id') && el.text)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -11,33 +11,55 @@ class Traject::Indexer
|
|
11
11
|
#
|
12
12
|
# method #provide(key, value) is added, to do like settings[key] ||= value,
|
13
13
|
# set only if not already set (but unlike ||=, nil or false can count as already set)
|
14
|
+
# provide WILL overwrite defaults.
|
14
15
|
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
16
|
+
# Or you can use standard Hash `store` which will overwrite already set values as well
|
17
|
+
# as defaults.
|
18
|
+
#
|
19
|
+
# Has kind of a weird 'defaults' system, where you tell the hash what it's defaults
|
20
|
+
# are, but they aren't actually loaded until asked for (or you can call fill_in_defaults!
|
21
|
+
# to load em all for inspection), to accomodate the `provide` API, where a caller wants to set
|
22
|
+
# only if not already set, but DO overwrite defaults.
|
22
23
|
class Settings < Hash
|
24
|
+
# Just a hash with indifferent access and hash initializer, to use for
|
25
|
+
# our defaults hash.
|
26
|
+
class DefaultsHash < Hash
|
27
|
+
include Hashie::Extensions::MergeInitializer # can init with hash
|
28
|
+
include Hashie::Extensions::IndifferentAccess
|
29
|
+
end
|
30
|
+
|
23
31
|
include Hashie::Extensions::MergeInitializer # can init with hash
|
24
32
|
include Hashie::Extensions::IndifferentAccess
|
25
33
|
|
26
34
|
def initialize(*args)
|
27
35
|
super
|
36
|
+
|
37
|
+
@defaults = {}
|
38
|
+
|
28
39
|
self.default_proc = lambda do |hash, key|
|
29
|
-
if
|
30
|
-
return hash[key] =
|
40
|
+
if @defaults.has_key?(key)
|
41
|
+
return hash[key] = @defaults[key]
|
31
42
|
else
|
32
43
|
return nil
|
33
44
|
end
|
34
45
|
end
|
46
|
+
|
47
|
+
@defaults_filled = Concurrent::AtomicBoolean.new(false)
|
48
|
+
end
|
49
|
+
|
50
|
+
def with_defaults(defaults)
|
51
|
+
@defaults = DefaultsHash.new(defaults).freeze
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
def keys
|
56
|
+
super + @defaults.keys
|
35
57
|
end
|
36
58
|
|
37
59
|
# a cautious store, which only saves key=value if
|
38
60
|
# there was not already a value for #key. Can be used
|
39
61
|
# to set settings that can be overridden on command line,
|
40
|
-
# or general first-set-wins settings.
|
62
|
+
# or general first-set-wins settings. DOES set over defaults.
|
41
63
|
def provide(key, value)
|
42
64
|
unless has_key? key
|
43
65
|
store(key, value)
|
@@ -54,50 +76,11 @@ class Traject::Indexer
|
|
54
76
|
replace(reverse_merge(other_hash))
|
55
77
|
end
|
56
78
|
|
79
|
+
# Normally defaults are filled in on-demand, but you can trigger it here --
|
80
|
+
# but if you later try to load traject config, `provide` will no longer
|
81
|
+
# overwrite defaults!
|
57
82
|
def fill_in_defaults!
|
58
|
-
self.reverse_merge!(
|
59
|
-
end
|
60
|
-
|
61
|
-
|
62
|
-
def self.mri_defaults
|
63
|
-
{
|
64
|
-
# Reader defaults
|
65
|
-
"reader_class_name" => "Traject::MarcReader",
|
66
|
-
"marc_source.type" => "binary",
|
67
|
-
|
68
|
-
# Writer defaults
|
69
|
-
"writer_class_name" => "Traject::SolrJsonWriter",
|
70
|
-
"solr_writer.batch_size" => 100,
|
71
|
-
"solr_writer.thread_pool" => 1,
|
72
|
-
|
73
|
-
# Threading and logging
|
74
|
-
"processing_thread_pool" => self.default_processing_thread_pool,
|
75
|
-
"log.batch_size.severity" => "info",
|
76
|
-
|
77
|
-
# how to post-process the accumulator
|
78
|
-
"allow_nil_values" => false,
|
79
|
-
"allow_duplicate_values" => true,
|
80
|
-
|
81
|
-
"allow_empty_fields" => false,
|
82
|
-
}
|
83
|
-
end
|
84
|
-
|
85
|
-
def self.jruby_defaults
|
86
|
-
{
|
87
|
-
'reader_class_name' => "Traject::Marc4JReader",
|
88
|
-
'marc4j_reader.permissive' => true
|
89
|
-
}
|
90
|
-
end
|
91
|
-
|
92
|
-
|
93
|
-
def self.defaults
|
94
|
-
return @@defaults if defined? @@defaults
|
95
|
-
default_settings = self.mri_defaults
|
96
|
-
if defined? JRUBY_VERSION
|
97
|
-
default_settings.merge! self.jruby_defaults
|
98
|
-
end
|
99
|
-
|
100
|
-
@@defaults = default_settings
|
83
|
+
self.reverse_merge!(@defaults)
|
101
84
|
end
|
102
85
|
|
103
86
|
def inspect
|
data/lib/traject/indexer/step.rb
CHANGED
@@ -30,15 +30,15 @@ class Traject::Indexer
|
|
30
30
|
# Set the arity of the lambda expression just once, when we define it
|
31
31
|
def lambda=(lam)
|
32
32
|
@lambda_arity = 0 # assume
|
33
|
+
@lambda = lam
|
34
|
+
|
33
35
|
return unless lam
|
34
36
|
|
35
|
-
@lambda = lam
|
36
37
|
if @lambda.is_a?(Proc)
|
37
38
|
@lambda_arity = @lambda.arity
|
38
39
|
else
|
39
40
|
raise NamingError.new("argument to each_record must be a block/lambda, not a #{lam.class} #{self.inspect}")
|
40
41
|
end
|
41
|
-
|
42
42
|
end
|
43
43
|
|
44
44
|
# raises if bad data
|
@@ -89,17 +89,17 @@ class Traject::Indexer
|
|
89
89
|
end
|
90
90
|
|
91
91
|
|
92
|
-
# An indexing step definition for a "to_field" step to specific
|
93
|
-
# field.
|
92
|
+
# An indexing step definition for a "to_field" step to specific
|
93
|
+
# field. The first field name argument can be an array of multiple field
|
94
|
+
# names, the processed values will be added to each one.
|
94
95
|
class ToFieldStep
|
95
|
-
|
96
|
-
attr_reader :lambda
|
96
|
+
attr_reader :field_name, :block, :source_location, :procs
|
97
97
|
|
98
|
-
def initialize(
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
98
|
+
def initialize(field_name, procs, block, source_location)
|
99
|
+
@field_name = field_name.freeze
|
100
|
+
@procs = procs.freeze
|
101
|
+
@block = block.freeze
|
102
|
+
@source_location = source_location.freeze
|
103
103
|
|
104
104
|
validate!
|
105
105
|
end
|
@@ -108,18 +108,13 @@ class Traject::Indexer
|
|
108
108
|
true
|
109
109
|
end
|
110
110
|
|
111
|
-
def lambda=(lam)
|
112
|
-
@lambda = lam
|
113
|
-
@lambda_arity = @lambda ? @lambda.arity : 0
|
114
|
-
end
|
115
|
-
|
116
111
|
def validate!
|
117
112
|
|
118
|
-
|
119
|
-
raise NamingError.new("to_field requires the field name (as a string) as the first argument at #{self.source_location})")
|
113
|
+
unless (field_name.is_a?(String) && ! field_name.empty?) || (field_name.is_a?(Array) && field_name.all? { |f| f.is_a?(String) && ! f.empty? })
|
114
|
+
raise NamingError.new("to_field requires the field name (as a string), or an array of such, as the first argument at #{self.source_location})")
|
120
115
|
end
|
121
116
|
|
122
|
-
[self.
|
117
|
+
[*self.procs, self.block].each do |proc|
|
123
118
|
# allow negative arity, meaning variable/optional, trust em on that.
|
124
119
|
# but for positive arrity, we need 2 or 3 args
|
125
120
|
if proc && (proc.arity == 0 || proc.arity == 1 || proc.arity > 3)
|
@@ -130,26 +125,22 @@ class Traject::Indexer
|
|
130
125
|
|
131
126
|
# Override inspect for developer debug messages
|
132
127
|
def inspect
|
133
|
-
"(to_field #{self.field_name} at #{self.source_location})"
|
128
|
+
"(to_field #{self.field_name.inspect} at #{self.source_location})"
|
134
129
|
end
|
135
130
|
|
136
131
|
def execute(context)
|
137
132
|
accumulator = []
|
138
|
-
|
133
|
+
source_record = context.source_record
|
139
134
|
|
140
|
-
|
141
|
-
|
142
|
-
|
135
|
+
[*self.procs, self.block].each do |aProc|
|
136
|
+
next unless aProc
|
137
|
+
if aProc.arity == 2
|
138
|
+
aProc.call(source_record, accumulator)
|
143
139
|
else
|
144
|
-
|
140
|
+
aProc.call(source_record, accumulator, context)
|
145
141
|
end
|
146
142
|
end
|
147
143
|
|
148
|
-
if @block
|
149
|
-
@block.call(sr, accumulator, context)
|
150
|
-
end
|
151
|
-
|
152
|
-
|
153
144
|
add_accumulator_to_context!(accumulator, context)
|
154
145
|
return accumulator
|
155
146
|
end
|
@@ -165,10 +156,13 @@ class Traject::Indexer
|
|
165
156
|
accumulator.compact! unless context.settings[ALLOW_NIL_VALUES]
|
166
157
|
return if accumulator.empty? and not (context.settings[ALLOW_EMPTY_FIELDS])
|
167
158
|
|
168
|
-
|
159
|
+
# field_name can actually be an array of field names
|
160
|
+
Array(field_name).each do |a_field_name|
|
161
|
+
context.output_hash[a_field_name] ||= []
|
169
162
|
|
170
|
-
|
171
|
-
|
163
|
+
existing_accumulator = context.output_hash[a_field_name].concat(accumulator)
|
164
|
+
existing_accumulator.uniq! unless context.settings[ALLOW_DUPLICATE_VALUES]
|
165
|
+
end
|
172
166
|
end
|
173
167
|
end
|
174
168
|
|
@@ -11,8 +11,8 @@ module Traject::Macros
|
|
11
11
|
# def specific to Marc21.
|
12
12
|
module Marc21
|
13
13
|
|
14
|
-
# A
|
15
|
-
# field/substring specification
|
14
|
+
# A macro that will extract data from marc according to a string
|
15
|
+
# field/substring specification.
|
16
16
|
#
|
17
17
|
# First argument is a string spec suitable for the MarcExtractor, see
|
18
18
|
# MarcExtractor::parse_string_spec.
|
@@ -20,25 +20,42 @@ module Traject::Macros
|
|
20
20
|
# Second arg is optional options, including options valid on MarcExtractor.new,
|
21
21
|
# and others. By default, will de-duplicate results, but see :allow_duplicates
|
22
22
|
#
|
23
|
-
#
|
23
|
+
#
|
24
|
+
# * :allow_duplicates => boolean, default false, if set to true then will avoid
|
25
|
+
# de-duplicating the result array (array.uniq!)
|
26
|
+
#
|
27
|
+
# * :separator: (default ' ' (space)), what to use when joining multiple subfield matches from
|
28
|
+
# same field. Set to `nil` to leave them as separate values (which is actually default if only
|
29
|
+
# one subfield is given in spec, like `100a`). See MarcExtractor docs for more info.
|
30
|
+
#
|
31
|
+
# * :alternate_script: (default true). True, automatically include
|
32
|
+
# 'alternate script' MARC 880 linked fields corresponding to matched specifications. `false`, do
|
33
|
+
# not include. `:only` include _only_ linked 880s corresponding to spec, not base tags.
|
34
|
+
#
|
35
|
+
# ## Soft-Deprecated options: post-processing transformations
|
36
|
+
#
|
37
|
+
# These don't produce a deprecation warning and there is no planned horizon for them to go away, but the
|
38
|
+
# alternative of using additional transformation macros (from Traject::Macros::Transformation) composed with
|
39
|
+
# extract_marc is recommended.
|
40
|
+
#
|
41
|
+
# * :first => true: take only first value. **Instead**, use `extract_marc(whatever), first_only`
|
24
42
|
#
|
25
43
|
# * :translation_map => String: translate with named translation map looked up in load
|
26
|
-
# path, uses Tranject::TranslationMap.new(translation_map_arg)
|
44
|
+
# path, uses Tranject::TranslationMap.new(translation_map_arg).
|
45
|
+
# **Instead**, use `extract_marc(whatever), translation_map(translation_map_arg)
|
27
46
|
#
|
28
47
|
# * :trim_punctuation => true; trims leading/trailing punctuation using standard algorithms that
|
29
|
-
# have shown themselves useful with Marc, using Marc21.trim_punctuation
|
48
|
+
# have shown themselves useful with Marc, using Marc21.trim_punctuation. **Instead**, use
|
49
|
+
# `extract_marc(whatever), trim_punctuation
|
30
50
|
#
|
31
|
-
# * :default => String: if otherwise empty, add default value
|
32
|
-
#
|
33
|
-
# * :allow_duplicates => boolean, default false, if set to true then will avoid
|
34
|
-
# de-duplicating the result array (array.uniq!)
|
51
|
+
# * :default => String: if otherwise empty, add default value. **Instead**, use `extract_marc(whatever), default("default value")`
|
35
52
|
#
|
36
53
|
#
|
37
54
|
# Examples:
|
38
55
|
#
|
39
|
-
# to_field("title"), extract_marc("245abcd",
|
40
|
-
# to_field("id"), extract_marc("001",
|
41
|
-
# to_field("geo"), extract_marc("040a", :separator => nil,
|
56
|
+
# to_field("title"), extract_marc("245abcd"), trim_punctuation
|
57
|
+
# to_field("id"), extract_marc("001"), first_only
|
58
|
+
# to_field("geo"), extract_marc("040a", :separator => nil), translation_map("marc040")
|
42
59
|
#
|
43
60
|
# If you'd like extract_marc functionality but you're not creating an indexer
|
44
61
|
# step, see Traject::Macros::Marc21.extract_marc_from module method.
|
@@ -122,6 +139,14 @@ module Traject::Macros
|
|
122
139
|
end
|
123
140
|
end
|
124
141
|
|
142
|
+
# A transformation macro version of trim_punctuation -- heuristics for trimming punctuation
|
143
|
+
# from AACR2/MARC style values, to get bare values.
|
144
|
+
def trim_punctuation
|
145
|
+
lambda do |rec, accumulator|
|
146
|
+
accumulator.collect! {|s| Marc21.trim_punctuation(s)}
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
125
150
|
|
126
151
|
# A list of symbols that are valid keys in the options hash
|
127
152
|
EXTRACT_MARC_VALID_OPTIONS = [:first, :trim_punctuation, :default,
|