traject 2.3.4 → 3.0.0.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +16 -9
- data/CHANGES.md +74 -1
- data/Gemfile +2 -1
- data/README.md +104 -53
- data/Rakefile +8 -1
- data/doc/indexing_rules.md +79 -63
- data/doc/programmatic_use.md +218 -0
- data/doc/settings.md +28 -1
- data/doc/xml.md +134 -0
- data/lib/traject.rb +5 -0
- data/lib/traject/array_writer.rb +34 -0
- data/lib/traject/command_line.rb +18 -22
- data/lib/traject/debug_writer.rb +2 -5
- data/lib/traject/experimental_nokogiri_streaming_reader.rb +276 -0
- data/lib/traject/hashie/indifferent_access_fix.rb +25 -0
- data/lib/traject/indexer.rb +321 -92
- data/lib/traject/indexer/context.rb +39 -13
- data/lib/traject/indexer/marc_indexer.rb +30 -0
- data/lib/traject/indexer/nokogiri_indexer.rb +30 -0
- data/lib/traject/indexer/settings.rb +36 -53
- data/lib/traject/indexer/step.rb +27 -33
- data/lib/traject/macros/marc21.rb +37 -12
- data/lib/traject/macros/nokogiri_macros.rb +43 -0
- data/lib/traject/macros/transformation.rb +162 -0
- data/lib/traject/marc_extractor.rb +2 -0
- data/lib/traject/ndj_reader.rb +1 -1
- data/lib/traject/nokogiri_reader.rb +179 -0
- data/lib/traject/oai_pmh_nokogiri_reader.rb +159 -0
- data/lib/traject/solr_json_writer.rb +19 -12
- data/lib/traject/thread_pool.rb +13 -0
- data/lib/traject/util.rb +14 -2
- data/lib/traject/version.rb +1 -1
- data/test/debug_writer_test.rb +3 -3
- data/test/delimited_writer_test.rb +3 -3
- data/test/experimental_nokogiri_streaming_reader_test.rb +169 -0
- data/test/indexer/context_test.rb +23 -13
- data/test/indexer/error_handler_test.rb +59 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +46 -46
- data/test/indexer/macros/marc21/extract_all_marc_values_test.rb +1 -1
- data/test/indexer/macros/marc21/extract_marc_test.rb +19 -9
- data/test/indexer/macros/marc21/serialize_marc_test.rb +4 -4
- data/test/indexer/macros/to_field_test.rb +2 -2
- data/test/indexer/macros/transformation_test.rb +177 -0
- data/test/indexer/map_record_test.rb +2 -3
- data/test/indexer/nokogiri_indexer_test.rb +103 -0
- data/test/indexer/process_record_test.rb +55 -0
- data/test/indexer/process_with_test.rb +148 -0
- data/test/indexer/read_write_test.rb +52 -2
- data/test/indexer/settings_test.rb +34 -24
- data/test/indexer/to_field_test.rb +27 -2
- data/test/marc_extractor_test.rb +7 -7
- data/test/marc_reader_test.rb +4 -4
- data/test/nokogiri_reader_test.rb +158 -0
- data/test/oai_pmh_nokogiri_reader_test.rb +23 -0
- data/test/solr_json_writer_test.rb +24 -28
- data/test/test_helper.rb +8 -2
- data/test/test_support/namespace-test.xml +7 -0
- data/test/test_support/nokogiri_demo_config.rb +17 -0
- data/test/test_support/oai-pmh-one-record-2.xml +24 -0
- data/test/test_support/oai-pmh-one-record-first.xml +24 -0
- data/test/test_support/sample-oai-no-namespace.xml +197 -0
- data/test/test_support/sample-oai-pmh.xml +197 -0
- data/test/thread_pool_test.rb +38 -0
- data/test/translation_map_test.rb +3 -3
- data/test/translation_maps/ruby_map.rb +2 -1
- data/test/translation_maps/yaml_map.yaml +2 -1
- data/traject.gemspec +4 -11
- metadata +92 -6
@@ -1,6 +1,8 @@
|
|
1
1
|
# Represents the context of a specific record being indexed, passed
|
2
2
|
# to indexing logic blocks
|
3
3
|
#
|
4
|
+
# Arg source_record_id_proc is a lambda that takes one arg (indexer-specific source record),
|
5
|
+
# and returns an ID for it suitable for use in log messages.
|
4
6
|
class Traject::Indexer
|
5
7
|
class Context
|
6
8
|
def initialize(hash_init = {})
|
@@ -17,9 +19,13 @@ class Traject::Indexer
|
|
17
19
|
end
|
18
20
|
|
19
21
|
attr_accessor :clipboard, :output_hash, :logger
|
20
|
-
attr_accessor :index_step, :source_record, :settings
|
21
|
-
# 1-based position in stream of processed records.
|
22
|
+
attr_accessor :index_step, :source_record, :settings, :source_record_id_proc
|
23
|
+
# 'position' is a 1-based position in stream of processed records.
|
22
24
|
attr_accessor :position
|
25
|
+
# sometimes we have multiple inputs, input_name describes the current one, and
|
26
|
+
# position_in_input the position of the record in the current input -- both can
|
27
|
+
# sometimes be blanl when we don't know.
|
28
|
+
attr_accessor :input_name, :position_in_input
|
23
29
|
|
24
30
|
# Should we be skipping this record?
|
25
31
|
attr_accessor :skipmessage
|
@@ -41,19 +47,39 @@ class Traject::Indexer
|
|
41
47
|
# in output messages, especially since this method may sometimes
|
42
48
|
# return empty string if info on record id is not available.
|
43
49
|
#
|
44
|
-
# Returns
|
50
|
+
# Returns id from source_record (if we can get it from a source_record_id_proc),
|
51
|
+
# then a slash,then output_hash["id"] -- if both
|
45
52
|
# are present. Otherwise may return just one, or even an empty string.
|
46
|
-
#
|
47
|
-
# Likely override this for a future XML or other source format version.
|
48
53
|
def source_record_id
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
source_record_id_proc && source_record_id_proc.call(source_record)
|
55
|
+
end
|
56
|
+
|
57
|
+
# a string label that can be used to refer to a particular record in log messages and
|
58
|
+
# exceptions. Includes various parts depending on what we got.
|
59
|
+
def record_inspect
|
60
|
+
str = "<"
|
61
|
+
|
62
|
+
str << "record ##{position}" if position
|
63
|
+
|
64
|
+
if input_name && position_in_input
|
65
|
+
str << " (#{input_name} ##{position_in_input}), "
|
66
|
+
elsif position
|
67
|
+
str << ", "
|
68
|
+
end
|
69
|
+
|
70
|
+
if source_id = source_record_id
|
71
|
+
str << "source_id:#{source_id} "
|
72
|
+
end
|
73
|
+
|
74
|
+
if output_id = self.output_hash["id"]
|
75
|
+
str << "output_id:#{[output_id].join(',')}"
|
76
|
+
end
|
77
|
+
|
78
|
+
str.chomp!(" ")
|
79
|
+
str.chomp!(",")
|
80
|
+
str << ">"
|
81
|
+
|
82
|
+
str
|
57
83
|
end
|
58
84
|
|
59
85
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Traject
|
2
|
+
class Indexer
|
3
|
+
# An indexer sub-class that includes "extract_marc" and other macros from
|
4
|
+
# Traject::Macros::Marc21, and also adds some marc-specific default settings.
|
5
|
+
class MarcIndexer < ::Traject::Indexer
|
6
|
+
include Traject::Macros::Marc21
|
7
|
+
|
8
|
+
def self.default_settings
|
9
|
+
@default_settings ||= begin
|
10
|
+
marc_settings = {
|
11
|
+
"reader_class_name" => "Traject::MarcReader",
|
12
|
+
"marc_source.type" => "binary",
|
13
|
+
}
|
14
|
+
super.merge(marc_settings)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Overridden from base Indexer, to get MARC 001 for log messages.
|
19
|
+
def source_record_id_proc
|
20
|
+
@source_record_id_proc ||= lambda do |source_marc_record|
|
21
|
+
if ( source_marc_record &&
|
22
|
+
source_marc_record.kind_of?(MARC::Record) &&
|
23
|
+
source_marc_record['001'] )
|
24
|
+
source_marc_record['001'].value
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'traject/nokogiri_reader'
|
2
|
+
require 'traject/macros/nokogiri_macros'
|
3
|
+
require 'traject/oai_pmh_nokogiri_reader'
|
4
|
+
|
5
|
+
module Traject
|
6
|
+
class Indexer
|
7
|
+
# An indexer sub-class for XML, where the source records in the pipeline are
|
8
|
+
# Nokogiri::XML::Document objects. It sets a default reader of NokogiriReader, and
|
9
|
+
# includes Traject::Macros::Nokogiri (with `extract_xpath`).
|
10
|
+
#
|
11
|
+
# See docs on XML use. (TODO)
|
12
|
+
class NokogiriIndexer < ::Traject::Indexer
|
13
|
+
include Traject::Macros::NokogiriMacros
|
14
|
+
|
15
|
+
def self.default_settings
|
16
|
+
@default_settings ||= super.merge("reader_class_name" => "Traject::NokogiriReader")
|
17
|
+
end
|
18
|
+
|
19
|
+
# Overridden from base Indexer, try an `id` attribute or element on record.
|
20
|
+
def source_record_id_proc
|
21
|
+
@source_record_id_proc ||= lambda do |source_xml_record|
|
22
|
+
if ( source_xml_record &&
|
23
|
+
source_xml_record.kind_of?(Nokogiri::XML::Node) )
|
24
|
+
source_xml_record['id'] || (el = source_xml_record.at_xpath('./id') && el.text)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -11,33 +11,55 @@ class Traject::Indexer
|
|
11
11
|
#
|
12
12
|
# method #provide(key, value) is added, to do like settings[key] ||= value,
|
13
13
|
# set only if not already set (but unlike ||=, nil or false can count as already set)
|
14
|
+
# provide WILL overwrite defaults.
|
14
15
|
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
16
|
+
# Or you can use standard Hash `store` which will overwrite already set values as well
|
17
|
+
# as defaults.
|
18
|
+
#
|
19
|
+
# Has kind of a weird 'defaults' system, where you tell the hash what it's defaults
|
20
|
+
# are, but they aren't actually loaded until asked for (or you can call fill_in_defaults!
|
21
|
+
# to load em all for inspection), to accomodate the `provide` API, where a caller wants to set
|
22
|
+
# only if not already set, but DO overwrite defaults.
|
22
23
|
class Settings < Hash
|
24
|
+
# Just a hash with indifferent access and hash initializer, to use for
|
25
|
+
# our defaults hash.
|
26
|
+
class DefaultsHash < Hash
|
27
|
+
include Hashie::Extensions::MergeInitializer # can init with hash
|
28
|
+
include Hashie::Extensions::IndifferentAccess
|
29
|
+
end
|
30
|
+
|
23
31
|
include Hashie::Extensions::MergeInitializer # can init with hash
|
24
32
|
include Hashie::Extensions::IndifferentAccess
|
25
33
|
|
26
34
|
def initialize(*args)
|
27
35
|
super
|
36
|
+
|
37
|
+
@defaults = {}
|
38
|
+
|
28
39
|
self.default_proc = lambda do |hash, key|
|
29
|
-
if
|
30
|
-
return hash[key] =
|
40
|
+
if @defaults.has_key?(key)
|
41
|
+
return hash[key] = @defaults[key]
|
31
42
|
else
|
32
43
|
return nil
|
33
44
|
end
|
34
45
|
end
|
46
|
+
|
47
|
+
@defaults_filled = Concurrent::AtomicBoolean.new(false)
|
48
|
+
end
|
49
|
+
|
50
|
+
def with_defaults(defaults)
|
51
|
+
@defaults = DefaultsHash.new(defaults).freeze
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
def keys
|
56
|
+
super + @defaults.keys
|
35
57
|
end
|
36
58
|
|
37
59
|
# a cautious store, which only saves key=value if
|
38
60
|
# there was not already a value for #key. Can be used
|
39
61
|
# to set settings that can be overridden on command line,
|
40
|
-
# or general first-set-wins settings.
|
62
|
+
# or general first-set-wins settings. DOES set over defaults.
|
41
63
|
def provide(key, value)
|
42
64
|
unless has_key? key
|
43
65
|
store(key, value)
|
@@ -54,50 +76,11 @@ class Traject::Indexer
|
|
54
76
|
replace(reverse_merge(other_hash))
|
55
77
|
end
|
56
78
|
|
79
|
+
# Normally defaults are filled in on-demand, but you can trigger it here --
|
80
|
+
# but if you later try to load traject config, `provide` will no longer
|
81
|
+
# overwrite defaults!
|
57
82
|
def fill_in_defaults!
|
58
|
-
self.reverse_merge!(
|
59
|
-
end
|
60
|
-
|
61
|
-
|
62
|
-
def self.mri_defaults
|
63
|
-
{
|
64
|
-
# Reader defaults
|
65
|
-
"reader_class_name" => "Traject::MarcReader",
|
66
|
-
"marc_source.type" => "binary",
|
67
|
-
|
68
|
-
# Writer defaults
|
69
|
-
"writer_class_name" => "Traject::SolrJsonWriter",
|
70
|
-
"solr_writer.batch_size" => 100,
|
71
|
-
"solr_writer.thread_pool" => 1,
|
72
|
-
|
73
|
-
# Threading and logging
|
74
|
-
"processing_thread_pool" => self.default_processing_thread_pool,
|
75
|
-
"log.batch_size.severity" => "info",
|
76
|
-
|
77
|
-
# how to post-process the accumulator
|
78
|
-
"allow_nil_values" => false,
|
79
|
-
"allow_duplicate_values" => true,
|
80
|
-
|
81
|
-
"allow_empty_fields" => false,
|
82
|
-
}
|
83
|
-
end
|
84
|
-
|
85
|
-
def self.jruby_defaults
|
86
|
-
{
|
87
|
-
'reader_class_name' => "Traject::Marc4JReader",
|
88
|
-
'marc4j_reader.permissive' => true
|
89
|
-
}
|
90
|
-
end
|
91
|
-
|
92
|
-
|
93
|
-
def self.defaults
|
94
|
-
return @@defaults if defined? @@defaults
|
95
|
-
default_settings = self.mri_defaults
|
96
|
-
if defined? JRUBY_VERSION
|
97
|
-
default_settings.merge! self.jruby_defaults
|
98
|
-
end
|
99
|
-
|
100
|
-
@@defaults = default_settings
|
83
|
+
self.reverse_merge!(@defaults)
|
101
84
|
end
|
102
85
|
|
103
86
|
def inspect
|
data/lib/traject/indexer/step.rb
CHANGED
@@ -30,15 +30,15 @@ class Traject::Indexer
|
|
30
30
|
# Set the arity of the lambda expression just once, when we define it
|
31
31
|
def lambda=(lam)
|
32
32
|
@lambda_arity = 0 # assume
|
33
|
+
@lambda = lam
|
34
|
+
|
33
35
|
return unless lam
|
34
36
|
|
35
|
-
@lambda = lam
|
36
37
|
if @lambda.is_a?(Proc)
|
37
38
|
@lambda_arity = @lambda.arity
|
38
39
|
else
|
39
40
|
raise NamingError.new("argument to each_record must be a block/lambda, not a #{lam.class} #{self.inspect}")
|
40
41
|
end
|
41
|
-
|
42
42
|
end
|
43
43
|
|
44
44
|
# raises if bad data
|
@@ -89,17 +89,17 @@ class Traject::Indexer
|
|
89
89
|
end
|
90
90
|
|
91
91
|
|
92
|
-
# An indexing step definition for a "to_field" step to specific
|
93
|
-
# field.
|
92
|
+
# An indexing step definition for a "to_field" step to specific
|
93
|
+
# field. The first field name argument can be an array of multiple field
|
94
|
+
# names, the processed values will be added to each one.
|
94
95
|
class ToFieldStep
|
95
|
-
|
96
|
-
attr_reader :lambda
|
96
|
+
attr_reader :field_name, :block, :source_location, :procs
|
97
97
|
|
98
|
-
def initialize(
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
98
|
+
def initialize(field_name, procs, block, source_location)
|
99
|
+
@field_name = field_name.freeze
|
100
|
+
@procs = procs.freeze
|
101
|
+
@block = block.freeze
|
102
|
+
@source_location = source_location.freeze
|
103
103
|
|
104
104
|
validate!
|
105
105
|
end
|
@@ -108,18 +108,13 @@ class Traject::Indexer
|
|
108
108
|
true
|
109
109
|
end
|
110
110
|
|
111
|
-
def lambda=(lam)
|
112
|
-
@lambda = lam
|
113
|
-
@lambda_arity = @lambda ? @lambda.arity : 0
|
114
|
-
end
|
115
|
-
|
116
111
|
def validate!
|
117
112
|
|
118
|
-
|
119
|
-
raise NamingError.new("to_field requires the field name (as a string) as the first argument at #{self.source_location})")
|
113
|
+
unless (field_name.is_a?(String) && ! field_name.empty?) || (field_name.is_a?(Array) && field_name.all? { |f| f.is_a?(String) && ! f.empty? })
|
114
|
+
raise NamingError.new("to_field requires the field name (as a string), or an array of such, as the first argument at #{self.source_location})")
|
120
115
|
end
|
121
116
|
|
122
|
-
[self.
|
117
|
+
[*self.procs, self.block].each do |proc|
|
123
118
|
# allow negative arity, meaning variable/optional, trust em on that.
|
124
119
|
# but for positive arrity, we need 2 or 3 args
|
125
120
|
if proc && (proc.arity == 0 || proc.arity == 1 || proc.arity > 3)
|
@@ -130,26 +125,22 @@ class Traject::Indexer
|
|
130
125
|
|
131
126
|
# Override inspect for developer debug messages
|
132
127
|
def inspect
|
133
|
-
"(to_field #{self.field_name} at #{self.source_location})"
|
128
|
+
"(to_field #{self.field_name.inspect} at #{self.source_location})"
|
134
129
|
end
|
135
130
|
|
136
131
|
def execute(context)
|
137
132
|
accumulator = []
|
138
|
-
|
133
|
+
source_record = context.source_record
|
139
134
|
|
140
|
-
|
141
|
-
|
142
|
-
|
135
|
+
[*self.procs, self.block].each do |aProc|
|
136
|
+
next unless aProc
|
137
|
+
if aProc.arity == 2
|
138
|
+
aProc.call(source_record, accumulator)
|
143
139
|
else
|
144
|
-
|
140
|
+
aProc.call(source_record, accumulator, context)
|
145
141
|
end
|
146
142
|
end
|
147
143
|
|
148
|
-
if @block
|
149
|
-
@block.call(sr, accumulator, context)
|
150
|
-
end
|
151
|
-
|
152
|
-
|
153
144
|
add_accumulator_to_context!(accumulator, context)
|
154
145
|
return accumulator
|
155
146
|
end
|
@@ -165,10 +156,13 @@ class Traject::Indexer
|
|
165
156
|
accumulator.compact! unless context.settings[ALLOW_NIL_VALUES]
|
166
157
|
return if accumulator.empty? and not (context.settings[ALLOW_EMPTY_FIELDS])
|
167
158
|
|
168
|
-
|
159
|
+
# field_name can actually be an array of field names
|
160
|
+
Array(field_name).each do |a_field_name|
|
161
|
+
context.output_hash[a_field_name] ||= []
|
169
162
|
|
170
|
-
|
171
|
-
|
163
|
+
existing_accumulator = context.output_hash[a_field_name].concat(accumulator)
|
164
|
+
existing_accumulator.uniq! unless context.settings[ALLOW_DUPLICATE_VALUES]
|
165
|
+
end
|
172
166
|
end
|
173
167
|
end
|
174
168
|
|
@@ -11,8 +11,8 @@ module Traject::Macros
|
|
11
11
|
# def specific to Marc21.
|
12
12
|
module Marc21
|
13
13
|
|
14
|
-
# A
|
15
|
-
# field/substring specification
|
14
|
+
# A macro that will extract data from marc according to a string
|
15
|
+
# field/substring specification.
|
16
16
|
#
|
17
17
|
# First argument is a string spec suitable for the MarcExtractor, see
|
18
18
|
# MarcExtractor::parse_string_spec.
|
@@ -20,25 +20,42 @@ module Traject::Macros
|
|
20
20
|
# Second arg is optional options, including options valid on MarcExtractor.new,
|
21
21
|
# and others. By default, will de-duplicate results, but see :allow_duplicates
|
22
22
|
#
|
23
|
-
#
|
23
|
+
#
|
24
|
+
# * :allow_duplicates => boolean, default false, if set to true then will avoid
|
25
|
+
# de-duplicating the result array (array.uniq!)
|
26
|
+
#
|
27
|
+
# * :separator: (default ' ' (space)), what to use when joining multiple subfield matches from
|
28
|
+
# same field. Set to `nil` to leave them as separate values (which is actually default if only
|
29
|
+
# one subfield is given in spec, like `100a`). See MarcExtractor docs for more info.
|
30
|
+
#
|
31
|
+
# * :alternate_script: (default true). True, automatically include
|
32
|
+
# 'alternate script' MARC 880 linked fields corresponding to matched specifications. `false`, do
|
33
|
+
# not include. `:only` include _only_ linked 880s corresponding to spec, not base tags.
|
34
|
+
#
|
35
|
+
# ## Soft-Deprecated options: post-processing transformations
|
36
|
+
#
|
37
|
+
# These don't produce a deprecation warning and there is no planned horizon for them to go away, but the
|
38
|
+
# alternative of using additional transformation macros (from Traject::Macros::Transformation) composed with
|
39
|
+
# extract_marc is recommended.
|
40
|
+
#
|
41
|
+
# * :first => true: take only first value. **Instead**, use `extract_marc(whatever), first_only`
|
24
42
|
#
|
25
43
|
# * :translation_map => String: translate with named translation map looked up in load
|
26
|
-
# path, uses Tranject::TranslationMap.new(translation_map_arg)
|
44
|
+
# path, uses Tranject::TranslationMap.new(translation_map_arg).
|
45
|
+
# **Instead**, use `extract_marc(whatever), translation_map(translation_map_arg)
|
27
46
|
#
|
28
47
|
# * :trim_punctuation => true; trims leading/trailing punctuation using standard algorithms that
|
29
|
-
# have shown themselves useful with Marc, using Marc21.trim_punctuation
|
48
|
+
# have shown themselves useful with Marc, using Marc21.trim_punctuation. **Instead**, use
|
49
|
+
# `extract_marc(whatever), trim_punctuation
|
30
50
|
#
|
31
|
-
# * :default => String: if otherwise empty, add default value
|
32
|
-
#
|
33
|
-
# * :allow_duplicates => boolean, default false, if set to true then will avoid
|
34
|
-
# de-duplicating the result array (array.uniq!)
|
51
|
+
# * :default => String: if otherwise empty, add default value. **Instead**, use `extract_marc(whatever), default("default value")`
|
35
52
|
#
|
36
53
|
#
|
37
54
|
# Examples:
|
38
55
|
#
|
39
|
-
# to_field("title"), extract_marc("245abcd",
|
40
|
-
# to_field("id"), extract_marc("001",
|
41
|
-
# to_field("geo"), extract_marc("040a", :separator => nil,
|
56
|
+
# to_field("title"), extract_marc("245abcd"), trim_punctuation
|
57
|
+
# to_field("id"), extract_marc("001"), first_only
|
58
|
+
# to_field("geo"), extract_marc("040a", :separator => nil), translation_map("marc040")
|
42
59
|
#
|
43
60
|
# If you'd like extract_marc functionality but you're not creating an indexer
|
44
61
|
# step, see Traject::Macros::Marc21.extract_marc_from module method.
|
@@ -122,6 +139,14 @@ module Traject::Macros
|
|
122
139
|
end
|
123
140
|
end
|
124
141
|
|
142
|
+
# A transformation macro version of trim_punctuation -- heuristics for trimming punctuation
|
143
|
+
# from AACR2/MARC style values, to get bare values.
|
144
|
+
def trim_punctuation
|
145
|
+
lambda do |rec, accumulator|
|
146
|
+
accumulator.collect! {|s| Marc21.trim_punctuation(s)}
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
125
150
|
|
126
151
|
# A list of symbols that are valid keys in the options hash
|
127
152
|
EXTRACT_MARC_VALID_OPTIONS = [:first, :trim_punctuation, :default,
|