traject 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +1 -0
  3. data/README.md +183 -191
  4. data/bench/bench.rb +1 -1
  5. data/doc/batch_execution.md +14 -0
  6. data/doc/extending.md +14 -12
  7. data/doc/indexing_rules.md +265 -0
  8. data/lib/traject/command_line.rb +12 -41
  9. data/lib/traject/debug_writer.rb +32 -13
  10. data/lib/traject/indexer.rb +101 -24
  11. data/lib/traject/indexer/settings.rb +18 -17
  12. data/lib/traject/json_writer.rb +32 -11
  13. data/lib/traject/line_writer.rb +6 -6
  14. data/lib/traject/macros/basic.rb +1 -1
  15. data/lib/traject/macros/marc21.rb +17 -13
  16. data/lib/traject/macros/marc21_semantics.rb +27 -25
  17. data/lib/traject/macros/marc_format_classifier.rb +39 -25
  18. data/lib/traject/marc4j_reader.rb +36 -22
  19. data/lib/traject/marc_extractor.rb +79 -75
  20. data/lib/traject/marc_reader.rb +33 -25
  21. data/lib/traject/mock_reader.rb +9 -10
  22. data/lib/traject/ndj_reader.rb +7 -7
  23. data/lib/traject/null_writer.rb +1 -1
  24. data/lib/traject/qualified_const_get.rb +12 -2
  25. data/lib/traject/solrj_writer.rb +61 -52
  26. data/lib/traject/thread_pool.rb +45 -45
  27. data/lib/traject/translation_map.rb +59 -27
  28. data/lib/traject/util.rb +3 -3
  29. data/lib/traject/version.rb +1 -1
  30. data/lib/traject/yaml_writer.rb +1 -1
  31. data/test/debug_writer_test.rb +7 -7
  32. data/test/indexer/each_record_test.rb +4 -4
  33. data/test/indexer/macros_marc21_semantics_test.rb +12 -12
  34. data/test/indexer/macros_marc21_test.rb +10 -10
  35. data/test/indexer/macros_test.rb +1 -1
  36. data/test/indexer/map_record_test.rb +6 -6
  37. data/test/indexer/read_write_test.rb +43 -4
  38. data/test/indexer/settings_test.rb +2 -2
  39. data/test/indexer/to_field_test.rb +8 -8
  40. data/test/marc4j_reader_test.rb +4 -4
  41. data/test/marc_extractor_test.rb +33 -25
  42. data/test/marc_format_classifier_test.rb +3 -3
  43. data/test/marc_reader_test.rb +2 -2
  44. data/test/test_helper.rb +3 -3
  45. data/test/test_support/demo_config.rb +52 -48
  46. data/test/translation_map_test.rb +22 -4
  47. data/test/translation_maps/bad_ruby.rb +2 -2
  48. data/test/translation_maps/both_map.rb +1 -1
  49. data/test/translation_maps/default_literal.rb +1 -1
  50. data/test/translation_maps/default_passthrough.rb +1 -1
  51. data/test/translation_maps/ruby_map.rb +1 -1
  52. metadata +7 -31
  53. data/doc/macros.md +0 -103
@@ -1,21 +1,40 @@
1
1
  require 'traject/line_writer'
2
2
 
3
- # A writer for Traject::Indexer that outputs each record as a series of
4
- # lines, prefixed by the id, one for each field and it's values.
5
- # Multiple values are separated by pipes
3
+ # The Traject::DebugWriter produces a simple, human-readable output format that's
4
+ # also amenable to simple computer processing (e.g., with a simple grep).
5
+ # It's the output format used when you pass the --debug-mode switch to traject on the command line.
6
6
  #
7
- # Applicable settings:
7
+ # Output format is three columns: id, output field, values (multiple
8
+ # values seperated by '|'), and looks something like:
8
9
  #
9
- # - 'output_file' -- the name of the file to output to
10
- # - 'output_stream' -- alternately, the IO stream
11
- # - 'debug_writer.idfield' -- the solr field from which to pull the record ID (default: 'id')
12
- # - 'debug_writer.format' -- How to format the id/solr field/values (default: '%-12s %-25s %s')
13
-
14
-
10
+ # 000001580 edition [1st ed.]
11
+ # 000001580 format Book | Online | Print
12
+ # 000001580 geo Great Britain
13
+ # 000001580 id 000001580
14
+ # 000001580 isbn 0631126902
15
+ #
16
+ # ## Settings
17
+ #
18
+ # * 'output_file' -- the name of the file to output to (command line -o shortcut).
19
+ # * 'output_stream' -- alternately, the IO stream
20
+ # * 'debug_writer.idfield' -- the solr field from which to pull the record ID (default: 'id')
21
+ # * 'debug_writer.format' -- How to format the id/solr field/values (default: '%-12s %-25s %s')
22
+ #
23
+ # By default, with neither output_file nor output_stream provided, writes to stdout, which
24
+ # can be useful for debugging diagnosis.
25
+ #
26
+ # ## Example configuration file
27
+ #
28
+ # require 'traject/debug_writer'
29
+ #
30
+ # settings do
31
+ # provide "writer_class_name", "Traject::DebugWriter"
32
+ # provide "output_file", "out.txt"
33
+ # end
15
34
  class Traject::DebugWriter < Traject::LineWriter
16
35
  DEFAULT_FORMAT = '%-12s %-25s %s'
17
36
  DEFAULT_IDFIELD = 'id'
18
-
37
+
19
38
  def serialize(context)
20
39
  idfield = settings["debug_writer.idfield"] || DEFAULT_IDFIELD
21
40
  format = settings['debug_writer.format'] || DEFAULT_FORMAT
@@ -23,6 +42,6 @@ class Traject::DebugWriter < Traject::LineWriter
23
42
  lines = h.keys.sort.map {|k| format % [h[idfield].first, k, h[k].join(' | ')] }
24
43
  lines.push "\n"
25
44
  lines.join("\n")
26
- end
45
+ end
27
46
 
28
- end
47
+ end
@@ -11,8 +11,38 @@ require 'traject/solrj_writer'
11
11
 
12
12
  require 'traject/macros/marc21'
13
13
  require 'traject/macros/basic'
14
+
15
+ # This class does indexing for traject: Getting input records from a Reader
16
+ # class, mapping the input records to an output hash, and then sending the output
17
+ # hash off somewhere (usually Solr) with a Writer class.
18
+ #
19
+ # Traject config files are `instance_eval`d in an Indexer object, so `self` in
20
+ # a config file is an Indexer, and any Indexer methods can be called.
21
+ #
22
+ # However, certain Indexer methods exist almost entirely for the purpose of
23
+ # being called in config files; these methods are part of the expected
24
+ # Domain-Specific Language ("DSL") for config files, and will ordinarily
25
+ # form the bulk or entirety of config files:
26
+ #
27
+ # * #settings
28
+ # * #to_field
29
+ # * #each_record
30
+ # * #after_procesing
31
+ # * #logger (rarely used in config files, but in some cases to set up custom logging config)
32
+ #
33
+ # If accessing a Traject::Indexer programmatically (instead of via command line with
34
+ # config files), additional methods of note include:
35
+ #
36
+ # # to process a stream of input records from configured Reader,
37
+ # # to configured Writer:
38
+ # indexer.process(io_stream)
39
+ #
40
+ # # To map a single input record manually to an ouput_hash,
41
+ # # ignoring Readers and Writers
42
+ # hash = indexer.map_record(record)
14
43
  #
15
- # == Readers and Writers
44
+ #
45
+ # ## Readers and Writers
16
46
  #
17
47
  # The Indexer has a modularized architecture for readers and writers, for where
18
48
  # source records come from (reader), and where output is sent to (writer).
@@ -73,28 +103,38 @@ class Traject::Indexer
73
103
  def initialize(arg_settings = {})
74
104
  @settings = Settings.new(arg_settings)
75
105
  @index_steps = []
106
+ @after_processing_steps = []
76
107
  end
77
108
 
78
- # The Indexer's settings are a hash of key/values -- not
79
- # nested, just one level -- of configuration settings. Keys
80
- # are strings.
109
+ # Part of the config file DSL, for writing settings values.
110
+ #
111
+ # The Indexer's settings consist of a hash-like Traject::Settings
112
+ # object. The settings hash is *not* nested hashes, just one level
113
+ # of configuration settings. Keys are always strings, and by convention
114
+ # use "." for namespacing, eg `log.file`
81
115
  #
82
- # The settings method with no arguments returns that hash.
116
+ # The settings method with no arguments returns that Settings object.
83
117
  #
84
118
  # With a hash and/or block argument, can be used to set
85
119
  # new key/values. Each call merges onto the existing settings
86
- # hash.
120
+ # hash. The block is `instance_eval`d in the context
121
+ # of the Traject::Settings object.
87
122
  #
88
123
  # indexer.settings("a" => "a", "b" => "b")
89
124
  #
90
125
  # indexer.settings do
91
- # store "b", "new b"
126
+ # provide "b", "new b"
92
127
  # end
93
128
  #
94
129
  # indexer.settings #=> {"a" => "a", "b" => "new b"}
95
130
  #
96
- # even with arguments, returns settings hash too, so can
97
- # be chained.
131
+ # Note the #provide method is defined on Traject::Settings to
132
+ # write to a setting only if previously not set. You can also
133
+ # use #store to force over-writing even if an existing setting.
134
+ #
135
+ # Even with arguments, Indexer#settings returns the Settings object,
136
+ # hash too, so can method calls can be chained.
137
+ #
98
138
  def settings(new_settings = nil, &block)
99
139
  @settings.merge!(new_settings) if new_settings
100
140
 
@@ -103,6 +143,24 @@ class Traject::Indexer
103
143
  return @settings
104
144
  end
105
145
 
146
+ # Part of DSL, used to define an indexing mapping. Register logic
147
+ # to be called for each record, and generate values for a particular
148
+ # output field.
149
+ def to_field(field_name, aLambda = nil, &block)
150
+ @index_steps << ToFieldStep.new(field_name, aLambda, block, Traject::Util.extract_caller_location(caller.first) )
151
+ end
152
+
153
+ # Part of DSL, register logic to be called for each record
154
+ def each_record(aLambda = nil, &block)
155
+ @index_steps << EachRecordStep.new(aLambda, block, Traject::Util.extract_caller_location(caller.first) )
156
+ end
157
+
158
+ # Part of DSL, register logic to be called once at the end
159
+ # of processing a stream of records.
160
+ def after_processing(aLambda = nil, &block)
161
+ @after_processing_steps << AfterProcessingStep.new(aLambda, block, Traject::Util.extract_caller_location(caller.first))
162
+ end
163
+
106
164
  def logger
107
165
  @logger ||= create_logger
108
166
  end
@@ -149,20 +207,6 @@ class Traject::Indexer
149
207
  return logger
150
208
  end
151
209
 
152
-
153
-
154
-
155
-
156
- # Used to define an indexing mapping.
157
- def to_field(field_name, aLambda = nil, &block)
158
- @index_steps << ToFieldStep.new(field_name, aLambda, block, Traject::Util.extract_caller_location(caller.first) )
159
- end
160
-
161
- def each_record(aLambda = nil, &block)
162
- @index_steps << EachRecordStep.new(aLambda, block, Traject::Util.extract_caller_location(caller.first) )
163
- end
164
-
165
-
166
210
  # Processes a single record according to indexing rules set up in
167
211
  # this indexer. Returns the output hash (a hash whose keys are
168
212
  # string fields, and values are arrays of one or more values in that field)
@@ -293,7 +337,7 @@ class Traject::Indexer
293
337
  # of having it be bound to the original variable in a non-threadsafe way.
294
338
  # This is confusing, I might not be understanding things properly, but that's where i am.
295
339
  #thread_pool.maybe_in_thread_pool &make_lambda(count, record, writer)
296
- thread_pool.maybe_in_thread_pool do
340
+ thread_pool.maybe_in_thread_pool(record, settings, position) do |record, settings, position|
297
341
  context = Context.new(:source_record => record, :settings => settings, :position => position)
298
342
  context.logger = logger
299
343
  map_to_context!(context)
@@ -317,6 +361,15 @@ class Traject::Indexer
317
361
 
318
362
  writer.close if writer.respond_to?(:close)
319
363
 
364
+ @after_processing_steps.each do |step|
365
+ begin
366
+ step.execute
367
+ rescue Exception => e
368
+ logger.fatal("Unexpected exception #{e} when executing #{step}")
369
+ raise e
370
+ end
371
+ end
372
+
320
373
  elapsed = Time.now - start_time
321
374
  avg_rps = (count / elapsed)
322
375
  logger.info "finished Indexer#process: #{count} records in #{'%.3f' % elapsed} seconds; #{'%.1f' % avg_rps} records/second overall."
@@ -513,6 +566,30 @@ class Traject::Indexer
513
566
 
514
567
  end
515
568
 
569
+ # A class representing a block of logic called after
570
+ # processing, registered with #after_processing
571
+ class AfterProcessingStep
572
+ attr_accessor :lambda, :block, :source_location
573
+ def initialize(lambda, block, source_location)
574
+ self.lambda = lambda
575
+ self.block = block
576
+ self.source_location = source_location
577
+ end
578
+
579
+ # after_processing steps get no args yielded to
580
+ # their blocks, they just are what they are.
581
+ def execute
582
+ [lambda, block].each do |aProc|
583
+ next unless aProc
584
+ aProc.call
585
+ end
586
+ end
587
+
588
+ def inspect
589
+ "(after_processing at #{self.source_location}"
590
+ end
591
+ end
592
+
516
593
 
517
594
 
518
595
 
@@ -1,22 +1,23 @@
1
1
  require 'hashie'
2
2
 
3
- # A Hash of settings for a Traject::Indexer, which also ends up passed along
4
- # to other objects Traject::Indexer interacts with.
5
- #
6
- # Enhanced with a few features from Hashie, to make it for
7
- # instance string/symbol indifferent
8
- #
9
- # #provide(key, value) is added, to do like settings[key] ||= value,
10
- # set only if not already set (but unlike ||=, nil or false can count as already set)
11
- #
12
- # Also has an interesting 'defaults' system, meant to play along
13
- # with configuration file 'provide' statements. There is a built-in hash of
14
- # defaults, which will be lazily filled in if accessed and not yet
15
- # set. (nil can count as set, though!). If they haven't been lazily
16
- # set yet, then #provide will still fill them in. But you can also call
17
- # fill_in_defaults! to fill all defaults in, if you know configuration
18
- # files have all been loaded, and want to fill them in for inspection.
19
3
  class Traject::Indexer
4
+
5
+ # A Hash of settings for a Traject::Indexer, which also ends up passed along
6
+ # to other objects Traject::Indexer interacts with.
7
+ #
8
+ # Enhanced with a few features from Hashie, to make it for
9
+ # instance string/symbol indifferent
10
+ #
11
+ # method #provide(key, value) is added, to do like settings[key] ||= value,
12
+ # set only if not already set (but unlike ||=, nil or false can count as already set)
13
+ #
14
+ # Also has an interesting 'defaults' system, meant to play along
15
+ # with configuration file 'provide' statements. There is a built-in hash of
16
+ # defaults, which will be lazily filled in if accessed and not yet
17
+ # set. (nil can count as set, though!). If they haven't been lazily
18
+ # set yet, then #provide will still fill them in. But you can also call
19
+ # fill_in_defaults! to fill all defaults in, if you know configuration
20
+ # files have all been loaded, and want to fill them in for inspection.
20
21
  class Settings < Hash
21
22
  include Hashie::Extensions::MergeInitializer # can init with hash
22
23
  include Hashie::Extensions::IndifferentAccess
@@ -80,4 +81,4 @@ class Traject::Indexer
80
81
  end.inspect
81
82
  end
82
83
  end
83
- end
84
+ end
@@ -1,21 +1,42 @@
1
1
  require 'json'
2
2
  require 'traject/line_writer'
3
3
 
4
- # A writer for Traject::Indexer, that just writes out
5
- # all the output as Json. It's newline delimitted json, but
6
- # right now no checks to make sure there is no internal newlines
7
- # as whitespace in the json. TODO, add that.
4
+ # The JsonWriter outputs one JSON hash per record, separated by newlines.
5
+ #
6
+ # It's newline delimitted json, which should be suitable for being
7
+ # read by simple NDJ readers. (TODO: We have no checks right now to
8
+ # make sure the standard json serializers we're using don't put any
9
+ # internal newlines as whitespace in the json. Which would break NDJ
10
+ # reading. Should we?)
8
11
  #
9
12
  # Should be thread-safe (ie, multiple worker threads can be calling #put
10
- # concurrently), by wrapping write to actual output file in a mutex synchronize.
13
+ # concurrently), because output to file is wrapped in a mutex synchronize.
11
14
  # This does not seem to effect performance much, as far as I could tell
12
15
  # benchmarking.
13
16
  #
14
- # You can force pretty-printing with setting 'json_writer.pretty_print' of boolean
15
- # true or string 'true'. Useful mostly for human checking of output.
17
+ # ## Settings
18
+ #
19
+ # * output_file A filename to send output; default will use stdout.
20
+ #
21
+ # * json_writer.pretty_print: [default: false]: Pretty-print (e.g., include newlines, indentation, etc.)
22
+ # each JSON record instead of just mashing it all together on one line. The default, no pretty-printing option
23
+ # produces one record per line, easy to process with another program.
24
+ #
25
+ # ## Example output
26
+ #
27
+ # Without pretty printing, you end up with something like this (just two records shown):
28
+ #
29
+ # {"id":["000001118"],"oclc":["ocm00085737"],"sdrnum":["sdr-nrlf.b170195454"],"isbn":["0137319924"],"lccn":["73120791"],"mainauthor":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel."],"author":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel.","Hamburg, David A., 1925-"],"author2":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel.","Hamburg, David A., 1925-"],"authorSort":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel."],"author_top":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel.","Edited by David A. Hamburg.","Hamburg, David A., 1925- ed."],"title":["Psychiatry as a behavioral science."],"title_a":["Psychiatry as a behavioral science."],"title_ab":["Psychiatry as a behavioral science."],"title_c":["Edited by David A. Hamburg."],"titleSort":["Psychiatry as a behavioral science"],"title_top":["Psychiatry as a behavioral science."],"title_rest":["A Spectrum book"],"series2":["A Spectrum book"],"callnumber":["RC327 .B41"],"broad_subject":["Medicine"],"pubdate":[1970],"format":["Book","Online","Print"],"publisher":["Prentice-Hall"],"language":["English"],"language008":["eng"],"editor":["David A. Hamburg."]}
30
+ # {"id":["000000794"],"oclc":["ocm00067181"],"lccn":["78011026"],"mainauthor":["Clark, Albert Curtis, 1859-1937."],"author":["Clark, Albert Curtis, 1859-1937."],"authorSort":["Clark, Albert Curtis, 1859-1937."],"author_top":["Clark, Albert Curtis, 1859-1937."],"title":["The descent of manuscripts.","descent of manuscripts."],"title_a":["The descent of manuscripts.","descent of manuscripts."],"title_ab":["The descent of manuscripts.","descent of manuscripts."],"titleSort":["descent of manuscripts"],"title_top":["The descent of manuscripts."],"callnumber":["PA47 .C45 1970"],"broad_subject":["Language & Literature"],"pubdate":[1918],"format":["Book","Online","Print"],"publisher":["Clarendon Press"],"language":["English"],"language008":["eng"]}
31
+ #
32
+ # ## Example configuration file
33
+ #
34
+ # require 'traject/json_writer'
16
35
  #
17
- # Output will be sent to settings["output_file"] string path, or else
18
- # settings["output_stream"] (ruby IO object), or else stdout.
36
+ # settings do
37
+ # provide "writer_class_name", "Traject::JsonWriter"
38
+ # provide "output_file", "out.json"
39
+ # end
19
40
  class Traject::JsonWriter < Traject::LineWriter
20
41
 
21
42
  def serialize(context)
@@ -25,6 +46,6 @@ class Traject::JsonWriter < Traject::LineWriter
25
46
  else
26
47
  JSON.generate(hash)
27
48
  end
28
- end
49
+ end
29
50
 
30
- end
51
+ end
@@ -1,19 +1,19 @@
1
1
  require 'thread'
2
2
 
3
3
  # A writer for Traject::Indexer, that just writes out
4
- # all the output as serialized text with #puts.
4
+ # all the output as serialized text with #puts.
5
5
  #
6
6
  # Should be thread-safe (ie, multiple worker threads can be calling #put
7
7
  # concurrently), by wrapping write to actual output file in a mutex synchronize.
8
8
  # This does not seem to effect performance much, as far as I could tell
9
9
  # benchmarking.
10
10
  #
11
- # Output will be sent to settings["output_file"] string path, or else
12
- # settings["output_stream"] (ruby IO object), or else stdout.
11
+ # Output will be sent to `settings["output_file"]` string path, or else
12
+ # `settings["output_stream"]` (ruby IO object), or else stdout.
13
13
  #
14
14
  # This class can be sub-classed to write out different serialized
15
15
  # reprentations -- subclasses will just override the #serialize
16
- # method. For instance, see JsonWriter.
16
+ # method. For instance, see JsonWriter.
17
17
  class Traject::LineWriter
18
18
  attr_reader :settings
19
19
  attr_reader :write_mutex
@@ -29,7 +29,7 @@ class Traject::LineWriter
29
29
 
30
30
  def serialize(context)
31
31
  context.output_hash
32
- end
32
+ end
33
33
 
34
34
  def put(context)
35
35
  serialized = serialize(context)
@@ -56,4 +56,4 @@ class Traject::LineWriter
56
56
  @output_file.close unless (@output_file.nil? || @output_file.tty?)
57
57
  end
58
58
 
59
- end
59
+ end
@@ -6,4 +6,4 @@ module Traject::Macros
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -20,29 +20,33 @@ module Traject::Macros
20
20
  # and others. By default, will de-duplicate results, but see :allow_duplicates
21
21
  #
22
22
  # * :first => true: take only first value
23
+ #
23
24
  # * :translation_map => String: translate with named translation map looked up in load
24
25
  # path, uses Tranject::TranslationMap.new(translation_map_arg)
26
+ #
25
27
  # * :trim_punctuation => true; trims leading/trailing punctuation using standard algorithms that
26
28
  # have shown themselves useful with Marc, using Marc21.trim_punctuation
29
+ #
27
30
  # * :default => String: if otherwise empty, add default value
31
+ #
28
32
  # * :allow_duplicates => boolean, default false, if set to true then will avoid
29
33
  # de-duplicating the result array (array.uniq!)
30
34
  #
31
35
  #
32
36
  # Examples:
33
37
  #
34
- # to_field("title"), extract_marc("245abcd", :trim_punctuation => true)
35
- # to_field("id"), extract_marc("001", :first => true)
36
- # to_field("geo"), extract_marc("040a", :separator => nil, :translation_map => "marc040")
38
+ # to_field("title"), extract_marc("245abcd", :trim_punctuation => true)
39
+ # to_field("id"), extract_marc("001", :first => true)
40
+ # to_field("geo"), extract_marc("040a", :separator => nil, :translation_map => "marc040")
37
41
  def extract_marc(spec, options = {})
38
-
42
+
39
43
  # Raise an error if there are any invalid options, indicating a
40
44
  # misspelled or illegal option, using a string instead of a symbol, etc.
41
-
45
+
42
46
  unless (options.keys - EXTRACT_MARC_VALID_OPTIONS).empty?
43
47
  raise RuntimeError.new("Illegal/Unknown argument '#{(options.keys - EXTRACT_MARC_VALID_OPTIONS).join(', ')}' in extract_marc at #{Traject::Util.extract_caller_location(caller.first)}")
44
48
  end
45
-
49
+
46
50
  only_first = options.delete(:first)
47
51
  trim_punctuation = options.delete(:trim_punctuation)
48
52
  default_value = options.delete(:default)
@@ -53,12 +57,12 @@ module Traject::Macros
53
57
  # ones, and not have to create a new one per-execution.
54
58
  #
55
59
  # Benchmarking shows for MarcExtractor at least, there is
56
- # significant performance advantage.
60
+ # significant performance advantage.
57
61
 
58
62
  if translation_map_arg = options.delete(:translation_map)
59
63
  translation_map = Traject::TranslationMap.new(translation_map_arg)
60
64
  end
61
-
65
+
62
66
 
63
67
  extractor = Traject::MarcExtractor.new(spec, options)
64
68
 
@@ -76,7 +80,7 @@ module Traject::Macros
76
80
  if trim_punctuation
77
81
  accumulator.collect! {|s| Marc21.trim_punctuation(s)}
78
82
  end
79
-
83
+
80
84
  unless allow_duplicates
81
85
  accumulator.uniq!
82
86
  end
@@ -84,14 +88,14 @@ module Traject::Macros
84
88
  if default_value && accumulator.empty?
85
89
  accumulator << default_value
86
90
  end
87
-
91
+
88
92
  end
89
93
  end
90
94
  # A list of symbols that are valid keys in the options hash
91
- EXTRACT_MARC_VALID_OPTIONS = [:first, :trim_punctuation, :default,
92
- :allow_duplicates, :separator, :translation_map,
95
+ EXTRACT_MARC_VALID_OPTIONS = [:first, :trim_punctuation, :default,
96
+ :allow_duplicates, :separator, :translation_map,
93
97
  :alternate_script]
94
-
98
+
95
99
  # Serializes complete marc record to a serialization format.
96
100
  # required param :format,
97
101
  # serialize_marc(:format => :binary)