traject 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +1 -0
  3. data/README.md +183 -191
  4. data/bench/bench.rb +1 -1
  5. data/doc/batch_execution.md +14 -0
  6. data/doc/extending.md +14 -12
  7. data/doc/indexing_rules.md +265 -0
  8. data/lib/traject/command_line.rb +12 -41
  9. data/lib/traject/debug_writer.rb +32 -13
  10. data/lib/traject/indexer.rb +101 -24
  11. data/lib/traject/indexer/settings.rb +18 -17
  12. data/lib/traject/json_writer.rb +32 -11
  13. data/lib/traject/line_writer.rb +6 -6
  14. data/lib/traject/macros/basic.rb +1 -1
  15. data/lib/traject/macros/marc21.rb +17 -13
  16. data/lib/traject/macros/marc21_semantics.rb +27 -25
  17. data/lib/traject/macros/marc_format_classifier.rb +39 -25
  18. data/lib/traject/marc4j_reader.rb +36 -22
  19. data/lib/traject/marc_extractor.rb +79 -75
  20. data/lib/traject/marc_reader.rb +33 -25
  21. data/lib/traject/mock_reader.rb +9 -10
  22. data/lib/traject/ndj_reader.rb +7 -7
  23. data/lib/traject/null_writer.rb +1 -1
  24. data/lib/traject/qualified_const_get.rb +12 -2
  25. data/lib/traject/solrj_writer.rb +61 -52
  26. data/lib/traject/thread_pool.rb +45 -45
  27. data/lib/traject/translation_map.rb +59 -27
  28. data/lib/traject/util.rb +3 -3
  29. data/lib/traject/version.rb +1 -1
  30. data/lib/traject/yaml_writer.rb +1 -1
  31. data/test/debug_writer_test.rb +7 -7
  32. data/test/indexer/each_record_test.rb +4 -4
  33. data/test/indexer/macros_marc21_semantics_test.rb +12 -12
  34. data/test/indexer/macros_marc21_test.rb +10 -10
  35. data/test/indexer/macros_test.rb +1 -1
  36. data/test/indexer/map_record_test.rb +6 -6
  37. data/test/indexer/read_write_test.rb +43 -4
  38. data/test/indexer/settings_test.rb +2 -2
  39. data/test/indexer/to_field_test.rb +8 -8
  40. data/test/marc4j_reader_test.rb +4 -4
  41. data/test/marc_extractor_test.rb +33 -25
  42. data/test/marc_format_classifier_test.rb +3 -3
  43. data/test/marc_reader_test.rb +2 -2
  44. data/test/test_helper.rb +3 -3
  45. data/test/test_support/demo_config.rb +52 -48
  46. data/test/translation_map_test.rb +22 -4
  47. data/test/translation_maps/bad_ruby.rb +2 -2
  48. data/test/translation_maps/both_map.rb +1 -1
  49. data/test/translation_maps/default_literal.rb +1 -1
  50. data/test/translation_maps/default_passthrough.rb +1 -1
  51. data/test/translation_maps/ruby_map.rb +1 -1
  52. metadata +7 -31
  53. data/doc/macros.md +0 -103
@@ -1,21 +1,40 @@
1
1
  require 'traject/line_writer'
2
2
 
3
- # A writer for Traject::Indexer that outputs each record as a series of
4
- # lines, prefixed by the id, one for each field and it's values.
5
- # Multiple values are separated by pipes
3
+ # The Traject::DebugWriter produces a simple, human-readable output format that's
4
+ # also amenable to simple computer processing (e.g., with a simple grep).
5
+ # It's the output format used when you pass the --debug-mode switch to traject on the command line.
6
6
  #
7
- # Applicable settings:
7
+ # Output format is three columns: id, output field, values (multiple
8
+ # values seperated by '|'), and looks something like:
8
9
  #
9
- # - 'output_file' -- the name of the file to output to
10
- # - 'output_stream' -- alternately, the IO stream
11
- # - 'debug_writer.idfield' -- the solr field from which to pull the record ID (default: 'id')
12
- # - 'debug_writer.format' -- How to format the id/solr field/values (default: '%-12s %-25s %s')
13
-
14
-
10
+ # 000001580 edition [1st ed.]
11
+ # 000001580 format Book | Online | Print
12
+ # 000001580 geo Great Britain
13
+ # 000001580 id 000001580
14
+ # 000001580 isbn 0631126902
15
+ #
16
+ # ## Settings
17
+ #
18
+ # * 'output_file' -- the name of the file to output to (command line -o shortcut).
19
+ # * 'output_stream' -- alternately, the IO stream
20
+ # * 'debug_writer.idfield' -- the solr field from which to pull the record ID (default: 'id')
21
+ # * 'debug_writer.format' -- How to format the id/solr field/values (default: '%-12s %-25s %s')
22
+ #
23
+ # By default, with neither output_file nor output_stream provided, writes to stdout, which
24
+ # can be useful for debugging diagnosis.
25
+ #
26
+ # ## Example configuration file
27
+ #
28
+ # require 'traject/debug_writer'
29
+ #
30
+ # settings do
31
+ # provide "writer_class_name", "Traject::DebugWriter"
32
+ # provide "output_file", "out.txt"
33
+ # end
15
34
  class Traject::DebugWriter < Traject::LineWriter
16
35
  DEFAULT_FORMAT = '%-12s %-25s %s'
17
36
  DEFAULT_IDFIELD = 'id'
18
-
37
+
19
38
  def serialize(context)
20
39
  idfield = settings["debug_writer.idfield"] || DEFAULT_IDFIELD
21
40
  format = settings['debug_writer.format'] || DEFAULT_FORMAT
@@ -23,6 +42,6 @@ class Traject::DebugWriter < Traject::LineWriter
23
42
  lines = h.keys.sort.map {|k| format % [h[idfield].first, k, h[k].join(' | ')] }
24
43
  lines.push "\n"
25
44
  lines.join("\n")
26
- end
45
+ end
27
46
 
28
- end
47
+ end
@@ -11,8 +11,38 @@ require 'traject/solrj_writer'
11
11
 
12
12
  require 'traject/macros/marc21'
13
13
  require 'traject/macros/basic'
14
+
15
+ # This class does indexing for traject: Getting input records from a Reader
16
+ # class, mapping the input records to an output hash, and then sending the output
17
+ # hash off somewhere (usually Solr) with a Writer class.
18
+ #
19
+ # Traject config files are `instance_eval`d in an Indexer object, so `self` in
20
+ # a config file is an Indexer, and any Indexer methods can be called.
21
+ #
22
+ # However, certain Indexer methods exist almost entirely for the purpose of
23
+ # being called in config files; these methods are part of the expected
24
+ # Domain-Specific Language ("DSL") for config files, and will ordinarily
25
+ # form the bulk or entirety of config files:
26
+ #
27
+ # * #settings
28
+ # * #to_field
29
+ # * #each_record
30
+ # * #after_procesing
31
+ # * #logger (rarely used in config files, but in some cases to set up custom logging config)
32
+ #
33
+ # If accessing a Traject::Indexer programmatically (instead of via command line with
34
+ # config files), additional methods of note include:
35
+ #
36
+ # # to process a stream of input records from configured Reader,
37
+ # # to configured Writer:
38
+ # indexer.process(io_stream)
39
+ #
40
+ # # To map a single input record manually to an ouput_hash,
41
+ # # ignoring Readers and Writers
42
+ # hash = indexer.map_record(record)
14
43
  #
15
- # == Readers and Writers
44
+ #
45
+ # ## Readers and Writers
16
46
  #
17
47
  # The Indexer has a modularized architecture for readers and writers, for where
18
48
  # source records come from (reader), and where output is sent to (writer).
@@ -73,28 +103,38 @@ class Traject::Indexer
73
103
  def initialize(arg_settings = {})
74
104
  @settings = Settings.new(arg_settings)
75
105
  @index_steps = []
106
+ @after_processing_steps = []
76
107
  end
77
108
 
78
- # The Indexer's settings are a hash of key/values -- not
79
- # nested, just one level -- of configuration settings. Keys
80
- # are strings.
109
+ # Part of the config file DSL, for writing settings values.
110
+ #
111
+ # The Indexer's settings consist of a hash-like Traject::Settings
112
+ # object. The settings hash is *not* nested hashes, just one level
113
+ # of configuration settings. Keys are always strings, and by convention
114
+ # use "." for namespacing, eg `log.file`
81
115
  #
82
- # The settings method with no arguments returns that hash.
116
+ # The settings method with no arguments returns that Settings object.
83
117
  #
84
118
  # With a hash and/or block argument, can be used to set
85
119
  # new key/values. Each call merges onto the existing settings
86
- # hash.
120
+ # hash. The block is `instance_eval`d in the context
121
+ # of the Traject::Settings object.
87
122
  #
88
123
  # indexer.settings("a" => "a", "b" => "b")
89
124
  #
90
125
  # indexer.settings do
91
- # store "b", "new b"
126
+ # provide "b", "new b"
92
127
  # end
93
128
  #
94
129
  # indexer.settings #=> {"a" => "a", "b" => "new b"}
95
130
  #
96
- # even with arguments, returns settings hash too, so can
97
- # be chained.
131
+ # Note the #provide method is defined on Traject::Settings to
132
+ # write to a setting only if previously not set. You can also
133
+ # use #store to force over-writing even if an existing setting.
134
+ #
135
+ # Even with arguments, Indexer#settings returns the Settings object,
136
+ # hash too, so can method calls can be chained.
137
+ #
98
138
  def settings(new_settings = nil, &block)
99
139
  @settings.merge!(new_settings) if new_settings
100
140
 
@@ -103,6 +143,24 @@ class Traject::Indexer
103
143
  return @settings
104
144
  end
105
145
 
146
+ # Part of DSL, used to define an indexing mapping. Register logic
147
+ # to be called for each record, and generate values for a particular
148
+ # output field.
149
+ def to_field(field_name, aLambda = nil, &block)
150
+ @index_steps << ToFieldStep.new(field_name, aLambda, block, Traject::Util.extract_caller_location(caller.first) )
151
+ end
152
+
153
+ # Part of DSL, register logic to be called for each record
154
+ def each_record(aLambda = nil, &block)
155
+ @index_steps << EachRecordStep.new(aLambda, block, Traject::Util.extract_caller_location(caller.first) )
156
+ end
157
+
158
+ # Part of DSL, register logic to be called once at the end
159
+ # of processing a stream of records.
160
+ def after_processing(aLambda = nil, &block)
161
+ @after_processing_steps << AfterProcessingStep.new(aLambda, block, Traject::Util.extract_caller_location(caller.first))
162
+ end
163
+
106
164
  def logger
107
165
  @logger ||= create_logger
108
166
  end
@@ -149,20 +207,6 @@ class Traject::Indexer
149
207
  return logger
150
208
  end
151
209
 
152
-
153
-
154
-
155
-
156
- # Used to define an indexing mapping.
157
- def to_field(field_name, aLambda = nil, &block)
158
- @index_steps << ToFieldStep.new(field_name, aLambda, block, Traject::Util.extract_caller_location(caller.first) )
159
- end
160
-
161
- def each_record(aLambda = nil, &block)
162
- @index_steps << EachRecordStep.new(aLambda, block, Traject::Util.extract_caller_location(caller.first) )
163
- end
164
-
165
-
166
210
  # Processes a single record according to indexing rules set up in
167
211
  # this indexer. Returns the output hash (a hash whose keys are
168
212
  # string fields, and values are arrays of one or more values in that field)
@@ -293,7 +337,7 @@ class Traject::Indexer
293
337
  # of having it be bound to the original variable in a non-threadsafe way.
294
338
  # This is confusing, I might not be understanding things properly, but that's where i am.
295
339
  #thread_pool.maybe_in_thread_pool &make_lambda(count, record, writer)
296
- thread_pool.maybe_in_thread_pool do
340
+ thread_pool.maybe_in_thread_pool(record, settings, position) do |record, settings, position|
297
341
  context = Context.new(:source_record => record, :settings => settings, :position => position)
298
342
  context.logger = logger
299
343
  map_to_context!(context)
@@ -317,6 +361,15 @@ class Traject::Indexer
317
361
 
318
362
  writer.close if writer.respond_to?(:close)
319
363
 
364
+ @after_processing_steps.each do |step|
365
+ begin
366
+ step.execute
367
+ rescue Exception => e
368
+ logger.fatal("Unexpected exception #{e} when executing #{step}")
369
+ raise e
370
+ end
371
+ end
372
+
320
373
  elapsed = Time.now - start_time
321
374
  avg_rps = (count / elapsed)
322
375
  logger.info "finished Indexer#process: #{count} records in #{'%.3f' % elapsed} seconds; #{'%.1f' % avg_rps} records/second overall."
@@ -513,6 +566,30 @@ class Traject::Indexer
513
566
 
514
567
  end
515
568
 
569
+ # A class representing a block of logic called after
570
+ # processing, registered with #after_processing
571
+ class AfterProcessingStep
572
+ attr_accessor :lambda, :block, :source_location
573
+ def initialize(lambda, block, source_location)
574
+ self.lambda = lambda
575
+ self.block = block
576
+ self.source_location = source_location
577
+ end
578
+
579
+ # after_processing steps get no args yielded to
580
+ # their blocks, they just are what they are.
581
+ def execute
582
+ [lambda, block].each do |aProc|
583
+ next unless aProc
584
+ aProc.call
585
+ end
586
+ end
587
+
588
+ def inspect
589
+ "(after_processing at #{self.source_location}"
590
+ end
591
+ end
592
+
516
593
 
517
594
 
518
595
 
@@ -1,22 +1,23 @@
1
1
  require 'hashie'
2
2
 
3
- # A Hash of settings for a Traject::Indexer, which also ends up passed along
4
- # to other objects Traject::Indexer interacts with.
5
- #
6
- # Enhanced with a few features from Hashie, to make it for
7
- # instance string/symbol indifferent
8
- #
9
- # #provide(key, value) is added, to do like settings[key] ||= value,
10
- # set only if not already set (but unlike ||=, nil or false can count as already set)
11
- #
12
- # Also has an interesting 'defaults' system, meant to play along
13
- # with configuration file 'provide' statements. There is a built-in hash of
14
- # defaults, which will be lazily filled in if accessed and not yet
15
- # set. (nil can count as set, though!). If they haven't been lazily
16
- # set yet, then #provide will still fill them in. But you can also call
17
- # fill_in_defaults! to fill all defaults in, if you know configuration
18
- # files have all been loaded, and want to fill them in for inspection.
19
3
  class Traject::Indexer
4
+
5
+ # A Hash of settings for a Traject::Indexer, which also ends up passed along
6
+ # to other objects Traject::Indexer interacts with.
7
+ #
8
+ # Enhanced with a few features from Hashie, to make it for
9
+ # instance string/symbol indifferent
10
+ #
11
+ # method #provide(key, value) is added, to do like settings[key] ||= value,
12
+ # set only if not already set (but unlike ||=, nil or false can count as already set)
13
+ #
14
+ # Also has an interesting 'defaults' system, meant to play along
15
+ # with configuration file 'provide' statements. There is a built-in hash of
16
+ # defaults, which will be lazily filled in if accessed and not yet
17
+ # set. (nil can count as set, though!). If they haven't been lazily
18
+ # set yet, then #provide will still fill them in. But you can also call
19
+ # fill_in_defaults! to fill all defaults in, if you know configuration
20
+ # files have all been loaded, and want to fill them in for inspection.
20
21
  class Settings < Hash
21
22
  include Hashie::Extensions::MergeInitializer # can init with hash
22
23
  include Hashie::Extensions::IndifferentAccess
@@ -80,4 +81,4 @@ class Traject::Indexer
80
81
  end.inspect
81
82
  end
82
83
  end
83
- end
84
+ end
@@ -1,21 +1,42 @@
1
1
  require 'json'
2
2
  require 'traject/line_writer'
3
3
 
4
- # A writer for Traject::Indexer, that just writes out
5
- # all the output as Json. It's newline delimitted json, but
6
- # right now no checks to make sure there is no internal newlines
7
- # as whitespace in the json. TODO, add that.
4
+ # The JsonWriter outputs one JSON hash per record, separated by newlines.
5
+ #
6
+ # It's newline delimitted json, which should be suitable for being
7
+ # read by simple NDJ readers. (TODO: We have no checks right now to
8
+ # make sure the standard json serializers we're using don't put any
9
+ # internal newlines as whitespace in the json. Which would break NDJ
10
+ # reading. Should we?)
8
11
  #
9
12
  # Should be thread-safe (ie, multiple worker threads can be calling #put
10
- # concurrently), by wrapping write to actual output file in a mutex synchronize.
13
+ # concurrently), because output to file is wrapped in a mutex synchronize.
11
14
  # This does not seem to effect performance much, as far as I could tell
12
15
  # benchmarking.
13
16
  #
14
- # You can force pretty-printing with setting 'json_writer.pretty_print' of boolean
15
- # true or string 'true'. Useful mostly for human checking of output.
17
+ # ## Settings
18
+ #
19
+ # * output_file A filename to send output; default will use stdout.
20
+ #
21
+ # * json_writer.pretty_print: [default: false]: Pretty-print (e.g., include newlines, indentation, etc.)
22
+ # each JSON record instead of just mashing it all together on one line. The default, no pretty-printing option
23
+ # produces one record per line, easy to process with another program.
24
+ #
25
+ # ## Example output
26
+ #
27
+ # Without pretty printing, you end up with something like this (just two records shown):
28
+ #
29
+ # {"id":["000001118"],"oclc":["ocm00085737"],"sdrnum":["sdr-nrlf.b170195454"],"isbn":["0137319924"],"lccn":["73120791"],"mainauthor":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel."],"author":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel.","Hamburg, David A., 1925-"],"author2":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel.","Hamburg, David A., 1925-"],"authorSort":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel."],"author_top":["Behavioral and Social Sciences Survey Committee. Psychiatry Panel.","Edited by David A. Hamburg.","Hamburg, David A., 1925- ed."],"title":["Psychiatry as a behavioral science."],"title_a":["Psychiatry as a behavioral science."],"title_ab":["Psychiatry as a behavioral science."],"title_c":["Edited by David A. Hamburg."],"titleSort":["Psychiatry as a behavioral science"],"title_top":["Psychiatry as a behavioral science."],"title_rest":["A Spectrum book"],"series2":["A Spectrum book"],"callnumber":["RC327 .B41"],"broad_subject":["Medicine"],"pubdate":[1970],"format":["Book","Online","Print"],"publisher":["Prentice-Hall"],"language":["English"],"language008":["eng"],"editor":["David A. Hamburg."]}
30
+ # {"id":["000000794"],"oclc":["ocm00067181"],"lccn":["78011026"],"mainauthor":["Clark, Albert Curtis, 1859-1937."],"author":["Clark, Albert Curtis, 1859-1937."],"authorSort":["Clark, Albert Curtis, 1859-1937."],"author_top":["Clark, Albert Curtis, 1859-1937."],"title":["The descent of manuscripts.","descent of manuscripts."],"title_a":["The descent of manuscripts.","descent of manuscripts."],"title_ab":["The descent of manuscripts.","descent of manuscripts."],"titleSort":["descent of manuscripts"],"title_top":["The descent of manuscripts."],"callnumber":["PA47 .C45 1970"],"broad_subject":["Language & Literature"],"pubdate":[1918],"format":["Book","Online","Print"],"publisher":["Clarendon Press"],"language":["English"],"language008":["eng"]}
31
+ #
32
+ # ## Example configuration file
33
+ #
34
+ # require 'traject/json_writer'
16
35
  #
17
- # Output will be sent to settings["output_file"] string path, or else
18
- # settings["output_stream"] (ruby IO object), or else stdout.
36
+ # settings do
37
+ # provide "writer_class_name", "Traject::JsonWriter"
38
+ # provide "output_file", "out.json"
39
+ # end
19
40
  class Traject::JsonWriter < Traject::LineWriter
20
41
 
21
42
  def serialize(context)
@@ -25,6 +46,6 @@ class Traject::JsonWriter < Traject::LineWriter
25
46
  else
26
47
  JSON.generate(hash)
27
48
  end
28
- end
49
+ end
29
50
 
30
- end
51
+ end
@@ -1,19 +1,19 @@
1
1
  require 'thread'
2
2
 
3
3
  # A writer for Traject::Indexer, that just writes out
4
- # all the output as serialized text with #puts.
4
+ # all the output as serialized text with #puts.
5
5
  #
6
6
  # Should be thread-safe (ie, multiple worker threads can be calling #put
7
7
  # concurrently), by wrapping write to actual output file in a mutex synchronize.
8
8
  # This does not seem to effect performance much, as far as I could tell
9
9
  # benchmarking.
10
10
  #
11
- # Output will be sent to settings["output_file"] string path, or else
12
- # settings["output_stream"] (ruby IO object), or else stdout.
11
+ # Output will be sent to `settings["output_file"]` string path, or else
12
+ # `settings["output_stream"]` (ruby IO object), or else stdout.
13
13
  #
14
14
  # This class can be sub-classed to write out different serialized
15
15
  # reprentations -- subclasses will just override the #serialize
16
- # method. For instance, see JsonWriter.
16
+ # method. For instance, see JsonWriter.
17
17
  class Traject::LineWriter
18
18
  attr_reader :settings
19
19
  attr_reader :write_mutex
@@ -29,7 +29,7 @@ class Traject::LineWriter
29
29
 
30
30
  def serialize(context)
31
31
  context.output_hash
32
- end
32
+ end
33
33
 
34
34
  def put(context)
35
35
  serialized = serialize(context)
@@ -56,4 +56,4 @@ class Traject::LineWriter
56
56
  @output_file.close unless (@output_file.nil? || @output_file.tty?)
57
57
  end
58
58
 
59
- end
59
+ end
@@ -6,4 +6,4 @@ module Traject::Macros
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -20,29 +20,33 @@ module Traject::Macros
20
20
  # and others. By default, will de-duplicate results, but see :allow_duplicates
21
21
  #
22
22
  # * :first => true: take only first value
23
+ #
23
24
  # * :translation_map => String: translate with named translation map looked up in load
24
25
  # path, uses Tranject::TranslationMap.new(translation_map_arg)
26
+ #
25
27
  # * :trim_punctuation => true; trims leading/trailing punctuation using standard algorithms that
26
28
  # have shown themselves useful with Marc, using Marc21.trim_punctuation
29
+ #
27
30
  # * :default => String: if otherwise empty, add default value
31
+ #
28
32
  # * :allow_duplicates => boolean, default false, if set to true then will avoid
29
33
  # de-duplicating the result array (array.uniq!)
30
34
  #
31
35
  #
32
36
  # Examples:
33
37
  #
34
- # to_field("title"), extract_marc("245abcd", :trim_punctuation => true)
35
- # to_field("id"), extract_marc("001", :first => true)
36
- # to_field("geo"), extract_marc("040a", :separator => nil, :translation_map => "marc040")
38
+ # to_field("title"), extract_marc("245abcd", :trim_punctuation => true)
39
+ # to_field("id"), extract_marc("001", :first => true)
40
+ # to_field("geo"), extract_marc("040a", :separator => nil, :translation_map => "marc040")
37
41
  def extract_marc(spec, options = {})
38
-
42
+
39
43
  # Raise an error if there are any invalid options, indicating a
40
44
  # misspelled or illegal option, using a string instead of a symbol, etc.
41
-
45
+
42
46
  unless (options.keys - EXTRACT_MARC_VALID_OPTIONS).empty?
43
47
  raise RuntimeError.new("Illegal/Unknown argument '#{(options.keys - EXTRACT_MARC_VALID_OPTIONS).join(', ')}' in extract_marc at #{Traject::Util.extract_caller_location(caller.first)}")
44
48
  end
45
-
49
+
46
50
  only_first = options.delete(:first)
47
51
  trim_punctuation = options.delete(:trim_punctuation)
48
52
  default_value = options.delete(:default)
@@ -53,12 +57,12 @@ module Traject::Macros
53
57
  # ones, and not have to create a new one per-execution.
54
58
  #
55
59
  # Benchmarking shows for MarcExtractor at least, there is
56
- # significant performance advantage.
60
+ # significant performance advantage.
57
61
 
58
62
  if translation_map_arg = options.delete(:translation_map)
59
63
  translation_map = Traject::TranslationMap.new(translation_map_arg)
60
64
  end
61
-
65
+
62
66
 
63
67
  extractor = Traject::MarcExtractor.new(spec, options)
64
68
 
@@ -76,7 +80,7 @@ module Traject::Macros
76
80
  if trim_punctuation
77
81
  accumulator.collect! {|s| Marc21.trim_punctuation(s)}
78
82
  end
79
-
83
+
80
84
  unless allow_duplicates
81
85
  accumulator.uniq!
82
86
  end
@@ -84,14 +88,14 @@ module Traject::Macros
84
88
  if default_value && accumulator.empty?
85
89
  accumulator << default_value
86
90
  end
87
-
91
+
88
92
  end
89
93
  end
90
94
  # A list of symbols that are valid keys in the options hash
91
- EXTRACT_MARC_VALID_OPTIONS = [:first, :trim_punctuation, :default,
92
- :allow_duplicates, :separator, :translation_map,
95
+ EXTRACT_MARC_VALID_OPTIONS = [:first, :trim_punctuation, :default,
96
+ :allow_duplicates, :separator, :translation_map,
93
97
  :alternate_script]
94
-
98
+
95
99
  # Serializes complete marc record to a serialization format.
96
100
  # required param :format,
97
101
  # serialize_marc(:format => :binary)