RubyGems - traject - Versions diffs - 2.3.2-java → 2.3.3-java - Mend

traject 2.3.2-java → 2.3.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/.travis.yml +2 -2
data/CHANGES.md +12 -2
data/index_bench/common.rb +1 -1
data/lib/traject/command_line.rb +2 -2
data/lib/traject/indexer/settings.rb +5 -1
data/lib/traject/macros/marc21.rb +1 -1
data/lib/traject/macros/marc21_semantics.rb +8 -7
data/lib/traject/macros/marc_format_classifier.rb +2 -1
data/lib/traject/mock_reader.rb +3 -2
data/lib/traject/ndj_reader.rb +1 -1
data/lib/traject/solr_json_writer.rb +2 -2
data/lib/traject/util.rb +18 -17
data/lib/traject/version.rb +1 -1
data/test/indexer/{macros_marc21_semantics_test.rb → macros/macros_marc21_semantics_test.rb} +2 -2
data/test/{marc21_macros_test.rb → indexer/macros/marc21/extract_all_marc_values_test.rb} +29 -2
data/test/indexer/macros/marc21/extract_marc_test.rb +125 -0
data/test/indexer/macros/marc21/serialize_marc_test.rb +73 -0
data/test/indexer/macros/marc21/trim_punctuation_test.rb +39 -0
data/test/indexer/{macros_test.rb → macros/to_field_test.rb} +1 -1
data/test/indexer/map_record_test.rb +1 -1
data/test/indexer/to_field_test.rb +1 -1
data/test/indexer/writer_test.rb +17 -10
data/test/test_support/demo_config.rb +3 -1
metadata +15 -11
data/test/indexer/macros_marc21_test.rb +0 -219

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: e3c3925ff899c2a8e0cce5267cd464dcdeb13652
-  data.tar.gz: 8b9f64ceca8f3e439ec3175acbbb808559c1b008
+  metadata.gz: 203a69835854bde2665c53aafaea20ceb731f431
+  data.tar.gz: 38579221ab60f0db3a05bec41e367ff6e06c4153
 SHA512:
-  metadata.gz: 93faea446b9f3a3b0c0922e9c8184d3546a513430d94db1ba735d381190a9ee2b0baecd612e187fc340d0318984abbe5fbd20517b81a4b267c8d119f4780f66f
-  data.tar.gz: 745fc458f87eaecad9fe5169b308d1278e073ff4b262401aefd131684133889f4eb419af37fbec9abeae8ba3cf38d39bdef73ea42f7aa01ba8e33a65352abbce
+  metadata.gz: 26969d8a05a35dbf5fd2d59391f4701353af387c14641a99e43beb32d9ee07de8ed6b0a1709091f03cc1db474ac4a24203969fc2a114d11edacc7e2f0e91963e
+  data.tar.gz: 6129937d2a7b6958368487ed38174458740d476c9513cc7f497b81f882aca9f6b61e36034ddbe59bfa9dacef5575bf7b5123702dc1f5c22072255eeba65afad2

data/.travis.yml CHANGED Viewed

@@ -6,8 +6,8 @@ rvm:
   - jruby-9.0.4.0
   - 1.9
   - 2.2
-  - 2.3.0
-  - rbx-2
+  - 2.3.3
+  - 2.4.0
 before_install:
   - gem update --system
   - gem uninstall bundler

data/CHANGES.md CHANGED Viewed

@@ -1,12 +1,22 @@
 # Changes
+## 2.3.3
+  * Further squash use of capture-variabels ('$1', etc.)
+    to try to work around the non-thread-safety of
+    regexp in ruby
+  * Fix a bug in trim_punctuation where trailing
+    periods were being eliminated even if there
+    was a short string before them (e.g., 'Jr.')
+  * Begin to reorganize tests, starting with
+    the Marc21 macros
 ## 2.3.2
   * Change to `extract_marc` to work around a threadsafe problem in JRuby/MRI where
     regexps were unsafely shared between threads. (@codeforkjeff)
   * Make trim-punctuation safe for non-just-ASCII text (thanks to @dunn and @redlibrarian)
 ## 2.3.1
-  * Update README with more info aout new nil-related options
+  * Update README with more info about new nil-related options
 ## 2.3.0
   * Allow nil values, empty fields, and deduplication
@@ -21,7 +31,7 @@
       Set to `true` to pass empty fields on to the writer (with the value being an empty array)
 ## 2.2.1
-  * Had inadverntantly broken use of arrays as extract_marc specifications. Fixed.
+  * Had inadvertently broken use of arrays as extract_marc specifications. Fixed.
 ## 2.2.0
   * Change DebugWriter to be more forgiving (and informative) about missing record-id fields

data/index_bench/common.rb CHANGED Viewed

@@ -131,6 +131,6 @@ to_field "edition", extract_marc('250a')
 to_field 'language', marc_languages("008[35-37]:041a:041d:041e:041j")
 to_field 'language008', extract_marc('008[35-37]') do |r, acc|
-  acc.reject! {|x| x !~ /\S/} # ditch only spaces
+  acc.reject! {|x| !(/\S/.match(x)} # ditch all-spaces values
   acc.uniq!
 end

data/lib/traject/command_line.rb CHANGED Viewed

@@ -202,8 +202,8 @@ module Traject
       # `-s key=value` command line
       (options[:setting] || []).each do |setting_pair|
-        if setting_pair =~ /\A([^=]+)\=(.*)\Z/
-          key, value = $1, $2
+        if m  = /\A([^=]+)\=(.*)\Z/.match(setting_pair)
+          key, value = m[1], m[2]
           settings[key] = value
         else
           self.console.puts "Unrecognized setting argument '#{setting_pair}':"

data/lib/traject/indexer/settings.rb CHANGED Viewed

@@ -103,7 +103,11 @@ class Traject::Indexer
     def inspect
       # Keep any key ending in password out of the inspect
       self.inject({}) do |hash, (key, value)|
-        hash[key] = (key =~ /password\Z/) ? "[hidden]" : value
+        if /password\Z/.match(key)
+          hash[key] = "[hidden]"
+        else
+          hash[key] = value
+        end
         hash
       end.inspect
     end

data/lib/traject/macros/marc21.rb CHANGED Viewed

@@ -233,7 +233,7 @@ module Traject::Macros
       str = str.sub(/ *[ ,\/;:] *\Z/, '')
       # trailing period if it is preceded by at least three letters (possibly preceded and followed by whitespace)
-      str = str.sub(/( *[[:word:][:word:][:word:]])\. *\Z/, '\1')
+      str = str.sub(/( *[[:word:]]{3,})\. *\Z/, '\1')
       # single square bracket characters if they are the start and/or end
       #   chars and there are no internal square brackets.

data/lib/traject/macros/marc21_semantics.rb CHANGED Viewed

@@ -40,8 +40,8 @@ module Traject::Macros
          /x
     def self.oclcnum_extract(num)
-      if OCLCPAT.match(num)
-        return $1
+      if m = OCLCPAT.match(num)
+        return m[1]
       else
         return nil
       end
@@ -369,8 +369,8 @@ module Traject::Macros
         v260c = MarcExtractor.cached("260c", :separator => nil).extract(record).first
         # just try to take the first four digits out of there, we're not going to try
         # anything crazy.
-        if v260c =~ /(\d{4})/
-          found_date = $1.to_i
+        if m = /(\d{4})/.match(v260c)
+          found_date = m[1].to_i
         end
       end
@@ -408,7 +408,7 @@ module Traject::Macros
         candidates = extractor.extract(record)
         candidates.reject! do |candidate|
-          !(candidate =~ lcc_regex)
+          !(lcc_regex.match candidate)
         end
         accumulator.concat translation_map.translate_array!(candidates.collect {|a| a.lstrip.slice(0, 1)}).uniq
@@ -501,10 +501,11 @@ module Traject::Macros
         end)
         # weird ones
+        special_fields_regex = /\A\s*.+,\s+(ca.\s+)?\d\d\d\d?(-\d\d\d\d?)?( B\.C\.)?[.,; ]*\Z/
         extractor_special_fields.each_matching_line(record) do |field, spec, extractor|
           field.subfields.each do |sf|
             next unless sf.code == 'y'
-            if sf.value =~ /\A\s*.+,\s+(ca.\s+)?\d\d\d\d?(-\d\d\d\d?)?( B\.C\.)?[.,; ]*\Z/
+            if special_fields_regex.match(sf.value)
               # it's our pattern, add the $a in please
               accumulator << "#{field['a']}#{separator}#{sf.value.sub(/\. *\Z/, '')}"
             else
@@ -562,7 +563,7 @@ module Traject::Macros
       marc_field.subfields.each_with_index do |sf, i|
         # ignore non-alphabetic, like numeric control subfields
-        next unless sf.code =~ /\A[a-z]\Z/
+        next unless /\A[a-z]\Z/.match(sf.code)
         prefix = if subd_prefix_codes.include? sf.code
           subd_separator

data/lib/traject/macros/marc_format_classifier.rb CHANGED Viewed

@@ -105,7 +105,8 @@ module Traject
       def proceeding?
         @proceeding_q ||= begin
           ! record.find do |field|
-            field.tag.slice(0) == '6' && field.subfields.find {|sf| sf.code == "v" && sf.value =~ /^\s*(C|c)ongresses\.?\s*$/}
+            field.tag.slice(0) == '6' &&
+                field.subfields.find {|sf| sf.code == "v" && /^\s*(C|c)ongresses\.?\s*$/.match(sf.value) }
           end.nil?
         end
       end

data/lib/traject/mock_reader.rb CHANGED Viewed

@@ -40,15 +40,16 @@ module Traject
       this_file_iter = file_io.each_line
       while true
         line = this_file_iter.next
-        break if line =~ /^\_\_END\_\_/
+        break if /^\_\_END\_\_/.match line
       end
       begin
         while true
           json = this_file_iter.next
-          next unless json =~ /\S/
+          next unless /\S/.match json
           records << MARC::Record.new_from_hash(JSON.parse(json))
         end
       rescue StopIteration

data/lib/traject/ndj_reader.rb CHANGED Viewed

@@ -12,7 +12,7 @@ class Traject::NDJReader
   def initialize(input_stream, settings)
     @settings = settings
     @input_stream = input_stream
-    if @settings['command_line.filename'] =~ /\.gz$/
+    if /\.gz\Z/.match(@settings['command_line.filename'])
       @input_stream = Zlib::GzipReader.new(@input_stream, :external_encoding => "UTF-8")
     end
   end

data/lib/traject/solr_json_writer.rb CHANGED Viewed

@@ -236,7 +236,7 @@ class Traject::SolrJsonWriter
   # If we've got a solr.update_url, make sure it's ok
   def check_solr_update_url(url)
-    unless url =~ /^#{URI::regexp}$/
+    unless /^#{URI::regexp}$/.match(url)
       raise ArgumentError.new("#{self.class.name} setting `solr.update_url` doesn't look like a URL: `#{url}`")
     end
     url
@@ -249,7 +249,7 @@ class Traject::SolrJsonWriter
     end
     # Not a URL? Bail
-    unless url =~ /^#{URI::regexp}$/
+    unless  /^#{URI::regexp}$/.match(url)
       raise ArgumentError.new("#{self.class.name} setting `solr.url` doesn't look like a URL: `#{url}`")
     end

data/lib/traject/util.rb CHANGED Viewed

@@ -5,14 +5,14 @@ module Traject
     def self.exception_to_log_message(e)
       indent = "    "
-      msg  = indent + "Exception: " + e.class.name + ": " + e.message + "\n"
+      msg = indent + "Exception: " + e.class.name + ": " + e.message + "\n"
       msg += indent + e.backtrace.first + "\n"
-      if (e.respond_to?(:getRootCause) && e.getRootCause && e != e.getRootCause )
+      if (e.respond_to?(:getRootCause) && e.getRootCause && e != e.getRootCause)
         caused_by = e.getRootCause
-        msg += indent + "Caused by\n"
-        msg += indent + caused_by.class.name + ": " + caused_by.message + "\n"
-        msg += indent + caused_by.backtrace.first + "\n"
+        msg       += indent + "Caused by\n"
+        msg       += indent + caused_by.class.name + ": " + caused_by.message + "\n"
+        msg       += indent + caused_by.backtrace.first + "\n"
       end
       return msg
@@ -37,8 +37,8 @@ module Traject
       # For a SyntaxError, we really need to grep it from the
       # exception message, it really appears to be nowhere else. Ugh.
       if exception.kind_of? SyntaxError
-        if exception.message =~ /:(\d+):/
-          return $1.to_i
+        if m = /:(\d+):/.match(exception.message)
+          return m[1].to_i
         end
       end
@@ -48,9 +48,9 @@ module Traject
       # exception.backtrace_locations exists in MRI 2.1+, which makes
       # our task a lot easier. But not yet in JRuby 1.7.x, so we got to
       # handle the old way of having to parse the strings in backtrace too.
-      if ( exception.respond_to?(:backtrace_locations) &&
-           exception.backtrace_locations &&
-           exception.backtrace_locations.length > 0 )
+      if (exception.respond_to?(:backtrace_locations) &&
+          exception.backtrace_locations &&
+          exception.backtrace_locations.length > 0)
         location = exception.backtrace_locations.find do |bt|
           bt.path == file_path
         end
@@ -58,8 +58,10 @@ module Traject
       else # have to parse string backtrace
         exception.backtrace.each do |line|
           if line.start_with?(file_path)
-            return $1.to_i if line =~ /\A.*\:(\d+)\:in/
-            break
+            if m = /\A.*\:(\d+)\:in/.match(line)
+              return m[1].to_i
+              break
+            end
           end
         end
         # if we got here, we have nothing
@@ -75,14 +77,14 @@ module Traject
     # returned array will actually be of Thread::Backtrace::Location elements.
     def self.backtrace_from_config(file_path, exception)
       filtered_trace = []
-      found = false
+      found          = false
       # MRI 2.1+ has exception.backtrace_locations which makes
       # this a lot easier, but JRuby 1.7.x doesn't yet, so we
       # need to do it both ways.
-      if ( exception.respond_to?(:backtrace_locations) &&
-           exception.backtrace_locations &&
-           exception.backtrace_locations.length > 0 )
+      if (exception.respond_to?(:backtrace_locations) &&
+          exception.backtrace_locations &&
+          exception.backtrace_locations.length > 0)
         exception.backtrace_locations.each do |location|
           filtered_trace << location
@@ -100,7 +102,6 @@ module Traject
     end
     # Ruby stdlib queue lacks a 'drain' function, we write one.
     #
     # Removes everything currently in the ruby stdlib queue, and returns

data/lib/traject/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Traject
-  VERSION = "2.3.2"
+  VERSION = "2.3.3"
 end

data/test/indexer/{macros_marc21_semantics_test.rb → macros/macros_marc21_semantics_test.rb} RENAMED Viewed

@@ -169,7 +169,7 @@ describe "Traject::Macros::Marc21Semantics" do
     # there are way too many edge cases for us to test em all, but we'll test some of em.
     it "works when there's no date information" do
-      assert_equal nil,  Marc21Semantics.publication_date(empty_record)
+      assert_nil Marc21Semantics.publication_date(empty_record)
     end
     it "uses macro correctly with no date info" do
@@ -189,7 +189,7 @@ describe "Traject::Macros::Marc21Semantics" do
     end
     it "returns nil when the records really got nothing" do
       @record = MARC::Reader.new(support_file_path  "emptyish_record.marc").to_a.first
-      assert_equal nil, Marc21Semantics.publication_date(@record)
+      assert_nil Marc21Semantics.publication_date(@record)
     end
     it "estimates with a single 'u'" do
       @record = MARC::Reader.new(support_file_path  "date_with_u.marc").to_a.first

data/test/{marc21_macros_test.rb → indexer/macros/marc21/extract_all_marc_values_test.rb} RENAMED Viewed

@@ -1,11 +1,20 @@
-# Encoding: UTF-8
 require 'test_helper'
+require 'traject/indexer'
 require 'traject/macros/marc21'
+require 'json'
+require 'marc'
 include Traject::Macros::Marc21
 describe "The extract_all_marc_values macro" do
+  before do
+    @indexer = Traject::Indexer.new
+    @record  = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
+  end
   it "is fine with no arguments" do
     assert(extract_all_marc_values)
@@ -20,4 +29,22 @@ describe "The extract_all_marc_values macro" do
       extract_all_marc_values(from: 100, to: '999')
     end
   end
+  it "#extract_all_marc_values" do
+    @indexer.instance_eval do
+      to_field "text", extract_all_marc_values
+    end
+    output = @indexer.map_record(@record)
+    assert_length 13, output["text"]
+  end
 end

data/test/indexer/macros/marc21/extract_marc_test.rb ADDED Viewed

@@ -0,0 +1,125 @@
+require 'test_helper'
+require 'traject/indexer'
+require 'traject/macros/marc21'
+require 'json'
+require 'marc'
+include Traject::Macros::Marc21
+describe "extract_marc" do
+  before do
+    @indexer = Traject::Indexer.new
+    @record  = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
+  end
+  it "extracts marc" do
+    @indexer.instance_eval do
+      to_field "title", extract_marc("245ab")
+    end
+    output = @indexer.map_record(@record)
+    assert_equal ["Manufacturing consent : the political economy of the mass media /"], output["title"]
+    assert_equal({}, @indexer.map_record(empty_record))
+  end
+  it "respects :first=>true option" do
+    @indexer.instance_eval do
+      to_field "other_id", extract_marc("035a", :first => true)
+    end
+    output = @indexer.map_record(@record)
+    assert_length 1, output["other_id"]
+  end
+  it "trims punctuation with :trim_punctuation => true" do
+    @indexer.instance_eval do
+      to_field "title", extract_marc("245ab", :trim_punctuation => true)
+    end
+    output = @indexer.map_record(@record)
+    assert_equal ["Manufacturing consent : the political economy of the mass media"], output["title"]
+    assert_equal({}, @indexer.map_record(empty_record))
+  end
+  it "respects :default option" do
+    @indexer.instance_eval do
+      to_field "only_default", extract_marc("9999", :default => "DEFAULT VALUE")
+    end
+    output = @indexer.map_record(@record)
+    assert_equal ["DEFAULT VALUE"], output["only_default"]
+  end
+  it "de-duplicates by default, respects :allow_duplicates" do
+    # Add a second 008
+    f = @record.fields('008').first
+    @record.append(f)
+    @indexer.instance_eval do
+      to_field "lang1", extract_marc('008[35-37]')
+      to_field "lang2", extract_marc('008[35-37]', :allow_duplicates => true)
+    end
+    output = @indexer.map_record(@record)
+    assert_equal ["eng"], output['lang1']
+    assert_equal ["eng", "eng"], output['lang2']
+    assert_equal({}, @indexer.map_record(empty_record))
+  end
+  it "fails on an extra/misspelled argument to extract_marc" do
+    assert_raises(RuntimeError) do
+      @indexer.instance_eval do
+        to_field "foo", extract_marc("9999", :misspelled => "Who cares")
+      end
+    end
+  end
+  it "throws away nil values unless settings['allow_nil_values]'" do
+    @indexer.instance_eval do
+      to_field 'default_nil', extract_marc('9999', :default => nil)
+    end
+    output = @indexer.map_record(@record)
+    assert_nil output['default_nil']
+  end
+  it "allows nil values if settings['allow_nil_values]'" do
+    @indexer.settings do |s|
+      s['allow_nil_values'] = true
+    end
+    @indexer.instance_eval do
+      to_field 'default_nil', extract_marc('9999', :default => nil)
+    end
+    output = @indexer.map_record(@record)
+    assert_equal [nil], output['default_nil']
+  end
+  it "uses :translation_map" do
+    @indexer.instance_eval do
+      to_field "cataloging_agency", extract_marc("040a", :separator => nil, :translation_map => "marc_040a_translate_test")
+    end
+    output = @indexer.map_record(@record)
+    assert_equal ["Library of Congress"], output["cataloging_agency"]
+  end
+  it "supports #extract_marc_from module method" do
+    output_arr = ::Traject::Macros::Marc21.extract_marc_from(@record, "245ab", :trim_punctuation => true)
+    assert_equal ["Manufacturing consent : the political economy of the mass media"], output_arr
+  end
+end

data/test/indexer/macros/marc21/serialize_marc_test.rb ADDED Viewed

@@ -0,0 +1,73 @@
+require 'test_helper'
+require 'traject/indexer'
+require 'traject/macros/marc21'
+require 'json'
+require 'marc'
+include Traject::Macros::Marc21
+describe "serialized_marc" do
+  before do
+    @indexer = Traject::Indexer.new
+    @record  = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
+  end
+  it "serializes xml" do
+    @indexer.instance_eval do
+      to_field "marc_record", serialized_marc(:format => "xml")
+    end
+    output = @indexer.map_record(@record)
+    assert_length 1, output["marc_record"]
+    assert_kind_of String, output["marc_record"].first
+    roundtrip_record = MARC::XMLReader.new(StringIO.new(output["marc_record"].first)).first
+    assert_equal @record, roundtrip_record
+  end
+  it "serializes binary UUEncoded" do
+    @indexer.instance_eval do
+      to_field "marc_record", serialized_marc(:format => "binary")
+    end
+    output = @indexer.map_record(@record)
+    assert_length 1, output["marc_record"]
+    assert_kind_of String, output["marc_record"].first
+    decoded = Base64.decode64(output["marc_record"].first)
+    # just check the marc header for now
+    assert_start_with "02067cam a2200469", decoded
+  end
+  it "serializes binary raw" do
+    @indexer.instance_eval do
+      to_field "marc_record", serialized_marc(:format => "binary", :binary_escape => false)
+    end
+    output = @indexer.map_record(@record)
+    assert_length 1, output["marc_record"]
+    assert_kind_of String, output["marc_record"].first
+    # just check the marc header for now
+    assert_start_with "02067cam a2200469", output["marc_record"].first
+  end
+  it "serializes json" do
+    @indexer.instance_eval do
+      to_field "marc_record", serialized_marc(:format => "json")
+    end
+    output = @indexer.map_record(@record)
+    assert_length 1, output["marc_record"]
+    # okay, let's actually deserialize it, why not
+    hash = JSON.parse(output["marc_record"].first)
+    deserialized = MARC::Record.new_from_hash(hash)
+    assert_equal @record, deserialized
+  end
+end

data/test/indexer/macros/marc21/trim_punctuation_test.rb ADDED Viewed

@@ -0,0 +1,39 @@
+# encoding: UTF-8
+require 'test_helper'
+require 'traject/indexer'
+require 'traject/macros/marc21'
+include Traject::Macros::Marc21
+describe "trim_punctuation" do
+  # TODO: test coverage for trim_punctuation
+  # trim_punctuation isn't super-complicated code, and yet we've found a few bugs
+  # in it already. Needs more test coveragel
+  it "Works as expected" do
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation("one two three")
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation("one two three,")
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation("one two three/")
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation("one two three;")
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation("one two three:")
+    assert_equal "one two three .", Traject::Macros::Marc21.trim_punctuation("one two three .")
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation("one two three.")
+    assert_equal "one two three...", Traject::Macros::Marc21.trim_punctuation("one two three...")
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation(" one two three.")
+    assert_equal "one two [three]", Traject::Macros::Marc21.trim_punctuation("one two [three]")
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation("one two three]")
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation("[one two three")
+    assert_equal "one two three", Traject::Macros::Marc21.trim_punctuation("[one two three]")
+    # This one was a bug before
+    assert_equal "Feminism and art", Traject::Macros::Marc21.trim_punctuation("Feminism and art.")
+    assert_equal "Le réve", Traject::Macros::Marc21.trim_punctuation("Le réve.")
+    # This one was a bug on the bug
+    assert_equal "Bill Dueber, Jr.", Traject::Macros::Marc21.trim_punctuation("Bill Dueber, Jr.")
+  end
+end

data/test/indexer/{macros_test.rb → macros/to_field_test.rb} RENAMED Viewed

@@ -1,6 +1,6 @@
 require 'test_helper'
-describe "Indexer Macros:" do
+describe "Indexer Macros#to_field" do
   before do
     @indexer = Traject::Indexer.new
     @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first

data/test/indexer/map_record_test.rb CHANGED Viewed

@@ -192,7 +192,7 @@ describe "Traject::Indexer#map_record" do
       end
       @indexer.to_field('radical') do |rec, acc, context|
-        context.skip!("Chomsky!") if rec['245'].to_s =~ /Chomsky/
+        context.skip!("Chomsky!") if rec['245'].to_s  =~ /Chomsky/
       end
       @indexer.to_field('afterSkip') do |rec, acc|

data/test/indexer/to_field_test.rb CHANGED Viewed

@@ -58,7 +58,7 @@ describe "Traject::Indexer.to_field" do
       acc = ['hello']
     end
     output = @indexer.map_record('never looked at')
-    assert_equal nil, output['foo']
+    assert_nil output['foo']
   end
   it "allows use of accumulator.replace" do

data/test/indexer/writer_test.rb CHANGED Viewed

@@ -2,21 +2,28 @@ require 'test_helper'
 require 'traject/yaml_writer'
 describe "The writer on Traject::Indexer" do
-  let(:indexer) { Traject::Indexer.new("solr.url" => "http://example.com") }
-  it "has a default" do
-    assert_instance_of Traject::SolrJsonWriter, indexer.writer
-    assert_equal Traject::SolrJsonWriter, indexer.writer_class
+  let(:indexer) { Traject::Indexer.new("solr.url" => "http://localhost.com") }
+  # TODO: fix default writer test
+  # Fails in the absence of a configured
+  # network interface.
+  describe "default writer from index" do
+    it "has a default" do
+      # assert_instance_of Traject::SolrJsonWriter, indexer.writer
+      # assert_equal Traject::SolrJsonWriter, indexer.writer_class
+     skip "Fails in the absence of a configured network interface."
+    end
   end
-  describe "when the writer is set in config" do
+  describe "when the writer is set in config" do
     let(:writer) { Traject::YamlWriter.new({}) }
     let(:indexer) { Traject::Indexer.new(
-      "solr.url" => "http://example.com",
-      "writer_class" => 'Traject::SolrJsonWriter',
-      "writer"   => writer
-      )}
+        "solr.url"     => "http://example.com",
+        "writer_class" => 'Traject::SolrJsonWriter',
+        "writer"       => writer
+    ) }
     it "uses writer from config" do
       assert_equal writer, indexer.writer

data/test/test_support/demo_config.rb CHANGED Viewed

@@ -120,7 +120,9 @@ to_field "discipline_facet",  marc_lcc_to_broad_category(:default => nil) do |re
     if call_type == "sudoc"
       # we choose to call it:
       accumulator << "Government Publication"
-    elsif call_type.nil? || call_type == "lc" || field['a'] =~ Traject::Macros::Marc21Semantics::LCC_REGEX
+    elsif call_type.nil? ||
+          call_type == "lc" ||
+        Traject::Macros::Marc21Semantics::LCC_REGEX.match(field['a'])
       # run it through the map
       s = field['a']
       s = s.slice(0, 1) if s

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: traject
 version: !ruby/object:Gem::Version
-  version: 2.3.2
+  version: 2.3.3
 platform: java
 authors:
 - Jonathan Rochkind
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-11-03 00:00:00.000000000 Z
+date: 2017-01-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   requirement: !ruby/object:Gem::Requirement
@@ -268,15 +268,17 @@ files:
 - test/indexer/context_test.rb
 - test/indexer/each_record_test.rb
 - test/indexer/load_config_file_test.rb
-- test/indexer/macros_marc21_semantics_test.rb
-- test/indexer/macros_marc21_test.rb
-- test/indexer/macros_test.rb
+- test/indexer/macros/macros_marc21_semantics_test.rb
+- test/indexer/macros/marc21/extract_all_marc_values_test.rb
+- test/indexer/macros/marc21/extract_marc_test.rb
+- test/indexer/macros/marc21/serialize_marc_test.rb
+- test/indexer/macros/marc21/trim_punctuation_test.rb
+- test/indexer/macros/to_field_test.rb
 - test/indexer/map_record_test.rb
 - test/indexer/read_write_test.rb
 - test/indexer/settings_test.rb
 - test/indexer/to_field_test.rb
 - test/indexer/writer_test.rb
-- test/marc21_macros_test.rb
 - test/marc_extractor_test.rb
 - test/marc_format_classifier_test.rb
 - test/marc_reader_test.rb
@@ -343,7 +345,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.6.4
+rubygems_version: 2.6.8
 signing_key:
 specification_version: 4
 summary: Index MARC to Solr; or generally process source records to hash-like structures
@@ -353,15 +355,17 @@ test_files:
 - test/indexer/context_test.rb
 - test/indexer/each_record_test.rb
 - test/indexer/load_config_file_test.rb
-- test/indexer/macros_marc21_semantics_test.rb
-- test/indexer/macros_marc21_test.rb
-- test/indexer/macros_test.rb
+- test/indexer/macros/macros_marc21_semantics_test.rb
+- test/indexer/macros/marc21/extract_all_marc_values_test.rb
+- test/indexer/macros/marc21/extract_marc_test.rb
+- test/indexer/macros/marc21/serialize_marc_test.rb
+- test/indexer/macros/marc21/trim_punctuation_test.rb
+- test/indexer/macros/to_field_test.rb
 - test/indexer/map_record_test.rb
 - test/indexer/read_write_test.rb
 - test/indexer/settings_test.rb
 - test/indexer/to_field_test.rb
 - test/indexer/writer_test.rb
-- test/marc21_macros_test.rb
 - test/marc_extractor_test.rb
 - test/marc_format_classifier_test.rb
 - test/marc_reader_test.rb

data/test/indexer/macros_marc21_test.rb DELETED Viewed

@@ -1,219 +0,0 @@
-require 'test_helper'
-require 'traject/indexer'
-require 'traject/macros/marc21'
-require 'json'
-require 'marc'
-# See also marc_extractor_test.rb for more detailed tests on marc extraction,
-# this is just a basic test to make sure our macro works passing through to there
-# and other options.
-describe "Traject::Macros::Marc21" do
-  Marc21 = Traject::Macros::Marc21 # shortcut
-  before do
-    @indexer = Traject::Indexer.new
-    @record  = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
-  end
-  describe "extract_marc" do
-    it "extracts marc" do
-      @indexer.instance_eval do
-        to_field "title", extract_marc("245ab")
-      end
-      output = @indexer.map_record(@record)
-      assert_equal ["Manufacturing consent : the political economy of the mass media /"], output["title"]
-      assert_equal({}, @indexer.map_record(empty_record))
-    end
-    it "respects :first=>true option" do
-      @indexer.instance_eval do
-        to_field "other_id", extract_marc("035a", :first => true)
-      end
-      output = @indexer.map_record(@record)
-      assert_length 1, output["other_id"]
-    end
-    it "trims punctuation with :trim_punctuation => true" do
-      @indexer.instance_eval do
-        to_field "title", extract_marc("245ab", :trim_punctuation => true)
-      end
-      output = @indexer.map_record(@record)
-      assert_equal ["Manufacturing consent : the political economy of the mass media"], output["title"]
-      assert_equal({}, @indexer.map_record(empty_record))
-    end
-    it "respects :default option" do
-      @indexer.instance_eval do
-        to_field "only_default", extract_marc("9999", :default => "DEFAULT VALUE")
-      end
-      output = @indexer.map_record(@record)
-      assert_equal ["DEFAULT VALUE"], output["only_default"]
-    end
-    it "de-duplicates by default, respects :allow_duplicates" do
-      # Add a second 008
-      f = @record.fields('008').first
-      @record.append(f)
-      @indexer.instance_eval do
-        to_field "lang1", extract_marc('008[35-37]')
-        to_field "lang2", extract_marc('008[35-37]', :allow_duplicates => true)
-      end
-      output = @indexer.map_record(@record)
-      assert_equal ["eng"], output['lang1']
-      assert_equal ["eng", "eng"], output['lang2']
-      assert_equal({}, @indexer.map_record(empty_record))
-    end
-    it "fails on an extra/misspelled argument to extract_marc" do
-      assert_raises(RuntimeError) do
-        @indexer.instance_eval do
-          to_field "foo", extract_marc("9999", :misspelled => "Who cares")
-        end
-      end
-    end
-    it "throws away nil values unless settings['allow_nil_values]'" do
-      @indexer.instance_eval do
-        to_field 'default_nil', extract_marc('9999', :default => nil)
-      end
-      output = @indexer.map_record(@record)
-      assert_nil output['default_nil']
-    end
-    it "allows nil values if settings['allow_nil_values]'" do
-      @indexer.settings do |s|
-        s['allow_nil_values'] = true
-      end
-      @indexer.instance_eval do
-        to_field 'default_nil', extract_marc('9999', :default => nil)
-      end
-      output = @indexer.map_record(@record)
-      assert_equal [nil], output['default_nil']
-    end
-    it "Marc21::trim_punctuation class method" do
-      assert_equal "one two three", Marc21.trim_punctuation("one two three")
-      assert_equal "one two three", Marc21.trim_punctuation("one two three,")
-      assert_equal "one two three", Marc21.trim_punctuation("one two three/")
-      assert_equal "one two three", Marc21.trim_punctuation("one two three;")
-      assert_equal "one two three", Marc21.trim_punctuation("one two three:")
-      assert_equal "one two three .", Marc21.trim_punctuation("one two three .")
-      assert_equal "one two three", Marc21.trim_punctuation("one two three.")
-      assert_equal "one two three...", Marc21.trim_punctuation("one two three...")
-      assert_equal "one two three", Marc21.trim_punctuation(" one two three.")
-      assert_equal "one two [three]", Marc21.trim_punctuation("one two [three]")
-      assert_equal "one two three", Marc21.trim_punctuation("one two three]")
-      assert_equal "one two three", Marc21.trim_punctuation("[one two three")
-      assert_equal "one two three", Marc21.trim_punctuation("[one two three]")
-      # This one was a bug before
-      assert_equal "Feminism and art", Marc21.trim_punctuation("Feminism and art.")
-      assert_equal "Le réve", Marc21.trim_punctuation("Le réve.") # this assertion currently fails
-    end
-    it "uses :translation_map" do
-      @indexer.instance_eval do
-        to_field "cataloging_agency", extract_marc("040a", :separator => nil, :translation_map => "marc_040a_translate_test")
-      end
-      output = @indexer.map_record(@record)
-      assert_equal ["Library of Congress"], output["cataloging_agency"]
-    end
-  end
-  it "supports #extract_marc_from module method" do
-    output_arr = ::Traject::Macros::Marc21.extract_marc_from(@record, "245ab", :trim_punctuation => true)
-    assert_equal ["Manufacturing consent : the political economy of the mass media"], output_arr
-  end
-  describe "serialized_marc" do
-    it "serializes xml" do
-      @indexer.instance_eval do
-        to_field "marc_record", serialized_marc(:format => "xml")
-      end
-      output = @indexer.map_record(@record)
-      assert_length 1, output["marc_record"]
-      assert_kind_of String, output["marc_record"].first
-      roundtrip_record = MARC::XMLReader.new(StringIO.new(output["marc_record"].first)).first
-      assert_equal @record, roundtrip_record
-    end
-    it "serializes binary UUEncoded" do
-      @indexer.instance_eval do
-        to_field "marc_record", serialized_marc(:format => "binary")
-      end
-      output = @indexer.map_record(@record)
-      assert_length 1, output["marc_record"]
-      assert_kind_of String, output["marc_record"].first
-      decoded = Base64.decode64(output["marc_record"].first)
-      # just check the marc header for now
-      assert_start_with "02067cam a2200469", decoded
-    end
-    it "serializes binary raw" do
-      @indexer.instance_eval do
-        to_field "marc_record", serialized_marc(:format => "binary", :binary_escape => false)
-      end
-      output = @indexer.map_record(@record)
-      assert_length 1, output["marc_record"]
-      assert_kind_of String, output["marc_record"].first
-      # just check the marc header for now
-      assert_start_with "02067cam a2200469", output["marc_record"].first
-    end
-    it "serializes json" do
-      @indexer.instance_eval do
-        to_field "marc_record", serialized_marc(:format => "json")
-      end
-      output = @indexer.map_record(@record)
-      assert_length 1, output["marc_record"]
-      # okay, let's actually deserialize it, why not
-      hash = JSON.parse(output["marc_record"].first)
-      deserialized = MARC::Record.new_from_hash(hash)
-      assert_equal @record, deserialized
-    end
-  end
-  it "#extract_all_marc_values" do
-    @indexer.instance_eval do
-      to_field "text", extract_all_marc_values
-    end
-    output = @indexer.map_record(@record)
-    assert_length 13, output["text"]
-  end
-end