traject 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +7 -0
- data/Gemfile +5 -1
- data/README.md +65 -17
- data/bench/bench.rb +30 -0
- data/bin/traject +4 -169
- data/doc/batch_execution.md +177 -0
- data/doc/extending.md +182 -0
- data/doc/other_commands.md +49 -0
- data/doc/settings.md +6 -2
- data/lib/traject.rb +1 -0
- data/lib/traject/command_line.rb +296 -0
- data/lib/traject/debug_writer.rb +28 -0
- data/lib/traject/indexer.rb +84 -20
- data/lib/traject/indexer/settings.rb +9 -1
- data/lib/traject/json_writer.rb +15 -38
- data/lib/traject/line_writer.rb +59 -0
- data/lib/traject/macros/marc21.rb +10 -5
- data/lib/traject/macros/marc21_semantics.rb +57 -25
- data/lib/traject/marc4j_reader.rb +9 -26
- data/lib/traject/marc_extractor.rb +121 -48
- data/lib/traject/mock_reader.rb +87 -0
- data/lib/traject/mock_writer.rb +34 -0
- data/lib/traject/solrj_writer.rb +1 -22
- data/lib/traject/util.rb +107 -1
- data/lib/traject/version.rb +1 -1
- data/lib/traject/yaml_writer.rb +9 -0
- data/test/debug_writer_test.rb +38 -0
- data/test/indexer/each_record_test.rb +27 -2
- data/test/indexer/macros_marc21_semantics_test.rb +12 -1
- data/test/indexer/settings_test.rb +9 -2
- data/test/indexer/to_field_test.rb +35 -5
- data/test/marc4j_reader_test.rb +3 -0
- data/test/marc_extractor_test.rb +94 -20
- data/test/test_support/demo_config.rb +6 -3
- data/traject.gemspec +1 -2
- metadata +17 -20
    
        data/lib/traject/version.rb
    CHANGED
    
    
| @@ -0,0 +1,38 @@ | |
| 1 | 
            +
            require 'test_helper'
         | 
| 2 | 
            +
            require 'stringio'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'traject/debug_writer'
         | 
| 5 | 
            +
            require 'traject'
         | 
| 6 | 
            +
            require 'marc'
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            describe 'Simple output' do
         | 
| 9 | 
            +
              before do
         | 
| 10 | 
            +
                @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first
         | 
| 11 | 
            +
                @indexer = Traject::Indexer.new
         | 
| 12 | 
            +
                @indexer.instance_eval do
         | 
| 13 | 
            +
                  to_field "id", extract_marc("001", :first => true)
         | 
| 14 | 
            +
                  to_field "title", extract_marc("245ab")
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
                @io = StringIO.new
         | 
| 17 | 
            +
                @writer = Traject::DebugWriter.new("output_stream" => @io)
         | 
| 18 | 
            +
                
         | 
| 19 | 
            +
                @id = "2710183"
         | 
| 20 | 
            +
                @title = "Manufacturing consent : the political economy of the mass media /"
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
              
         | 
| 23 | 
            +
              it "does a simple output" do
         | 
| 24 | 
            +
                @writer.put Traject::Indexer::Context.new(:output_hash => @indexer.map_record(@record))
         | 
| 25 | 
            +
                expected = [
         | 
| 26 | 
            +
                  "#{@id} id #{@id}",
         | 
| 27 | 
            +
                  "#{@id} title #{@title}",
         | 
| 28 | 
            +
                  "\n"
         | 
| 29 | 
            +
                ]
         | 
| 30 | 
            +
                assert_equal expected.join("\n").gsub(/\s/, ''), @io.string.gsub(/\s/, '')
         | 
| 31 | 
            +
                @writer.close
         | 
| 32 | 
            +
                
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
              
         | 
| 35 | 
            +
            end
         | 
| 36 | 
            +
                
         | 
| 37 | 
            +
                
         | 
| 38 | 
            +
                
         | 
| @@ -7,13 +7,13 @@ describe "Traject::Indexer#each_record" do | |
| 7 7 |  | 
| 8 8 | 
             
              describe "checks arguments" do
         | 
| 9 9 | 
             
                it "rejects no-arg block" do
         | 
| 10 | 
            -
                  assert_raises( | 
| 10 | 
            +
                  assert_raises(Traject::Indexer::ArityError) do
         | 
| 11 11 | 
             
                    @indexer.each_record do
         | 
| 12 12 | 
             
                    end
         | 
| 13 13 | 
             
                  end
         | 
| 14 14 | 
             
                end
         | 
| 15 15 | 
             
                it "rejects three-arg block" do
         | 
| 16 | 
            -
                  assert_raises( | 
| 16 | 
            +
                  assert_raises(Traject::Indexer::ArityError) do
         | 
| 17 17 | 
             
                    @indexer.each_record do |one, two, three|
         | 
| 18 18 | 
             
                    end
         | 
| 19 19 | 
             
                  end
         | 
| @@ -30,5 +30,30 @@ describe "Traject::Indexer#each_record" do | |
| 30 30 | 
             
                  @indexer.each_record do |*variable|
         | 
| 31 31 | 
             
                  end
         | 
| 32 32 | 
             
                end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                it "finds first (only) field on each_record error" do
         | 
| 35 | 
            +
                  begin
         | 
| 36 | 
            +
                    @indexer.to_field('foo') {|one, two| }
         | 
| 37 | 
            +
                    @indexer.each_record {|one, two, three| }   # bad arity
         | 
| 38 | 
            +
                    flunk("Should have rejected bad arity ")
         | 
| 39 | 
            +
                  rescue Traject::Indexer::ArityError => e
         | 
| 40 | 
            +
                    assert_match(/foo/, e.message)
         | 
| 41 | 
            +
                  rescue 
         | 
| 42 | 
            +
                    flunk("Should only fail with a ArityError")
         | 
| 43 | 
            +
                  end
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
                
         | 
| 46 | 
            +
                it "rejects each_record with a name (e.g., using a to_field syntax)" do
         | 
| 47 | 
            +
                  assert_raises(Traject::Indexer::NamingError) do
         | 
| 48 | 
            +
                    @indexer.each_record('bad_name') {|one, two| }
         | 
| 49 | 
            +
                  end
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
                
         | 
| 52 | 
            +
                it "reject each_record with no arguments/blocks at all" do
         | 
| 53 | 
            +
                  assert_raises(ArgumentError) do
         | 
| 54 | 
            +
                    @indexer.each_record()
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
                end
         | 
| 57 | 
            +
             | 
| 33 58 | 
             
              end
         | 
| 34 59 | 
             
            end
         | 
| @@ -25,7 +25,18 @@ describe "Traject::Macros::Marc21Semantics" do | |
| 25 25 | 
             
                end
         | 
| 26 26 | 
             
                output = @indexer.map_record(@record)
         | 
| 27 27 |  | 
| 28 | 
            -
                assert_equal %w{ | 
| 28 | 
            +
                assert_equal %w{47971712},  output["oclcnum"]
         | 
| 29 | 
            +
              end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
              it "#marc_series_facet" do
         | 
| 32 | 
            +
                @record = MARC::Reader.new(support_file_path  "louis_armstrong.marc").to_a.first
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                @indexer.instance_eval do
         | 
| 35 | 
            +
                  to_field "series_facet", marc_series_facet
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
                output = @indexer.map_record(@record)
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                assert_equal ["Big bands."], output["series_facet"]
         | 
| 29 40 | 
             
              end
         | 
| 30 41 |  | 
| 31 42 | 
             
              describe "marc_sortable_author" do
         | 
| @@ -114,8 +114,15 @@ describe "Traject::Indexer#settings" do | |
| 114 114 | 
             
                assert_equal "new", settings["c"]
         | 
| 115 115 | 
             
              end
         | 
| 116 116 |  | 
| 117 | 
            -
              describe " | 
| 118 | 
            -
             | 
| 117 | 
            +
              describe "inspect" do
         | 
| 118 | 
            +
                it "keeps keys ending in 'password' out of inspect" do
         | 
| 119 | 
            +
                  settings = Traject::Indexer::Settings.new("a" => "a", 
         | 
| 120 | 
            +
                    "password" => "password", "some_password" => "password",
         | 
| 121 | 
            +
                    "some.password" => "password")
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                  parsed = eval( settings.inspect )
         | 
| 124 | 
            +
                  assert_equal( {"a" => "a", "password" => "[hidden]", "some_password" => "[hidden]", "some.password" => "[hidden]"}, parsed)
         | 
| 125 | 
            +
                end
         | 
| 119 126 | 
             
              end
         | 
| 120 127 |  | 
| 121 128 | 
             
            end
         | 
| @@ -6,20 +6,23 @@ describe "Traject::Indexer.to_field" do | |
| 6 6 | 
             
              end
         | 
| 7 7 | 
             
              describe "checks it's arguments" do
         | 
| 8 8 | 
             
                it "rejects nil first arg" do
         | 
| 9 | 
            -
                  assert_raises( | 
| 9 | 
            +
                  assert_raises(Traject::Indexer::NamingError) { @indexer.to_field(nil) }
         | 
| 10 10 | 
             
                end
         | 
| 11 11 | 
             
                it "rejects empty string first arg" do
         | 
| 12 | 
            -
                  assert_raises( | 
| 12 | 
            +
                  assert_raises(Traject::Indexer::NamingError) {@indexer.to_field("")}
         | 
| 13 13 | 
             
                end
         | 
| 14 | 
            +
                it "rejects non-string first arg" do
         | 
| 15 | 
            +
                  assert_raises(Traject::Indexer::NamingError) {@indexer.to_field(:symbol)}
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
                
         | 
| 14 18 | 
             
                it "rejects one-arg lambda" do
         | 
| 15 | 
            -
                  assert_raises( | 
| 19 | 
            +
                  assert_raises(Traject::Indexer::ArityError) do
         | 
| 16 20 | 
             
                    @indexer.to_field("foo") do |one_arg|
         | 
| 17 | 
            -
             | 
| 18 21 | 
             
                    end
         | 
| 19 22 | 
             
                  end
         | 
| 20 23 | 
             
                end
         | 
| 21 24 | 
             
                it "rejects four-arg lambda" do
         | 
| 22 | 
            -
                  assert_raises( | 
| 25 | 
            +
                  assert_raises(Traject::Indexer::ArityError) do 
         | 
| 23 26 | 
             
                    @indexer.to_field("foo") do |one_arg, two_arg, three_arg, four_arg|
         | 
| 24 27 | 
             
                    end
         | 
| 25 28 | 
             
                  end
         | 
| @@ -36,4 +39,31 @@ describe "Traject::Indexer.to_field" do | |
| 36 39 | 
             
                  end
         | 
| 37 40 | 
             
                end
         | 
| 38 41 | 
             
              end
         | 
| 42 | 
            +
              
         | 
| 43 | 
            +
              describe "gives location in error message" do
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                it "finds no previous field on initial error" do
         | 
| 46 | 
            +
                  begin
         | 
| 47 | 
            +
                    @indexer.to_field('') {|one, two| }   # bad field name
         | 
| 48 | 
            +
                    flunk("Should have rejected empty field name")
         | 
| 49 | 
            +
                  rescue Traject::Indexer::NamingError => e
         | 
| 50 | 
            +
                    assert_match(/no previous named fields/, e.message)
         | 
| 51 | 
            +
                  rescue 
         | 
| 52 | 
            +
                    flunk("Should only fail with a NamingError")
         | 
| 53 | 
            +
                  end
         | 
| 54 | 
            +
                end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                it "finds first (only) field on error" do
         | 
| 57 | 
            +
                  begin
         | 
| 58 | 
            +
                    @indexer.to_field('foo') {|one, two| }
         | 
| 59 | 
            +
                    @indexer.to_field('') {|one, two| }   # bad field name
         | 
| 60 | 
            +
                    flunk("Should have rejected empty field name")
         | 
| 61 | 
            +
                  rescue Traject::Indexer::NamingError => e
         | 
| 62 | 
            +
                    assert_match(/foo/, e.message)
         | 
| 63 | 
            +
                  rescue 
         | 
| 64 | 
            +
                    flunk("Should only fail with a NamingError")
         | 
| 65 | 
            +
                  end
         | 
| 66 | 
            +
                end
         | 
| 67 | 
            +
              end
         | 
| 68 | 
            +
              
         | 
| 39 69 | 
             
            end
         | 
    
        data/test/marc4j_reader_test.rb
    CHANGED
    
    | @@ -54,6 +54,9 @@ describe "Marc4JReader" do | |
| 54 54 | 
             
                # it's legal, it probably looks weird as a string literal
         | 
| 55 55 | 
             
                # below, depending on your editor.
         | 
| 56 56 | 
             
                assert_equal "Por uma outra globalização :", a245a
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                # Set leader byte to proper for unicode
         | 
| 59 | 
            +
                assert_equal 'a', array.first.leader[9]
         | 
| 57 60 | 
             
              end
         | 
| 58 61 |  | 
| 59 62 |  | 
    
        data/test/marc_extractor_test.rb
    CHANGED
    
    | @@ -50,6 +50,67 @@ describe "Traject::MarcExtractor" do | |
| 50 50 | 
             
                  assert_equal 5, parsed["005"][:bytes]
         | 
| 51 51 | 
             
                  assert_equal 7..10, parsed["008"][:bytes]
         | 
| 52 52 | 
             
                end
         | 
| 53 | 
            +
                
         | 
| 54 | 
            +
                it "allows arrays of specs" do
         | 
| 55 | 
            +
                  parsed = Traject::MarcExtractor.parse_string_spec %w(
         | 
| 56 | 
            +
                    245abcde
         | 
| 57 | 
            +
                    810
         | 
| 58 | 
            +
                    700|*4|bcd
         | 
| 59 | 
            +
                  )
         | 
| 60 | 
            +
                  assert_length 3, parsed
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
                
         | 
| 63 | 
            +
                it "allows mixture of array and colon-delimited specs" do
         | 
| 64 | 
            +
                  parsed = Traject::MarcExtractor.parse_string_spec %w(
         | 
| 65 | 
            +
                    245abcde
         | 
| 66 | 
            +
                    100:110:111
         | 
| 67 | 
            +
                    810
         | 
| 68 | 
            +
                    700|*4|bcd
         | 
| 69 | 
            +
                  )
         | 
| 70 | 
            +
                  assert_length 6, parsed
         | 
| 71 | 
            +
                end
         | 
| 72 | 
            +
                  
         | 
| 73 | 
            +
                
         | 
| 74 | 
            +
              end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
              # Mostly an internal method, not neccesarily API, but
         | 
| 77 | 
            +
              # an important one, so we unit test some parts of it.
         | 
| 78 | 
            +
              describe "#spec_covering_field" do
         | 
| 79 | 
            +
                describe "for alternate script tags" do
         | 
| 80 | 
            +
                  before do
         | 
| 81 | 
            +
                    @record = MARC::Reader.new(support_file_path  "hebrew880s.marc").to_a.first
         | 
| 82 | 
            +
                    @extractor = Traject::MarcExtractor.new("245")
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                    @a245 = @record.fields.find {|f| f.tag == "245"}
         | 
| 85 | 
            +
                    assert ! @a245.nil?, "Found a 245 to test"
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                    @a880_245 = @record.fields.find do |field|
         | 
| 88 | 
            +
                      (field.tag == "880") && field['6'] &&
         | 
| 89 | 
            +
                      "245" == field['6'].slice(0,3)
         | 
| 90 | 
            +
                    end
         | 
| 91 | 
            +
                    assert ! @a880_245.nil?, "Found an 880-245 to test"
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                    @a880_100 = @record.fields.find do |field|
         | 
| 94 | 
            +
                      (field.tag == "880") && field['6'] &&
         | 
| 95 | 
            +
                      "100" == field['6'].slice(0,3)
         | 
| 96 | 
            +
                    end
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                    assert ! @a880_100.nil?, "Found an 880-100 to test"
         | 
| 99 | 
            +
                  end
         | 
| 100 | 
            +
                  it "finds spec for relevant 880" do
         | 
| 101 | 
            +
                    assert_equal( {}, @extractor.spec_covering_field(@a880_245) )
         | 
| 102 | 
            +
                    assert_nil        @extractor.spec_covering_field(@a880_100)
         | 
| 103 | 
            +
                  end
         | 
| 104 | 
            +
                  it "does not find spec for 880 if disabled" do
         | 
| 105 | 
            +
                    @extractor = Traject::MarcExtractor.new("245", :alternate_script => false)
         | 
| 106 | 
            +
                    assert_nil @extractor.spec_covering_field(@a880_245) 
         | 
| 107 | 
            +
                  end
         | 
| 108 | 
            +
                  it "finds only 880 if so configured" do
         | 
| 109 | 
            +
                    @extractor = Traject::MarcExtractor.new("245", :alternate_script => :only)
         | 
| 110 | 
            +
                    assert_nil @extractor.spec_covering_field(@a245) 
         | 
| 111 | 
            +
                    assert_equal({},  @extractor.spec_covering_field(@a880_245))
         | 
| 112 | 
            +
                  end
         | 
| 113 | 
            +
                end
         | 
| 53 114 | 
             
              end
         | 
| 54 115 |  | 
| 55 116 | 
             
              describe "#extract_by_spec" do
         | 
| @@ -60,7 +121,7 @@ describe "Traject::MarcExtractor" do | |
| 60 121 | 
             
                describe "extracts a basic case" do
         | 
| 61 122 | 
             
                  before do
         | 
| 62 123 | 
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("700abcdef:856|*2|:505|1*|:245ba")
         | 
| 63 | 
            -
                    @values = Traject::MarcExtractor. | 
| 124 | 
            +
                    @values = Traject::MarcExtractor.new(parsed_spec).extract(@record)
         | 
| 64 125 | 
             
                  end
         | 
| 65 126 |  | 
| 66 127 | 
             
                  it "returns an array" do
         | 
| @@ -94,19 +155,19 @@ describe "Traject::MarcExtractor" do | |
| 94 155 | 
             
                describe "extracts fixed fields" do
         | 
| 95 156 | 
             
                  it ", complete" do
         | 
| 96 157 | 
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("001")
         | 
| 97 | 
            -
                    values = Traject::MarcExtractor. | 
| 158 | 
            +
                    values = Traject::MarcExtractor.new(parsed_spec).extract(@record)
         | 
| 98 159 |  | 
| 99 160 | 
             
                    assert_equal ["2710183"], values
         | 
| 100 161 | 
             
                  end
         | 
| 101 162 | 
             
                  it ", single byte offset" do
         | 
| 102 163 | 
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("008[5]")
         | 
| 103 | 
            -
                    values = Traject::MarcExtractor. | 
| 164 | 
            +
                    values = Traject::MarcExtractor.new(parsed_spec).extract(@record)
         | 
| 104 165 |  | 
| 105 166 | 
             
                    assert_equal ["1"], values
         | 
| 106 167 | 
             
                  end
         | 
| 107 168 | 
             
                  it ", byte range" do
         | 
| 108 169 | 
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("008[7-10]")
         | 
| 109 | 
            -
                    values = Traject::MarcExtractor. | 
| 170 | 
            +
                    values = Traject::MarcExtractor.new(parsed_spec).extract(@record)
         | 
| 110 171 |  | 
| 111 172 | 
             
                    assert_equal ["2002"], values
         | 
| 112 173 | 
             
                  end
         | 
| @@ -115,14 +176,14 @@ describe "Traject::MarcExtractor" do | |
| 115 176 | 
             
                describe "seperator argument" do
         | 
| 116 177 | 
             
                  it "causes non-join when nil" do
         | 
| 117 178 | 
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
         | 
| 118 | 
            -
                    values = Traject::MarcExtractor. | 
| 179 | 
            +
                    values = Traject::MarcExtractor.new(parsed_spec, :seperator => nil).extract(@record)
         | 
| 119 180 |  | 
| 120 181 | 
             
                    assert_length 3, values
         | 
| 121 182 | 
             
                  end
         | 
| 122 183 |  | 
| 123 184 | 
             
                  it "can be non-default" do
         | 
| 124 185 | 
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
         | 
| 125 | 
            -
                    values = Traject::MarcExtractor. | 
| 186 | 
            +
                    values = Traject::MarcExtractor.new(parsed_spec, :seperator => "!! ").extract(@record)
         | 
| 126 187 |  | 
| 127 188 | 
             
                    assert_length 1, values
         | 
| 128 189 | 
             
                    assert_equal "Manufacturing consent :!! the political economy of the mass media /!! Edward S. Herman and Noam Chomsky ; with a new introduction by the authors.", values.first
         | 
| @@ -136,19 +197,19 @@ describe "Traject::MarcExtractor" do | |
| 136 197 | 
             
                  end
         | 
| 137 198 | 
             
                  it "from default :include" do
         | 
| 138 199 |  | 
| 139 | 
            -
                    values = Traject::MarcExtractor. | 
| 200 | 
            +
                    values = Traject::MarcExtractor.new(@parsed_spec).extract(@record)
         | 
| 140 201 |  | 
| 141 202 | 
             
                    assert_length 2, values # both the original and the 880
         | 
| 142 203 | 
             
                    assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /", "בין מרטין בובר לאהרן דוד גורדון /"], values
         | 
| 143 204 | 
             
                  end
         | 
| 144 205 | 
             
                  it "with :only" do
         | 
| 145 | 
            -
                    values = Traject::MarcExtractor. | 
| 206 | 
            +
                    values = Traject::MarcExtractor.new(@parsed_spec, :alternate_script => :only).extract(@record)
         | 
| 146 207 |  | 
| 147 208 | 
             
                    assert_length 1, values
         | 
| 148 209 | 
             
                    assert_equal ["בין מרטין בובר לאהרן דוד גורדון /"], values
         | 
| 149 210 | 
             
                  end
         | 
| 150 211 | 
             
                  it "with false" do
         | 
| 151 | 
            -
                    values = Traject::MarcExtractor. | 
| 212 | 
            +
                    values = Traject::MarcExtractor.new(@parsed_spec, :alternate_script => false).extract(@record)
         | 
| 152 213 |  | 
| 153 214 | 
             
                    assert_length 1, values
         | 
| 154 215 | 
             
                    assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /"], values
         | 
| @@ -156,22 +217,22 @@ describe "Traject::MarcExtractor" do | |
| 156 217 | 
             
                end
         | 
| 157 218 |  | 
| 158 219 | 
             
                it "works with string second arg too" do
         | 
| 159 | 
            -
                  values = Traject::MarcExtractor. | 
| 220 | 
            +
                  values = Traject::MarcExtractor.new("245abc").extract(@record)
         | 
| 160 221 |  | 
| 161 222 | 
             
                  assert_length 1, values
         | 
| 162 223 | 
             
                  assert values.first.include?("Manufacturing consent"), "Extracted value includes title"
         | 
| 163 224 | 
             
                end
         | 
| 164 225 |  | 
| 165 226 | 
             
                it "returns empty array if no matching tags" do
         | 
| 166 | 
            -
                  values = Traject::MarcExtractor. | 
| 227 | 
            +
                  values = Traject::MarcExtractor.new("999abc").extract(@record)
         | 
| 167 228 | 
             
                  assert_equal [], values
         | 
| 168 229 |  | 
| 169 | 
            -
                  values = Traject::MarcExtractor. | 
| 230 | 
            +
                  values = Traject::MarcExtractor.new("999").extract(@record)
         | 
| 170 231 | 
             
                  assert_equal [], values
         | 
| 171 232 | 
             
                end
         | 
| 172 233 |  | 
| 173 | 
            -
                it "returns empty array if matching tag but no subfield" do | 
| 174 | 
            -
                  values = Traject::MarcExtractor. | 
| 234 | 
            +
                it "returns empty array if matching tag but no subfield" do
         | 
| 235 | 
            +
                  values = Traject::MarcExtractor.new("245xyz").extract(@record)
         | 
| 175 236 | 
             
                  assert_equal [], values
         | 
| 176 237 | 
             
                end
         | 
| 177 238 |  | 
| @@ -180,7 +241,7 @@ describe "Traject::MarcExtractor" do | |
| 180 241 | 
             
              describe "with bad data" do
         | 
| 181 242 | 
             
                it "can ignore an 880 with no $6" do
         | 
| 182 243 | 
             
                  @record = MARC::Reader.new(support_file_path  "880_with_no_6.utf8.marc").to_a.first
         | 
| 183 | 
            -
                  values = Traject::MarcExtractor. | 
| 244 | 
            +
                  values = Traject::MarcExtractor.new("001").extract(@record)
         | 
| 184 245 | 
             
                  assert_equal ["3468569"], values
         | 
| 185 246 | 
             
                end
         | 
| 186 247 | 
             
              end
         | 
| @@ -188,11 +249,11 @@ describe "Traject::MarcExtractor" do | |
| 188 249 | 
             
              describe "#each_matching_line" do
         | 
| 189 250 | 
             
                before do
         | 
| 190 251 | 
             
                  @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first
         | 
| 191 | 
            -
                  @extractor = Traject::MarcExtractor.new( | 
| 252 | 
            +
                  @extractor = Traject::MarcExtractor.new("245abc")
         | 
| 192 253 | 
             
                end
         | 
| 193 254 | 
             
                it "yields two args" do
         | 
| 194 255 | 
             
                  called = false
         | 
| 195 | 
            -
                  @extractor.each_matching_line do |field, spec|
         | 
| 256 | 
            +
                  @extractor.each_matching_line(@record) do |field, spec|
         | 
| 196 257 | 
             
                    called = true
         | 
| 197 258 | 
             
                    assert_kind_of MARC::DataField, field
         | 
| 198 259 | 
             
                    assert_kind_of Hash, spec
         | 
| @@ -201,7 +262,7 @@ describe "Traject::MarcExtractor" do | |
| 201 262 | 
             
                end
         | 
| 202 263 | 
             
                it "yields three args" do
         | 
| 203 264 | 
             
                  called = false
         | 
| 204 | 
            -
                  @extractor.each_matching_line do |field, spec, extractor|
         | 
| 265 | 
            +
                  @extractor.each_matching_line(@record) do |field, spec, extractor|
         | 
| 205 266 | 
             
                    called = true
         | 
| 206 267 | 
             
                    assert_kind_of MARC::DataField, field
         | 
| 207 268 | 
             
                    assert_kind_of Hash, spec
         | 
| @@ -215,16 +276,29 @@ describe "Traject::MarcExtractor" do | |
| 215 276 | 
             
              describe "#collect_matching_lines" do
         | 
| 216 277 | 
             
                before do
         | 
| 217 278 | 
             
                  @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first
         | 
| 218 | 
            -
                  @extractor = Traject::MarcExtractor.new( | 
| 279 | 
            +
                  @extractor = Traject::MarcExtractor.new("245abc")
         | 
| 219 280 | 
             
                end
         | 
| 220 281 | 
             
                it "collects with custom block" do
         | 
| 221 | 
            -
                  results = @extractor.collect_matching_lines do |field, spec, extractor|
         | 
| 282 | 
            +
                  results = @extractor.collect_matching_lines(@record) do |field, spec, extractor|
         | 
| 222 283 | 
             
                    extractor.collect_subfields(field, spec)
         | 
| 223 284 | 
             
                  end
         | 
| 224 285 | 
             
                  assert_equal ["Manufacturing consent : the political economy of the mass media / Edward S. Herman and Noam Chomsky ; with a new introduction by the authors."], results
         | 
| 225 286 | 
             
                end
         | 
| 226 287 | 
             
              end
         | 
| 227 288 |  | 
| 289 | 
            +
              describe "MarcExtractor.cached" do
         | 
| 290 | 
            +
                it "creates" do
         | 
| 291 | 
            +
                  ext = Traject::MarcExtractor.cached("245abc", :seperator => nil)
         | 
| 292 | 
            +
                  assert_equal({"245"=>{:subfields=>["a", "b", "c"]}}, ext.spec_hash)
         | 
| 293 | 
            +
                  assert ext.options[:seperator].nil?, "extractor options[:seperator] is nil"
         | 
| 294 | 
            +
                end
         | 
| 295 | 
            +
                it "caches" do
         | 
| 296 | 
            +
                  ext1 = Traject::MarcExtractor.cached("245abc", :seperator => nil)
         | 
| 297 | 
            +
                  ext2 = Traject::MarcExtractor.cached("245abc", :seperator => nil)
         | 
| 298 | 
            +
             | 
| 299 | 
            +
                  assert_same ext1, ext2
         | 
| 300 | 
            +
                end
         | 
| 301 | 
            +
              end
         | 
| 228 302 |  | 
| 229 303 |  | 
| 230 304 | 
             
            end
         | 
| @@ -105,11 +105,14 @@ to_field "pub_date",          marc_publication_date | |
| 105 105 |  | 
| 106 106 | 
             
            # LCC to broad class, start with built-in from marc record, but then do our own for local
         | 
| 107 107 | 
             
            # call numbers.
         | 
| 108 | 
            -
            lcc_map | 
| 108 | 
            +
            lcc_map             = Traject::TranslationMap.new("lcc_top_level")
         | 
| 109 | 
            +
            holdings_extractor  = Traject::MarcExtractor.new("991:937")
         | 
| 110 | 
            +
            sudoc_extractor     = Traject::MarcExtractor.new("086a", :seperator =>nil)
         | 
| 111 | 
            +
             | 
| 109 112 | 
             
            to_field "discipline_facet",  marc_lcc_to_broad_category(:default => nil) do |record, accumulator|
         | 
| 110 113 | 
             
              # add in our local call numbers
         | 
| 111 114 | 
             
              accumulator.concat(
         | 
| 112 | 
            -
                 | 
| 115 | 
            +
                holdings_extractor.collect_matching_lines(record) do |field, spec, extractor|
         | 
| 113 116 | 
             
                    # we output call type 'processor' in subfield 'f' of our holdings
         | 
| 114 117 | 
             
                    # fields, that sort of maybe tells us if it's an LCC field.
         | 
| 115 118 | 
             
                    # When the data is right, which it often isn't.
         | 
| @@ -130,7 +133,7 @@ to_field "discipline_facet",  marc_lcc_to_broad_category(:default => nil) do |re | |
| 130 133 |  | 
| 131 134 | 
             
              # If it's got an 086, we'll put it in "Government Publication", to be
         | 
| 132 135 | 
             
              # consistent with when we do that from a local SuDoc call #.
         | 
| 133 | 
            -
              if  | 
| 136 | 
            +
              if sudoc_extractor.extract(record).length > 0
         | 
| 134 137 | 
             
                accumulator << "Government Publication"
         | 
| 135 138 | 
             
              end
         | 
| 136 139 |  |