traject 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +7 -0
 - data/Gemfile +5 -1
 - data/README.md +65 -17
 - data/bench/bench.rb +30 -0
 - data/bin/traject +4 -169
 - data/doc/batch_execution.md +177 -0
 - data/doc/extending.md +182 -0
 - data/doc/other_commands.md +49 -0
 - data/doc/settings.md +6 -2
 - data/lib/traject.rb +1 -0
 - data/lib/traject/command_line.rb +296 -0
 - data/lib/traject/debug_writer.rb +28 -0
 - data/lib/traject/indexer.rb +84 -20
 - data/lib/traject/indexer/settings.rb +9 -1
 - data/lib/traject/json_writer.rb +15 -38
 - data/lib/traject/line_writer.rb +59 -0
 - data/lib/traject/macros/marc21.rb +10 -5
 - data/lib/traject/macros/marc21_semantics.rb +57 -25
 - data/lib/traject/marc4j_reader.rb +9 -26
 - data/lib/traject/marc_extractor.rb +121 -48
 - data/lib/traject/mock_reader.rb +87 -0
 - data/lib/traject/mock_writer.rb +34 -0
 - data/lib/traject/solrj_writer.rb +1 -22
 - data/lib/traject/util.rb +107 -1
 - data/lib/traject/version.rb +1 -1
 - data/lib/traject/yaml_writer.rb +9 -0
 - data/test/debug_writer_test.rb +38 -0
 - data/test/indexer/each_record_test.rb +27 -2
 - data/test/indexer/macros_marc21_semantics_test.rb +12 -1
 - data/test/indexer/settings_test.rb +9 -2
 - data/test/indexer/to_field_test.rb +35 -5
 - data/test/marc4j_reader_test.rb +3 -0
 - data/test/marc_extractor_test.rb +94 -20
 - data/test/test_support/demo_config.rb +6 -3
 - data/traject.gemspec +1 -2
 - metadata +17 -20
 
    
        data/lib/traject/version.rb
    CHANGED
    
    
| 
         @@ -0,0 +1,38 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'stringio'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            require 'traject/debug_writer'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'traject'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'marc'
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            describe 'Simple output' do
         
     | 
| 
      
 9 
     | 
    
         
            +
              before do
         
     | 
| 
      
 10 
     | 
    
         
            +
                @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first
         
     | 
| 
      
 11 
     | 
    
         
            +
                @indexer = Traject::Indexer.new
         
     | 
| 
      
 12 
     | 
    
         
            +
                @indexer.instance_eval do
         
     | 
| 
      
 13 
     | 
    
         
            +
                  to_field "id", extract_marc("001", :first => true)
         
     | 
| 
      
 14 
     | 
    
         
            +
                  to_field "title", extract_marc("245ab")
         
     | 
| 
      
 15 
     | 
    
         
            +
                end
         
     | 
| 
      
 16 
     | 
    
         
            +
                @io = StringIO.new
         
     | 
| 
      
 17 
     | 
    
         
            +
                @writer = Traject::DebugWriter.new("output_stream" => @io)
         
     | 
| 
      
 18 
     | 
    
         
            +
                
         
     | 
| 
      
 19 
     | 
    
         
            +
                @id = "2710183"
         
     | 
| 
      
 20 
     | 
    
         
            +
                @title = "Manufacturing consent : the political economy of the mass media /"
         
     | 
| 
      
 21 
     | 
    
         
            +
              end
         
     | 
| 
      
 22 
     | 
    
         
            +
              
         
     | 
| 
      
 23 
     | 
    
         
            +
              it "does a simple output" do
         
     | 
| 
      
 24 
     | 
    
         
            +
                @writer.put Traject::Indexer::Context.new(:output_hash => @indexer.map_record(@record))
         
     | 
| 
      
 25 
     | 
    
         
            +
                expected = [
         
     | 
| 
      
 26 
     | 
    
         
            +
                  "#{@id} id #{@id}",
         
     | 
| 
      
 27 
     | 
    
         
            +
                  "#{@id} title #{@title}",
         
     | 
| 
      
 28 
     | 
    
         
            +
                  "\n"
         
     | 
| 
      
 29 
     | 
    
         
            +
                ]
         
     | 
| 
      
 30 
     | 
    
         
            +
                assert_equal expected.join("\n").gsub(/\s/, ''), @io.string.gsub(/\s/, '')
         
     | 
| 
      
 31 
     | 
    
         
            +
                @writer.close
         
     | 
| 
      
 32 
     | 
    
         
            +
                
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
              
         
     | 
| 
      
 35 
     | 
    
         
            +
            end
         
     | 
| 
      
 36 
     | 
    
         
            +
                
         
     | 
| 
      
 37 
     | 
    
         
            +
                
         
     | 
| 
      
 38 
     | 
    
         
            +
                
         
     | 
| 
         @@ -7,13 +7,13 @@ describe "Traject::Indexer#each_record" do 
     | 
|
| 
       7 
7 
     | 
    
         | 
| 
       8 
8 
     | 
    
         
             
              describe "checks arguments" do
         
     | 
| 
       9 
9 
     | 
    
         
             
                it "rejects no-arg block" do
         
     | 
| 
       10 
     | 
    
         
            -
                  assert_raises( 
     | 
| 
      
 10 
     | 
    
         
            +
                  assert_raises(Traject::Indexer::ArityError) do
         
     | 
| 
       11 
11 
     | 
    
         
             
                    @indexer.each_record do
         
     | 
| 
       12 
12 
     | 
    
         
             
                    end
         
     | 
| 
       13 
13 
     | 
    
         
             
                  end
         
     | 
| 
       14 
14 
     | 
    
         
             
                end
         
     | 
| 
       15 
15 
     | 
    
         
             
                it "rejects three-arg block" do
         
     | 
| 
       16 
     | 
    
         
            -
                  assert_raises( 
     | 
| 
      
 16 
     | 
    
         
            +
                  assert_raises(Traject::Indexer::ArityError) do
         
     | 
| 
       17 
17 
     | 
    
         
             
                    @indexer.each_record do |one, two, three|
         
     | 
| 
       18 
18 
     | 
    
         
             
                    end
         
     | 
| 
       19 
19 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -30,5 +30,30 @@ describe "Traject::Indexer#each_record" do 
     | 
|
| 
       30 
30 
     | 
    
         
             
                  @indexer.each_record do |*variable|
         
     | 
| 
       31 
31 
     | 
    
         
             
                  end
         
     | 
| 
       32 
32 
     | 
    
         
             
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                it "finds first (only) field on each_record error" do
         
     | 
| 
      
 35 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 36 
     | 
    
         
            +
                    @indexer.to_field('foo') {|one, two| }
         
     | 
| 
      
 37 
     | 
    
         
            +
                    @indexer.each_record {|one, two, three| }   # bad arity
         
     | 
| 
      
 38 
     | 
    
         
            +
                    flunk("Should have rejected bad arity ")
         
     | 
| 
      
 39 
     | 
    
         
            +
                  rescue Traject::Indexer::ArityError => e
         
     | 
| 
      
 40 
     | 
    
         
            +
                    assert_match(/foo/, e.message)
         
     | 
| 
      
 41 
     | 
    
         
            +
                  rescue 
         
     | 
| 
      
 42 
     | 
    
         
            +
                    flunk("Should only fail with a ArityError")
         
     | 
| 
      
 43 
     | 
    
         
            +
                  end
         
     | 
| 
      
 44 
     | 
    
         
            +
                end
         
     | 
| 
      
 45 
     | 
    
         
            +
                
         
     | 
| 
      
 46 
     | 
    
         
            +
                it "rejects each_record with a name (e.g., using a to_field syntax)" do
         
     | 
| 
      
 47 
     | 
    
         
            +
                  assert_raises(Traject::Indexer::NamingError) do
         
     | 
| 
      
 48 
     | 
    
         
            +
                    @indexer.each_record('bad_name') {|one, two| }
         
     | 
| 
      
 49 
     | 
    
         
            +
                  end
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
                
         
     | 
| 
      
 52 
     | 
    
         
            +
                it "reject each_record with no arguments/blocks at all" do
         
     | 
| 
      
 53 
     | 
    
         
            +
                  assert_raises(ArgumentError) do
         
     | 
| 
      
 54 
     | 
    
         
            +
                    @indexer.each_record()
         
     | 
| 
      
 55 
     | 
    
         
            +
                  end
         
     | 
| 
      
 56 
     | 
    
         
            +
                end
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
       33 
58 
     | 
    
         
             
              end
         
     | 
| 
       34 
59 
     | 
    
         
             
            end
         
     | 
| 
         @@ -25,7 +25,18 @@ describe "Traject::Macros::Marc21Semantics" do 
     | 
|
| 
       25 
25 
     | 
    
         
             
                end
         
     | 
| 
       26 
26 
     | 
    
         
             
                output = @indexer.map_record(@record)
         
     | 
| 
       27 
27 
     | 
    
         | 
| 
       28 
     | 
    
         
            -
                assert_equal %w{ 
     | 
| 
      
 28 
     | 
    
         
            +
                assert_equal %w{47971712},  output["oclcnum"]
         
     | 
| 
      
 29 
     | 
    
         
            +
              end
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
              it "#marc_series_facet" do
         
     | 
| 
      
 32 
     | 
    
         
            +
                @record = MARC::Reader.new(support_file_path  "louis_armstrong.marc").to_a.first
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                @indexer.instance_eval do
         
     | 
| 
      
 35 
     | 
    
         
            +
                  to_field "series_facet", marc_series_facet
         
     | 
| 
      
 36 
     | 
    
         
            +
                end
         
     | 
| 
      
 37 
     | 
    
         
            +
                output = @indexer.map_record(@record)
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                assert_equal ["Big bands."], output["series_facet"]
         
     | 
| 
       29 
40 
     | 
    
         
             
              end
         
     | 
| 
       30 
41 
     | 
    
         | 
| 
       31 
42 
     | 
    
         
             
              describe "marc_sortable_author" do
         
     | 
| 
         @@ -114,8 +114,15 @@ describe "Traject::Indexer#settings" do 
     | 
|
| 
       114 
114 
     | 
    
         
             
                assert_equal "new", settings["c"]
         
     | 
| 
       115 
115 
     | 
    
         
             
              end
         
     | 
| 
       116 
116 
     | 
    
         | 
| 
       117 
     | 
    
         
            -
              describe " 
     | 
| 
       118 
     | 
    
         
            -
             
     | 
| 
      
 117 
     | 
    
         
            +
              describe "inspect" do
         
     | 
| 
      
 118 
     | 
    
         
            +
                it "keeps keys ending in 'password' out of inspect" do
         
     | 
| 
      
 119 
     | 
    
         
            +
                  settings = Traject::Indexer::Settings.new("a" => "a", 
         
     | 
| 
      
 120 
     | 
    
         
            +
                    "password" => "password", "some_password" => "password",
         
     | 
| 
      
 121 
     | 
    
         
            +
                    "some.password" => "password")
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                  parsed = eval( settings.inspect )
         
     | 
| 
      
 124 
     | 
    
         
            +
                  assert_equal( {"a" => "a", "password" => "[hidden]", "some_password" => "[hidden]", "some.password" => "[hidden]"}, parsed)
         
     | 
| 
      
 125 
     | 
    
         
            +
                end
         
     | 
| 
       119 
126 
     | 
    
         
             
              end
         
     | 
| 
       120 
127 
     | 
    
         | 
| 
       121 
128 
     | 
    
         
             
            end
         
     | 
| 
         @@ -6,20 +6,23 @@ describe "Traject::Indexer.to_field" do 
     | 
|
| 
       6 
6 
     | 
    
         
             
              end
         
     | 
| 
       7 
7 
     | 
    
         
             
              describe "checks it's arguments" do
         
     | 
| 
       8 
8 
     | 
    
         
             
                it "rejects nil first arg" do
         
     | 
| 
       9 
     | 
    
         
            -
                  assert_raises( 
     | 
| 
      
 9 
     | 
    
         
            +
                  assert_raises(Traject::Indexer::NamingError) { @indexer.to_field(nil) }
         
     | 
| 
       10 
10 
     | 
    
         
             
                end
         
     | 
| 
       11 
11 
     | 
    
         
             
                it "rejects empty string first arg" do
         
     | 
| 
       12 
     | 
    
         
            -
                  assert_raises( 
     | 
| 
      
 12 
     | 
    
         
            +
                  assert_raises(Traject::Indexer::NamingError) {@indexer.to_field("")}
         
     | 
| 
       13 
13 
     | 
    
         
             
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
                it "rejects non-string first arg" do
         
     | 
| 
      
 15 
     | 
    
         
            +
                  assert_raises(Traject::Indexer::NamingError) {@indexer.to_field(:symbol)}
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
                
         
     | 
| 
       14 
18 
     | 
    
         
             
                it "rejects one-arg lambda" do
         
     | 
| 
       15 
     | 
    
         
            -
                  assert_raises( 
     | 
| 
      
 19 
     | 
    
         
            +
                  assert_raises(Traject::Indexer::ArityError) do
         
     | 
| 
       16 
20 
     | 
    
         
             
                    @indexer.to_field("foo") do |one_arg|
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
21 
     | 
    
         
             
                    end
         
     | 
| 
       19 
22 
     | 
    
         
             
                  end
         
     | 
| 
       20 
23 
     | 
    
         
             
                end
         
     | 
| 
       21 
24 
     | 
    
         
             
                it "rejects four-arg lambda" do
         
     | 
| 
       22 
     | 
    
         
            -
                  assert_raises( 
     | 
| 
      
 25 
     | 
    
         
            +
                  assert_raises(Traject::Indexer::ArityError) do 
         
     | 
| 
       23 
26 
     | 
    
         
             
                    @indexer.to_field("foo") do |one_arg, two_arg, three_arg, four_arg|
         
     | 
| 
       24 
27 
     | 
    
         
             
                    end
         
     | 
| 
       25 
28 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -36,4 +39,31 @@ describe "Traject::Indexer.to_field" do 
     | 
|
| 
       36 
39 
     | 
    
         
             
                  end
         
     | 
| 
       37 
40 
     | 
    
         
             
                end
         
     | 
| 
       38 
41 
     | 
    
         
             
              end
         
     | 
| 
      
 42 
     | 
    
         
            +
              
         
     | 
| 
      
 43 
     | 
    
         
            +
              describe "gives location in error message" do
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                it "finds no previous field on initial error" do
         
     | 
| 
      
 46 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 47 
     | 
    
         
            +
                    @indexer.to_field('') {|one, two| }   # bad field name
         
     | 
| 
      
 48 
     | 
    
         
            +
                    flunk("Should have rejected empty field name")
         
     | 
| 
      
 49 
     | 
    
         
            +
                  rescue Traject::Indexer::NamingError => e
         
     | 
| 
      
 50 
     | 
    
         
            +
                    assert_match(/no previous named fields/, e.message)
         
     | 
| 
      
 51 
     | 
    
         
            +
                  rescue 
         
     | 
| 
      
 52 
     | 
    
         
            +
                    flunk("Should only fail with a NamingError")
         
     | 
| 
      
 53 
     | 
    
         
            +
                  end
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                it "finds first (only) field on error" do
         
     | 
| 
      
 57 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 58 
     | 
    
         
            +
                    @indexer.to_field('foo') {|one, two| }
         
     | 
| 
      
 59 
     | 
    
         
            +
                    @indexer.to_field('') {|one, two| }   # bad field name
         
     | 
| 
      
 60 
     | 
    
         
            +
                    flunk("Should have rejected empty field name")
         
     | 
| 
      
 61 
     | 
    
         
            +
                  rescue Traject::Indexer::NamingError => e
         
     | 
| 
      
 62 
     | 
    
         
            +
                    assert_match(/foo/, e.message)
         
     | 
| 
      
 63 
     | 
    
         
            +
                  rescue 
         
     | 
| 
      
 64 
     | 
    
         
            +
                    flunk("Should only fail with a NamingError")
         
     | 
| 
      
 65 
     | 
    
         
            +
                  end
         
     | 
| 
      
 66 
     | 
    
         
            +
                end
         
     | 
| 
      
 67 
     | 
    
         
            +
              end
         
     | 
| 
      
 68 
     | 
    
         
            +
              
         
     | 
| 
       39 
69 
     | 
    
         
             
            end
         
     | 
    
        data/test/marc4j_reader_test.rb
    CHANGED
    
    | 
         @@ -54,6 +54,9 @@ describe "Marc4JReader" do 
     | 
|
| 
       54 
54 
     | 
    
         
             
                # it's legal, it probably looks weird as a string literal
         
     | 
| 
       55 
55 
     | 
    
         
             
                # below, depending on your editor.
         
     | 
| 
       56 
56 
     | 
    
         
             
                assert_equal "Por uma outra globalização :", a245a
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
                # Set leader byte to proper for unicode
         
     | 
| 
      
 59 
     | 
    
         
            +
                assert_equal 'a', array.first.leader[9]
         
     | 
| 
       57 
60 
     | 
    
         
             
              end
         
     | 
| 
       58 
61 
     | 
    
         | 
| 
       59 
62 
     | 
    
         | 
    
        data/test/marc_extractor_test.rb
    CHANGED
    
    | 
         @@ -50,6 +50,67 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       50 
50 
     | 
    
         
             
                  assert_equal 5, parsed["005"][:bytes]
         
     | 
| 
       51 
51 
     | 
    
         
             
                  assert_equal 7..10, parsed["008"][:bytes]
         
     | 
| 
       52 
52 
     | 
    
         
             
                end
         
     | 
| 
      
 53 
     | 
    
         
            +
                
         
     | 
| 
      
 54 
     | 
    
         
            +
                it "allows arrays of specs" do
         
     | 
| 
      
 55 
     | 
    
         
            +
                  parsed = Traject::MarcExtractor.parse_string_spec %w(
         
     | 
| 
      
 56 
     | 
    
         
            +
                    245abcde
         
     | 
| 
      
 57 
     | 
    
         
            +
                    810
         
     | 
| 
      
 58 
     | 
    
         
            +
                    700|*4|bcd
         
     | 
| 
      
 59 
     | 
    
         
            +
                  )
         
     | 
| 
      
 60 
     | 
    
         
            +
                  assert_length 3, parsed
         
     | 
| 
      
 61 
     | 
    
         
            +
                end
         
     | 
| 
      
 62 
     | 
    
         
            +
                
         
     | 
| 
      
 63 
     | 
    
         
            +
                it "allows mixture of array and colon-delimited specs" do
         
     | 
| 
      
 64 
     | 
    
         
            +
                  parsed = Traject::MarcExtractor.parse_string_spec %w(
         
     | 
| 
      
 65 
     | 
    
         
            +
                    245abcde
         
     | 
| 
      
 66 
     | 
    
         
            +
                    100:110:111
         
     | 
| 
      
 67 
     | 
    
         
            +
                    810
         
     | 
| 
      
 68 
     | 
    
         
            +
                    700|*4|bcd
         
     | 
| 
      
 69 
     | 
    
         
            +
                  )
         
     | 
| 
      
 70 
     | 
    
         
            +
                  assert_length 6, parsed
         
     | 
| 
      
 71 
     | 
    
         
            +
                end
         
     | 
| 
      
 72 
     | 
    
         
            +
                  
         
     | 
| 
      
 73 
     | 
    
         
            +
                
         
     | 
| 
      
 74 
     | 
    
         
            +
              end
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
              # Mostly an internal method, not neccesarily API, but
         
     | 
| 
      
 77 
     | 
    
         
            +
              # an important one, so we unit test some parts of it.
         
     | 
| 
      
 78 
     | 
    
         
            +
              describe "#spec_covering_field" do
         
     | 
| 
      
 79 
     | 
    
         
            +
                describe "for alternate script tags" do
         
     | 
| 
      
 80 
     | 
    
         
            +
                  before do
         
     | 
| 
      
 81 
     | 
    
         
            +
                    @record = MARC::Reader.new(support_file_path  "hebrew880s.marc").to_a.first
         
     | 
| 
      
 82 
     | 
    
         
            +
                    @extractor = Traject::MarcExtractor.new("245")
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                    @a245 = @record.fields.find {|f| f.tag == "245"}
         
     | 
| 
      
 85 
     | 
    
         
            +
                    assert ! @a245.nil?, "Found a 245 to test"
         
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
      
 87 
     | 
    
         
            +
                    @a880_245 = @record.fields.find do |field|
         
     | 
| 
      
 88 
     | 
    
         
            +
                      (field.tag == "880") && field['6'] &&
         
     | 
| 
      
 89 
     | 
    
         
            +
                      "245" == field['6'].slice(0,3)
         
     | 
| 
      
 90 
     | 
    
         
            +
                    end
         
     | 
| 
      
 91 
     | 
    
         
            +
                    assert ! @a880_245.nil?, "Found an 880-245 to test"
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
                    @a880_100 = @record.fields.find do |field|
         
     | 
| 
      
 94 
     | 
    
         
            +
                      (field.tag == "880") && field['6'] &&
         
     | 
| 
      
 95 
     | 
    
         
            +
                      "100" == field['6'].slice(0,3)
         
     | 
| 
      
 96 
     | 
    
         
            +
                    end
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
                    assert ! @a880_100.nil?, "Found an 880-100 to test"
         
     | 
| 
      
 99 
     | 
    
         
            +
                  end
         
     | 
| 
      
 100 
     | 
    
         
            +
                  it "finds spec for relevant 880" do
         
     | 
| 
      
 101 
     | 
    
         
            +
                    assert_equal( {}, @extractor.spec_covering_field(@a880_245) )
         
     | 
| 
      
 102 
     | 
    
         
            +
                    assert_nil        @extractor.spec_covering_field(@a880_100)
         
     | 
| 
      
 103 
     | 
    
         
            +
                  end
         
     | 
| 
      
 104 
     | 
    
         
            +
                  it "does not find spec for 880 if disabled" do
         
     | 
| 
      
 105 
     | 
    
         
            +
                    @extractor = Traject::MarcExtractor.new("245", :alternate_script => false)
         
     | 
| 
      
 106 
     | 
    
         
            +
                    assert_nil @extractor.spec_covering_field(@a880_245) 
         
     | 
| 
      
 107 
     | 
    
         
            +
                  end
         
     | 
| 
      
 108 
     | 
    
         
            +
                  it "finds only 880 if so configured" do
         
     | 
| 
      
 109 
     | 
    
         
            +
                    @extractor = Traject::MarcExtractor.new("245", :alternate_script => :only)
         
     | 
| 
      
 110 
     | 
    
         
            +
                    assert_nil @extractor.spec_covering_field(@a245) 
         
     | 
| 
      
 111 
     | 
    
         
            +
                    assert_equal({},  @extractor.spec_covering_field(@a880_245))
         
     | 
| 
      
 112 
     | 
    
         
            +
                  end
         
     | 
| 
      
 113 
     | 
    
         
            +
                end
         
     | 
| 
       53 
114 
     | 
    
         
             
              end
         
     | 
| 
       54 
115 
     | 
    
         | 
| 
       55 
116 
     | 
    
         
             
              describe "#extract_by_spec" do
         
     | 
| 
         @@ -60,7 +121,7 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       60 
121 
     | 
    
         
             
                describe "extracts a basic case" do
         
     | 
| 
       61 
122 
     | 
    
         
             
                  before do
         
     | 
| 
       62 
123 
     | 
    
         
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("700abcdef:856|*2|:505|1*|:245ba")
         
     | 
| 
       63 
     | 
    
         
            -
                    @values = Traject::MarcExtractor. 
     | 
| 
      
 124 
     | 
    
         
            +
                    @values = Traject::MarcExtractor.new(parsed_spec).extract(@record)
         
     | 
| 
       64 
125 
     | 
    
         
             
                  end
         
     | 
| 
       65 
126 
     | 
    
         | 
| 
       66 
127 
     | 
    
         
             
                  it "returns an array" do
         
     | 
| 
         @@ -94,19 +155,19 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       94 
155 
     | 
    
         
             
                describe "extracts fixed fields" do
         
     | 
| 
       95 
156 
     | 
    
         
             
                  it ", complete" do
         
     | 
| 
       96 
157 
     | 
    
         
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("001")
         
     | 
| 
       97 
     | 
    
         
            -
                    values = Traject::MarcExtractor. 
     | 
| 
      
 158 
     | 
    
         
            +
                    values = Traject::MarcExtractor.new(parsed_spec).extract(@record)
         
     | 
| 
       98 
159 
     | 
    
         | 
| 
       99 
160 
     | 
    
         
             
                    assert_equal ["2710183"], values
         
     | 
| 
       100 
161 
     | 
    
         
             
                  end
         
     | 
| 
       101 
162 
     | 
    
         
             
                  it ", single byte offset" do
         
     | 
| 
       102 
163 
     | 
    
         
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("008[5]")
         
     | 
| 
       103 
     | 
    
         
            -
                    values = Traject::MarcExtractor. 
     | 
| 
      
 164 
     | 
    
         
            +
                    values = Traject::MarcExtractor.new(parsed_spec).extract(@record)
         
     | 
| 
       104 
165 
     | 
    
         | 
| 
       105 
166 
     | 
    
         
             
                    assert_equal ["1"], values
         
     | 
| 
       106 
167 
     | 
    
         
             
                  end
         
     | 
| 
       107 
168 
     | 
    
         
             
                  it ", byte range" do
         
     | 
| 
       108 
169 
     | 
    
         
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("008[7-10]")
         
     | 
| 
       109 
     | 
    
         
            -
                    values = Traject::MarcExtractor. 
     | 
| 
      
 170 
     | 
    
         
            +
                    values = Traject::MarcExtractor.new(parsed_spec).extract(@record)
         
     | 
| 
       110 
171 
     | 
    
         | 
| 
       111 
172 
     | 
    
         
             
                    assert_equal ["2002"], values
         
     | 
| 
       112 
173 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -115,14 +176,14 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       115 
176 
     | 
    
         
             
                describe "seperator argument" do
         
     | 
| 
       116 
177 
     | 
    
         
             
                  it "causes non-join when nil" do
         
     | 
| 
       117 
178 
     | 
    
         
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
         
     | 
| 
       118 
     | 
    
         
            -
                    values = Traject::MarcExtractor. 
     | 
| 
      
 179 
     | 
    
         
            +
                    values = Traject::MarcExtractor.new(parsed_spec, :seperator => nil).extract(@record)
         
     | 
| 
       119 
180 
     | 
    
         | 
| 
       120 
181 
     | 
    
         
             
                    assert_length 3, values
         
     | 
| 
       121 
182 
     | 
    
         
             
                  end
         
     | 
| 
       122 
183 
     | 
    
         | 
| 
       123 
184 
     | 
    
         
             
                  it "can be non-default" do
         
     | 
| 
       124 
185 
     | 
    
         
             
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
         
     | 
| 
       125 
     | 
    
         
            -
                    values = Traject::MarcExtractor. 
     | 
| 
      
 186 
     | 
    
         
            +
                    values = Traject::MarcExtractor.new(parsed_spec, :seperator => "!! ").extract(@record)
         
     | 
| 
       126 
187 
     | 
    
         | 
| 
       127 
188 
     | 
    
         
             
                    assert_length 1, values
         
     | 
| 
       128 
189 
     | 
    
         
             
                    assert_equal "Manufacturing consent :!! the political economy of the mass media /!! Edward S. Herman and Noam Chomsky ; with a new introduction by the authors.", values.first
         
     | 
| 
         @@ -136,19 +197,19 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       136 
197 
     | 
    
         
             
                  end
         
     | 
| 
       137 
198 
     | 
    
         
             
                  it "from default :include" do
         
     | 
| 
       138 
199 
     | 
    
         | 
| 
       139 
     | 
    
         
            -
                    values = Traject::MarcExtractor. 
     | 
| 
      
 200 
     | 
    
         
            +
                    values = Traject::MarcExtractor.new(@parsed_spec).extract(@record)
         
     | 
| 
       140 
201 
     | 
    
         | 
| 
       141 
202 
     | 
    
         
             
                    assert_length 2, values # both the original and the 880
         
     | 
| 
       142 
203 
     | 
    
         
             
                    assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /", "בין מרטין בובר לאהרן דוד גורדון /"], values
         
     | 
| 
       143 
204 
     | 
    
         
             
                  end
         
     | 
| 
       144 
205 
     | 
    
         
             
                  it "with :only" do
         
     | 
| 
       145 
     | 
    
         
            -
                    values = Traject::MarcExtractor. 
     | 
| 
      
 206 
     | 
    
         
            +
                    values = Traject::MarcExtractor.new(@parsed_spec, :alternate_script => :only).extract(@record)
         
     | 
| 
       146 
207 
     | 
    
         | 
| 
       147 
208 
     | 
    
         
             
                    assert_length 1, values
         
     | 
| 
       148 
209 
     | 
    
         
             
                    assert_equal ["בין מרטין בובר לאהרן דוד גורדון /"], values
         
     | 
| 
       149 
210 
     | 
    
         
             
                  end
         
     | 
| 
       150 
211 
     | 
    
         
             
                  it "with false" do
         
     | 
| 
       151 
     | 
    
         
            -
                    values = Traject::MarcExtractor. 
     | 
| 
      
 212 
     | 
    
         
            +
                    values = Traject::MarcExtractor.new(@parsed_spec, :alternate_script => false).extract(@record)
         
     | 
| 
       152 
213 
     | 
    
         | 
| 
       153 
214 
     | 
    
         
             
                    assert_length 1, values
         
     | 
| 
       154 
215 
     | 
    
         
             
                    assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /"], values
         
     | 
| 
         @@ -156,22 +217,22 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       156 
217 
     | 
    
         
             
                end
         
     | 
| 
       157 
218 
     | 
    
         | 
| 
       158 
219 
     | 
    
         
             
                it "works with string second arg too" do
         
     | 
| 
       159 
     | 
    
         
            -
                  values = Traject::MarcExtractor. 
     | 
| 
      
 220 
     | 
    
         
            +
                  values = Traject::MarcExtractor.new("245abc").extract(@record)
         
     | 
| 
       160 
221 
     | 
    
         | 
| 
       161 
222 
     | 
    
         
             
                  assert_length 1, values
         
     | 
| 
       162 
223 
     | 
    
         
             
                  assert values.first.include?("Manufacturing consent"), "Extracted value includes title"
         
     | 
| 
       163 
224 
     | 
    
         
             
                end
         
     | 
| 
       164 
225 
     | 
    
         | 
| 
       165 
226 
     | 
    
         
             
                it "returns empty array if no matching tags" do
         
     | 
| 
       166 
     | 
    
         
            -
                  values = Traject::MarcExtractor. 
     | 
| 
      
 227 
     | 
    
         
            +
                  values = Traject::MarcExtractor.new("999abc").extract(@record)
         
     | 
| 
       167 
228 
     | 
    
         
             
                  assert_equal [], values
         
     | 
| 
       168 
229 
     | 
    
         | 
| 
       169 
     | 
    
         
            -
                  values = Traject::MarcExtractor. 
     | 
| 
      
 230 
     | 
    
         
            +
                  values = Traject::MarcExtractor.new("999").extract(@record)
         
     | 
| 
       170 
231 
     | 
    
         
             
                  assert_equal [], values
         
     | 
| 
       171 
232 
     | 
    
         
             
                end
         
     | 
| 
       172 
233 
     | 
    
         | 
| 
       173 
     | 
    
         
            -
                it "returns empty array if matching tag but no subfield" do 
     | 
| 
       174 
     | 
    
         
            -
                  values = Traject::MarcExtractor. 
     | 
| 
      
 234 
     | 
    
         
            +
                it "returns empty array if matching tag but no subfield" do
         
     | 
| 
      
 235 
     | 
    
         
            +
                  values = Traject::MarcExtractor.new("245xyz").extract(@record)
         
     | 
| 
       175 
236 
     | 
    
         
             
                  assert_equal [], values
         
     | 
| 
       176 
237 
     | 
    
         
             
                end
         
     | 
| 
       177 
238 
     | 
    
         | 
| 
         @@ -180,7 +241,7 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       180 
241 
     | 
    
         
             
              describe "with bad data" do
         
     | 
| 
       181 
242 
     | 
    
         
             
                it "can ignore an 880 with no $6" do
         
     | 
| 
       182 
243 
     | 
    
         
             
                  @record = MARC::Reader.new(support_file_path  "880_with_no_6.utf8.marc").to_a.first
         
     | 
| 
       183 
     | 
    
         
            -
                  values = Traject::MarcExtractor. 
     | 
| 
      
 244 
     | 
    
         
            +
                  values = Traject::MarcExtractor.new("001").extract(@record)
         
     | 
| 
       184 
245 
     | 
    
         
             
                  assert_equal ["3468569"], values
         
     | 
| 
       185 
246 
     | 
    
         
             
                end
         
     | 
| 
       186 
247 
     | 
    
         
             
              end
         
     | 
| 
         @@ -188,11 +249,11 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       188 
249 
     | 
    
         
             
              describe "#each_matching_line" do
         
     | 
| 
       189 
250 
     | 
    
         
             
                before do
         
     | 
| 
       190 
251 
     | 
    
         
             
                  @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first
         
     | 
| 
       191 
     | 
    
         
            -
                  @extractor = Traject::MarcExtractor.new( 
     | 
| 
      
 252 
     | 
    
         
            +
                  @extractor = Traject::MarcExtractor.new("245abc")
         
     | 
| 
       192 
253 
     | 
    
         
             
                end
         
     | 
| 
       193 
254 
     | 
    
         
             
                it "yields two args" do
         
     | 
| 
       194 
255 
     | 
    
         
             
                  called = false
         
     | 
| 
       195 
     | 
    
         
            -
                  @extractor.each_matching_line do |field, spec|
         
     | 
| 
      
 256 
     | 
    
         
            +
                  @extractor.each_matching_line(@record) do |field, spec|
         
     | 
| 
       196 
257 
     | 
    
         
             
                    called = true
         
     | 
| 
       197 
258 
     | 
    
         
             
                    assert_kind_of MARC::DataField, field
         
     | 
| 
       198 
259 
     | 
    
         
             
                    assert_kind_of Hash, spec
         
     | 
| 
         @@ -201,7 +262,7 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       201 
262 
     | 
    
         
             
                end
         
     | 
| 
       202 
263 
     | 
    
         
             
                it "yields three args" do
         
     | 
| 
       203 
264 
     | 
    
         
             
                  called = false
         
     | 
| 
       204 
     | 
    
         
            -
                  @extractor.each_matching_line do |field, spec, extractor|
         
     | 
| 
      
 265 
     | 
    
         
            +
                  @extractor.each_matching_line(@record) do |field, spec, extractor|
         
     | 
| 
       205 
266 
     | 
    
         
             
                    called = true
         
     | 
| 
       206 
267 
     | 
    
         
             
                    assert_kind_of MARC::DataField, field
         
     | 
| 
       207 
268 
     | 
    
         
             
                    assert_kind_of Hash, spec
         
     | 
| 
         @@ -215,16 +276,29 @@ describe "Traject::MarcExtractor" do 
     | 
|
| 
       215 
276 
     | 
    
         
             
              describe "#collect_matching_lines" do
         
     | 
| 
       216 
277 
     | 
    
         
             
                before do
         
     | 
| 
       217 
278 
     | 
    
         
             
                  @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first
         
     | 
| 
       218 
     | 
    
         
            -
                  @extractor = Traject::MarcExtractor.new( 
     | 
| 
      
 279 
     | 
    
         
            +
                  @extractor = Traject::MarcExtractor.new("245abc")
         
     | 
| 
       219 
280 
     | 
    
         
             
                end
         
     | 
| 
       220 
281 
     | 
    
         
             
                it "collects with custom block" do
         
     | 
| 
       221 
     | 
    
         
            -
                  results = @extractor.collect_matching_lines do |field, spec, extractor|
         
     | 
| 
      
 282 
     | 
    
         
            +
                  results = @extractor.collect_matching_lines(@record) do |field, spec, extractor|
         
     | 
| 
       222 
283 
     | 
    
         
             
                    extractor.collect_subfields(field, spec)
         
     | 
| 
       223 
284 
     | 
    
         
             
                  end
         
     | 
| 
       224 
285 
     | 
    
         
             
                  assert_equal ["Manufacturing consent : the political economy of the mass media / Edward S. Herman and Noam Chomsky ; with a new introduction by the authors."], results
         
     | 
| 
       225 
286 
     | 
    
         
             
                end
         
     | 
| 
       226 
287 
     | 
    
         
             
              end
         
     | 
| 
       227 
288 
     | 
    
         | 
| 
      
 289 
     | 
    
         
            +
              describe "MarcExtractor.cached" do
         
     | 
| 
      
 290 
     | 
    
         
            +
                it "creates" do
         
     | 
| 
      
 291 
     | 
    
         
            +
                  ext = Traject::MarcExtractor.cached("245abc", :seperator => nil)
         
     | 
| 
      
 292 
     | 
    
         
            +
                  assert_equal({"245"=>{:subfields=>["a", "b", "c"]}}, ext.spec_hash)
         
     | 
| 
      
 293 
     | 
    
         
            +
                  assert ext.options[:seperator].nil?, "extractor options[:seperator] is nil"
         
     | 
| 
      
 294 
     | 
    
         
            +
                end
         
     | 
| 
      
 295 
     | 
    
         
            +
                it "caches" do
         
     | 
| 
      
 296 
     | 
    
         
            +
                  ext1 = Traject::MarcExtractor.cached("245abc", :seperator => nil)
         
     | 
| 
      
 297 
     | 
    
         
            +
                  ext2 = Traject::MarcExtractor.cached("245abc", :seperator => nil)
         
     | 
| 
      
 298 
     | 
    
         
            +
             
     | 
| 
      
 299 
     | 
    
         
            +
                  assert_same ext1, ext2
         
     | 
| 
      
 300 
     | 
    
         
            +
                end
         
     | 
| 
      
 301 
     | 
    
         
            +
              end
         
     | 
| 
       228 
302 
     | 
    
         | 
| 
       229 
303 
     | 
    
         | 
| 
       230 
304 
     | 
    
         
             
            end
         
     | 
| 
         @@ -105,11 +105,14 @@ to_field "pub_date",          marc_publication_date 
     | 
|
| 
       105 
105 
     | 
    
         | 
| 
       106 
106 
     | 
    
         
             
            # LCC to broad class, start with built-in from marc record, but then do our own for local
         
     | 
| 
       107 
107 
     | 
    
         
             
            # call numbers.
         
     | 
| 
       108 
     | 
    
         
            -
            lcc_map 
     | 
| 
      
 108 
     | 
    
         
            +
            lcc_map             = Traject::TranslationMap.new("lcc_top_level")
         
     | 
| 
      
 109 
     | 
    
         
            +
            holdings_extractor  = Traject::MarcExtractor.new("991:937")
         
     | 
| 
      
 110 
     | 
    
         
            +
            sudoc_extractor     = Traject::MarcExtractor.new("086a", :seperator =>nil)
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
       109 
112 
     | 
    
         
             
            to_field "discipline_facet",  marc_lcc_to_broad_category(:default => nil) do |record, accumulator|
         
     | 
| 
       110 
113 
     | 
    
         
             
              # add in our local call numbers
         
     | 
| 
       111 
114 
     | 
    
         
             
              accumulator.concat(
         
     | 
| 
       112 
     | 
    
         
            -
                 
     | 
| 
      
 115 
     | 
    
         
            +
                holdings_extractor.collect_matching_lines(record) do |field, spec, extractor|
         
     | 
| 
       113 
116 
     | 
    
         
             
                    # we output call type 'processor' in subfield 'f' of our holdings
         
     | 
| 
       114 
117 
     | 
    
         
             
                    # fields, that sort of maybe tells us if it's an LCC field.
         
     | 
| 
       115 
118 
     | 
    
         
             
                    # When the data is right, which it often isn't.
         
     | 
| 
         @@ -130,7 +133,7 @@ to_field "discipline_facet",  marc_lcc_to_broad_category(:default => nil) do |re 
     | 
|
| 
       130 
133 
     | 
    
         | 
| 
       131 
134 
     | 
    
         
             
              # If it's got an 086, we'll put it in "Government Publication", to be
         
     | 
| 
       132 
135 
     | 
    
         
             
              # consistent with when we do that from a local SuDoc call #.
         
     | 
| 
       133 
     | 
    
         
            -
              if  
     | 
| 
      
 136 
     | 
    
         
            +
              if sudoc_extractor.extract(record).length > 0
         
     | 
| 
       134 
137 
     | 
    
         
             
                accumulator << "Government Publication"
         
     | 
| 
       135 
138 
     | 
    
         
             
              end
         
     | 
| 
       136 
139 
     | 
    
         |