traject 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
 - data/Gemfile +4 -0
 - data/LICENSE.txt +22 -0
 - data/README.md +346 -0
 - data/Rakefile +16 -0
 - data/bin/traject +153 -0
 - data/doc/macros.md +103 -0
 - data/doc/settings.md +34 -0
 - data/lib/traject.rb +10 -0
 - data/lib/traject/indexer.rb +196 -0
 - data/lib/traject/json_writer.rb +51 -0
 - data/lib/traject/macros/basic.rb +9 -0
 - data/lib/traject/macros/marc21.rb +145 -0
 - data/lib/traject/marc_extractor.rb +206 -0
 - data/lib/traject/marc_reader.rb +61 -0
 - data/lib/traject/qualified_const_get.rb +30 -0
 - data/lib/traject/solrj_writer.rb +120 -0
 - data/lib/traject/translation_map.rb +184 -0
 - data/lib/traject/version.rb +3 -0
 - data/test/indexer/macros_marc21_test.rb +146 -0
 - data/test/indexer/macros_test.rb +40 -0
 - data/test/indexer/map_record_test.rb +120 -0
 - data/test/indexer/read_write_test.rb +47 -0
 - data/test/indexer/settings_test.rb +65 -0
 - data/test/marc_extractor_test.rb +168 -0
 - data/test/marc_reader_test.rb +29 -0
 - data/test/solrj_writer_test.rb +106 -0
 - data/test/test_helper.rb +28 -0
 - data/test/test_support/hebrew880s.marc +1 -0
 - data/test/test_support/manufacturing_consent.marc +1 -0
 - data/test/test_support/test_data.utf8.marc.xml +2609 -0
 - data/test/test_support/test_data.utf8.mrc +1 -0
 - data/test/translation_map_test.rb +98 -0
 - data/test/translation_maps/bad_ruby.rb +8 -0
 - data/test/translation_maps/bad_yaml.yaml +1 -0
 - data/test/translation_maps/both_map.rb +1 -0
 - data/test/translation_maps/both_map.yaml +1 -0
 - data/test/translation_maps/default_literal.rb +10 -0
 - data/test/translation_maps/default_passthrough.rb +10 -0
 - data/test/translation_maps/marc_040a_translate_test.yaml +1 -0
 - data/test/translation_maps/ruby_map.rb +10 -0
 - data/test/translation_maps/translate_array_test.yaml +8 -0
 - data/test/translation_maps/yaml_map.yaml +7 -0
 - data/traject.gemspec +30 -0
 - data/vendor/solrj/README +8 -0
 - data/vendor/solrj/build.xml +39 -0
 - data/vendor/solrj/ivy.xml +16 -0
 - data/vendor/solrj/lib/commons-codec-1.7.jar +0 -0
 - data/vendor/solrj/lib/commons-io-2.1.jar +0 -0
 - data/vendor/solrj/lib/httpclient-4.2.3.jar +0 -0
 - data/vendor/solrj/lib/httpcore-4.2.2.jar +0 -0
 - data/vendor/solrj/lib/httpmime-4.2.3.jar +0 -0
 - data/vendor/solrj/lib/jcl-over-slf4j-1.6.6.jar +0 -0
 - data/vendor/solrj/lib/jul-to-slf4j-1.6.6.jar +0 -0
 - data/vendor/solrj/lib/log4j-1.2.16.jar +0 -0
 - data/vendor/solrj/lib/noggit-0.5.jar +0 -0
 - data/vendor/solrj/lib/slf4j-api-1.6.6.jar +0 -0
 - data/vendor/solrj/lib/slf4j-log4j12-1.6.6.jar +0 -0
 - data/vendor/solrj/lib/solr-solrj-4.3.1-javadoc.jar +0 -0
 - data/vendor/solrj/lib/solr-solrj-4.3.1-sources.jar +0 -0
 - data/vendor/solrj/lib/solr-solrj-4.3.1.jar +0 -0
 - data/vendor/solrj/lib/wstx-asl-3.2.7.jar +0 -0
 - data/vendor/solrj/lib/zookeeper-3.4.5.jar +0 -0
 - metadata +264 -0
 
| 
         @@ -0,0 +1,146 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'traject/indexer'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'traject/macros/marc21'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            require 'json'
         
     | 
| 
      
 7 
     | 
    
         
            +
            require 'marc/record'
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            # See also marc_extractor_test.rb for more detailed tests on marc extraction,
         
     | 
| 
      
 10 
     | 
    
         
            +
            # this is just a basic test to make sure our macro works passing through to there
         
     | 
| 
      
 11 
     | 
    
         
            +
            # and other options.
         
     | 
| 
      
 12 
     | 
    
         
            +
            describe "Traject::Macros::Marc21" do
         
     | 
| 
      
 13 
     | 
    
         
            +
              Marc21 = Traject::Macros::Marc21 # shortcut
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
              before do
         
     | 
| 
      
 16 
     | 
    
         
            +
                @indexer = Traject::Indexer.new
         
     | 
| 
      
 17 
     | 
    
         
            +
                @indexer.extend Traject::Macros::Marc21
         
     | 
| 
      
 18 
     | 
    
         
            +
                @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first
         
     | 
| 
      
 19 
     | 
    
         
            +
              end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              describe "extract_marc" do
         
     | 
| 
      
 22 
     | 
    
         
            +
                it "extracts marc" do
         
     | 
| 
      
 23 
     | 
    
         
            +
                  @indexer.instance_eval do
         
     | 
| 
      
 24 
     | 
    
         
            +
                    to_field "title", extract_marc("245ab")
         
     | 
| 
      
 25 
     | 
    
         
            +
                  end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                  assert_equal ["Manufacturing consent : the political economy of the mass media /"], output["title"]
         
     | 
| 
      
 30 
     | 
    
         
            +
                end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                it "respects :first=>true option" do
         
     | 
| 
      
 33 
     | 
    
         
            +
                  @indexer.instance_eval do
         
     | 
| 
      
 34 
     | 
    
         
            +
                    to_field "other_id", extract_marc("035a", :first => true)
         
     | 
| 
      
 35 
     | 
    
         
            +
                  end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                  assert_length 1, output["other_id"]
         
     | 
| 
      
 40 
     | 
    
         
            +
                end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                it "trims punctuation with :trim_punctuation => true" do
         
     | 
| 
      
 43 
     | 
    
         
            +
                  @indexer.instance_eval do
         
     | 
| 
      
 44 
     | 
    
         
            +
                    to_field "title", extract_marc("245ab", :trim_punctuation => true)
         
     | 
| 
      
 45 
     | 
    
         
            +
                  end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                  assert ! output["title"].first.end_with?("/"), "does not end with /"
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                it "Marc21::trim_punctuation class method" do
         
     | 
| 
      
 53 
     | 
    
         
            +
                  assert_equal "one two three", Marc21.trim_punctuation("one two three")
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                  assert_equal "one two three", Marc21.trim_punctuation("one two three,")
         
     | 
| 
      
 56 
     | 
    
         
            +
                  assert_equal "one two three", Marc21.trim_punctuation("one two three/")
         
     | 
| 
      
 57 
     | 
    
         
            +
                  assert_equal "one two three", Marc21.trim_punctuation("one two three;")
         
     | 
| 
      
 58 
     | 
    
         
            +
                  assert_equal "one two three", Marc21.trim_punctuation("one two three:")
         
     | 
| 
      
 59 
     | 
    
         
            +
                  assert_equal "one two three .", Marc21.trim_punctuation("one two three .")
         
     | 
| 
      
 60 
     | 
    
         
            +
                  assert_equal "one two three", Marc21.trim_punctuation("one two three.")
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                  assert_equal "one two [three]", Marc21.trim_punctuation("one two [three]")
         
     | 
| 
      
 63 
     | 
    
         
            +
                  assert_equal "one two three", Marc21.trim_punctuation("one two three]")
         
     | 
| 
      
 64 
     | 
    
         
            +
                  assert_equal "one two three", Marc21.trim_punctuation("[one two three")
         
     | 
| 
      
 65 
     | 
    
         
            +
                  assert_equal "one two three", Marc21.trim_punctuation("[one two three]")
         
     | 
| 
      
 66 
     | 
    
         
            +
                end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                it "uses :translation_map" do
         
     | 
| 
      
 69 
     | 
    
         
            +
                  @indexer.instance_eval do
         
     | 
| 
      
 70 
     | 
    
         
            +
                    to_field "cataloging_agency", extract_marc("040a", :seperator => nil, :translation_map => "marc_040a_translate_test")
         
     | 
| 
      
 71 
     | 
    
         
            +
                  end
         
     | 
| 
      
 72 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                  assert_equal ["Library of Congress"], output["cataloging_agency"]
         
     | 
| 
      
 75 
     | 
    
         
            +
                end
         
     | 
| 
      
 76 
     | 
    
         
            +
              end
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
              describe "serialized_marc" do
         
     | 
| 
      
 79 
     | 
    
         
            +
                it "serializes xml" do
         
     | 
| 
      
 80 
     | 
    
         
            +
                  @indexer.instance_eval do
         
     | 
| 
      
 81 
     | 
    
         
            +
                    to_field "marc_record", serialized_marc(:format => "xml")
         
     | 
| 
      
 82 
     | 
    
         
            +
                  end
         
     | 
| 
      
 83 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                  assert_length 1, output["marc_record"]
         
     | 
| 
      
 86 
     | 
    
         
            +
                  assert_kind_of String, output["marc_record"].first
         
     | 
| 
      
 87 
     | 
    
         
            +
                  assert output["marc_record"].first.start_with?("<record xmlns='http://www.loc.gov/MARC21/slim'>"), "looks like serialized MarcXML"
         
     | 
| 
      
 88 
     | 
    
         
            +
                end
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
                it "serializes binary UUEncoded" do
         
     | 
| 
      
 91 
     | 
    
         
            +
                  @indexer.instance_eval do
         
     | 
| 
      
 92 
     | 
    
         
            +
                    to_field "marc_record", serialized_marc(:format => "binary")
         
     | 
| 
      
 93 
     | 
    
         
            +
                  end
         
     | 
| 
      
 94 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                  assert_length 1, output["marc_record"]
         
     | 
| 
      
 97 
     | 
    
         
            +
                  assert_kind_of String, output["marc_record"].first
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
                  decoded = Base64.decode64( output["marc_record"].first )
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
                  # just check the marc header for now
         
     | 
| 
      
 102 
     | 
    
         
            +
                  assert_start_with "02067cam a2200469", decoded
         
     | 
| 
      
 103 
     | 
    
         
            +
                end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
                it "serializes binary raw" do
         
     | 
| 
      
 106 
     | 
    
         
            +
                  @indexer.instance_eval do
         
     | 
| 
      
 107 
     | 
    
         
            +
                    to_field "marc_record", serialized_marc(:format => "binary", :binary_escape => false)
         
     | 
| 
      
 108 
     | 
    
         
            +
                  end
         
     | 
| 
      
 109 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
                  assert_length 1, output["marc_record"]
         
     | 
| 
      
 112 
     | 
    
         
            +
                  assert_kind_of String, output["marc_record"].first
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                  # just check the marc header for now
         
     | 
| 
      
 115 
     | 
    
         
            +
                  assert_start_with "02067cam a2200469", output["marc_record"].first
         
     | 
| 
      
 116 
     | 
    
         
            +
                end
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                it "serializes json" do
         
     | 
| 
      
 119 
     | 
    
         
            +
                  @indexer.instance_eval do
         
     | 
| 
      
 120 
     | 
    
         
            +
                    to_field "marc_record", serialized_marc(:format => "json")
         
     | 
| 
      
 121 
     | 
    
         
            +
                  end
         
     | 
| 
      
 122 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
                  assert_length 1, output["marc_record"]
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                  # okay, let's actually deserialize it, why not
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
                  hash = JSON.parse( output["marc_record"].first )
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
                  deserialized = MARC::Record.new_from_hash(hash)
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
                  assert_equal @record, deserialized
         
     | 
| 
      
 133 
     | 
    
         
            +
                end
         
     | 
| 
      
 134 
     | 
    
         
            +
              end
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
      
 136 
     | 
    
         
            +
              it "#extract_all_marc_values" do
         
     | 
| 
      
 137 
     | 
    
         
            +
                @indexer.instance_eval do
         
     | 
| 
      
 138 
     | 
    
         
            +
                  to_field "text", extract_all_marc_values
         
     | 
| 
      
 139 
     | 
    
         
            +
                end
         
     | 
| 
      
 140 
     | 
    
         
            +
                output = @indexer.map_record(@record)
         
     | 
| 
      
 141 
     | 
    
         
            +
             
     | 
| 
      
 142 
     | 
    
         
            +
                assert_length 13, output["text"]
         
     | 
| 
      
 143 
     | 
    
         
            +
              end
         
     | 
| 
      
 144 
     | 
    
         
            +
             
     | 
| 
      
 145 
     | 
    
         
            +
             
     | 
| 
      
 146 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,40 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            describe "Indexer Macros:" do
         
     | 
| 
      
 4 
     | 
    
         
            +
              before do
         
     | 
| 
      
 5 
     | 
    
         
            +
                @indexer = Traject::Indexer.new
         
     | 
| 
      
 6 
     | 
    
         
            +
                @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first
         
     | 
| 
      
 7 
     | 
    
         
            +
              end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              it "works with simple literal" do
         
     | 
| 
      
 10 
     | 
    
         
            +
                @indexer.instance_eval do
         
     | 
| 
      
 11 
     | 
    
         
            +
                  extend Traject::Macros::Basic
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                  to_field "source", literal("MY LIBRARY")
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                output = @indexer.map_record(@record)
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                assert_equal ["MY LIBRARY"], output["source"]
         
     | 
| 
      
 19 
     | 
    
         
            +
              end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              it "works with macro AND block" do
         
     | 
| 
      
 22 
     | 
    
         
            +
                called = false
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                @indexer.instance_eval do
         
     | 
| 
      
 25 
     | 
    
         
            +
                  extend Traject::Macros::Basic
         
     | 
| 
      
 26 
     | 
    
         
            +
                  to_field "source", literal("MY LIBRARY") do |record, accumulator, context|
         
     | 
| 
      
 27 
     | 
    
         
            +
                    called = true
         
     | 
| 
      
 28 
     | 
    
         
            +
                    accumulator << "SECOND VALUE"
         
     | 
| 
      
 29 
     | 
    
         
            +
                  end
         
     | 
| 
      
 30 
     | 
    
         
            +
                end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                output = @indexer.map_record(@record)
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                assert called
         
     | 
| 
      
 35 
     | 
    
         
            +
                assert_equal ["MY LIBRARY", "SECOND VALUE"], output["source"]
         
     | 
| 
      
 36 
     | 
    
         
            +
              end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,120 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            describe "Traject::Indexer#map_record" do 
         
     | 
| 
      
 4 
     | 
    
         
            +
              before do
         
     | 
| 
      
 5 
     | 
    
         
            +
                @indexer = Traject::Indexer.new
         
     | 
| 
      
 6 
     | 
    
         
            +
                @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first    
         
     | 
| 
      
 7 
     | 
    
         
            +
              end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
              describe "with no indexing rules" do
         
     | 
| 
      
 11 
     | 
    
         
            +
                it "returns empty hash" do
         
     | 
| 
      
 12 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                  assert_kind_of Hash, output
         
     | 
| 
      
 15 
     | 
    
         
            +
                  assert_empty output
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
              describe "#to_field" do    
         
     | 
| 
      
 20 
     | 
    
         
            +
                it "works with block" do
         
     | 
| 
      
 21 
     | 
    
         
            +
                  called  = false
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                  @indexer.to_field("title") do |record, accumulator|
         
     | 
| 
      
 24 
     | 
    
         
            +
                    assert_kind_of MARC::Record, record
         
     | 
| 
      
 25 
     | 
    
         
            +
                    assert_kind_of Array, accumulator
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                    called = true # by the power of closure!
         
     | 
| 
      
 28 
     | 
    
         
            +
                    accumulator << "Some Title"
         
     | 
| 
      
 29 
     | 
    
         
            +
                  end
         
     | 
| 
      
 30 
     | 
    
         
            +
                  
         
     | 
| 
      
 31 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                  assert called
         
     | 
| 
      
 34 
     | 
    
         
            +
                  assert_kind_of Hash, output
         
     | 
| 
      
 35 
     | 
    
         
            +
                  assert_equal ["Some Title"], output["title"]
         
     | 
| 
      
 36 
     | 
    
         
            +
                end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                it "works with a lambda arg" do
         
     | 
| 
      
 39 
     | 
    
         
            +
                  called  = false
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                  logic = lambda do |record, accumulator|
         
     | 
| 
      
 42 
     | 
    
         
            +
                    assert_kind_of MARC::Record, record
         
     | 
| 
      
 43 
     | 
    
         
            +
                    assert_kind_of Array, accumulator
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                    called = true # by the power of closure!
         
     | 
| 
      
 46 
     | 
    
         
            +
                    accumulator << "Some Title"
         
     | 
| 
      
 47 
     | 
    
         
            +
                  end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                  @indexer.to_field("title", logic) 
         
     | 
| 
      
 50 
     | 
    
         
            +
                  
         
     | 
| 
      
 51 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                  assert called
         
     | 
| 
      
 54 
     | 
    
         
            +
                  assert_kind_of Hash, output
         
     | 
| 
      
 55 
     | 
    
         
            +
                  assert_equal ["Some Title"], output["title"]
         
     | 
| 
      
 56 
     | 
    
         
            +
                end
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
                it "works with both lambda and Proc" do
         
     | 
| 
      
 59 
     | 
    
         
            +
                  block_called = false
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
                  lambda_arg = lambda do |record, accumulator|
         
     | 
| 
      
 62 
     | 
    
         
            +
                    accumulator << "Lambda-provided Value"
         
     | 
| 
      
 63 
     | 
    
         
            +
                  end
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                  @indexer.to_field("title", lambda_arg) do |record, accumulator|
         
     | 
| 
      
 66 
     | 
    
         
            +
                    assert_includes accumulator, "Lambda-provided Value"
         
     | 
| 
      
 67 
     | 
    
         
            +
                    accumulator << "Block-provided Value"
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                    block_called = true
         
     | 
| 
      
 70 
     | 
    
         
            +
                  end
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                  assert block_called
         
     | 
| 
      
 75 
     | 
    
         
            +
                  assert_includes output["title"], "Lambda-provided Value"
         
     | 
| 
      
 76 
     | 
    
         
            +
                  assert_includes output["title"], "Block-provided Value"
         
     | 
| 
      
 77 
     | 
    
         
            +
                end
         
     | 
| 
      
 78 
     | 
    
         
            +
              end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
              describe "multiple to_field blocks" do
         
     | 
| 
      
 81 
     | 
    
         
            +
                it "get called in order" do
         
     | 
| 
      
 82 
     | 
    
         
            +
                  order = []
         
     | 
| 
      
 83 
     | 
    
         
            +
                  @indexer.to_field("title") do |rec, acc|
         
     | 
| 
      
 84 
     | 
    
         
            +
                    order << :first_one
         
     | 
| 
      
 85 
     | 
    
         
            +
                    acc << "First"
         
     | 
| 
      
 86 
     | 
    
         
            +
                  end
         
     | 
| 
      
 87 
     | 
    
         
            +
                  @indexer.to_field("title") do |rec, acc|
         
     | 
| 
      
 88 
     | 
    
         
            +
                    order << :second_one
         
     | 
| 
      
 89 
     | 
    
         
            +
                    acc << "Second"
         
     | 
| 
      
 90 
     | 
    
         
            +
                  end
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                  output = @indexer.map_record(@record)
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                  assert_equal [:first_one, :second_one], order
         
     | 
| 
      
 95 
     | 
    
         
            +
                  assert_equal ["First", "Second"], output["title"]
         
     | 
| 
      
 96 
     | 
    
         
            +
                end
         
     | 
| 
      
 97 
     | 
    
         
            +
              end
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
              describe "context argument" do
         
     | 
| 
      
 100 
     | 
    
         
            +
                it "is third argument to block" do
         
     | 
| 
      
 101 
     | 
    
         
            +
                  called = false
         
     | 
| 
      
 102 
     | 
    
         
            +
                  @indexer.to_field("title") do |record, accumulator, context|
         
     | 
| 
      
 103 
     | 
    
         
            +
                    called = true
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
                    assert_kind_of Traject::Indexer::Context, context
         
     | 
| 
      
 106 
     | 
    
         
            +
                    
         
     | 
| 
      
 107 
     | 
    
         
            +
                    assert_kind_of Hash, context.clipboard
         
     | 
| 
      
 108 
     | 
    
         
            +
                    assert_kind_of Hash, context.output_hash
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                    assert_same record, context.source_record
         
     | 
| 
      
 111 
     | 
    
         
            +
                    assert_same @indexer.settings, context.settings
         
     | 
| 
      
 112 
     | 
    
         
            +
                  end
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                  @indexer.map_record @record
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                  assert called
         
     | 
| 
      
 117 
     | 
    
         
            +
                end
         
     | 
| 
      
 118 
     | 
    
         
            +
              end
         
     | 
| 
      
 119 
     | 
    
         
            +
              
         
     | 
| 
      
 120 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,47 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            # A little Traject Writer that just keeps everything
         
     | 
| 
      
 4 
     | 
    
         
            +
            # in an array, just added to settings for easy access
         
     | 
| 
      
 5 
     | 
    
         
            +
            memory_writer_class = Class.new do
         
     | 
| 
      
 6 
     | 
    
         
            +
                def initialize(settings)
         
     | 
| 
      
 7 
     | 
    
         
            +
                  @settings = settings
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @settings["memory_writer.added"] = []
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                def put(hash)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @settings["memory_writer.added"] << hash
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                def close
         
     | 
| 
      
 16 
     | 
    
         
            +
                  @settings["memory_writer.closed"] = true
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            describe "Traject::Indexer#process" do 
         
     | 
| 
      
 21 
     | 
    
         
            +
              before do
         
     | 
| 
      
 22 
     | 
    
         
            +
                @indexer = Traject::Indexer.new
         
     | 
| 
      
 23 
     | 
    
         
            +
                @indexer.writer_class = memory_writer_class
         
     | 
| 
      
 24 
     | 
    
         
            +
                @file = File.open(support_file_path "test_data.utf8.mrc")
         
     | 
| 
      
 25 
     | 
    
         
            +
              end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
              it "works" do
         
     | 
| 
      
 28 
     | 
    
         
            +
                @indexer.to_field("title") do |record, accumulator, context|
         
     | 
| 
      
 29 
     | 
    
         
            +
                  accumulator << "ADDED TITLE"
         
     | 
| 
      
 30 
     | 
    
         
            +
                  assert_equal "title", context.field_name
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                @indexer.process( @file )
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                assert @indexer.settings["memory_writer.added"]
         
     | 
| 
      
 36 
     | 
    
         
            +
                assert_equal 30, @indexer.settings["memory_writer.added"].length
         
     | 
| 
      
 37 
     | 
    
         
            +
                assert_kind_of Hash, @indexer.settings["memory_writer.added"].first
         
     | 
| 
      
 38 
     | 
    
         
            +
                assert_equal ["ADDED TITLE"], @indexer.settings["memory_writer.added"].first["title"]
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                assert @indexer.settings["memory_writer.closed"]
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
              end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
              
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,65 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            describe "Traject::Indexer#settings" do 
         
     | 
| 
      
 4 
     | 
    
         
            +
              before do
         
     | 
| 
      
 5 
     | 
    
         
            +
                @indexer = Traject::Indexer.new
         
     | 
| 
      
 6 
     | 
    
         
            +
              end
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
              it "starts out default hash" do
         
     | 
| 
      
 9 
     | 
    
         
            +
                assert_kind_of Hash, @indexer.settings
         
     | 
| 
      
 10 
     | 
    
         
            +
                assert_equal Traject::Indexer.default_settings, @indexer.settings
         
     | 
| 
      
 11 
     | 
    
         
            +
              end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              it "can take argument to set" do
         
     | 
| 
      
 14 
     | 
    
         
            +
                @indexer.settings("foo" => "foo", "bar" => "bar")
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                assert_equal "foo", @indexer.settings["foo"]
         
     | 
| 
      
 17 
     | 
    
         
            +
                assert_equal "bar", @indexer.settings["bar"]
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              it "has settings DSL to set" do
         
     | 
| 
      
 21 
     | 
    
         
            +
                @indexer.instance_eval do
         
     | 
| 
      
 22 
     | 
    
         
            +
                  settings do
         
     | 
| 
      
 23 
     | 
    
         
            +
                    store "foo", "foo"
         
     | 
| 
      
 24 
     | 
    
         
            +
                  end
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                assert_equal "foo", @indexer.settings["foo"]
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
              it "merges new values, not completely replaces" do
         
     | 
| 
      
 31 
     | 
    
         
            +
                @indexer.settings("one" => "original", "two" => "original", "three" => "original", "four" => "original")
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                @indexer.settings do
         
     | 
| 
      
 34 
     | 
    
         
            +
                  store "two", "second"
         
     | 
| 
      
 35 
     | 
    
         
            +
                  store "three", "second"
         
     | 
| 
      
 36 
     | 
    
         
            +
                end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                @indexer.settings do
         
     | 
| 
      
 39 
     | 
    
         
            +
                  store "three", "third"
         
     | 
| 
      
 40 
     | 
    
         
            +
                end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                @indexer.settings("four" => "fourth")
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                {"one" => "original", "two" => "second", "three" => "third", "four" => "fourth"}.each_pair do |key, value|
         
     | 
| 
      
 45 
     | 
    
         
            +
                  assert_equal value, @indexer.settings[key] 
         
     | 
| 
      
 46 
     | 
    
         
            +
                end
         
     | 
| 
      
 47 
     | 
    
         
            +
              end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
              it "is indifferent between string and symbol" do
         
     | 
| 
      
 50 
     | 
    
         
            +
                @indexer.settings[:foo] = "foo 1"
         
     | 
| 
      
 51 
     | 
    
         
            +
                @indexer.settings["foo"] = "foo 2"
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                assert_equal "foo 2", @indexer.settings[:foo]
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                @indexer.settings do
         
     | 
| 
      
 56 
     | 
    
         
            +
                  store "foo", "foo 3"
         
     | 
| 
      
 57 
     | 
    
         
            +
                  store :foo, "foo 4"
         
     | 
| 
      
 58 
     | 
    
         
            +
                end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                assert_equal "foo 4", @indexer.settings["foo"]
         
     | 
| 
      
 61 
     | 
    
         
            +
              end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,168 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # encoding: UTF-8
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'test_helper'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'traject/marc_extractor'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            describe "Traject::MarcExtractor" do
         
     | 
| 
      
 8 
     | 
    
         
            +
              describe "#parse_marc_spec" do
         
     | 
| 
      
 9 
     | 
    
         
            +
                it "parses single spec with all elements" do
         
     | 
| 
      
 10 
     | 
    
         
            +
                  parsed = Traject::MarcExtractor.parse_string_spec("245|1*|abcg")
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                  assert_kind_of Hash, parsed
         
     | 
| 
      
 13 
     | 
    
         
            +
                  assert_equal 1, parsed.keys.length
         
     | 
| 
      
 14 
     | 
    
         
            +
                  assert_kind_of Hash, parsed["245"]
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                  assert_kind_of Array, parsed["245"][:indicators]
         
     | 
| 
      
 17 
     | 
    
         
            +
                  assert_equal 2, parsed["245"][:indicators].length
         
     | 
| 
      
 18 
     | 
    
         
            +
                  assert_equal "1", parsed["245"][:indicators][0]
         
     | 
| 
      
 19 
     | 
    
         
            +
                  assert_nil parsed["245"][:indicators][1]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  assert_kind_of Array, parsed["245"][:subfields]
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                it "parses a mixed bag" do
         
     | 
| 
      
 26 
     | 
    
         
            +
                  parsed = Traject::MarcExtractor.parse_string_spec("245abcde:810:700|*4|bcd")
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                  assert_length 3, parsed
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                  #245abcde
         
     | 
| 
      
 31 
     | 
    
         
            +
                  assert parsed["245"]
         
     | 
| 
      
 32 
     | 
    
         
            +
                  assert_nil parsed["245"][:indicators]
         
     | 
| 
      
 33 
     | 
    
         
            +
                  assert_equal %w{a b c d e}, parsed["245"][:subfields]
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                  #810
         
     | 
| 
      
 36 
     | 
    
         
            +
                  assert parsed["810"]
         
     | 
| 
      
 37 
     | 
    
         
            +
                  assert_nil parsed["810"][:indicators]
         
     | 
| 
      
 38 
     | 
    
         
            +
                  assert_nil parsed["810"][:subfields]
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                  #700-*4bcd
         
     | 
| 
      
 41 
     | 
    
         
            +
                  assert parsed["700"]
         
     | 
| 
      
 42 
     | 
    
         
            +
                  assert_equal [nil, "4"], parsed["700"][:indicators]
         
     | 
| 
      
 43 
     | 
    
         
            +
                  assert_equal %w{b c d}, parsed["700"][:subfields]
         
     | 
| 
      
 44 
     | 
    
         
            +
                end
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                it "parses fixed field byte offsets" do
         
     | 
| 
      
 47 
     | 
    
         
            +
                  parsed = Traject::MarcExtractor.parse_string_spec("005[5]:008[7-10]")
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                  assert_equal 5, parsed["005"][:bytes]
         
     | 
| 
      
 50 
     | 
    
         
            +
                  assert_equal 7..10, parsed["008"][:bytes]
         
     | 
| 
      
 51 
     | 
    
         
            +
                end
         
     | 
| 
      
 52 
     | 
    
         
            +
              end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
              describe "#extract_by_spec" do
         
     | 
| 
      
 55 
     | 
    
         
            +
                before do
         
     | 
| 
      
 56 
     | 
    
         
            +
                  @record = MARC::Reader.new(support_file_path  "manufacturing_consent.marc").to_a.first
         
     | 
| 
      
 57 
     | 
    
         
            +
                end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
                describe "extracts a basic case" do
         
     | 
| 
      
 60 
     | 
    
         
            +
                  before do
         
     | 
| 
      
 61 
     | 
    
         
            +
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("700abcdef:856|*2|:505|1*|:245ba")
         
     | 
| 
      
 62 
     | 
    
         
            +
                    @values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
         
     | 
| 
      
 63 
     | 
    
         
            +
                  end
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                  it "returns an array" do
         
     | 
| 
      
 66 
     | 
    
         
            +
                    assert_kind_of Array, @values
         
     | 
| 
      
 67 
     | 
    
         
            +
                  end
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                  it "handles no subfields given" do
         
     | 
| 
      
 70 
     | 
    
         
            +
                    a856s = @record.find_all {|f| f.tag == "856"}
         
     | 
| 
      
 71 
     | 
    
         
            +
                    assert a856s, "Record must have 856 fields for this test to work"
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                    a856s.each do |field|
         
     | 
| 
      
 74 
     | 
    
         
            +
                      assert @values.include?( field.subfields.collect(&:value).join(" "))
         
     | 
| 
      
 75 
     | 
    
         
            +
                    end
         
     | 
| 
      
 76 
     | 
    
         
            +
                  end
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                  it "does not have 505, due to non-matching indicators" do
         
     | 
| 
      
 79 
     | 
    
         
            +
                    assert ! @values.find {|s| s.include? "propaganda model"}
         
     | 
| 
      
 80 
     | 
    
         
            +
                  end
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                  it "respects original record order, for both fields and subfields" do
         
     | 
| 
      
 85 
     | 
    
         
            +
                    expected = ["Manufacturing consent : the political economy of the mass media /",
         
     | 
| 
      
 86 
     | 
    
         
            +
                                "Chomsky, Noam.",
         
     | 
| 
      
 87 
     | 
    
         
            +
                                "Contributor biographical information http://www.loc.gov/catdir/bios/random051/2001050014.html",
         
     | 
| 
      
 88 
     | 
    
         
            +
                                "Publisher description http://www.loc.gov/catdir/description/random044/2001050014.html"]
         
     | 
| 
      
 89 
     | 
    
         
            +
                    assert_equal expected, @values
         
     | 
| 
      
 90 
     | 
    
         
            +
                  end
         
     | 
| 
      
 91 
     | 
    
         
            +
                end
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
                describe "extracts fixed fields" do
         
     | 
| 
      
 94 
     | 
    
         
            +
                  it ", complete" do
         
     | 
| 
      
 95 
     | 
    
         
            +
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("001")
         
     | 
| 
      
 96 
     | 
    
         
            +
                    values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
                    assert_equal ["2710183"], values
         
     | 
| 
      
 99 
     | 
    
         
            +
                  end
         
     | 
| 
      
 100 
     | 
    
         
            +
                  it ", single byte offset" do
         
     | 
| 
      
 101 
     | 
    
         
            +
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("008[5]")
         
     | 
| 
      
 102 
     | 
    
         
            +
                    values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
                    assert_equal ["1"], values
         
     | 
| 
      
 105 
     | 
    
         
            +
                  end
         
     | 
| 
      
 106 
     | 
    
         
            +
                  it ", byte range" do
         
     | 
| 
      
 107 
     | 
    
         
            +
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("008[7-10]")
         
     | 
| 
      
 108 
     | 
    
         
            +
                    values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec)
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                    assert_equal ["2002"], values
         
     | 
| 
      
 111 
     | 
    
         
            +
                  end
         
     | 
| 
      
 112 
     | 
    
         
            +
                end
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                describe "seperator argument" do
         
     | 
| 
      
 115 
     | 
    
         
            +
                  it "causes non-join when nil" do
         
     | 
| 
      
 116 
     | 
    
         
            +
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
         
     | 
| 
      
 117 
     | 
    
         
            +
                    values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec, :seperator => nil)
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
                    assert_length 3, values
         
     | 
| 
      
 120 
     | 
    
         
            +
                  end
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
                  it "can be non-default" do
         
     | 
| 
      
 123 
     | 
    
         
            +
                    parsed_spec = Traject::MarcExtractor.parse_string_spec("245")
         
     | 
| 
      
 124 
     | 
    
         
            +
                    values = Traject::MarcExtractor.extract_by_spec(@record, parsed_spec, :seperator => "!! ")
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                    assert_length 1, values
         
     | 
| 
      
 127 
     | 
    
         
            +
                    assert_equal "Manufacturing consent :!! the political economy of the mass media /!! Edward S. Herman and Noam Chomsky ; with a new introduction by the authors.", values.first
         
     | 
| 
      
 128 
     | 
    
         
            +
                  end
         
     | 
| 
      
 129 
     | 
    
         
            +
                end
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
                describe "extracts alternate script" do
         
     | 
| 
      
 132 
     | 
    
         
            +
                  before do
         
     | 
| 
      
 133 
     | 
    
         
            +
                    @record = MARC::Reader.new(support_file_path  "hebrew880s.marc").to_a.first
         
     | 
| 
      
 134 
     | 
    
         
            +
                    @parsed_spec = Traject::MarcExtractor.parse_string_spec("245b")
         
     | 
| 
      
 135 
     | 
    
         
            +
                  end
         
     | 
| 
      
 136 
     | 
    
         
            +
                  it "from default :include" do
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
                    values = Traject::MarcExtractor.extract_by_spec(@record, @parsed_spec)
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
                    assert_length 2, values # both the original and the 880
         
     | 
| 
      
 141 
     | 
    
         
            +
                    assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /", "בין מרטין בובר לאהרן דוד גורדון /"], values
         
     | 
| 
      
 142 
     | 
    
         
            +
                  end
         
     | 
| 
      
 143 
     | 
    
         
            +
                  it "with :only" do
         
     | 
| 
      
 144 
     | 
    
         
            +
                    values = Traject::MarcExtractor.extract_by_spec(@record, @parsed_spec, :alternate_script => :only)
         
     | 
| 
      
 145 
     | 
    
         
            +
             
     | 
| 
      
 146 
     | 
    
         
            +
                    assert_length 1, values
         
     | 
| 
      
 147 
     | 
    
         
            +
                    assert_equal ["בין מרטין בובר לאהרן דוד גורדון /"], values
         
     | 
| 
      
 148 
     | 
    
         
            +
                  end
         
     | 
| 
      
 149 
     | 
    
         
            +
                  it "with false" do
         
     | 
| 
      
 150 
     | 
    
         
            +
                    values = Traject::MarcExtractor.extract_by_spec(@record, @parsed_spec, :alternate_script => false)
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
                    assert_length 1, values
         
     | 
| 
      
 153 
     | 
    
         
            +
                    assert_equal ["ben Marṭin Buber le-Aharon Daṿid Gordon /"], values
         
     | 
| 
      
 154 
     | 
    
         
            +
                  end
         
     | 
| 
      
 155 
     | 
    
         
            +
                end
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
                it "works with string second arg too" do
         
     | 
| 
      
 158 
     | 
    
         
            +
                  values = Traject::MarcExtractor.extract_by_spec(@record, "245abc")
         
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
      
 160 
     | 
    
         
            +
                  assert_length 1, values
         
     | 
| 
      
 161 
     | 
    
         
            +
                  assert values.first.include?("Manufacturing consent"), "Extracted value includes title"
         
     | 
| 
      
 162 
     | 
    
         
            +
                end
         
     | 
| 
      
 163 
     | 
    
         
            +
             
     | 
| 
      
 164 
     | 
    
         
            +
              end
         
     | 
| 
      
 165 
     | 
    
         
            +
             
     | 
| 
      
 166 
     | 
    
         
            +
             
     | 
| 
      
 167 
     | 
    
         
            +
             
     | 
| 
      
 168 
     | 
    
         
            +
            end
         
     |