traject_profiling 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e0af0cc816155f0d24efd1e61b3291aff736b9a
4
- data.tar.gz: 339785fe91fe7114c85627ee48935c9e1e366904
3
+ metadata.gz: bb75507d672134b5852f53c2621f7e2e1babfeaa
4
+ data.tar.gz: 82bc9804e6dd9514ab6f2f8e4bb2644446c802ed
5
5
  SHA512:
6
- metadata.gz: 8eb43cb020eadf138e87d2dbeedfff53f086507d87060dd0c31649328050979c45903163412ae3bd83a4921cdb5e0fbe6d999018feccbf6a32fa862b3fdfa1fb
7
- data.tar.gz: f1d6e12641c74351ce0ffec992afc921b8e9120c9666f9f0c1421fa1dbdb2560d63dbf8f893994ece7f0bf957beacc53750c5e64dbca777f8ca782125f023477
6
+ metadata.gz: f893ff54c6044570384ea62e1856b56be230ae4c6190b8cc7675bdd59cb73ff8fb758f0fa14de5a0a3d87ef58cd1eaf7dd8b0555f3a1d969dbbf27dc00bf4104
7
+ data.tar.gz: 799a0ba326b39e56aeebdf124d6f74e68c2e465a78f1e80d0750fb74c0b4d6e33862d5bddd5773242e746e8ff1c36c1ba054bc415d873ae75f87bb91b3ba5939
@@ -0,0 +1 @@
1
+ service_name: travis-ci
@@ -0,0 +1,26 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ require: rubocop-rspec
4
+
5
+ Metrics/LineLength:
6
+ Max: 120
7
+
8
+ Metrics/MethodLength:
9
+ CountComments: false # count full line comments?
10
+ Max: 20
11
+
12
+ Style/EmptyLinesAroundBlockBody:
13
+ Enabled: false
14
+
15
+ Style/EmptyLinesAroundClassBody:
16
+ Enabled: false
17
+
18
+ Style/EmptyLinesAroundModuleBody:
19
+ Enabled: false
20
+
21
+ Style/SpaceAroundEqualsInParameterDefault:
22
+ EnforcedStyle: no_space
23
+
24
+ RSpec/DescribeClass:
25
+ Enabled: false
26
+
@@ -0,0 +1,23 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2015-10-01 15:34:04 -0700 using RuboCop version 0.33.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 2
10
+ Lint/HandleExceptions:
11
+ Exclude:
12
+ - 'Rakefile'
13
+
14
+ # Offense count: 1
15
+ Style/Documentation:
16
+ Exclude:
17
+ - 'lib/traject/profiling/version.rb'
18
+
19
+ # Offense count: 1
20
+ # Cop supports --auto-correct.
21
+ # Configuration parameters: WordRegex.
22
+ Style/WordArray:
23
+ MinSize: 2
@@ -1,5 +1,5 @@
1
1
  language: ruby
2
- script: rake ci
2
+ script: rake spec
3
3
  rvm:
4
4
  - ruby-head
5
5
  - 2.2.3
@@ -9,6 +9,8 @@ rvm:
9
9
  matrix:
10
10
  allow_failures:
11
11
  - rvm: ruby-head
12
+ - rvm: jruby-9.0.0.0 # pry-byebug (interactive debugging gem) doesn't load for jruby
13
+ - rvm: jruby-1.7.20 # pry-byebug doesn't load for jruby
12
14
  fast_finish: true
13
15
  env:
14
16
  global:
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Build Status](https://travis-ci.org/sul-dlss/traject_profiling.svg?branch=master)](https://travis-ci.org/sul-dlss/traject_profiling) [![Coverage Status](https://coveralls.io/repos/sul-dlss/traject_profiling/badge.png)](https://coveralls.io/r/sul-dlss/traject_profiling) [![Dependency Status](https://gemnasium.com/sul-dlss/traject_profiling.svg)](https://gemnasium.com/sul-dlss/traject_profiling) [![Gem Version](https://badge.fury.io/rb/traject_profiling.svg)](http://badge.fury.io/rb/traject_profiling)
2
+
1
3
  # traject_profiling
2
4
 
3
5
  Traject macros to provide profiling information on MARC bibliographic records.
@@ -15,11 +17,12 @@ extend Traject::Profiling::Macros
15
17
 
16
18
  to_field 'id', extract_marc('001', :first=>true)
17
19
  to_field 'f700count', field_count('700')
18
- to_field 'f700ind1', field_ind_vals('700', '1')
20
+ to_field 'f700ind1', field_ind_vals('700', '1') # 700 ind1 values
19
21
  to_field 'f700ind2', field_ind_vals('700', '2')
20
- to_field 'f700subflds', field_subfield_codes('700')
21
- to_field 'f880_for', f880_tags
22
- to_field 'f880_for', f880_tags_and_subfields
22
+ to_field 'f700codes', field_codes('700') # subfield codes used in 700 fields
23
+ to_field 'f880_for', tags_with_880s()
24
+ to_field 'f880codes_for_700', tag_codes_in_880s('700')
25
+ to_field 'orphan_880s', tags_for_unassociated_880s
23
26
 
24
27
  ```
25
28
 
data/Rakefile CHANGED
@@ -1,17 +1,17 @@
1
1
  begin
2
2
  require 'bundler'
3
- require "bundler/gem_tasks"
3
+ require 'bundler/gem_tasks'
4
4
  rescue LoadError => e
5
5
  warn e.message
6
- warn "Run `gem install bundler` to install Bundler."
7
- exit -1
6
+ warn 'Run `gem install bundler` to install Bundler.'
7
+ exit(-1)
8
8
  end
9
9
 
10
10
  begin
11
11
  Bundler.setup(:development)
12
12
  rescue Bundler::BundlerError => e
13
13
  warn e.message
14
- warn "Run `bundle install` to install missing gems."
14
+ warn 'Run `bundle install` to install missing gems.'
15
15
  exit e.status_code
16
16
  end
17
17
 
@@ -20,7 +20,7 @@ require 'rake'
20
20
  begin
21
21
  require 'yard'
22
22
  YARD::Rake::YardocTask.new
23
- task :doc => :yard
23
+ task doc: :yard
24
24
  rescue LoadError
25
25
  # yard not available - we're probably on a prod environment or need to run bundle install
26
26
  end
@@ -28,8 +28,7 @@ end
28
28
  begin
29
29
  require 'rspec/core/rake_task'
30
30
  RSpec::Core::RakeTask.new(:spec)
31
- task :default => :spec
31
+ task default: :spec
32
32
  rescue LoadError
33
- # rspec not available - we're probably on a prod environment or need to run bundle install
33
+ # rspec not available - we're probably on a prod environment or need to run bundle install
34
34
  end
35
-
@@ -1,5 +1,6 @@
1
- require "traject/profiling/version"
2
- require "traject/profiling/field_macros"
1
+ require 'traject/profiling/version'
2
+ require 'traject/profiling/field_macros'
3
+ require 'traject/profiling/f880_macros'
3
4
 
4
5
  module Traject
5
6
  class Profiling
@@ -0,0 +1,68 @@
1
+ module Traject
2
+ class Profiling
3
+
4
+ # traject "macros" to be used with #to_field in a traject config file
5
+ module Macros
6
+
7
+ # Get the tags of fields associated with every 880 field
8
+ # If multiple occurrences, there is a single output value for each unique indicator value unless dedup=false
9
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
10
+ # counts the number of occurrences of a field in a marc record.
11
+ # If no occurrences, accumulator is not altered (field should be missing in output_hash)
12
+ # @param [String] tag - marc field tag; three chars (usually but not neccesarily numeric)
13
+ # @return [lambda] lambda expression appropriate for "to_field", with the number of marc fields
14
+ # matching the tag param added to in the lambda's accumulator param
15
+ def tags_with_880s(dedup=true)
16
+ lambda do |record, accumulator, _context|
17
+ record.each_by_tag('880') do |field|
18
+ tag = field['6'][0, 3]
19
+ if dedup
20
+ accumulator << tag unless accumulator.include? tag
21
+ else
22
+ accumulator << tag
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ # gets the all the subfield codes in 880s for a tag in a marc record.
29
+ # If no occurrences of the 880 for the tag in the marc record, accumulator is not
30
+ # altered (field should be missing in output_hash).
31
+ # If multiple occurrences for a code, there is a single output value for each unique subfield code unless dedup=false.
32
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
33
+ def tag_codes_in_880s(tag, dedup=true)
34
+ lambda do |record, accumulator, _context|
35
+ codes = []
36
+ record.each_by_tag('880') do |field|
37
+ tag_in_880 = field['6'][0, 3]
38
+ if tag_in_880 == tag
39
+ codes << field.codes(dedup)
40
+ codes.flatten!
41
+ if dedup
42
+ accumulator.replace codes.uniq - ['6']
43
+ else
44
+ accumulator.replace codes - ['6'] # 6 is a non-repeatable code
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+ # Get the tag of the associated field of an 880 when the |6 linkage occurrence number is 00 or
52
+ # when the linkage refers to a field not present in the Marc::Record object.
53
+ # e.g. 880 has subfield 6 with value 260-00, so '260' is added to the accumulator.
54
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
55
+ def tags_for_unassociated_880s(dedup=true)
56
+ lambda do |record, accumulator, _context|
57
+ record.each_by_tag('880') do |field|
58
+ if field['6'][4, 2] == '00' || record.fields(field['6'][0, 3]).empty?
59
+ accumulator << field['6'][0, 3]
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ end # module Macros
66
+
67
+ end # Profiling class
68
+ end # Traject module
@@ -1,25 +1,73 @@
1
1
  module Traject
2
2
  class Profiling
3
3
 
4
+ # traject "macros" to be used with #to_field in a traject config file
4
5
  module Macros
5
6
 
6
- # to_field 'f100ind1', field_ind('100', '1')
7
- # to_field 'f100ind2', field_ind('100', '2')
8
- # to_field 'f100subflds', profile_subfields('100')
9
-
10
- # counts the number of occurrences of a field in a marc record.
7
+ # counts the number of occurrences of a tag in a marc record.
11
8
  # If no occurrences, accumulator is not altered (field should be missing in output_hash)
12
- # @param [String] tag - marc field tag; three chars (usually but not neccesarily numeric)
9
+ # @param [String] tag - marc field tag; three chars (usually but not necessarily numeric)
13
10
  # @return [lambda] lambda expression appropriate for "to_field", with the number of marc fields
14
11
  # matching the tag param added to in the lambda's accumulator param
15
12
  def field_count(tag)
16
- return lambda do |record, accumulator, context|
13
+ lambda do |record, accumulator, _context|
17
14
  num_fields = record.fields(tag).size
18
15
  accumulator << num_fields.to_s if num_fields > 0
19
16
  end
20
17
  end
21
18
 
19
+ # gets the all the values of an indicator for a tag in a marc record.
20
+ # If no occurrences of the tag in the marc record, accumulator is not
21
+ # altered (field should be missing in output_hash).
22
+ # If multiple occurrences, there is a single output value for each unique indicator value unless dedup=false.
23
+ # @param [String] tag - marc field tag; three chars (usually but not necessarily numeric)
24
+ # @param [Object] which_ind - can be '1' or '2' (Strings) or 1 or 2 (int);
25
+ # any other value and accumulator is not altered (field should be missing in output_hash)
26
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
27
+ # @return [lambda] lambda expression appropriate for "to_field", with the values of the specified
28
+ # indicator for tag param added to in the lambda's accumulator param
29
+ def field_ind(tag, which_ind, dedup=true)
30
+ lambda do |record, accumulator, _context|
31
+ ind_vals = []
32
+ record.each_by_tag(tag) do |fld|
33
+ case which_ind
34
+ when '1', 1
35
+ ind_vals << fld.indicator1.to_s
36
+ when '2', 2
37
+ ind_vals << fld.indicator2.to_s
38
+ end
39
+ end
40
+ if dedup
41
+ accumulator.replace ind_vals.uniq
42
+ else
43
+ accumulator.replace ind_vals
44
+ end
45
+ end
46
+ end
47
+
48
+ # gets the all the subfield codes for a tag in a marc record.
49
+ # If no occurrences of the tag in the marc record, accumulator is not
50
+ # altered (field should be missing in output_hash).
51
+ # If multiple occurrences, there is a single output value for each unique subfield code unless dedup=false.
52
+ # @param [String] tag - marc field tag; three chars (usually but not necessarily numeric)
53
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
54
+ # @return [lambda] lambda expression appropriate for "to_field", with the subfield codes
55
+ # for tag param added to in the lambda's accumulator param
56
+ def field_codes(tag, dedup=true)
57
+ lambda do |record, accumulator, _context|
58
+ codes = []
59
+ record.each_by_tag(tag) do |fld|
60
+ codes << fld.codes(dedup)
61
+ end
62
+ if dedup
63
+ accumulator.replace codes.flatten.uniq
64
+ else
65
+ accumulator.replace codes.flatten
66
+ end
67
+ end
68
+ end
69
+
22
70
  end # module Macros
23
71
 
24
- end
25
- end
72
+ end # Profiling class
73
+ end # Traject module
@@ -1,5 +1,5 @@
1
1
  module Traject
2
2
  class Profiling
3
- VERSION = "0.0.1"
3
+ VERSION = '0.0.2'
4
4
  end
5
5
  end
@@ -0,0 +1,603 @@
1
+ RSpec.describe 'f880_macros' do
2
+
3
+ let!(:indexer) do
4
+ i = Traject::Indexer.new
5
+ i.instance_eval do
6
+ extend Traject::Profiling::Macros
7
+ end
8
+ i
9
+ end # let! indexer
10
+
11
+ context 'tags_with_880s' do
12
+ it 'include tag for each 880' do
13
+ marcxml =
14
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
15
+ <leader>01942cam a2200505Ia 4500</leader>
16
+ <controlfield tag="001">f880s</controlfield>
17
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
18
+ <datafield ind1="1" ind2="0" tag="245">
19
+ <subfield code="6">880-01</subfield>
20
+ <subfield code="a">Fen nu de pu tao =</subfield>
21
+ <subfield code="b">The grapes of wrath /</subfield>
22
+ </datafield>
23
+ <datafield ind1=" " ind2=" " tag="260">
24
+ <subfield code="6">880-02</subfield>
25
+ <subfield code="a">Shanghai Shi :</subfield>
26
+ <subfield code="c">2003.</subfield>
27
+ </datafield>
28
+ <datafield ind1="1" ind2="0" tag="880">
29
+ <subfield code="6">245-01</subfield>
30
+ <subfield code="a">愤怒的葡萄 =</subfield>
31
+ <subfield code="b">The grapes of wrath /</subfield>
32
+ </datafield>
33
+ <datafield ind1=" " ind2=" " tag="880">
34
+ <subfield code="6">260-02</subfield>
35
+ <subfield code="a">上海市 :</subfield>
36
+ <subfield code="c">2003.</subfield>
37
+ </datafield>
38
+ </record>'
39
+ indexer.instance_eval do
40
+ to_field 'tags_w_880s', tags_with_880s
41
+ end
42
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq %w(245 260)
43
+ end
44
+ it 'assoc fields do not need to be consecutive' do
45
+ marcxml =
46
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
47
+ <leader>01942cam a2200505Ia 4500</leader>
48
+ <controlfield tag="001">f880s</controlfield>
49
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
50
+ <datafield ind1="1" ind2="0" tag="245">
51
+ <subfield code="6">880-01</subfield>
52
+ <subfield code="a">Fen nu de pu tao =</subfield>
53
+ </datafield>
54
+ <datafield ind1=" " ind2=" " tag="260">
55
+ <subfield code="6">880-03</subfield>
56
+ <subfield code="a">Shanghai Shi :</subfield>
57
+ </datafield>
58
+ <datafield ind1="1" ind2="0" tag="880">
59
+ <subfield code="6">245-01</subfield>
60
+ <subfield code="a">愤怒的葡萄 =</subfield>
61
+ </datafield>
62
+ <datafield ind1=" " ind2=" " tag="880">
63
+ <subfield code="6">260-03</subfield>
64
+ <subfield code="a">上海市 :</subfield>
65
+ </datafield>
66
+ </record>'
67
+ indexer.instance_eval do
68
+ to_field 'tags_w_880s', tags_with_880s
69
+ end
70
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq %w(245 260)
71
+ end
72
+ it 'do not include tags without 880s (no |6 )' do
73
+ marcxml =
74
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
75
+ <leader>01942cam a2200505Ia 4500</leader>
76
+ <controlfield tag="001">f880s</controlfield>
77
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
78
+ <datafield ind1="1" ind2=" " tag="100">
79
+ <subfield code="a">Steinbeck, John,</subfield>
80
+ </datafield>
81
+ <datafield ind1="1" ind2="0" tag="245">
82
+ <subfield code="6">880-01</subfield>
83
+ <subfield code="a">Fen nu de pu tao =</subfield>
84
+ </datafield>
85
+ <datafield ind1="1" ind2="0" tag="880">
86
+ <subfield code="6">245-01</subfield>
87
+ <subfield code="a">愤怒的葡萄 =</subfield>
88
+ </datafield>
89
+ </record>'
90
+ indexer.instance_eval do
91
+ to_field 'tags_w_880s', tags_with_880s
92
+ end
93
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).not_to include('100')
94
+ end
95
+ it 'do not include tags without 880s (even if they have |6 )' do
96
+ marcxml =
97
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
98
+ <leader>01942cam a2200505Ia 4500</leader>
99
+ <controlfield tag="001">f880s</controlfield>
100
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
101
+ <datafield ind1="1" ind2=" " tag="100">
102
+ <subfield code="6">880-02</subfield>
103
+ <subfield code="a">Steinbeck, John,</subfield>
104
+ </datafield>
105
+ <datafield ind1="1" ind2="0" tag="245">
106
+ <subfield code="6">880-01</subfield>
107
+ <subfield code="a">Fen nu de pu tao =</subfield>
108
+ </datafield>
109
+ <datafield ind1="1" ind2="0" tag="880">
110
+ <subfield code="6">245-01</subfield>
111
+ <subfield code="a">愤怒的葡萄 =</subfield>
112
+ </datafield>
113
+ </record>'
114
+ indexer.instance_eval do
115
+ to_field 'tags_w_880s', tags_with_880s
116
+ end
117
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).not_to include('100')
118
+ end
119
+ it 'include 880 even if assoc field missing (and not 00 assoc number)' do
120
+ marcxml =
121
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
122
+ <leader>01942cam a2200505Ia 4500</leader>
123
+ <controlfield tag="001">f880s</controlfield>
124
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
125
+ <datafield ind1="1" ind2="0" tag="245">
126
+ <subfield code="6">880-01</subfield>
127
+ <subfield code="a">Fen nu de pu tao =</subfield>
128
+ </datafield>
129
+ <datafield ind1="1" ind2="0" tag="880">
130
+ <subfield code="6">245-01</subfield>
131
+ <subfield code="a">愤怒的葡萄 =</subfield>
132
+ </datafield>
133
+ <datafield ind1=" " ind2=" " tag="880">
134
+ <subfield code="6">250-02</subfield>
135
+ <subfield code="a">第1版.</subfield>
136
+ </datafield>
137
+ </record>'
138
+ indexer.instance_eval do
139
+ to_field 'tags_w_880s', tags_with_880s
140
+ end
141
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to include('250')
142
+ end
143
+ it 'include 880s with 00 assoc number' do
144
+ marcxml =
145
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
146
+ <leader>01942cam a2200505Ia 4500</leader>
147
+ <controlfield tag="001">f880s</controlfield>
148
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
149
+ <datafield ind1="1" ind2="0" tag="245">
150
+ <subfield code="a">The grapes of wrath /</subfield>
151
+ </datafield>
152
+ <datafield ind1="1" ind2="0" tag="880">
153
+ <subfield code="6">246-00</subfield>
154
+ <subfield code="a">愤怒的葡萄</subfield>
155
+ </datafield>
156
+ </record>'
157
+ indexer.instance_eval do
158
+ to_field 'tags_w_880s', tags_with_880s
159
+ end
160
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq ['246']
161
+ end
162
+ it 'script identified in 880 |6' do
163
+ marcxml =
164
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
165
+ <leader>01564cam a2200409 a 4500</leader>
166
+ <controlfield tag="001">13850373</controlfield>
167
+ <controlfield tag="008">050125s2004 cc a b 000 0 chi d</controlfield>
168
+ <datafield tag="700" ind1="1" ind2=" ">
169
+ <subfield code="6">880-04</subfield>
170
+ <subfield code="a">Qu, Wei</subfield>
171
+ </datafield>
172
+ <datafield tag="880" ind1="1" ind2=" ">
173
+ <subfield code="6">700-04/$1</subfield>
174
+ <subfield code="a">&#x66F2;&#x4F1F;</subfield>
175
+ </datafield>
176
+ </record>'
177
+ indexer.instance_eval do
178
+ to_field 'tags_w_880s', tags_with_880s
179
+ end
180
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq ['700']
181
+ end
182
+ it 'repeated tags with 880s are deduped by default' do
183
+ marcxml =
184
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
185
+ <leader>01564cam a2200409 a 4500</leader>
186
+ <controlfield tag="001">13850373</controlfield>
187
+ <controlfield tag="008">050125s2004 cc a b 000 0 chi d</controlfield>
188
+ <datafield tag="245" ind1="0" ind2="0">
189
+ <subfield code="a"> Haerbin Youtai ren =</subfield>
190
+ <subfield code="b">Collection of research papers on Harbin Jews /</subfield>
191
+ </datafield>
192
+ <datafield tag="700" ind1="1" ind2=" ">
193
+ <subfield code="6">880-04</subfield>
194
+ <subfield code="a">Qu, Wei</subfield>
195
+ </datafield>
196
+ <datafield tag="700" ind1="1" ind2=" ">
197
+ <subfield code="6">880-05</subfield>
198
+ <subfield code="a">Li, Shuxiao.</subfield>
199
+ </datafield>
200
+ <datafield tag="880" ind1="1" ind2=" ">
201
+ <subfield code="6">700-04/$1</subfield>
202
+ <subfield code="a">&#x66F2;&#x4F1F;</subfield>
203
+ </datafield>
204
+ <datafield tag="880" ind1="1" ind2=" ">
205
+ <subfield code="6">700-05/$1</subfield>
206
+ <subfield code="a">&#x674E;&#x8FF0;&#x7B11;.</subfield>
207
+ </datafield>
208
+ </record>'
209
+ indexer.instance_eval do
210
+ to_field 'tags_w_880s', tags_with_880s
211
+ end
212
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq ['700']
213
+ end
214
+ it 'dedup=false' do
215
+ marcxml =
216
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
217
+ <leader>01564cam a2200409 a 4500</leader>
218
+ <controlfield tag="001">13850373</controlfield>
219
+ <controlfield tag="008">050125s2004 cc a b 000 0 chi d</controlfield>
220
+ <datafield tag="245" ind1="0" ind2="0">
221
+ <subfield code="a"> Haerbin Youtai ren =</subfield>
222
+ <subfield code="b">Collection of research papers on Harbin Jews /</subfield>
223
+ </datafield>
224
+ <datafield tag="700" ind1="1" ind2=" ">
225
+ <subfield code="6">880-04</subfield>
226
+ <subfield code="a">Qu, Wei</subfield>
227
+ </datafield>
228
+ <datafield tag="700" ind1="1" ind2=" ">
229
+ <subfield code="6">880-05</subfield>
230
+ <subfield code="a">Li, Shuxiao.</subfield>
231
+ </datafield>
232
+ <datafield tag="880" ind1="1" ind2=" ">
233
+ <subfield code="6">700-04/$1</subfield>
234
+ <subfield code="a">&#x66F2;&#x4F1F;</subfield>
235
+ </datafield>
236
+ <datafield tag="880" ind1="1" ind2=" ">
237
+ <subfield code="6">700-05/$1</subfield>
238
+ <subfield code="a">&#x674E;&#x8FF0;&#x7B11;.</subfield>
239
+ </datafield>
240
+ </record>'
241
+ indexer.instance_eval do
242
+ to_field 'tags_w_880s', tags_with_880s(false)
243
+ end
244
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq %w(700 700)
245
+ end
246
+ it 'do not include tags without 880s' do
247
+ marcxml =
248
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
249
+ <leader>01942cam a2200505Ia 4500</leader>
250
+ <controlfield tag="001">f880s</controlfield>
251
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
252
+ <datafield ind1="1" ind2="0" tag="245">
253
+ <subfield code="6">880-01</subfield>
254
+ <subfield code="a">title</subfield>
255
+ </datafield>
256
+ </record>'
257
+ indexer.instance_eval do
258
+ to_field 'tags_w_880s', tags_with_880s
259
+ end
260
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq nil
261
+ end
262
+ it 'no 880s: field not in output_hash' do
263
+ marcxml =
264
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
265
+ <leader>01052cam a2200313 i 4500</leader>
266
+ <controlfield tag="001">880s</controlfield>
267
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
268
+ </record>'
269
+ indexer.instance_eval do
270
+ to_field 'tags_w_880s', tags_with_880s
271
+ end
272
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq nil
273
+ end
274
+ end # tags_with_880s
275
+
276
+
277
+ context 'tag_codes_in_880s' do
278
+ it 'gets all codes other than 6' do
279
+ marcxml =
280
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
281
+ <leader>01942cam a2200505Ia 4500</leader>
282
+ <controlfield tag="001">f880s</controlfield>
283
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
284
+ <datafield ind1="1" ind2="0" tag="245">
285
+ <subfield code="6">880-01</subfield>
286
+ <subfield code="a">Fen nu de pu tao =</subfield>
287
+ </datafield>
288
+ <datafield ind1="1" ind2="0" tag="880">
289
+ <subfield code="6">245-01</subfield>
290
+ <subfield code="a">愤怒的葡萄 =</subfield>
291
+ <subfield code="b">The grapes of wrath /</subfield>
292
+ <subfield code="c">斯坦培克著 ; 胡仲持译.</subfield>
293
+ </datafield>
294
+ </record>'
295
+ indexer.instance_eval do
296
+ to_field 'codes_in_880_for_245', tag_codes_in_880s('245')
297
+ end
298
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_245']).to eq %w(a b c)
299
+ end
300
+ it 'repeated tags without repeated subfield codes' do
301
+ marcxml =
302
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
303
+ <leader>01052cam a2200313 i 4500</leader>
304
+ <controlfield tag="001">field_codes</controlfield>
305
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
306
+ <datafield ind1=" " ind2=" " tag="666">
307
+ <subfield code="6">880-01</subfield>
308
+ <subfield code="a">suba</subfield>
309
+ </datafield>
310
+ <datafield ind1=" " ind2=" " tag="666">
311
+ <subfield code="6">880-02</subfield>
312
+ <subfield code="a">suba</subfield>
313
+ </datafield>
314
+ <datafield ind1="1" ind2="0" tag="880">
315
+ <subfield code="6">666-01</subfield>
316
+ <subfield code="a">愤怒的葡萄 =</subfield>
317
+ <subfield code="b">The grapes of wrath /</subfield>
318
+ </datafield>
319
+ <datafield ind1="1" ind2="0" tag="880">
320
+ <subfield code="6">666-02</subfield>
321
+ <subfield code="c">斯坦培克著 ; 胡仲持译.</subfield>
322
+ <subfield code="d">斯坦培克著 ; 胡仲持译.</subfield>
323
+ </datafield>
324
+ </record>'
325
+ indexer.instance_eval do
326
+ to_field 'codes_in_880_for_666', tag_codes_in_880s('666')
327
+ end
328
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_666']).to eq %w(a b c d)
329
+ end
330
+ it 'multiple occurrences of single subfield in single tags' do
331
+ marcxml =
332
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
333
+ <leader>01052cam a2200313 i 4500</leader>
334
+ <controlfield tag="001">field_codes</controlfield>
335
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
336
+ <datafield tag="880" ind1="1" ind2=" ">
337
+ <subfield code="6">490-05/$1</subfield>
338
+ <subfield code="a">&#x671D;&#x9BAE;&#x3000;&#x6642;&#x4EE3;&#x3000;&#x79C1;&#x64B0;&#x3000;&#x9091;&#x8A8C;&#x3000;&#xFF1B;</subfield>
339
+ <subfield code="v">12.</subfield>
340
+ <subfield code="a">&#x4EAC;&#x757F;&#x9053; ;</subfield>
341
+ <subfield code="v">12</subfield>
342
+ </datafield>
343
+ </record>'
344
+ indexer.instance_eval do
345
+ to_field 'codes_in_880_for_490', tag_codes_in_880s('490')
346
+ end
347
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_490']).to eq %w(a v)
348
+ end
349
+ it 'multiple occurrences of subfields in multiple tags' do
350
+ marcxml =
351
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
352
+ <leader>01052cam a2200313 i 4500</leader>
353
+ <controlfield tag="001">field_codes</controlfield>
354
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
355
+ <datafield ind1=" " ind2=" " tag="666">
356
+ <subfield code="6">880-01</subfield>
357
+ <subfield code="a">suba</subfield>
358
+ </datafield>
359
+ <datafield ind1=" " ind2=" " tag="666">
360
+ <subfield code="6">880-02</subfield>
361
+ <subfield code="a">suba</subfield>
362
+ </datafield>
363
+ <datafield ind1="1" ind2="0" tag="880">
364
+ <subfield code="6">666-01</subfield>
365
+ <subfield code="a">愤怒的葡萄 =</subfield>
366
+ <subfield code="b">The grapes of wrath /</subfield>
367
+ </datafield>
368
+ <datafield ind1="1" ind2="0" tag="880">
369
+ <subfield code="6">666-02</subfield>
370
+ <subfield code="a">斯坦培克著 ; 胡仲持译.</subfield>
371
+ <subfield code="b">斯坦培克著 ; 胡仲持译.</subfield>
372
+ </datafield>
373
+ </record>'
374
+ indexer.instance_eval do
375
+ to_field 'codes_in_880_for_666', tag_codes_in_880s('666')
376
+ end
377
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_666']).to eq %w(a b)
378
+ end
379
+ it 'gets numeric codes' do
380
+ marcxml =
381
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
382
+ <leader>01942cam a2200505Ia 4500</leader>
383
+ <controlfield tag="001">f880s</controlfield>
384
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
385
+ <datafield ind1="1" ind2="0" tag="245">
386
+ <subfield code="6">880-01</subfield>
387
+ <subfield code="a">Fen nu de pu tao =</subfield>
388
+ </datafield>
389
+ <datafield ind1="1" ind2="0" tag="880">
390
+ <subfield code="6">245-01</subfield>
391
+ <subfield code="0">(OCoLC)fst01140873</subfield>
392
+ <subfield code="a">愤怒的葡萄 =</subfield>
393
+ </datafield>
394
+ </record>'
395
+ indexer.instance_eval do
396
+ to_field 'codes_in_880_for_245', tag_codes_in_880s('245')
397
+ end
398
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_245']).to include('0')
399
+ end
400
+ it 'gets non-alphanum codes' do
401
+ marcxml =
402
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
403
+ <leader>01942cam a2200505Ia 4500</leader>
404
+ <controlfield tag="001">f880s</controlfield>
405
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
406
+ <datafield ind1="1" ind2="0" tag="245">
407
+ <subfield code="6">880-01</subfield>
408
+ <subfield code="a">Fen nu de pu tao =</subfield>
409
+ </datafield>
410
+ <datafield ind1="1" ind2="0" tag="880">
411
+ <subfield code="6">245-01</subfield>
412
+ <subfield code="a">愤怒的葡萄 =</subfield>
413
+ <subfield code="=">^A885612</subfield>
414
+ </datafield>
415
+ </record>'
416
+ indexer.instance_eval do
417
+ to_field 'codes_in_880_for_245', tag_codes_in_880s('245')
418
+ end
419
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_245']).to include('=')
420
+ end
421
+ it 'dedup=false' do
422
+ marcxml =
423
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
424
+ <leader>01052cam a2200313 i 4500</leader>
425
+ <controlfield tag="001">field_codes</controlfield>
426
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
427
+ <datafield tag="880" ind1="1" ind2=" ">
428
+ <subfield code="6">490-05/$1</subfield>
429
+ <subfield code="a">&#x671D;&#x9BAE;&#x3000;&#x6642;&#x4EE3;&#x3000;&#x79C1;&#x64B0;&#x3000;&#x9091;&#x8A8C;&#x3000;&#xFF1B;</subfield>
430
+ <subfield code="v">12.</subfield>
431
+ <subfield code="a">&#x4EAC;&#x757F;&#x9053; ;</subfield>
432
+ <subfield code="v">12</subfield>
433
+ </datafield>
434
+ </record>'
435
+ indexer.instance_eval do
436
+ to_field 'codes_in_880_for_490', tag_codes_in_880s('490', false)
437
+ end
438
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_490']).to eq %w(a v a v)
439
+ end
440
+ it 'no 880s: field not in output_hash' do
441
+ marcxml =
442
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
443
+ <leader>01052cam a2200313 i 4500</leader>
444
+ <controlfield tag="001">880s</controlfield>
445
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
446
+ </record>'
447
+ indexer.instance_eval do
448
+ to_field 'f245_880_codes', tag_codes_in_880s('245')
449
+ end
450
+ expect(indexer.map_record(parse_marc(marcxml))['f245_880_codes']).to eq nil
451
+ end
452
+ it 'no assoc 880s: field not in output hash' do
453
+ marcxml =
454
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
455
+ <leader>01052cam a2200313 i 4500</leader>
456
+ <controlfield tag="001">880s</controlfield>
457
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
458
+ <datafield ind1="1" ind2="0" tag="245">
459
+ <subfield code="6">880-01</subfield>
460
+ <subfield code="a">title</subfield>
461
+ </datafield>
462
+ <datafield ind1="1" ind2="0" tag="880">
463
+ <subfield code="6">245-01</subfield>
464
+ <subfield code="a">愤怒的葡萄</subfield>
465
+ </datafield>
466
+ </record>'
467
+ indexer.instance_eval do
468
+ to_field 'f246_880_codes', tag_codes_in_880s('246')
469
+ end
470
+ expect(indexer.map_record(parse_marc(marcxml))['f246_880_codes']).to eq nil
471
+ end
472
+ end # tag_codes_in_880s
473
+
474
+ context 'tags_for_unassociated_880s' do
475
+ it 'takes first 3 chars when occurrence number is 00' do
476
+ marcxml =
477
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
478
+ <leader>01942cam a2200505Ia 4500</leader>
479
+ <controlfield tag="001">f880s</controlfield>
480
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
481
+ <datafield ind1="1" ind2="0" tag="245">
482
+ <subfield code="a">title</subfield>
483
+ </datafield>
484
+ <datafield ind1="1" ind2="0" tag="880">
485
+ <subfield code="6">245-00</subfield>
486
+ <subfield code="a">愤怒的葡萄</subfield>
487
+ </datafield>
488
+ <datafield ind1="1" ind2="0" tag="880">
489
+ <subfield code="6">zZZ-00</subfield>
490
+ <subfield code="a">anything</subfield>
491
+ </datafield>
492
+ </record>'
493
+ indexer.instance_eval do
494
+ to_field 'orphan_880s', tags_for_unassociated_880s
495
+ end
496
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq %w(245 zZZ)
497
+ end
498
+ it "doesn't require associated field for -00" do
499
+ marcxml =
500
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
501
+ <leader>01942cam a2200505Ia 4500</leader>
502
+ <controlfield tag="001">f880s</controlfield>
503
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
504
+ <datafield ind1="1" ind2="0" tag="880">
505
+ <subfield code="6">246-00</subfield>
506
+ <subfield code="a">愤怒的葡萄</subfield>
507
+ </datafield>
508
+ </record>'
509
+ indexer.instance_eval do
510
+ to_field 'orphan_880s', tags_for_unassociated_880s
511
+ end
512
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq ['246']
513
+ end
514
+ it "included if assoc field is missing" do
515
+ marcxml =
516
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
517
+ <leader>01942cam a2200505Ia 4500</leader>
518
+ <controlfield tag="001">f880s</controlfield>
519
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
520
+ <datafield ind1="1" ind2="0" tag="245">
521
+ <subfield code="a">Fen nu de pu tao =</subfield>
522
+ </datafield>
523
+ <datafield ind1=" " ind2=" " tag="880">
524
+ <subfield code="6">250-02</subfield>
525
+ <subfield code="a">第1版.</subfield>
526
+ </datafield>
527
+ </record>'
528
+ indexer.instance_eval do
529
+ to_field 'orphan_880s', tags_for_unassociated_880s
530
+ end
531
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq ['250']
532
+ end
533
+ it 'dedup=false' do
534
+ marcxml =
535
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
536
+ <leader>01942cam a2200505Ia 4500</leader>
537
+ <controlfield tag="001">f880s</controlfield>
538
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
539
+ <datafield ind1="1" ind2="0" tag="880">
540
+ <subfield code="6">246-00</subfield>
541
+ <subfield code="a">愤怒的葡萄</subfield>
542
+ </datafield>
543
+ <datafield ind1="1" ind2="0" tag="880">
544
+ <subfield code="6">246-00</subfield>
545
+ <subfield code="a">愤怒的葡萄</subfield>
546
+ </datafield>
547
+ </record>'
548
+ indexer.instance_eval do
549
+ to_field 'orphan_880s', tags_for_unassociated_880s
550
+ end
551
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq %w(246 246)
552
+ end
553
+ it "ignores tags referring to non-existent 880s" do
554
+ marcxml =
555
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
556
+ <leader>01942cam a2200505Ia 4500</leader>
557
+ <controlfield tag="001">f880s</controlfield>
558
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
559
+ <datafield ind1="1" ind2="0" tag="245">
560
+ <subfield code="6">880-01</subfield>
561
+ <subfield code="a">title</subfield>
562
+ </datafield>
563
+ </record>'
564
+ indexer.instance_eval do
565
+ to_field 'orphan_880s', tags_for_unassociated_880s
566
+ end
567
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq nil
568
+ end
569
+ it 'no 880s: field not in output_hash' do
570
+ marcxml =
571
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
572
+ <leader>01052cam a2200313 i 4500</leader>
573
+ <controlfield tag="001">880s</controlfield>
574
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
575
+ </record>'
576
+ indexer.instance_eval do
577
+ to_field 'orphan_880s', tags_for_unassociated_880s
578
+ end
579
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq nil
580
+ end
581
+ it 'no unassoc 880s: field not in output hash' do
582
+ marcxml =
583
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
584
+ <leader>01052cam a2200313 i 4500</leader>
585
+ <controlfield tag="001">880s</controlfield>
586
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
587
+ <datafield ind1="1" ind2="0" tag="245">
588
+ <subfield code="6">880-01</subfield>
589
+ <subfield code="a">title</subfield>
590
+ </datafield>
591
+ <datafield ind1="1" ind2="0" tag="880">
592
+ <subfield code="6">245-01</subfield>
593
+ <subfield code="a">愤怒的葡萄</subfield>
594
+ </datafield>
595
+ </record>'
596
+ indexer.instance_eval do
597
+ to_field 'orphan_880s', tags_for_unassociated_880s
598
+ end
599
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq nil
600
+ end
601
+ end # tags_for_unassociated_880s
602
+
603
+ end