traject_profiling 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e0af0cc816155f0d24efd1e61b3291aff736b9a
4
- data.tar.gz: 339785fe91fe7114c85627ee48935c9e1e366904
3
+ metadata.gz: bb75507d672134b5852f53c2621f7e2e1babfeaa
4
+ data.tar.gz: 82bc9804e6dd9514ab6f2f8e4bb2644446c802ed
5
5
  SHA512:
6
- metadata.gz: 8eb43cb020eadf138e87d2dbeedfff53f086507d87060dd0c31649328050979c45903163412ae3bd83a4921cdb5e0fbe6d999018feccbf6a32fa862b3fdfa1fb
7
- data.tar.gz: f1d6e12641c74351ce0ffec992afc921b8e9120c9666f9f0c1421fa1dbdb2560d63dbf8f893994ece7f0bf957beacc53750c5e64dbca777f8ca782125f023477
6
+ metadata.gz: f893ff54c6044570384ea62e1856b56be230ae4c6190b8cc7675bdd59cb73ff8fb758f0fa14de5a0a3d87ef58cd1eaf7dd8b0555f3a1d969dbbf27dc00bf4104
7
+ data.tar.gz: 799a0ba326b39e56aeebdf124d6f74e68c2e465a78f1e80d0750fb74c0b4d6e33862d5bddd5773242e746e8ff1c36c1ba054bc415d873ae75f87bb91b3ba5939
@@ -0,0 +1 @@
1
+ service_name: travis-ci
@@ -0,0 +1,26 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ require: rubocop-rspec
4
+
5
+ Metrics/LineLength:
6
+ Max: 120
7
+
8
+ Metrics/MethodLength:
9
+ CountComments: false # count full line comments?
10
+ Max: 20
11
+
12
+ Style/EmptyLinesAroundBlockBody:
13
+ Enabled: false
14
+
15
+ Style/EmptyLinesAroundClassBody:
16
+ Enabled: false
17
+
18
+ Style/EmptyLinesAroundModuleBody:
19
+ Enabled: false
20
+
21
+ Style/SpaceAroundEqualsInParameterDefault:
22
+ EnforcedStyle: no_space
23
+
24
+ RSpec/DescribeClass:
25
+ Enabled: false
26
+
@@ -0,0 +1,23 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2015-10-01 15:34:04 -0700 using RuboCop version 0.33.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 2
10
+ Lint/HandleExceptions:
11
+ Exclude:
12
+ - 'Rakefile'
13
+
14
+ # Offense count: 1
15
+ Style/Documentation:
16
+ Exclude:
17
+ - 'lib/traject/profiling/version.rb'
18
+
19
+ # Offense count: 1
20
+ # Cop supports --auto-correct.
21
+ # Configuration parameters: WordRegex.
22
+ Style/WordArray:
23
+ MinSize: 2
@@ -1,5 +1,5 @@
1
1
  language: ruby
2
- script: rake ci
2
+ script: rake spec
3
3
  rvm:
4
4
  - ruby-head
5
5
  - 2.2.3
@@ -9,6 +9,8 @@ rvm:
9
9
  matrix:
10
10
  allow_failures:
11
11
  - rvm: ruby-head
12
+ - rvm: jruby-9.0.0.0 # pry-byebug (interactive debugging gem) doesn't load for jruby
13
+ - rvm: jruby-1.7.20 # pry-byebug doesn't load for jruby
12
14
  fast_finish: true
13
15
  env:
14
16
  global:
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Build Status](https://travis-ci.org/sul-dlss/traject_profiling.svg?branch=master)](https://travis-ci.org/sul-dlss/traject_profiling) [![Coverage Status](https://coveralls.io/repos/sul-dlss/traject_profiling/badge.png)](https://coveralls.io/r/sul-dlss/traject_profiling) [![Dependency Status](https://gemnasium.com/sul-dlss/traject_profiling.svg)](https://gemnasium.com/sul-dlss/traject_profiling) [![Gem Version](https://badge.fury.io/rb/traject_profiling.svg)](http://badge.fury.io/rb/traject_profiling)
2
+
1
3
  # traject_profiling
2
4
 
3
5
  Traject macros to provide profiling information on MARC bibliographic records.
@@ -15,11 +17,12 @@ extend Traject::Profiling::Macros
15
17
 
16
18
  to_field 'id', extract_marc('001', :first=>true)
17
19
  to_field 'f700count', field_count('700')
18
- to_field 'f700ind1', field_ind_vals('700', '1')
20
+ to_field 'f700ind1', field_ind_vals('700', '1') # 700 ind1 values
19
21
  to_field 'f700ind2', field_ind_vals('700', '2')
20
- to_field 'f700subflds', field_subfield_codes('700')
21
- to_field 'f880_for', f880_tags
22
- to_field 'f880_for', f880_tags_and_subfields
22
+ to_field 'f700codes', field_codes('700') # subfield codes used in 700 fields
23
+ to_field 'f880_for', tags_with_880s()
24
+ to_field 'f880codes_for_700', tag_codes_in_880s('700')
25
+ to_field 'orphan_880s', tags_for_unassociated_880s
23
26
 
24
27
  ```
25
28
 
data/Rakefile CHANGED
@@ -1,17 +1,17 @@
1
1
  begin
2
2
  require 'bundler'
3
- require "bundler/gem_tasks"
3
+ require 'bundler/gem_tasks'
4
4
  rescue LoadError => e
5
5
  warn e.message
6
- warn "Run `gem install bundler` to install Bundler."
7
- exit -1
6
+ warn 'Run `gem install bundler` to install Bundler.'
7
+ exit(-1)
8
8
  end
9
9
 
10
10
  begin
11
11
  Bundler.setup(:development)
12
12
  rescue Bundler::BundlerError => e
13
13
  warn e.message
14
- warn "Run `bundle install` to install missing gems."
14
+ warn 'Run `bundle install` to install missing gems.'
15
15
  exit e.status_code
16
16
  end
17
17
 
@@ -20,7 +20,7 @@ require 'rake'
20
20
  begin
21
21
  require 'yard'
22
22
  YARD::Rake::YardocTask.new
23
- task :doc => :yard
23
+ task doc: :yard
24
24
  rescue LoadError
25
25
  # yard not available - we're probably on a prod environment or need to run bundle install
26
26
  end
@@ -28,8 +28,7 @@ end
28
28
  begin
29
29
  require 'rspec/core/rake_task'
30
30
  RSpec::Core::RakeTask.new(:spec)
31
- task :default => :spec
31
+ task default: :spec
32
32
  rescue LoadError
33
- # rspec not available - we're probably on a prod environment or need to run bundle install
33
+ # rspec not available - we're probably on a prod environment or need to run bundle install
34
34
  end
35
-
@@ -1,5 +1,6 @@
1
- require "traject/profiling/version"
2
- require "traject/profiling/field_macros"
1
+ require 'traject/profiling/version'
2
+ require 'traject/profiling/field_macros'
3
+ require 'traject/profiling/f880_macros'
3
4
 
4
5
  module Traject
5
6
  class Profiling
@@ -0,0 +1,68 @@
1
+ module Traject
2
+ class Profiling
3
+
4
+ # traject "macros" to be used with #to_field in a traject config file
5
+ module Macros
6
+
7
+ # Get the tags of fields associated with every 880 field
8
+ # If multiple occurrences, there is a single output value for each unique indicator value unless dedup=false
9
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
10
+ # counts the number of occurrences of a field in a marc record.
11
+ # If no occurrences, accumulator is not altered (field should be missing in output_hash)
12
+ # @param [String] tag - marc field tag; three chars (usually but not neccesarily numeric)
13
+ # @return [lambda] lambda expression appropriate for "to_field", with the number of marc fields
14
+ # matching the tag param added to in the lambda's accumulator param
15
+ def tags_with_880s(dedup=true)
16
+ lambda do |record, accumulator, _context|
17
+ record.each_by_tag('880') do |field|
18
+ tag = field['6'][0, 3]
19
+ if dedup
20
+ accumulator << tag unless accumulator.include? tag
21
+ else
22
+ accumulator << tag
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ # gets the all the subfield codes in 880s for a tag in a marc record.
29
+ # If no occurrences of the 880 for the tag in the marc record, accumulator is not
30
+ # altered (field should be missing in output_hash).
31
+ # If multiple occurrences for a code, there is a single output value for each unique subfield code unless dedup=false.
32
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
33
+ def tag_codes_in_880s(tag, dedup=true)
34
+ lambda do |record, accumulator, _context|
35
+ codes = []
36
+ record.each_by_tag('880') do |field|
37
+ tag_in_880 = field['6'][0, 3]
38
+ if tag_in_880 == tag
39
+ codes << field.codes(dedup)
40
+ codes.flatten!
41
+ if dedup
42
+ accumulator.replace codes.uniq - ['6']
43
+ else
44
+ accumulator.replace codes - ['6'] # 6 is a non-repeatable code
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+ # Get the tag of the associated field of an 880 when the |6 linkage occurrence number is 00 or
52
+ # when the linkage refers to a field not present in the Marc::Record object.
53
+ # e.g. 880 has subfield 6 with value 260-00, so '260' is added to the accumulator.
54
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
55
+ def tags_for_unassociated_880s(dedup=true)
56
+ lambda do |record, accumulator, _context|
57
+ record.each_by_tag('880') do |field|
58
+ if field['6'][4, 2] == '00' || record.fields(field['6'][0, 3]).empty?
59
+ accumulator << field['6'][0, 3]
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ end # module Macros
66
+
67
+ end # Profiling class
68
+ end # Traject module
@@ -1,25 +1,73 @@
1
1
  module Traject
2
2
  class Profiling
3
3
 
4
+ # traject "macros" to be used with #to_field in a traject config file
4
5
  module Macros
5
6
 
6
- # to_field 'f100ind1', field_ind('100', '1')
7
- # to_field 'f100ind2', field_ind('100', '2')
8
- # to_field 'f100subflds', profile_subfields('100')
9
-
10
- # counts the number of occurrences of a field in a marc record.
7
+ # counts the number of occurrences of a tag in a marc record.
11
8
  # If no occurrences, accumulator is not altered (field should be missing in output_hash)
12
- # @param [String] tag - marc field tag; three chars (usually but not neccesarily numeric)
9
+ # @param [String] tag - marc field tag; three chars (usually but not necessarily numeric)
13
10
  # @return [lambda] lambda expression appropriate for "to_field", with the number of marc fields
14
11
  # matching the tag param added to in the lambda's accumulator param
15
12
  def field_count(tag)
16
- return lambda do |record, accumulator, context|
13
+ lambda do |record, accumulator, _context|
17
14
  num_fields = record.fields(tag).size
18
15
  accumulator << num_fields.to_s if num_fields > 0
19
16
  end
20
17
  end
21
18
 
19
+ # gets the all the values of an indicator for a tag in a marc record.
20
+ # If no occurrences of the tag in the marc record, accumulator is not
21
+ # altered (field should be missing in output_hash).
22
+ # If multiple occurrences, there is a single output value for each unique indicator value unless dedup=false.
23
+ # @param [String] tag - marc field tag; three chars (usually but not necessarily numeric)
24
+ # @param [Object] which_ind - can be '1' or '2' (Strings) or 1 or 2 (int);
25
+ # any other value and accumulator is not altered (field should be missing in output_hash)
26
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
27
+ # @return [lambda] lambda expression appropriate for "to_field", with the values of the specified
28
+ # indicator for tag param added to in the lambda's accumulator param
29
+ def field_ind(tag, which_ind, dedup=true)
30
+ lambda do |record, accumulator, _context|
31
+ ind_vals = []
32
+ record.each_by_tag(tag) do |fld|
33
+ case which_ind
34
+ when '1', 1
35
+ ind_vals << fld.indicator1.to_s
36
+ when '2', 2
37
+ ind_vals << fld.indicator2.to_s
38
+ end
39
+ end
40
+ if dedup
41
+ accumulator.replace ind_vals.uniq
42
+ else
43
+ accumulator.replace ind_vals
44
+ end
45
+ end
46
+ end
47
+
48
+ # gets the all the subfield codes for a tag in a marc record.
49
+ # If no occurrences of the tag in the marc record, accumulator is not
50
+ # altered (field should be missing in output_hash).
51
+ # If multiple occurrences, there is a single output value for each unique subfield code unless dedup=false.
52
+ # @param [String] tag - marc field tag; three chars (usually but not necessarily numeric)
53
+ # @param [Boolean] dedup - set to false if duplicate values should produce duplicate output values
54
+ # @return [lambda] lambda expression appropriate for "to_field", with the subfield codes
55
+ # for tag param added to in the lambda's accumulator param
56
+ def field_codes(tag, dedup=true)
57
+ lambda do |record, accumulator, _context|
58
+ codes = []
59
+ record.each_by_tag(tag) do |fld|
60
+ codes << fld.codes(dedup)
61
+ end
62
+ if dedup
63
+ accumulator.replace codes.flatten.uniq
64
+ else
65
+ accumulator.replace codes.flatten
66
+ end
67
+ end
68
+ end
69
+
22
70
  end # module Macros
23
71
 
24
- end
25
- end
72
+ end # Profiling class
73
+ end # Traject module
@@ -1,5 +1,5 @@
1
1
  module Traject
2
2
  class Profiling
3
- VERSION = "0.0.1"
3
+ VERSION = '0.0.2'
4
4
  end
5
5
  end
@@ -0,0 +1,603 @@
1
+ RSpec.describe 'f880_macros' do
2
+
3
+ let!(:indexer) do
4
+ i = Traject::Indexer.new
5
+ i.instance_eval do
6
+ extend Traject::Profiling::Macros
7
+ end
8
+ i
9
+ end # let! indexer
10
+
11
+ context 'tags_with_880s' do
12
+ it 'include tag for each 880' do
13
+ marcxml =
14
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
15
+ <leader>01942cam a2200505Ia 4500</leader>
16
+ <controlfield tag="001">f880s</controlfield>
17
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
18
+ <datafield ind1="1" ind2="0" tag="245">
19
+ <subfield code="6">880-01</subfield>
20
+ <subfield code="a">Fen nu de pu tao =</subfield>
21
+ <subfield code="b">The grapes of wrath /</subfield>
22
+ </datafield>
23
+ <datafield ind1=" " ind2=" " tag="260">
24
+ <subfield code="6">880-02</subfield>
25
+ <subfield code="a">Shanghai Shi :</subfield>
26
+ <subfield code="c">2003.</subfield>
27
+ </datafield>
28
+ <datafield ind1="1" ind2="0" tag="880">
29
+ <subfield code="6">245-01</subfield>
30
+ <subfield code="a">愤怒的葡萄 =</subfield>
31
+ <subfield code="b">The grapes of wrath /</subfield>
32
+ </datafield>
33
+ <datafield ind1=" " ind2=" " tag="880">
34
+ <subfield code="6">260-02</subfield>
35
+ <subfield code="a">上海市 :</subfield>
36
+ <subfield code="c">2003.</subfield>
37
+ </datafield>
38
+ </record>'
39
+ indexer.instance_eval do
40
+ to_field 'tags_w_880s', tags_with_880s
41
+ end
42
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq %w(245 260)
43
+ end
44
+ it 'assoc fields do not need to be consecutive' do
45
+ marcxml =
46
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
47
+ <leader>01942cam a2200505Ia 4500</leader>
48
+ <controlfield tag="001">f880s</controlfield>
49
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
50
+ <datafield ind1="1" ind2="0" tag="245">
51
+ <subfield code="6">880-01</subfield>
52
+ <subfield code="a">Fen nu de pu tao =</subfield>
53
+ </datafield>
54
+ <datafield ind1=" " ind2=" " tag="260">
55
+ <subfield code="6">880-03</subfield>
56
+ <subfield code="a">Shanghai Shi :</subfield>
57
+ </datafield>
58
+ <datafield ind1="1" ind2="0" tag="880">
59
+ <subfield code="6">245-01</subfield>
60
+ <subfield code="a">愤怒的葡萄 =</subfield>
61
+ </datafield>
62
+ <datafield ind1=" " ind2=" " tag="880">
63
+ <subfield code="6">260-03</subfield>
64
+ <subfield code="a">上海市 :</subfield>
65
+ </datafield>
66
+ </record>'
67
+ indexer.instance_eval do
68
+ to_field 'tags_w_880s', tags_with_880s
69
+ end
70
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq %w(245 260)
71
+ end
72
+ it 'do not include tags without 880s (no |6 )' do
73
+ marcxml =
74
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
75
+ <leader>01942cam a2200505Ia 4500</leader>
76
+ <controlfield tag="001">f880s</controlfield>
77
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
78
+ <datafield ind1="1" ind2=" " tag="100">
79
+ <subfield code="a">Steinbeck, John,</subfield>
80
+ </datafield>
81
+ <datafield ind1="1" ind2="0" tag="245">
82
+ <subfield code="6">880-01</subfield>
83
+ <subfield code="a">Fen nu de pu tao =</subfield>
84
+ </datafield>
85
+ <datafield ind1="1" ind2="0" tag="880">
86
+ <subfield code="6">245-01</subfield>
87
+ <subfield code="a">愤怒的葡萄 =</subfield>
88
+ </datafield>
89
+ </record>'
90
+ indexer.instance_eval do
91
+ to_field 'tags_w_880s', tags_with_880s
92
+ end
93
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).not_to include('100')
94
+ end
95
+ it 'do not include tags without 880s (even if they have |6 )' do
96
+ marcxml =
97
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
98
+ <leader>01942cam a2200505Ia 4500</leader>
99
+ <controlfield tag="001">f880s</controlfield>
100
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
101
+ <datafield ind1="1" ind2=" " tag="100">
102
+ <subfield code="6">880-02</subfield>
103
+ <subfield code="a">Steinbeck, John,</subfield>
104
+ </datafield>
105
+ <datafield ind1="1" ind2="0" tag="245">
106
+ <subfield code="6">880-01</subfield>
107
+ <subfield code="a">Fen nu de pu tao =</subfield>
108
+ </datafield>
109
+ <datafield ind1="1" ind2="0" tag="880">
110
+ <subfield code="6">245-01</subfield>
111
+ <subfield code="a">愤怒的葡萄 =</subfield>
112
+ </datafield>
113
+ </record>'
114
+ indexer.instance_eval do
115
+ to_field 'tags_w_880s', tags_with_880s
116
+ end
117
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).not_to include('100')
118
+ end
119
+ it 'include 880 even if assoc field missing (and not 00 assoc number)' do
120
+ marcxml =
121
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
122
+ <leader>01942cam a2200505Ia 4500</leader>
123
+ <controlfield tag="001">f880s</controlfield>
124
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
125
+ <datafield ind1="1" ind2="0" tag="245">
126
+ <subfield code="6">880-01</subfield>
127
+ <subfield code="a">Fen nu de pu tao =</subfield>
128
+ </datafield>
129
+ <datafield ind1="1" ind2="0" tag="880">
130
+ <subfield code="6">245-01</subfield>
131
+ <subfield code="a">愤怒的葡萄 =</subfield>
132
+ </datafield>
133
+ <datafield ind1=" " ind2=" " tag="880">
134
+ <subfield code="6">250-02</subfield>
135
+ <subfield code="a">第1版.</subfield>
136
+ </datafield>
137
+ </record>'
138
+ indexer.instance_eval do
139
+ to_field 'tags_w_880s', tags_with_880s
140
+ end
141
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to include('250')
142
+ end
143
+ it 'include 880s with 00 assoc number' do
144
+ marcxml =
145
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
146
+ <leader>01942cam a2200505Ia 4500</leader>
147
+ <controlfield tag="001">f880s</controlfield>
148
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
149
+ <datafield ind1="1" ind2="0" tag="245">
150
+ <subfield code="a">The grapes of wrath /</subfield>
151
+ </datafield>
152
+ <datafield ind1="1" ind2="0" tag="880">
153
+ <subfield code="6">246-00</subfield>
154
+ <subfield code="a">愤怒的葡萄</subfield>
155
+ </datafield>
156
+ </record>'
157
+ indexer.instance_eval do
158
+ to_field 'tags_w_880s', tags_with_880s
159
+ end
160
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq ['246']
161
+ end
162
+ it 'script identified in 880 |6' do
163
+ marcxml =
164
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
165
+ <leader>01564cam a2200409 a 4500</leader>
166
+ <controlfield tag="001">13850373</controlfield>
167
+ <controlfield tag="008">050125s2004 cc a b 000 0 chi d</controlfield>
168
+ <datafield tag="700" ind1="1" ind2=" ">
169
+ <subfield code="6">880-04</subfield>
170
+ <subfield code="a">Qu, Wei</subfield>
171
+ </datafield>
172
+ <datafield tag="880" ind1="1" ind2=" ">
173
+ <subfield code="6">700-04/$1</subfield>
174
+ <subfield code="a">&#x66F2;&#x4F1F;</subfield>
175
+ </datafield>
176
+ </record>'
177
+ indexer.instance_eval do
178
+ to_field 'tags_w_880s', tags_with_880s
179
+ end
180
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq ['700']
181
+ end
182
+ it 'repeated tags with 880s are deduped by default' do
183
+ marcxml =
184
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
185
+ <leader>01564cam a2200409 a 4500</leader>
186
+ <controlfield tag="001">13850373</controlfield>
187
+ <controlfield tag="008">050125s2004 cc a b 000 0 chi d</controlfield>
188
+ <datafield tag="245" ind1="0" ind2="0">
189
+ <subfield code="a"> Haerbin Youtai ren =</subfield>
190
+ <subfield code="b">Collection of research papers on Harbin Jews /</subfield>
191
+ </datafield>
192
+ <datafield tag="700" ind1="1" ind2=" ">
193
+ <subfield code="6">880-04</subfield>
194
+ <subfield code="a">Qu, Wei</subfield>
195
+ </datafield>
196
+ <datafield tag="700" ind1="1" ind2=" ">
197
+ <subfield code="6">880-05</subfield>
198
+ <subfield code="a">Li, Shuxiao.</subfield>
199
+ </datafield>
200
+ <datafield tag="880" ind1="1" ind2=" ">
201
+ <subfield code="6">700-04/$1</subfield>
202
+ <subfield code="a">&#x66F2;&#x4F1F;</subfield>
203
+ </datafield>
204
+ <datafield tag="880" ind1="1" ind2=" ">
205
+ <subfield code="6">700-05/$1</subfield>
206
+ <subfield code="a">&#x674E;&#x8FF0;&#x7B11;.</subfield>
207
+ </datafield>
208
+ </record>'
209
+ indexer.instance_eval do
210
+ to_field 'tags_w_880s', tags_with_880s
211
+ end
212
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq ['700']
213
+ end
214
+ it 'dedup=false' do
215
+ marcxml =
216
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
217
+ <leader>01564cam a2200409 a 4500</leader>
218
+ <controlfield tag="001">13850373</controlfield>
219
+ <controlfield tag="008">050125s2004 cc a b 000 0 chi d</controlfield>
220
+ <datafield tag="245" ind1="0" ind2="0">
221
+ <subfield code="a"> Haerbin Youtai ren =</subfield>
222
+ <subfield code="b">Collection of research papers on Harbin Jews /</subfield>
223
+ </datafield>
224
+ <datafield tag="700" ind1="1" ind2=" ">
225
+ <subfield code="6">880-04</subfield>
226
+ <subfield code="a">Qu, Wei</subfield>
227
+ </datafield>
228
+ <datafield tag="700" ind1="1" ind2=" ">
229
+ <subfield code="6">880-05</subfield>
230
+ <subfield code="a">Li, Shuxiao.</subfield>
231
+ </datafield>
232
+ <datafield tag="880" ind1="1" ind2=" ">
233
+ <subfield code="6">700-04/$1</subfield>
234
+ <subfield code="a">&#x66F2;&#x4F1F;</subfield>
235
+ </datafield>
236
+ <datafield tag="880" ind1="1" ind2=" ">
237
+ <subfield code="6">700-05/$1</subfield>
238
+ <subfield code="a">&#x674E;&#x8FF0;&#x7B11;.</subfield>
239
+ </datafield>
240
+ </record>'
241
+ indexer.instance_eval do
242
+ to_field 'tags_w_880s', tags_with_880s(false)
243
+ end
244
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq %w(700 700)
245
+ end
246
+ it 'do not include tags without 880s' do
247
+ marcxml =
248
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
249
+ <leader>01942cam a2200505Ia 4500</leader>
250
+ <controlfield tag="001">f880s</controlfield>
251
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
252
+ <datafield ind1="1" ind2="0" tag="245">
253
+ <subfield code="6">880-01</subfield>
254
+ <subfield code="a">title</subfield>
255
+ </datafield>
256
+ </record>'
257
+ indexer.instance_eval do
258
+ to_field 'tags_w_880s', tags_with_880s
259
+ end
260
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq nil
261
+ end
262
+ it 'no 880s: field not in output_hash' do
263
+ marcxml =
264
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
265
+ <leader>01052cam a2200313 i 4500</leader>
266
+ <controlfield tag="001">880s</controlfield>
267
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
268
+ </record>'
269
+ indexer.instance_eval do
270
+ to_field 'tags_w_880s', tags_with_880s
271
+ end
272
+ expect(indexer.map_record(parse_marc(marcxml))['tags_w_880s']).to eq nil
273
+ end
274
+ end # tags_with_880s
275
+
276
+
277
+ context 'tag_codes_in_880s' do
278
+ it 'gets all codes other than 6' do
279
+ marcxml =
280
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
281
+ <leader>01942cam a2200505Ia 4500</leader>
282
+ <controlfield tag="001">f880s</controlfield>
283
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
284
+ <datafield ind1="1" ind2="0" tag="245">
285
+ <subfield code="6">880-01</subfield>
286
+ <subfield code="a">Fen nu de pu tao =</subfield>
287
+ </datafield>
288
+ <datafield ind1="1" ind2="0" tag="880">
289
+ <subfield code="6">245-01</subfield>
290
+ <subfield code="a">愤怒的葡萄 =</subfield>
291
+ <subfield code="b">The grapes of wrath /</subfield>
292
+ <subfield code="c">斯坦培克著 ; 胡仲持译.</subfield>
293
+ </datafield>
294
+ </record>'
295
+ indexer.instance_eval do
296
+ to_field 'codes_in_880_for_245', tag_codes_in_880s('245')
297
+ end
298
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_245']).to eq %w(a b c)
299
+ end
300
+ it 'repeated tags without repeated subfield codes' do
301
+ marcxml =
302
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
303
+ <leader>01052cam a2200313 i 4500</leader>
304
+ <controlfield tag="001">field_codes</controlfield>
305
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
306
+ <datafield ind1=" " ind2=" " tag="666">
307
+ <subfield code="6">880-01</subfield>
308
+ <subfield code="a">suba</subfield>
309
+ </datafield>
310
+ <datafield ind1=" " ind2=" " tag="666">
311
+ <subfield code="6">880-02</subfield>
312
+ <subfield code="a">suba</subfield>
313
+ </datafield>
314
+ <datafield ind1="1" ind2="0" tag="880">
315
+ <subfield code="6">666-01</subfield>
316
+ <subfield code="a">愤怒的葡萄 =</subfield>
317
+ <subfield code="b">The grapes of wrath /</subfield>
318
+ </datafield>
319
+ <datafield ind1="1" ind2="0" tag="880">
320
+ <subfield code="6">666-02</subfield>
321
+ <subfield code="c">斯坦培克著 ; 胡仲持译.</subfield>
322
+ <subfield code="d">斯坦培克著 ; 胡仲持译.</subfield>
323
+ </datafield>
324
+ </record>'
325
+ indexer.instance_eval do
326
+ to_field 'codes_in_880_for_666', tag_codes_in_880s('666')
327
+ end
328
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_666']).to eq %w(a b c d)
329
+ end
330
+ it 'multiple occurrences of single subfield in single tags' do
331
+ marcxml =
332
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
333
+ <leader>01052cam a2200313 i 4500</leader>
334
+ <controlfield tag="001">field_codes</controlfield>
335
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
336
+ <datafield tag="880" ind1="1" ind2=" ">
337
+ <subfield code="6">490-05/$1</subfield>
338
+ <subfield code="a">&#x671D;&#x9BAE;&#x3000;&#x6642;&#x4EE3;&#x3000;&#x79C1;&#x64B0;&#x3000;&#x9091;&#x8A8C;&#x3000;&#xFF1B;</subfield>
339
+ <subfield code="v">12.</subfield>
340
+ <subfield code="a">&#x4EAC;&#x757F;&#x9053; ;</subfield>
341
+ <subfield code="v">12</subfield>
342
+ </datafield>
343
+ </record>'
344
+ indexer.instance_eval do
345
+ to_field 'codes_in_880_for_490', tag_codes_in_880s('490')
346
+ end
347
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_490']).to eq %w(a v)
348
+ end
349
+ it 'multiple occurrences of subfields in multiple tags' do
350
+ marcxml =
351
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
352
+ <leader>01052cam a2200313 i 4500</leader>
353
+ <controlfield tag="001">field_codes</controlfield>
354
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
355
+ <datafield ind1=" " ind2=" " tag="666">
356
+ <subfield code="6">880-01</subfield>
357
+ <subfield code="a">suba</subfield>
358
+ </datafield>
359
+ <datafield ind1=" " ind2=" " tag="666">
360
+ <subfield code="6">880-02</subfield>
361
+ <subfield code="a">suba</subfield>
362
+ </datafield>
363
+ <datafield ind1="1" ind2="0" tag="880">
364
+ <subfield code="6">666-01</subfield>
365
+ <subfield code="a">愤怒的葡萄 =</subfield>
366
+ <subfield code="b">The grapes of wrath /</subfield>
367
+ </datafield>
368
+ <datafield ind1="1" ind2="0" tag="880">
369
+ <subfield code="6">666-02</subfield>
370
+ <subfield code="a">斯坦培克著 ; 胡仲持译.</subfield>
371
+ <subfield code="b">斯坦培克著 ; 胡仲持译.</subfield>
372
+ </datafield>
373
+ </record>'
374
+ indexer.instance_eval do
375
+ to_field 'codes_in_880_for_666', tag_codes_in_880s('666')
376
+ end
377
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_666']).to eq %w(a b)
378
+ end
379
+ it 'gets numeric codes' do
380
+ marcxml =
381
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
382
+ <leader>01942cam a2200505Ia 4500</leader>
383
+ <controlfield tag="001">f880s</controlfield>
384
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
385
+ <datafield ind1="1" ind2="0" tag="245">
386
+ <subfield code="6">880-01</subfield>
387
+ <subfield code="a">Fen nu de pu tao =</subfield>
388
+ </datafield>
389
+ <datafield ind1="1" ind2="0" tag="880">
390
+ <subfield code="6">245-01</subfield>
391
+ <subfield code="0">(OCoLC)fst01140873</subfield>
392
+ <subfield code="a">愤怒的葡萄 =</subfield>
393
+ </datafield>
394
+ </record>'
395
+ indexer.instance_eval do
396
+ to_field 'codes_in_880_for_245', tag_codes_in_880s('245')
397
+ end
398
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_245']).to include('0')
399
+ end
400
+ it 'gets non-alphanum codes' do
401
+ marcxml =
402
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
403
+ <leader>01942cam a2200505Ia 4500</leader>
404
+ <controlfield tag="001">f880s</controlfield>
405
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
406
+ <datafield ind1="1" ind2="0" tag="245">
407
+ <subfield code="6">880-01</subfield>
408
+ <subfield code="a">Fen nu de pu tao =</subfield>
409
+ </datafield>
410
+ <datafield ind1="1" ind2="0" tag="880">
411
+ <subfield code="6">245-01</subfield>
412
+ <subfield code="a">愤怒的葡萄 =</subfield>
413
+ <subfield code="=">^A885612</subfield>
414
+ </datafield>
415
+ </record>'
416
+ indexer.instance_eval do
417
+ to_field 'codes_in_880_for_245', tag_codes_in_880s('245')
418
+ end
419
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_245']).to include('=')
420
+ end
421
+ it 'dedup=false' do
422
+ marcxml =
423
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
424
+ <leader>01052cam a2200313 i 4500</leader>
425
+ <controlfield tag="001">field_codes</controlfield>
426
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
427
+ <datafield tag="880" ind1="1" ind2=" ">
428
+ <subfield code="6">490-05/$1</subfield>
429
+ <subfield code="a">&#x671D;&#x9BAE;&#x3000;&#x6642;&#x4EE3;&#x3000;&#x79C1;&#x64B0;&#x3000;&#x9091;&#x8A8C;&#x3000;&#xFF1B;</subfield>
430
+ <subfield code="v">12.</subfield>
431
+ <subfield code="a">&#x4EAC;&#x757F;&#x9053; ;</subfield>
432
+ <subfield code="v">12</subfield>
433
+ </datafield>
434
+ </record>'
435
+ indexer.instance_eval do
436
+ to_field 'codes_in_880_for_490', tag_codes_in_880s('490', false)
437
+ end
438
+ expect(indexer.map_record(parse_marc(marcxml))['codes_in_880_for_490']).to eq %w(a v a v)
439
+ end
440
+ it 'no 880s: field not in output_hash' do
441
+ marcxml =
442
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
443
+ <leader>01052cam a2200313 i 4500</leader>
444
+ <controlfield tag="001">880s</controlfield>
445
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
446
+ </record>'
447
+ indexer.instance_eval do
448
+ to_field 'f245_880_codes', tag_codes_in_880s('245')
449
+ end
450
+ expect(indexer.map_record(parse_marc(marcxml))['f245_880_codes']).to eq nil
451
+ end
452
+ it 'no assoc 880s: field not in output hash' do
453
+ marcxml =
454
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
455
+ <leader>01052cam a2200313 i 4500</leader>
456
+ <controlfield tag="001">880s</controlfield>
457
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
458
+ <datafield ind1="1" ind2="0" tag="245">
459
+ <subfield code="6">880-01</subfield>
460
+ <subfield code="a">title</subfield>
461
+ </datafield>
462
+ <datafield ind1="1" ind2="0" tag="880">
463
+ <subfield code="6">245-01</subfield>
464
+ <subfield code="a">愤怒的葡萄</subfield>
465
+ </datafield>
466
+ </record>'
467
+ indexer.instance_eval do
468
+ to_field 'f246_880_codes', tag_codes_in_880s('246')
469
+ end
470
+ expect(indexer.map_record(parse_marc(marcxml))['f246_880_codes']).to eq nil
471
+ end
472
+ end # tag_codes_in_880s
473
+
474
+ context 'tags_for_unassociated_880s' do
475
+ it 'takes first 3 chars when occurrence number is 00' do
476
+ marcxml =
477
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
478
+ <leader>01942cam a2200505Ia 4500</leader>
479
+ <controlfield tag="001">f880s</controlfield>
480
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
481
+ <datafield ind1="1" ind2="0" tag="245">
482
+ <subfield code="a">title</subfield>
483
+ </datafield>
484
+ <datafield ind1="1" ind2="0" tag="880">
485
+ <subfield code="6">245-00</subfield>
486
+ <subfield code="a">愤怒的葡萄</subfield>
487
+ </datafield>
488
+ <datafield ind1="1" ind2="0" tag="880">
489
+ <subfield code="6">zZZ-00</subfield>
490
+ <subfield code="a">anything</subfield>
491
+ </datafield>
492
+ </record>'
493
+ indexer.instance_eval do
494
+ to_field 'orphan_880s', tags_for_unassociated_880s
495
+ end
496
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq %w(245 zZZ)
497
+ end
498
+ it "doesn't require associated field for -00" do
499
+ marcxml =
500
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
501
+ <leader>01942cam a2200505Ia 4500</leader>
502
+ <controlfield tag="001">f880s</controlfield>
503
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
504
+ <datafield ind1="1" ind2="0" tag="880">
505
+ <subfield code="6">246-00</subfield>
506
+ <subfield code="a">愤怒的葡萄</subfield>
507
+ </datafield>
508
+ </record>'
509
+ indexer.instance_eval do
510
+ to_field 'orphan_880s', tags_for_unassociated_880s
511
+ end
512
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq ['246']
513
+ end
514
+ it "included if assoc field is missing" do
515
+ marcxml =
516
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
517
+ <leader>01942cam a2200505Ia 4500</leader>
518
+ <controlfield tag="001">f880s</controlfield>
519
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
520
+ <datafield ind1="1" ind2="0" tag="245">
521
+ <subfield code="a">Fen nu de pu tao =</subfield>
522
+ </datafield>
523
+ <datafield ind1=" " ind2=" " tag="880">
524
+ <subfield code="6">250-02</subfield>
525
+ <subfield code="a">第1版.</subfield>
526
+ </datafield>
527
+ </record>'
528
+ indexer.instance_eval do
529
+ to_field 'orphan_880s', tags_for_unassociated_880s
530
+ end
531
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq ['250']
532
+ end
533
+ it 'dedup=false' do
534
+ marcxml =
535
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
536
+ <leader>01942cam a2200505Ia 4500</leader>
537
+ <controlfield tag="001">f880s</controlfield>
538
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
539
+ <datafield ind1="1" ind2="0" tag="880">
540
+ <subfield code="6">246-00</subfield>
541
+ <subfield code="a">愤怒的葡萄</subfield>
542
+ </datafield>
543
+ <datafield ind1="1" ind2="0" tag="880">
544
+ <subfield code="6">246-00</subfield>
545
+ <subfield code="a">愤怒的葡萄</subfield>
546
+ </datafield>
547
+ </record>'
548
+ indexer.instance_eval do
549
+ to_field 'orphan_880s', tags_for_unassociated_880s
550
+ end
551
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq %w(246 246)
552
+ end
553
+ it "ignores tags referring to non-existent 880s" do
554
+ marcxml =
555
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
556
+ <leader>01942cam a2200505Ia 4500</leader>
557
+ <controlfield tag="001">f880s</controlfield>
558
+ <controlfield tag="008">140709s2003 cc 000 1 chird</controlfield>
559
+ <datafield ind1="1" ind2="0" tag="245">
560
+ <subfield code="6">880-01</subfield>
561
+ <subfield code="a">title</subfield>
562
+ </datafield>
563
+ </record>'
564
+ indexer.instance_eval do
565
+ to_field 'orphan_880s', tags_for_unassociated_880s
566
+ end
567
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq nil
568
+ end
569
+ it 'no 880s: field not in output_hash' do
570
+ marcxml =
571
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
572
+ <leader>01052cam a2200313 i 4500</leader>
573
+ <controlfield tag="001">880s</controlfield>
574
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
575
+ </record>'
576
+ indexer.instance_eval do
577
+ to_field 'orphan_880s', tags_for_unassociated_880s
578
+ end
579
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq nil
580
+ end
581
+ it 'no unassoc 880s: field not in output hash' do
582
+ marcxml =
583
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
584
+ <leader>01052cam a2200313 i 4500</leader>
585
+ <controlfield tag="001">880s</controlfield>
586
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
587
+ <datafield ind1="1" ind2="0" tag="245">
588
+ <subfield code="6">880-01</subfield>
589
+ <subfield code="a">title</subfield>
590
+ </datafield>
591
+ <datafield ind1="1" ind2="0" tag="880">
592
+ <subfield code="6">245-01</subfield>
593
+ <subfield code="a">愤怒的葡萄</subfield>
594
+ </datafield>
595
+ </record>'
596
+ indexer.instance_eval do
597
+ to_field 'orphan_880s', tags_for_unassociated_880s
598
+ end
599
+ expect(indexer.map_record(parse_marc(marcxml))['orphan_880s']).to eq nil
600
+ end
601
+ end # tags_for_unassociated_880s
602
+
603
+ end