traject_profiling 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,19 @@
1
-
2
1
  RSpec.describe 'field_macros' do
3
2
 
4
- let!(:indexer) {
3
+ let!(:indexer) do
5
4
  i = Traject::Indexer.new
6
5
  i.instance_eval do
7
6
  extend Traject::Profiling::Macros
8
7
  end
9
8
  i
10
- }
9
+ end # let! indexer
11
10
 
12
- context "field_count" do
13
- let!(:record) {
11
+ context 'field_count' do
12
+ let!(:record) do
14
13
  marcxml_str =
15
14
  '<record xmlns="http://www.loc.gov/MARC21/slim">
16
15
  <leader>01052cam a2200313 i 4500</leader>
17
- <controlfield tag="001">245a</controlfield>
16
+ <controlfield tag="001">field_count</controlfield>
18
17
  <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
19
18
  <datafield ind1="1" ind2="0" tag="245">
20
19
  <subfield code="a">Slippery noodles</subfield>
@@ -27,38 +26,361 @@ RSpec.describe 'field_macros' do
27
26
  </datafield>
28
27
  </record>'
29
28
  parse_marc(marcxml_str)
30
- }
29
+ end # let! record
31
30
 
32
31
  it 'single occurrence of tag' do
33
32
  indexer.instance_eval do
34
- to_field '245count', field_count('245')
33
+ to_field 'f245count', field_count('245')
35
34
  end
36
- output = indexer.map_record(record)
37
- expect(output['245count']).to eq ['1']
35
+ expect(indexer.map_record(record)['f245count']).to eq ['1']
38
36
  end
39
37
  it 'mult occurrences of tag' do
40
38
  indexer.instance_eval do
41
- to_field '700count', field_count('700')
39
+ to_field 'f700count', field_count('700')
42
40
  end
43
- output = indexer.map_record(record)
44
- expect(output['700count']).to eq ['2']
41
+ expect(indexer.map_record(record)['f700count']).to eq ['2']
45
42
  end
46
43
  it 'no occurrences of tag: field not in output_hash' do
47
44
  indexer.instance_eval do
48
- to_field '100count', field_count('100')
45
+ to_field 'f100count', field_count('100')
49
46
  end
50
- output = indexer.map_record(record)
51
- expect(output['100count']).to eq nil
47
+ expect(indexer.map_record(record)['f100count']).to eq nil
52
48
  end
53
49
  end # field_count
54
50
 
55
- end
51
+ context 'field_ind' do
52
+ let!(:record) do
53
+ marcxml_str =
54
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
55
+ <leader>01052cam a2200313 i 4500</leader>
56
+ <controlfield tag="001">field_ind</controlfield>
57
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
58
+ <datafield ind1="1" ind2="2" tag="100">
59
+ <subfield code="a">numeric indicators</subfield>
60
+ </datafield>
61
+ <datafield ind1=" " ind2="_" tag="700">
62
+ <subfield code="a">blank and punctuation indicators</subfield>
63
+ </datafield>
64
+ <datafield ind1="a" ind2="b" tag="800" >
65
+ <subfield code="a">alpha indicators</subfield>
66
+ </datafield>
67
+ </record>'
68
+ parse_marc(marcxml_str)
69
+ end # let! record
70
+
71
+ it 'uses first indicator when second param is 1 (string)' do
72
+ indexer.instance_eval do
73
+ to_field 'f100ind1', field_ind('100', '1')
74
+ end
75
+ expect(indexer.map_record(record)['f100ind1']).to eq ['1']
76
+ end
77
+ it 'uses first indicator when second param is 1 (int)' do
78
+ indexer.instance_eval do
79
+ to_field 'f100ind1', field_ind('100', 1)
80
+ end
81
+ expect(indexer.map_record(record)['f100ind1']).to eq ['1']
82
+ end
83
+ it 'uses second indicator when second param is 2 (string)' do
84
+ indexer.instance_eval do
85
+ to_field 'f100ind2', field_ind('100', '2')
86
+ end
87
+ expect(indexer.map_record(record)['f100ind2']).to eq ['2']
88
+ end
89
+ it 'uses second indicator when second param is 2 (int)' do
90
+ indexer.instance_eval do
91
+ to_field 'f100ind2', field_ind('100', 2)
92
+ end
93
+ expect(indexer.map_record(record)['f100ind2']).to eq ['2']
94
+ end
95
+ it 'returns nil (field not in output_hash) when second param is not 1 or 2' do
96
+ indexer.instance_eval do
97
+ to_field 'f100ind_3', field_ind('100', '3')
98
+ to_field 'f100ind_first', field_ind('100', 'first')
99
+ to_field 'f100ind_a', field_ind('100', 'a')
100
+ end
101
+ output_hash = indexer.map_record(record)
102
+ expect(output_hash['f100ind_3']).to eq nil
103
+ expect(output_hash['f100ind_first']).to eq nil
104
+ expect(output_hash['f100ind_a']).to eq nil
105
+ end
106
+ it 'single instance of tag returns single char value' do
107
+ indexer.instance_eval do
108
+ to_field 'f100ind1', field_ind('100', 1)
109
+ end
110
+ expect(indexer.map_record(record)['f100ind1']).to eq ['1']
111
+ end
112
+ it 'multiple instances of tag all with same ind value returns unrepeated char value' do
113
+ marcxml =
114
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
115
+ <leader>01052cam a2200313 i 4500</leader>
116
+ <controlfield tag="001">field_ind</controlfield>
117
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
118
+ <datafield ind1="1" ind2=" " tag="700">
119
+ <subfield code="a">numeric indicators</subfield>
120
+ </datafield>
121
+ <datafield ind1="1" ind2=" " tag="700">
122
+ <subfield code="a">blank and punctuation indicators</subfield>
123
+ </datafield>
124
+ </record>'
125
+ indexer.instance_eval do
126
+ to_field 'f700ind1', field_ind('700', 1)
127
+ end
128
+ expect(indexer.map_record(parse_marc(marcxml))['f700ind1']).to eq ['1']
129
+ end
130
+ it 'each char used in indicator is a separate value' do
131
+ marcxml =
132
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
133
+ <leader>01052cam a2200313 i 4500</leader>
134
+ <controlfield tag="001">field_ind</controlfield>
135
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
136
+ <datafield ind1="1" ind2=" " tag="700">
137
+ <subfield code="a">numeric indicators</subfield>
138
+ </datafield>
139
+ <datafield ind1="2" ind2=" " tag="700">
140
+ <subfield code="a">blank and punctuation indicators</subfield>
141
+ </datafield>
142
+ <datafield ind1="3" ind2=" " tag="700">
143
+ <subfield code="a">blank and punctuation indicators</subfield>
144
+ </datafield>
145
+ </record>'
146
+ indexer.instance_eval do
147
+ to_field 'f700ind1', field_ind('700', 1)
148
+ end
149
+ expect(indexer.map_record(parse_marc(marcxml))['f700ind1']).to eq %w(1 2 3)
150
+ end
151
+ it 'blank value included' do
152
+ indexer.instance_eval do
153
+ to_field 'f700ind1', field_ind('700', 1)
154
+ end
155
+ expect(indexer.map_record(record)['f700ind1']).to eq [' ']
156
+ end
157
+ it 'non-alphanum values included' do
158
+ indexer.instance_eval do
159
+ to_field 'f700ind2', field_ind('700', 2)
160
+ end
161
+ expect(indexer.map_record(record)['f700ind2']).to eq ['_']
162
+ end
163
+ it 'no occurrences of tag: field not in output_hash' do
164
+ indexer.instance_eval do
165
+ to_field 'f245ind1', field_ind('245', 1)
166
+ end
167
+ expect(indexer.map_record(record)['f245ind1']).to eq nil
168
+ end
169
+ context 'dedup=false' do
170
+ it 'multiple occurrences of single value' do
171
+ marcxml =
172
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
173
+ <leader>01052cam a2200313 i 4500</leader>
174
+ <controlfield tag="001">field_ind</controlfield>
175
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
176
+ <datafield ind1="1" ind2=" " tag="700">
177
+ <subfield code="a">numeric indicators</subfield>
178
+ </datafield>
179
+ <datafield ind1="1" ind2=" " tag="700">
180
+ <subfield code="a">blank and punctuation indicators</subfield>
181
+ </datafield>
182
+ </record>'
183
+ indexer.instance_eval do
184
+ to_field 'f700ind1', field_ind('700', 1, false)
185
+ to_field 'f700ind2', field_ind('700', 2, false)
186
+ end
187
+ expect(indexer.map_record(parse_marc(marcxml))['f700ind1']).to eq ['1', '1']
188
+ expect(indexer.map_record(parse_marc(marcxml))['f700ind2']).to eq [' ', ' ']
189
+ end
190
+ end # dedup=false
191
+ end # field_ind
192
+
193
+ context 'field_codes' do
194
+ it 'single occurrence of single subfield in single tag' do
195
+ marcxml_str =
196
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
197
+ <leader>01052cam a2200313 i 4500</leader>
198
+ <controlfield tag="001">field_codes</controlfield>
199
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
200
+ <datafield ind1=" " ind2=" " tag="035">
201
+ <subfield code="a">(OCoLC-I)872526434</subfield>
202
+ </datafield>
203
+ </record>'
204
+ indexer.instance_eval do
205
+ to_field 'f_codes', field_codes('035')
206
+ end
207
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq ['a']
208
+ end
209
+ it 'single occurrence of multiple subfields in single tag' do
210
+ marcxml_str =
211
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
212
+ <leader>01052cam a2200313 i 4500</leader>
213
+ <controlfield tag="001">field_codes</controlfield>
214
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
215
+ <datafield ind1=" " ind2=" " tag="300">
216
+ <subfield code="a">2 videodiscs :</subfield>
217
+ <subfield code="b">sound, color ;</subfield>
218
+ <subfield code="c">4 3/4 in. +</subfield>
219
+ <subfield code="e">2 booklets (24 cm)</subfield>
220
+ </datafield>
221
+ </record>'
222
+ indexer.instance_eval do
223
+ to_field 'f_codes', field_codes('300')
224
+ end
225
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a b c e)
226
+ end
227
+ it 'single occurrence of multiple subfields in multiple tags' do
228
+ marcxml_str =
229
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
230
+ <leader>01052cam a2200313 i 4500</leader>
231
+ <controlfield tag="001">field_codes</controlfield>
232
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
233
+ <datafield ind1=" " ind2="4" tag="666">
234
+ <subfield code="a">suba</subfield>
235
+ <subfield code="b">subb</subfield>
236
+ </datafield>
237
+ <datafield ind1=" " ind2="4" tag="666">
238
+ <subfield code="c">subc</subfield>
239
+ <subfield code="d">subd</subfield>
240
+ </datafield>
241
+ </record>'
242
+ indexer.instance_eval do
243
+ to_field 'f_codes', field_codes('666')
244
+ end
245
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a b c d)
246
+ end
247
+ it 'multiple occurrences of single subfield in single tags' do
248
+ marcxml_str =
249
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
250
+ <leader>01052cam a2200313 i 4500</leader>
251
+ <controlfield tag="001">field_codes</controlfield>
252
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
253
+ <datafield tag="040" ind1=" " ind2=" ">
254
+ <subfield code="a">BTCTA</subfield>
255
+ <subfield code="c">BTCTA</subfield>
256
+ <subfield code="d">OHX</subfield>
257
+ <subfield code="d">YDXCP</subfield>
258
+ </datafield>
259
+ </record>'
260
+ indexer.instance_eval do
261
+ to_field 'f_codes', field_codes('040')
262
+ end
263
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a c d)
264
+ end
265
+ it 'multiple occurrences of subfields in multiple tags' do
266
+ marcxml_str =
267
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
268
+ <leader>01052cam a2200313 i 4500</leader>
269
+ <controlfield tag="001">field_codes</controlfield>
270
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
271
+ <datafield ind1=" " ind2="0" tag="650">
272
+ <subfield code="a">Food habits</subfield>
273
+ <subfield code="z">China</subfield>
274
+ <subfield code="x">History.</subfield>
275
+ </datafield>
276
+ <datafield ind1=" " ind2="0" tag="650">
277
+ <subfield code="a">Cooking</subfield>
278
+ <subfield code="z">China</subfield>
279
+ <subfield code="x">History.</subfield>
280
+ </datafield>
281
+ </record>'
282
+ indexer.instance_eval do
283
+ to_field 'f_codes', field_codes('650')
284
+ end
285
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a z x)
286
+ end
287
+ it 'numeric subfields included' do
288
+ marcxml_str =
289
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
290
+ <leader>01052cam a2200313 i 4500</leader>
291
+ <controlfield tag="001">field_codes</controlfield>
292
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
293
+ <datafield ind1="1" ind2="0" tag="245">
294
+ <subfield code="6">880-01</subfield>
295
+ <subfield code="a">suba</subfield>
296
+ <subfield code="b">subb</subfield>
297
+ <subfield code="c">subc</subfield>
298
+ </datafield>
299
+ </record>'
300
+ indexer.instance_eval do
301
+ to_field 'f_codes', field_codes('245')
302
+ end
303
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(6 a b c)
304
+ end
305
+ it 'non-alphanum values included' do
306
+ marcxml_str =
307
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
308
+ <leader>01052cam a2200313 i 4500</leader>
309
+ <controlfield tag="001">field_codes</controlfield>
310
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
311
+ <datafield ind1=" " ind2="0" tag="650">
312
+ <subfield code="a">Food habits</subfield>
313
+ <subfield code="z">China</subfield>
314
+ <subfield code="x">History.</subfield>
315
+ <subfield code="=">^A2383609</subfield>
316
+ </datafield>
317
+ <datafield ind1="0" ind2="2" tag="730">
318
+ <subfield code="i">Contains (work):</subfield>
319
+ <subfield code="a">Te doy mis ojos.</subfield>
320
+ <subfield code="?">UNAUTHORIZED</subfield>
321
+ </datafield>
322
+ </record>'
323
+ indexer.instance_eval do
324
+ to_field 'f650_codes', field_codes('650')
325
+ to_field 'f730_codes', field_codes('730')
326
+ end
327
+ output_hash = indexer.map_record(parse_marc(marcxml_str))
328
+ expect(output_hash['f650_codes']).to eq %w(a z x =)
329
+ expect(output_hash['f730_codes']).to eq %w(i a ?)
330
+ end
331
+ it 'no occurrences of tag: field not in output_hash' do
332
+ marcxml_str =
333
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
334
+ <leader>01052cam a2200313 i 4500</leader>
335
+ <controlfield tag="001">field_codes</controlfield>
336
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
337
+ </record>'
338
+ indexer.instance_eval do
339
+ to_field 'f_codes', field_codes('245')
340
+ end
341
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq nil
342
+ end
343
+ context 'dedup=false' do
344
+ it 'multiple occurrences of single subfield in single tags' do
345
+ marcxml_str =
346
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
347
+ <leader>01052cam a2200313 i 4500</leader>
348
+ <controlfield tag="001">field_codes</controlfield>
349
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
350
+ <datafield tag="040" ind1=" " ind2=" ">
351
+ <subfield code="a">BTCTA</subfield>
352
+ <subfield code="c">BTCTA</subfield>
353
+ <subfield code="d">OHX</subfield>
354
+ <subfield code="d">YDXCP</subfield>
355
+ <subfield code="d">DLC</subfield>
356
+ </datafield>
357
+ </record>'
358
+ indexer.instance_eval do
359
+ to_field 'f_codes', field_codes('040', false)
360
+ end
361
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a c d d d)
362
+ end
363
+ it 'multiple occurrences of subfields in multiple tags' do
364
+ marcxml_str =
365
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
366
+ <leader>01052cam a2200313 i 4500</leader>
367
+ <controlfield tag="001">field_codes</controlfield>
368
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
369
+ <datafield tag="505" ind1=" " ind2=" ">
370
+ <subfield code="t">blah</subfield>
371
+ <subfield code="t">blah</subfield>
372
+ </datafield>
373
+ <datafield tag="505" ind1=" " ind2=" ">
374
+ <subfield code="t">blah</subfield>
375
+ <subfield code="t">blah</subfield>
376
+ </datafield>
377
+ </record>'
378
+ indexer.instance_eval do
379
+ to_field 'f_codes', field_codes('505', false)
380
+ end
381
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(t t t t)
382
+ end
383
+ end # dedup=false
384
+ end # field_codes
56
385
 
57
- # @param [String] marcxml_str an xml representation of a MARC record
58
- # @raise [Marc::Exception] if nil returned from MARC::XMLReader
59
- # @return [MARC::Record] parsed marc_record
60
- def parse_marc(marcxml_str)
61
- marc_record = MARC::XMLReader.new(StringIO.new(marcxml_str)).to_a.first
62
- fail(MARC::Exception, "unable to parse marc record: " + marcxml_str, caller) if marc_record.nil?
63
- marc_record
64
386
  end
@@ -1,3 +1,6 @@
1
+ require 'coveralls'
2
+ Coveralls.wear!
3
+
1
4
  require 'traject'
2
5
  require 'traject/profiling'
3
6
 
@@ -29,8 +32,8 @@ RSpec.configure do |config|
29
32
  config.disable_monkey_patching!
30
33
 
31
34
  # This setting enables warnings. It's recommended, but in some cases may
32
- # be too noisy due to issues in dependencies.
33
- # config.warnings = true
35
+ # be too noisy due to issues in dependencies.
36
+ # config.warnings = true
34
37
 
35
38
  # Many RSpec users commonly either run the entire suite or an individual
36
39
  # file, and it's useful to allow more verbose output when running an
@@ -45,7 +48,7 @@ RSpec.configure do |config|
45
48
  # Print the 10 slowest examples and example groups at the
46
49
  # end of the spec run, to help surface which specs are running
47
50
  # particularly slow.
48
- #config.profile_examples = 10
51
+ # config.profile_examples = 10
49
52
 
50
53
  # Run specs in random order to surface order dependencies. If you find an
51
54
  # order dependency and want to debug it, you can fix the order by providing
@@ -54,3 +57,12 @@ RSpec.configure do |config|
54
57
  config.order = :random
55
58
 
56
59
  end
60
+
61
+ # @param [String] marcxml_str an xml representation of a MARC record
62
+ # @raise [Marc::Exception] if nil returned from MARC::XMLReader
63
+ # @return [MARC::Record] parsed marc_record
64
+ def parse_marc(marcxml_str)
65
+ marc_record = MARC::XMLReader.new(StringIO.new(marcxml_str)).to_a.first
66
+ fail(MARC::Exception, 'unable to parse marc record: ' + marcxml_str, caller) if marc_record.nil?
67
+ marc_record
68
+ end
@@ -1,30 +1,33 @@
1
1
  # coding: utf-8
2
- lib = File.expand_path("../lib/", __FILE__)
2
+ lib = File.expand_path('../lib/', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  require 'traject/profiling/version'
6
6
 
7
7
  Gem::Specification.new do |spec|
8
- spec.name = "traject_profiling"
8
+ spec.name = 'traject_profiling'
9
9
  spec.version = Traject::Profiling::VERSION
10
- spec.authors = ["Naomi Dushay"]
11
- spec.email = ["ndushay@stanford.edu"]
12
- spec.summary = %q{Traject macros to provide profiling information on MARC bibliographic records.}
13
- spec.description = %q{Profiling macros for MARC bib records; meant to be used with traject to index MARC records into Solr.}
14
- spec.homepage = "https://github.com/sul-dlss/traject_profiling.git"
15
- spec.license = "Apache 2.0"
10
+ spec.authors = ['Naomi Dushay']
11
+ spec.email = ['ndushay@stanford.edu']
12
+ spec.summary = 'Traject macros to provide profiling information on MARC bibliographic records.'
13
+ spec.description = 'Profiling macros for MARC bib records; meant to be used with traject to index into Solr.'
14
+ spec.homepage = 'https://github.com/sul-dlss/traject_profiling.git'
15
+ spec.license = 'Apache-2.0'
16
16
 
17
17
  spec.files = `git ls-files -z`.split("\x0")
18
18
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
19
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
- spec.require_paths = ["lib", "lib/traject", "lib/traject/profiling"]
20
+ spec.require_paths = ['lib', 'lib/traject', 'lib/traject/profiling']
21
21
 
22
22
  spec.add_runtime_dependency 'traject'
23
23
 
24
- spec.add_development_dependency "bundler"
25
- spec.add_development_dependency "rake"
26
- spec.add_development_dependency "yard"
27
- spec.add_development_dependency "rspec"
28
- spec.add_development_dependency "pry"
29
- spec.add_development_dependency "pry-byebug"
24
+ spec.add_development_dependency 'bundler'
25
+ spec.add_development_dependency 'rake'
26
+ spec.add_development_dependency 'yard'
27
+ spec.add_development_dependency 'rspec'
28
+ spec.add_development_dependency 'pry' # interactive debugging gem
29
+ spec.add_development_dependency 'pry-byebug' # interactive debugging gem
30
+ spec.add_development_dependency 'coveralls'
31
+ spec.add_development_dependency 'rubocop'
32
+ spec.add_development_dependency 'rubocop-rspec'
30
33
  end