traject_profiling 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,20 +1,19 @@
1
-
2
1
  RSpec.describe 'field_macros' do
3
2
 
4
- let!(:indexer) {
3
+ let!(:indexer) do
5
4
  i = Traject::Indexer.new
6
5
  i.instance_eval do
7
6
  extend Traject::Profiling::Macros
8
7
  end
9
8
  i
10
- }
9
+ end # let! indexer
11
10
 
12
- context "field_count" do
13
- let!(:record) {
11
+ context 'field_count' do
12
+ let!(:record) do
14
13
  marcxml_str =
15
14
  '<record xmlns="http://www.loc.gov/MARC21/slim">
16
15
  <leader>01052cam a2200313 i 4500</leader>
17
- <controlfield tag="001">245a</controlfield>
16
+ <controlfield tag="001">field_count</controlfield>
18
17
  <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
19
18
  <datafield ind1="1" ind2="0" tag="245">
20
19
  <subfield code="a">Slippery noodles</subfield>
@@ -27,38 +26,361 @@ RSpec.describe 'field_macros' do
27
26
  </datafield>
28
27
  </record>'
29
28
  parse_marc(marcxml_str)
30
- }
29
+ end # let! record
31
30
 
32
31
  it 'single occurrence of tag' do
33
32
  indexer.instance_eval do
34
- to_field '245count', field_count('245')
33
+ to_field 'f245count', field_count('245')
35
34
  end
36
- output = indexer.map_record(record)
37
- expect(output['245count']).to eq ['1']
35
+ expect(indexer.map_record(record)['f245count']).to eq ['1']
38
36
  end
39
37
  it 'mult occurrences of tag' do
40
38
  indexer.instance_eval do
41
- to_field '700count', field_count('700')
39
+ to_field 'f700count', field_count('700')
42
40
  end
43
- output = indexer.map_record(record)
44
- expect(output['700count']).to eq ['2']
41
+ expect(indexer.map_record(record)['f700count']).to eq ['2']
45
42
  end
46
43
  it 'no occurrences of tag: field not in output_hash' do
47
44
  indexer.instance_eval do
48
- to_field '100count', field_count('100')
45
+ to_field 'f100count', field_count('100')
49
46
  end
50
- output = indexer.map_record(record)
51
- expect(output['100count']).to eq nil
47
+ expect(indexer.map_record(record)['f100count']).to eq nil
52
48
  end
53
49
  end # field_count
54
50
 
55
- end
51
+ context 'field_ind' do
52
+ let!(:record) do
53
+ marcxml_str =
54
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
55
+ <leader>01052cam a2200313 i 4500</leader>
56
+ <controlfield tag="001">field_ind</controlfield>
57
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
58
+ <datafield ind1="1" ind2="2" tag="100">
59
+ <subfield code="a">numeric indicators</subfield>
60
+ </datafield>
61
+ <datafield ind1=" " ind2="_" tag="700">
62
+ <subfield code="a">blank and punctuation indicators</subfield>
63
+ </datafield>
64
+ <datafield ind1="a" ind2="b" tag="800" >
65
+ <subfield code="a">alpha indicators</subfield>
66
+ </datafield>
67
+ </record>'
68
+ parse_marc(marcxml_str)
69
+ end # let! record
70
+
71
+ it 'uses first indicator when second param is 1 (string)' do
72
+ indexer.instance_eval do
73
+ to_field 'f100ind1', field_ind('100', '1')
74
+ end
75
+ expect(indexer.map_record(record)['f100ind1']).to eq ['1']
76
+ end
77
+ it 'uses first indicator when second param is 1 (int)' do
78
+ indexer.instance_eval do
79
+ to_field 'f100ind1', field_ind('100', 1)
80
+ end
81
+ expect(indexer.map_record(record)['f100ind1']).to eq ['1']
82
+ end
83
+ it 'uses second indicator when second param is 2 (string)' do
84
+ indexer.instance_eval do
85
+ to_field 'f100ind2', field_ind('100', '2')
86
+ end
87
+ expect(indexer.map_record(record)['f100ind2']).to eq ['2']
88
+ end
89
+ it 'uses second indicator when second param is 2 (int)' do
90
+ indexer.instance_eval do
91
+ to_field 'f100ind2', field_ind('100', 2)
92
+ end
93
+ expect(indexer.map_record(record)['f100ind2']).to eq ['2']
94
+ end
95
+ it 'returns nil (field not in output_hash) when second param is not 1 or 2' do
96
+ indexer.instance_eval do
97
+ to_field 'f100ind_3', field_ind('100', '3')
98
+ to_field 'f100ind_first', field_ind('100', 'first')
99
+ to_field 'f100ind_a', field_ind('100', 'a')
100
+ end
101
+ output_hash = indexer.map_record(record)
102
+ expect(output_hash['f100ind_3']).to eq nil
103
+ expect(output_hash['f100ind_first']).to eq nil
104
+ expect(output_hash['f100ind_a']).to eq nil
105
+ end
106
+ it 'single instance of tag returns single char value' do
107
+ indexer.instance_eval do
108
+ to_field 'f100ind1', field_ind('100', 1)
109
+ end
110
+ expect(indexer.map_record(record)['f100ind1']).to eq ['1']
111
+ end
112
+ it 'multiple instances of tag all with same ind value returns unrepeated char value' do
113
+ marcxml =
114
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
115
+ <leader>01052cam a2200313 i 4500</leader>
116
+ <controlfield tag="001">field_ind</controlfield>
117
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
118
+ <datafield ind1="1" ind2=" " tag="700">
119
+ <subfield code="a">numeric indicators</subfield>
120
+ </datafield>
121
+ <datafield ind1="1" ind2=" " tag="700">
122
+ <subfield code="a">blank and punctuation indicators</subfield>
123
+ </datafield>
124
+ </record>'
125
+ indexer.instance_eval do
126
+ to_field 'f700ind1', field_ind('700', 1)
127
+ end
128
+ expect(indexer.map_record(parse_marc(marcxml))['f700ind1']).to eq ['1']
129
+ end
130
+ it 'each char used in indicator is a separate value' do
131
+ marcxml =
132
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
133
+ <leader>01052cam a2200313 i 4500</leader>
134
+ <controlfield tag="001">field_ind</controlfield>
135
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
136
+ <datafield ind1="1" ind2=" " tag="700">
137
+ <subfield code="a">numeric indicators</subfield>
138
+ </datafield>
139
+ <datafield ind1="2" ind2=" " tag="700">
140
+ <subfield code="a">blank and punctuation indicators</subfield>
141
+ </datafield>
142
+ <datafield ind1="3" ind2=" " tag="700">
143
+ <subfield code="a">blank and punctuation indicators</subfield>
144
+ </datafield>
145
+ </record>'
146
+ indexer.instance_eval do
147
+ to_field 'f700ind1', field_ind('700', 1)
148
+ end
149
+ expect(indexer.map_record(parse_marc(marcxml))['f700ind1']).to eq %w(1 2 3)
150
+ end
151
+ it 'blank value included' do
152
+ indexer.instance_eval do
153
+ to_field 'f700ind1', field_ind('700', 1)
154
+ end
155
+ expect(indexer.map_record(record)['f700ind1']).to eq [' ']
156
+ end
157
+ it 'non-alphanum values included' do
158
+ indexer.instance_eval do
159
+ to_field 'f700ind2', field_ind('700', 2)
160
+ end
161
+ expect(indexer.map_record(record)['f700ind2']).to eq ['_']
162
+ end
163
+ it 'no occurrences of tag: field not in output_hash' do
164
+ indexer.instance_eval do
165
+ to_field 'f245ind1', field_ind('245', 1)
166
+ end
167
+ expect(indexer.map_record(record)['f245ind1']).to eq nil
168
+ end
169
+ context 'dedup=false' do
170
+ it 'multiple occurrences of single value' do
171
+ marcxml =
172
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
173
+ <leader>01052cam a2200313 i 4500</leader>
174
+ <controlfield tag="001">field_ind</controlfield>
175
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
176
+ <datafield ind1="1" ind2=" " tag="700">
177
+ <subfield code="a">numeric indicators</subfield>
178
+ </datafield>
179
+ <datafield ind1="1" ind2=" " tag="700">
180
+ <subfield code="a">blank and punctuation indicators</subfield>
181
+ </datafield>
182
+ </record>'
183
+ indexer.instance_eval do
184
+ to_field 'f700ind1', field_ind('700', 1, false)
185
+ to_field 'f700ind2', field_ind('700', 2, false)
186
+ end
187
+ expect(indexer.map_record(parse_marc(marcxml))['f700ind1']).to eq ['1', '1']
188
+ expect(indexer.map_record(parse_marc(marcxml))['f700ind2']).to eq [' ', ' ']
189
+ end
190
+ end # dedup=false
191
+ end # field_ind
192
+
193
+ context 'field_codes' do
194
+ it 'single occurrence of single subfield in single tag' do
195
+ marcxml_str =
196
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
197
+ <leader>01052cam a2200313 i 4500</leader>
198
+ <controlfield tag="001">field_codes</controlfield>
199
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
200
+ <datafield ind1=" " ind2=" " tag="035">
201
+ <subfield code="a">(OCoLC-I)872526434</subfield>
202
+ </datafield>
203
+ </record>'
204
+ indexer.instance_eval do
205
+ to_field 'f_codes', field_codes('035')
206
+ end
207
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq ['a']
208
+ end
209
+ it 'single occurrence of multiple subfields in single tag' do
210
+ marcxml_str =
211
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
212
+ <leader>01052cam a2200313 i 4500</leader>
213
+ <controlfield tag="001">field_codes</controlfield>
214
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
215
+ <datafield ind1=" " ind2=" " tag="300">
216
+ <subfield code="a">2 videodiscs :</subfield>
217
+ <subfield code="b">sound, color ;</subfield>
218
+ <subfield code="c">4 3/4 in. +</subfield>
219
+ <subfield code="e">2 booklets (24 cm)</subfield>
220
+ </datafield>
221
+ </record>'
222
+ indexer.instance_eval do
223
+ to_field 'f_codes', field_codes('300')
224
+ end
225
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a b c e)
226
+ end
227
+ it 'single occurrence of multiple subfields in multiple tags' do
228
+ marcxml_str =
229
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
230
+ <leader>01052cam a2200313 i 4500</leader>
231
+ <controlfield tag="001">field_codes</controlfield>
232
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
233
+ <datafield ind1=" " ind2="4" tag="666">
234
+ <subfield code="a">suba</subfield>
235
+ <subfield code="b">subb</subfield>
236
+ </datafield>
237
+ <datafield ind1=" " ind2="4" tag="666">
238
+ <subfield code="c">subc</subfield>
239
+ <subfield code="d">subd</subfield>
240
+ </datafield>
241
+ </record>'
242
+ indexer.instance_eval do
243
+ to_field 'f_codes', field_codes('666')
244
+ end
245
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a b c d)
246
+ end
247
+ it 'multiple occurrences of single subfield in single tags' do
248
+ marcxml_str =
249
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
250
+ <leader>01052cam a2200313 i 4500</leader>
251
+ <controlfield tag="001">field_codes</controlfield>
252
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
253
+ <datafield tag="040" ind1=" " ind2=" ">
254
+ <subfield code="a">BTCTA</subfield>
255
+ <subfield code="c">BTCTA</subfield>
256
+ <subfield code="d">OHX</subfield>
257
+ <subfield code="d">YDXCP</subfield>
258
+ </datafield>
259
+ </record>'
260
+ indexer.instance_eval do
261
+ to_field 'f_codes', field_codes('040')
262
+ end
263
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a c d)
264
+ end
265
+ it 'multiple occurrences of subfields in multiple tags' do
266
+ marcxml_str =
267
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
268
+ <leader>01052cam a2200313 i 4500</leader>
269
+ <controlfield tag="001">field_codes</controlfield>
270
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
271
+ <datafield ind1=" " ind2="0" tag="650">
272
+ <subfield code="a">Food habits</subfield>
273
+ <subfield code="z">China</subfield>
274
+ <subfield code="x">History.</subfield>
275
+ </datafield>
276
+ <datafield ind1=" " ind2="0" tag="650">
277
+ <subfield code="a">Cooking</subfield>
278
+ <subfield code="z">China</subfield>
279
+ <subfield code="x">History.</subfield>
280
+ </datafield>
281
+ </record>'
282
+ indexer.instance_eval do
283
+ to_field 'f_codes', field_codes('650')
284
+ end
285
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a z x)
286
+ end
287
+ it 'numeric subfields included' do
288
+ marcxml_str =
289
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
290
+ <leader>01052cam a2200313 i 4500</leader>
291
+ <controlfield tag="001">field_codes</controlfield>
292
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
293
+ <datafield ind1="1" ind2="0" tag="245">
294
+ <subfield code="6">880-01</subfield>
295
+ <subfield code="a">suba</subfield>
296
+ <subfield code="b">subb</subfield>
297
+ <subfield code="c">subc</subfield>
298
+ </datafield>
299
+ </record>'
300
+ indexer.instance_eval do
301
+ to_field 'f_codes', field_codes('245')
302
+ end
303
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(6 a b c)
304
+ end
305
+ it 'non-alphanum values included' do
306
+ marcxml_str =
307
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
308
+ <leader>01052cam a2200313 i 4500</leader>
309
+ <controlfield tag="001">field_codes</controlfield>
310
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
311
+ <datafield ind1=" " ind2="0" tag="650">
312
+ <subfield code="a">Food habits</subfield>
313
+ <subfield code="z">China</subfield>
314
+ <subfield code="x">History.</subfield>
315
+ <subfield code="=">^A2383609</subfield>
316
+ </datafield>
317
+ <datafield ind1="0" ind2="2" tag="730">
318
+ <subfield code="i">Contains (work):</subfield>
319
+ <subfield code="a">Te doy mis ojos.</subfield>
320
+ <subfield code="?">UNAUTHORIZED</subfield>
321
+ </datafield>
322
+ </record>'
323
+ indexer.instance_eval do
324
+ to_field 'f650_codes', field_codes('650')
325
+ to_field 'f730_codes', field_codes('730')
326
+ end
327
+ output_hash = indexer.map_record(parse_marc(marcxml_str))
328
+ expect(output_hash['f650_codes']).to eq %w(a z x =)
329
+ expect(output_hash['f730_codes']).to eq %w(i a ?)
330
+ end
331
+ it 'no occurrences of tag: field not in output_hash' do
332
+ marcxml_str =
333
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
334
+ <leader>01052cam a2200313 i 4500</leader>
335
+ <controlfield tag="001">field_codes</controlfield>
336
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
337
+ </record>'
338
+ indexer.instance_eval do
339
+ to_field 'f_codes', field_codes('245')
340
+ end
341
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq nil
342
+ end
343
+ context 'dedup=false' do
344
+ it 'multiple occurrences of single subfield in single tags' do
345
+ marcxml_str =
346
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
347
+ <leader>01052cam a2200313 i 4500</leader>
348
+ <controlfield tag="001">field_codes</controlfield>
349
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
350
+ <datafield tag="040" ind1=" " ind2=" ">
351
+ <subfield code="a">BTCTA</subfield>
352
+ <subfield code="c">BTCTA</subfield>
353
+ <subfield code="d">OHX</subfield>
354
+ <subfield code="d">YDXCP</subfield>
355
+ <subfield code="d">DLC</subfield>
356
+ </datafield>
357
+ </record>'
358
+ indexer.instance_eval do
359
+ to_field 'f_codes', field_codes('040', false)
360
+ end
361
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(a c d d d)
362
+ end
363
+ it 'multiple occurrences of subfields in multiple tags' do
364
+ marcxml_str =
365
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
366
+ <leader>01052cam a2200313 i 4500</leader>
367
+ <controlfield tag="001">field_codes</controlfield>
368
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
369
+ <datafield tag="505" ind1=" " ind2=" ">
370
+ <subfield code="t">blah</subfield>
371
+ <subfield code="t">blah</subfield>
372
+ </datafield>
373
+ <datafield tag="505" ind1=" " ind2=" ">
374
+ <subfield code="t">blah</subfield>
375
+ <subfield code="t">blah</subfield>
376
+ </datafield>
377
+ </record>'
378
+ indexer.instance_eval do
379
+ to_field 'f_codes', field_codes('505', false)
380
+ end
381
+ expect(indexer.map_record(parse_marc(marcxml_str))['f_codes']).to eq %w(t t t t)
382
+ end
383
+ end # dedup=false
384
+ end # field_codes
56
385
 
57
- # @param [String] marcxml_str an xml representation of a MARC record
58
- # @raise [Marc::Exception] if nil returned from MARC::XMLReader
59
- # @return [MARC::Record] parsed marc_record
60
- def parse_marc(marcxml_str)
61
- marc_record = MARC::XMLReader.new(StringIO.new(marcxml_str)).to_a.first
62
- fail(MARC::Exception, "unable to parse marc record: " + marcxml_str, caller) if marc_record.nil?
63
- marc_record
64
386
  end
@@ -1,3 +1,6 @@
1
+ require 'coveralls'
2
+ Coveralls.wear!
3
+
1
4
  require 'traject'
2
5
  require 'traject/profiling'
3
6
 
@@ -29,8 +32,8 @@ RSpec.configure do |config|
29
32
  config.disable_monkey_patching!
30
33
 
31
34
  # This setting enables warnings. It's recommended, but in some cases may
32
- # be too noisy due to issues in dependencies.
33
- # config.warnings = true
35
+ # be too noisy due to issues in dependencies.
36
+ # config.warnings = true
34
37
 
35
38
  # Many RSpec users commonly either run the entire suite or an individual
36
39
  # file, and it's useful to allow more verbose output when running an
@@ -45,7 +48,7 @@ RSpec.configure do |config|
45
48
  # Print the 10 slowest examples and example groups at the
46
49
  # end of the spec run, to help surface which specs are running
47
50
  # particularly slow.
48
- #config.profile_examples = 10
51
+ # config.profile_examples = 10
49
52
 
50
53
  # Run specs in random order to surface order dependencies. If you find an
51
54
  # order dependency and want to debug it, you can fix the order by providing
@@ -54,3 +57,12 @@ RSpec.configure do |config|
54
57
  config.order = :random
55
58
 
56
59
  end
60
+
61
+ # @param [String] marcxml_str an xml representation of a MARC record
62
+ # @raise [Marc::Exception] if nil returned from MARC::XMLReader
63
+ # @return [MARC::Record] parsed marc_record
64
+ def parse_marc(marcxml_str)
65
+ marc_record = MARC::XMLReader.new(StringIO.new(marcxml_str)).to_a.first
66
+ fail(MARC::Exception, 'unable to parse marc record: ' + marcxml_str, caller) if marc_record.nil?
67
+ marc_record
68
+ end
@@ -1,30 +1,33 @@
1
1
  # coding: utf-8
2
- lib = File.expand_path("../lib/", __FILE__)
2
+ lib = File.expand_path('../lib/', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  require 'traject/profiling/version'
6
6
 
7
7
  Gem::Specification.new do |spec|
8
- spec.name = "traject_profiling"
8
+ spec.name = 'traject_profiling'
9
9
  spec.version = Traject::Profiling::VERSION
10
- spec.authors = ["Naomi Dushay"]
11
- spec.email = ["ndushay@stanford.edu"]
12
- spec.summary = %q{Traject macros to provide profiling information on MARC bibliographic records.}
13
- spec.description = %q{Profiling macros for MARC bib records; meant to be used with traject to index MARC records into Solr.}
14
- spec.homepage = "https://github.com/sul-dlss/traject_profiling.git"
15
- spec.license = "Apache 2.0"
10
+ spec.authors = ['Naomi Dushay']
11
+ spec.email = ['ndushay@stanford.edu']
12
+ spec.summary = 'Traject macros to provide profiling information on MARC bibliographic records.'
13
+ spec.description = 'Profiling macros for MARC bib records; meant to be used with traject to index into Solr.'
14
+ spec.homepage = 'https://github.com/sul-dlss/traject_profiling.git'
15
+ spec.license = 'Apache-2.0'
16
16
 
17
17
  spec.files = `git ls-files -z`.split("\x0")
18
18
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
19
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
- spec.require_paths = ["lib", "lib/traject", "lib/traject/profiling"]
20
+ spec.require_paths = ['lib', 'lib/traject', 'lib/traject/profiling']
21
21
 
22
22
  spec.add_runtime_dependency 'traject'
23
23
 
24
- spec.add_development_dependency "bundler"
25
- spec.add_development_dependency "rake"
26
- spec.add_development_dependency "yard"
27
- spec.add_development_dependency "rspec"
28
- spec.add_development_dependency "pry"
29
- spec.add_development_dependency "pry-byebug"
24
+ spec.add_development_dependency 'bundler'
25
+ spec.add_development_dependency 'rake'
26
+ spec.add_development_dependency 'yard'
27
+ spec.add_development_dependency 'rspec'
28
+ spec.add_development_dependency 'pry' # interactive debugging gem
29
+ spec.add_development_dependency 'pry-byebug' # interactive debugging gem
30
+ spec.add_development_dependency 'coveralls'
31
+ spec.add_development_dependency 'rubocop'
32
+ spec.add_development_dependency 'rubocop-rspec'
30
33
  end