marcspec 0.8.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGES CHANGED
@@ -1,3 +1,8 @@
1
+ 1.0.0
2
+ * Added constant specs (:constantValue=>'val' or :constantValue=>['array', 'of', 'values'])
3
+ * Arbitrarily decided this is version 1.0
4
+ 0.9.0
5
+ * Added ability to benchmark by calling ss.doc_from_marc(r, true) instead of just ss.doc_from_marc(r)
1
6
  0.8.1
2
7
  * Added some specs, squashed some bugs. In particular, make sure the Range passed to a ControlField
3
8
  makes sense (x..y, where x<=y and x > 0)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.1
1
+ 1.0.0
@@ -0,0 +1,50 @@
1
+ require 'marcspec/solrfieldspec'
2
+
3
+
4
+ module MARCSpec
5
+ class ConstantSolrSpec < SolrFieldSpec
6
+ attr_accessor :constantValue
7
+
8
+ #attr_accessor :solrField, :first, :map, :noMapKeyDefault, :marcfieldspecs, :default, :arity
9
+
10
+ def initialize opts = {}
11
+ @solrField = opts[:solrField]
12
+ @constantValue = opts[:constantValue]
13
+ @arity = 1
14
+
15
+ # Check to make sure we didn't get anything else and warn if need be
16
+ [:firstOnly, :mapname, :noMapKeyDefault, :specs, :default, :module, :functionSymbol].each do |s|
17
+ if opts[s]
18
+ raise ArgumentError, "#{s} is not a valid option for Constant spec (one with :constantValue defined)"
19
+ end
20
+ end
21
+ end
22
+
23
+ def marc_values r, doc = {}
24
+ return @constantValue
25
+ end
26
+
27
+ def == other
28
+ return @constantValue == other.constantValue
29
+ end
30
+
31
+ def self.fromHash h
32
+ return self.new(h)
33
+ end
34
+
35
+ def asPPString
36
+ s = StringIO.new
37
+ s.print "{\n :solrField=> "
38
+ PP.singleline_pp(@solrField, s)
39
+ s.print(",\n ")
40
+ s.print ":constantValue => "
41
+ PP.singleline_pp(@constantValue, s)
42
+ s.print "\n}"
43
+ return s.string
44
+ end
45
+
46
+ end
47
+ end
48
+
49
+
50
+
@@ -1,6 +1,6 @@
1
1
  require 'jruby_streaming_update_solr_server'
2
2
  require 'marc4j4r'
3
-
3
+ require 'benchmark'
4
4
 
5
5
 
6
6
  module MARCSpec
@@ -26,11 +26,12 @@ module MARCSpec
26
26
  end
27
27
 
28
28
  class SpecSet
29
- attr_accessor :tmaps, :solrfieldspecs
29
+ attr_accessor :tmaps, :solrfieldspecs, :benchmarks
30
30
 
31
31
  def initialize
32
32
  @tmaps = {}
33
33
  @solrfieldspecs = []
34
+ @benchmarks = {}
34
35
  end
35
36
 
36
37
  def map name
@@ -57,6 +58,8 @@ module MARCSpec
57
58
  speclist.each do |spechash|
58
59
  if spechash[:module]
59
60
  solrspec = MARCSpec::CustomSolrSpec.fromHash(spechash)
61
+ elsif spechash[:constantValue]
62
+ solrspec = MARCSpec::ConstantSolrSpec.fromHash(spechash)
60
63
  else
61
64
  solrspec = MARCSpec::SolrFieldSpec.fromHash(spechash)
62
65
  end
@@ -77,6 +80,7 @@ module MARCSpec
77
80
 
78
81
  def add_spec solrfieldspec
79
82
  self.solrfieldspecs << solrfieldspec
83
+ @benchmarks[solrfieldspec.solrField] = Benchmark::Tms.new(0,0,0,0, 0, solrfieldspec.solrField)
80
84
  end
81
85
 
82
86
  alias_method :<<, :add_spec
@@ -100,15 +104,39 @@ module MARCSpec
100
104
  end
101
105
  end
102
106
 
103
- def doc_from_marc r
107
+ def fill_hashlike_from_marc_benchmark r, hashlike
108
+ @solrfieldspecs.each do |sfs|
109
+ @benchmarks[sfs.solrField] += Benchmark.measure do
110
+ if sfs.arity == 1
111
+ hashlike.add(sfs.solrField,sfs.marc_values(r, hashlike))
112
+ else
113
+ vals = sfs.marc_values(r, hashlike)
114
+ (0..(sfs.arity - 1)).each do |i|
115
+ hashlike.add(sfs.solrField[i], vals[i])
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+
123
+ def doc_from_marc r, timeit = false
104
124
  doc = SolrInputDocument.new
105
- fill_hashlike_from_marc r, doc
125
+ if timeit
126
+ fill_hashlike_from_marc_benchmark r, doc
127
+ else
128
+ fill_hashlike_from_marc r, doc
129
+ end
106
130
  return doc
107
131
  end
108
132
 
109
- def hash_from_marc r
133
+ def hash_from_marc r, timeit = false
110
134
  h = MARCSpec::MockSolrDoc.new
111
- fill_hashlike_from_marc r, h
135
+ if timeit
136
+ fill_hashlike_from_marc_benchmark r, h
137
+ else
138
+ fill_hashlike_from_marc r, h
139
+ end
112
140
  return h
113
141
  end
114
142
  end
data/lib/marcspec.rb CHANGED
@@ -6,6 +6,7 @@ $LOG.level = Logger::WARN
6
6
 
7
7
  require "marcspec/customspec"
8
8
  require "marcspec/solrfieldspec"
9
+ require "marcspec/constantspec"
9
10
  require "marcspec/kvmap"
10
11
  require "marcspec/multivaluemap"
11
12
  require "marcspec/specset"
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "Cachespot" do
22
+ before do
23
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
24
+ end
25
+
26
+ it "should set a value and get it back" do
27
+ @one.cachespot['one'] = 1
28
+ @one.cachespot['one'].should.equal 1
29
+ end
30
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "ControlFieldSpec" do
22
+
23
+ before do
24
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
25
+ # @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
26
+ end
27
+
28
+ # afc99990058366 # data
29
+ # 01234567890123 # index
30
+ it "gets a single full value" do
31
+ cfs = MARCSpec::ControlFieldSpec.new('001')
32
+ cfs.marc_values(@one).should.equal ["afc99990058366"]
33
+ end
34
+
35
+ it "gets a single character" do
36
+ cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
37
+ cfs.marc_values(@one).should.equal ['8']
38
+ end
39
+
40
+ it "gets a range of characters" do
41
+ cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
42
+ cfs.marc_values(@one).should.equal ['90058']
43
+ end
44
+
45
+ it "should round trip" do
46
+ cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
47
+ cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
48
+ cfs.should.equal cfs2
49
+ end
50
+ end
51
+
52
+
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "LeaderSpec" do
22
+ before do
23
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
24
+ end
25
+
26
+ it "Works with full leader" do
27
+ cfs = MARCSpec::LeaderSpec.new('LDR')
28
+ cfs.marc_values(@one).should.equal @one.leader
29
+ end
30
+
31
+ it "Works with substring of leader" do
32
+ cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
33
+ cfs.marc_values(@one).should.equal @one.leader[3..5]
34
+ end
35
+ end
@@ -213,4 +213,43 @@ describe "CustomSolrSpec" do
213
213
  end
214
214
 
215
215
  end
216
-
216
+
217
+ describe "ConstantSolrSpec" do
218
+ it "sets correct fields" do
219
+ c = MARCSpec::ConstantSolrSpec.new(:solrField=>"test", :constantValue=>"value")
220
+ c.solrField.should.equal 'test'
221
+ c.constantValue.should.equal 'value'
222
+ end
223
+
224
+ it "allows array of values" do
225
+ value = ['a', 'b', 'c']
226
+ c = MARCSpec::ConstantSolrSpec.new(:solrField=>"test", :constantValue=>value)
227
+ c.constantValue.should.equal value
228
+ end
229
+
230
+ bad = {
231
+ :firstOnly => true,
232
+ :default => 'default',
233
+ :noMapKeyDefault => 'nmd',
234
+ :mapname => 'map',
235
+ :specs => [],
236
+ :module => MARCSpec,
237
+ :functionSymbol => :test
238
+ }
239
+
240
+ bad.each do |k,v|
241
+ opts = {:solrField=>'test'}
242
+ opts[k] = v
243
+ it "raises ArgumentError if given invalid option #{k}" do
244
+ lambda{c = MARCSpec::ConstantSolrSpec.new(opts)}.should.raise ArgumentError
245
+ end
246
+ end
247
+
248
+ it "should round trip" do
249
+ c = MARCSpec::ConstantSolrSpec.new(:solrField=>"test", :constantValue=>"value")
250
+ s = StringIO.new
251
+ s.puts(c.asPPString)
252
+ d = MARCSpec::ConstantSolrSpec.fromPPString(s.string)
253
+ c.should.equal d
254
+ end
255
+ end
data/spec/specset_spec.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
+ positive = lambda{|x| x > 0}
4
+
3
5
  module A
4
6
  module B
5
7
  def self.titleUp doc, r, codes=nil
@@ -46,22 +48,43 @@ describe "SpecSet Basics" do
46
48
  ["651", "z"],
47
49
  ]
48
50
  },
49
- {:solrField=>'title', :specs=>[['245']]},
51
+ {
52
+ :solrField=>'title',
53
+ :specs=>[['245']]
54
+ },
50
55
  {
51
56
  :solrField => 'titleA',
52
57
  :specs => [['245', 'a']]
58
+ },
59
+ {
60
+ :solrField => 'constantField',
61
+ :constantValue => ['A', 'B']
53
62
  }
54
63
  ]
64
+
65
+ @ss = MARCSpec::SpecSet.new
66
+ @ss.buildSpecsFromList(@speclist)
67
+ @h = @ss.hash_from_marc @one
68
+
55
69
  end
56
70
 
57
- it "should build from a list" do
58
- ss = MARCSpec::SpecSet.new
59
- ss.buildSpecsFromList(@speclist)
60
- ss.solrfieldspecs.size.should.equal 3
61
- h = ss.hash_from_marc @one
62
- h['places'].sort.should.equal @places.sort
63
- h['title'].should.equal @title
64
- h['titleA'].should.equal @titleA
71
+ it "should get all the specs" do
72
+ @ss.solrfieldspecs.size.should.equal 4
73
+ end
74
+
75
+ it "gets the places field" do
76
+ @h['places'].sort.should.equal @places.sort
77
+ end
78
+
79
+ correct = {
80
+ 'title' => @title,
81
+ 'titleA' => @titleA,
82
+ 'constantField' => ['A', 'B']
83
+ }
84
+ correct.each do |k,v|
85
+ it "gets correct value for #{k}" do
86
+ @h[k].should.equal v
87
+ end
65
88
  end
66
89
 
67
90
  it "allows customs that reference previous work" do
@@ -96,7 +119,49 @@ describe "SpecSet Basics" do
96
119
  h['letters'].should.equal ['a', 'b']
97
120
  end
98
121
  end
99
-
122
+
123
+
124
+
125
+ describe "Specset Benchmarking" do
126
+ before do
127
+ @reader = MARC4J4R::Reader.new("#{DIR}/data/batch.dat")
128
+ @speclist = [
129
+ {
130
+ :solrField=>'title',
131
+ :specs=>[['245']]
132
+ },
133
+ {
134
+ :solrField=> "places",
135
+ :specs => [
136
+ ["260", "a"],
137
+ ["651", "a"],
138
+ ["651", "z"],
139
+ ]
140
+ },
141
+ {
142
+ :solrField => 'titleA',
143
+ :specs => [['245', 'a']]
144
+ }
145
+ ]
146
+
147
+ @ss = MARCSpec::SpecSet.new
148
+ @ss.buildSpecsFromList(@speclist)
149
+ end
150
+
151
+ it "should benchmark" do
152
+ @reader.each do |r|
153
+ h = @ss.hash_from_marc(r, true)
154
+ end
155
+ @ss.solrfieldspecs.each do |sfs|
156
+ @ss.benchmarks[sfs.solrField].real.should.be positive
157
+ end
158
+
159
+ # @ss.benchmarks.each do |k,v|
160
+ # puts "%-10s %s" % [k + ':', v.to_s]
161
+ # end
162
+ end
163
+ end
164
+
100
165
 
101
166
 
102
167
 
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "VariableFieldSpec" do
22
+ before do
23
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
24
+ # @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
25
+ end
26
+
27
+ it "Should get a whole field separated by spaces" do
28
+ dfs = MARCSpec::VariableFieldSpec.new('260')
29
+ dfs.marc_values(@one).should.equal ["Medina, Texas, 1939."]
30
+ end
31
+
32
+ it "Should get just the $a" do
33
+ dfs = MARCSpec::VariableFieldSpec.new('260', 'a')
34
+ dfs.marc_values(@one).should.equal ["Medina, Texas,"]
35
+ end
36
+
37
+ it "should return separate values for repeated subfields if only one code is specified" do
38
+ dfs = MARCSpec::VariableFieldSpec.new('651', 'z')
39
+ dfs.marc_values(@one).sort.should.equal ['Texas', 'United States of America.']
40
+ end
41
+
42
+ it "Should get all fields via several equal routes" do
43
+ a = MARCSpec::VariableFieldSpec.new('260').marc_values(@one)
44
+ ac = MARCSpec::VariableFieldSpec.new('260', ['a', 'c']).marc_values(@one)
45
+ ca = MARCSpec::VariableFieldSpec.new('260', ['c', 'a']).marc_values(@one)
46
+ ca2 = MARCSpec::VariableFieldSpec.new('260', 'ca').marc_values(@one)
47
+ allrange = MARCSpec::VariableFieldSpec.new('260', 'a'..'z').marc_values(@one)
48
+ a.should.equal ac
49
+ ac.should.equal ca
50
+ ca.should.equal allrange
51
+ end
52
+
53
+ it "should get all three 700a's" do
54
+ a = MARCSpec::VariableFieldSpec.new('700', 'a').marc_values(@one)
55
+ a.should.equal ["Lomax, John Avery, 1867-1948", "Lomax, Ruby T. (Ruby Terrill)", "Taylor, Beale D."]
56
+ end
57
+
58
+ it "should round trip" do
59
+ ac = MARCSpec::VariableFieldSpec.new('260', ['a', 'c'])
60
+ ac2 = MARCSpec::VariableFieldSpec.fromPPString(ac.asPPString)
61
+ ac.should.equal ac2
62
+ end
63
+
64
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marcspec
3
3
  version: !ruby/object:Gem::Version
4
- hash: 61
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
- - 0
8
- - 8
9
7
  - 1
10
- version: 0.8.1
8
+ - 0
9
+ - 0
10
+ version: 1.0.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - BillDueber
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-04 00:00:00 -04:00
18
+ date: 2010-09-14 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -96,6 +96,7 @@ files:
96
96
  - Rakefile
97
97
  - VERSION
98
98
  - lib/marcspec.rb
99
+ - lib/marcspec/constantspec.rb
99
100
  - lib/marcspec/controlfieldspec.rb
100
101
  - lib/marcspec/customspec.rb
101
102
  - lib/marcspec/kvmap.rb
@@ -106,6 +107,8 @@ files:
106
107
  - lib/marcspec/solrfieldspec.rb
107
108
  - lib/marcspec/specset.rb
108
109
  - lib/marcspec/variablefieldspec.rb
110
+ - spec/cachespot_spec.rb
111
+ - spec/controlfieldspec_spec.rb
109
112
  - spec/data/batch.dat
110
113
  - spec/data/one.dat
111
114
  - spec/data/umich/translation_maps/area_map.properties
@@ -122,11 +125,13 @@ files:
122
125
  - spec/data/umich/translation_maps/library_map.properties
123
126
  - spec/data/umich/translation_maps/location_map.properties
124
127
  - spec/data/umich/umich_index.properties
128
+ - spec/leaderspec_spec.rb
125
129
  - spec/maps_spec.rb
126
130
  - spec/marcfieldspecs_spec.rb
127
131
  - spec/solrfieldspec_spec.rb
128
132
  - spec/spec_helper.rb
129
133
  - spec/specset_spec.rb
134
+ - spec/variablefieldspec_spec.rb
130
135
  has_rdoc: true
131
136
  homepage: http://github.com/billdueber/marcspec
132
137
  licenses: []
@@ -162,8 +167,12 @@ signing_key:
162
167
  specification_version: 3
163
168
  summary: Extract data from MARC records and send to Solr
164
169
  test_files:
170
+ - spec/cachespot_spec.rb
171
+ - spec/controlfieldspec_spec.rb
172
+ - spec/leaderspec_spec.rb
165
173
  - spec/maps_spec.rb
166
174
  - spec/marcfieldspecs_spec.rb
167
175
  - spec/solrfieldspec_spec.rb
168
176
  - spec/spec_helper.rb
169
177
  - spec/specset_spec.rb
178
+ - spec/variablefieldspec_spec.rb