marcspec 0.8.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGES CHANGED
@@ -1,3 +1,8 @@
1
+ 1.0.0
2
+ * Added constant specs (:constantValue=>'val' or :constantValue=>['array', 'of', 'values'])
3
+ * Arbitrarily decided this is version 1.0
4
+ 0.9.0
5
+ * Added ability to benchmark by calling ss.doc_from_marc(r, true) instead of just ss.doc_from_marc(r)
1
6
  0.8.1
2
7
  * Added some specs, squashed some bugs. In particular, make sure the Range passed to a ControlField
3
8
  makes sense (x..y, where x<=y and x > 0)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.1
1
+ 1.0.0
@@ -0,0 +1,50 @@
1
+ require 'marcspec/solrfieldspec'
2
+
3
+
4
+ module MARCSpec
5
+ class ConstantSolrSpec < SolrFieldSpec
6
+ attr_accessor :constantValue
7
+
8
+ #attr_accessor :solrField, :first, :map, :noMapKeyDefault, :marcfieldspecs, :default, :arity
9
+
10
+ def initialize opts = {}
11
+ @solrField = opts[:solrField]
12
+ @constantValue = opts[:constantValue]
13
+ @arity = 1
14
+
15
+ # Check to make sure we didn't get anything else and warn if need be
16
+ [:firstOnly, :mapname, :noMapKeyDefault, :specs, :default, :module, :functionSymbol].each do |s|
17
+ if opts[s]
18
+ raise ArgumentError, "#{s} is not a valid option for Constant spec (one with :constantValue defined)"
19
+ end
20
+ end
21
+ end
22
+
23
+ def marc_values r, doc = {}
24
+ return @constantValue
25
+ end
26
+
27
+ def == other
28
+ return @constantValue == other.constantValue
29
+ end
30
+
31
+ def self.fromHash h
32
+ return self.new(h)
33
+ end
34
+
35
+ def asPPString
36
+ s = StringIO.new
37
+ s.print "{\n :solrField=> "
38
+ PP.singleline_pp(@solrField, s)
39
+ s.print(",\n ")
40
+ s.print ":constantValue => "
41
+ PP.singleline_pp(@constantValue, s)
42
+ s.print "\n}"
43
+ return s.string
44
+ end
45
+
46
+ end
47
+ end
48
+
49
+
50
+
@@ -1,6 +1,6 @@
1
1
  require 'jruby_streaming_update_solr_server'
2
2
  require 'marc4j4r'
3
-
3
+ require 'benchmark'
4
4
 
5
5
 
6
6
  module MARCSpec
@@ -26,11 +26,12 @@ module MARCSpec
26
26
  end
27
27
 
28
28
  class SpecSet
29
- attr_accessor :tmaps, :solrfieldspecs
29
+ attr_accessor :tmaps, :solrfieldspecs, :benchmarks
30
30
 
31
31
  def initialize
32
32
  @tmaps = {}
33
33
  @solrfieldspecs = []
34
+ @benchmarks = {}
34
35
  end
35
36
 
36
37
  def map name
@@ -57,6 +58,8 @@ module MARCSpec
57
58
  speclist.each do |spechash|
58
59
  if spechash[:module]
59
60
  solrspec = MARCSpec::CustomSolrSpec.fromHash(spechash)
61
+ elsif spechash[:constantValue]
62
+ solrspec = MARCSpec::ConstantSolrSpec.fromHash(spechash)
60
63
  else
61
64
  solrspec = MARCSpec::SolrFieldSpec.fromHash(spechash)
62
65
  end
@@ -77,6 +80,7 @@ module MARCSpec
77
80
 
78
81
  def add_spec solrfieldspec
79
82
  self.solrfieldspecs << solrfieldspec
83
+ @benchmarks[solrfieldspec.solrField] = Benchmark::Tms.new(0,0,0,0, 0, solrfieldspec.solrField)
80
84
  end
81
85
 
82
86
  alias_method :<<, :add_spec
@@ -100,15 +104,39 @@ module MARCSpec
100
104
  end
101
105
  end
102
106
 
103
- def doc_from_marc r
107
+ def fill_hashlike_from_marc_benchmark r, hashlike
108
+ @solrfieldspecs.each do |sfs|
109
+ @benchmarks[sfs.solrField] += Benchmark.measure do
110
+ if sfs.arity == 1
111
+ hashlike.add(sfs.solrField,sfs.marc_values(r, hashlike))
112
+ else
113
+ vals = sfs.marc_values(r, hashlike)
114
+ (0..(sfs.arity - 1)).each do |i|
115
+ hashlike.add(sfs.solrField[i], vals[i])
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+
123
+ def doc_from_marc r, timeit = false
104
124
  doc = SolrInputDocument.new
105
- fill_hashlike_from_marc r, doc
125
+ if timeit
126
+ fill_hashlike_from_marc_benchmark r, doc
127
+ else
128
+ fill_hashlike_from_marc r, doc
129
+ end
106
130
  return doc
107
131
  end
108
132
 
109
- def hash_from_marc r
133
+ def hash_from_marc r, timeit = false
110
134
  h = MARCSpec::MockSolrDoc.new
111
- fill_hashlike_from_marc r, h
135
+ if timeit
136
+ fill_hashlike_from_marc_benchmark r, h
137
+ else
138
+ fill_hashlike_from_marc r, h
139
+ end
112
140
  return h
113
141
  end
114
142
  end
data/lib/marcspec.rb CHANGED
@@ -6,6 +6,7 @@ $LOG.level = Logger::WARN
6
6
 
7
7
  require "marcspec/customspec"
8
8
  require "marcspec/solrfieldspec"
9
+ require "marcspec/constantspec"
9
10
  require "marcspec/kvmap"
10
11
  require "marcspec/multivaluemap"
11
12
  require "marcspec/specset"
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "Cachespot" do
22
+ before do
23
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
24
+ end
25
+
26
+ it "should set a value and get it back" do
27
+ @one.cachespot['one'] = 1
28
+ @one.cachespot['one'].should.equal 1
29
+ end
30
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "ControlFieldSpec" do
22
+
23
+ before do
24
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
25
+ # @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
26
+ end
27
+
28
+ # afc99990058366 # data
29
+ # 01234567890123 # index
30
+ it "gets a single full value" do
31
+ cfs = MARCSpec::ControlFieldSpec.new('001')
32
+ cfs.marc_values(@one).should.equal ["afc99990058366"]
33
+ end
34
+
35
+ it "gets a single character" do
36
+ cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
37
+ cfs.marc_values(@one).should.equal ['8']
38
+ end
39
+
40
+ it "gets a range of characters" do
41
+ cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
42
+ cfs.marc_values(@one).should.equal ['90058']
43
+ end
44
+
45
+ it "should round trip" do
46
+ cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
47
+ cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
48
+ cfs.should.equal cfs2
49
+ end
50
+ end
51
+
52
+
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "LeaderSpec" do
22
+ before do
23
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
24
+ end
25
+
26
+ it "Works with full leader" do
27
+ cfs = MARCSpec::LeaderSpec.new('LDR')
28
+ cfs.marc_values(@one).should.equal @one.leader
29
+ end
30
+
31
+ it "Works with substring of leader" do
32
+ cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
33
+ cfs.marc_values(@one).should.equal @one.leader[3..5]
34
+ end
35
+ end
@@ -213,4 +213,43 @@ describe "CustomSolrSpec" do
213
213
  end
214
214
 
215
215
  end
216
-
216
+
217
+ describe "ConstantSolrSpec" do
218
+ it "sets correct fields" do
219
+ c = MARCSpec::ConstantSolrSpec.new(:solrField=>"test", :constantValue=>"value")
220
+ c.solrField.should.equal 'test'
221
+ c.constantValue.should.equal 'value'
222
+ end
223
+
224
+ it "allows array of values" do
225
+ value = ['a', 'b', 'c']
226
+ c = MARCSpec::ConstantSolrSpec.new(:solrField=>"test", :constantValue=>value)
227
+ c.constantValue.should.equal value
228
+ end
229
+
230
+ bad = {
231
+ :firstOnly => true,
232
+ :default => 'default',
233
+ :noMapKeyDefault => 'nmd',
234
+ :mapname => 'map',
235
+ :specs => [],
236
+ :module => MARCSpec,
237
+ :functionSymbol => :test
238
+ }
239
+
240
+ bad.each do |k,v|
241
+ opts = {:solrField=>'test'}
242
+ opts[k] = v
243
+ it "raises ArgumentError if given invalid option #{k}" do
244
+ lambda{c = MARCSpec::ConstantSolrSpec.new(opts)}.should.raise ArgumentError
245
+ end
246
+ end
247
+
248
+ it "should round trip" do
249
+ c = MARCSpec::ConstantSolrSpec.new(:solrField=>"test", :constantValue=>"value")
250
+ s = StringIO.new
251
+ s.puts(c.asPPString)
252
+ d = MARCSpec::ConstantSolrSpec.fromPPString(s.string)
253
+ c.should.equal d
254
+ end
255
+ end
data/spec/specset_spec.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
+ positive = lambda{|x| x > 0}
4
+
3
5
  module A
4
6
  module B
5
7
  def self.titleUp doc, r, codes=nil
@@ -46,22 +48,43 @@ describe "SpecSet Basics" do
46
48
  ["651", "z"],
47
49
  ]
48
50
  },
49
- {:solrField=>'title', :specs=>[['245']]},
51
+ {
52
+ :solrField=>'title',
53
+ :specs=>[['245']]
54
+ },
50
55
  {
51
56
  :solrField => 'titleA',
52
57
  :specs => [['245', 'a']]
58
+ },
59
+ {
60
+ :solrField => 'constantField',
61
+ :constantValue => ['A', 'B']
53
62
  }
54
63
  ]
64
+
65
+ @ss = MARCSpec::SpecSet.new
66
+ @ss.buildSpecsFromList(@speclist)
67
+ @h = @ss.hash_from_marc @one
68
+
55
69
  end
56
70
 
57
- it "should build from a list" do
58
- ss = MARCSpec::SpecSet.new
59
- ss.buildSpecsFromList(@speclist)
60
- ss.solrfieldspecs.size.should.equal 3
61
- h = ss.hash_from_marc @one
62
- h['places'].sort.should.equal @places.sort
63
- h['title'].should.equal @title
64
- h['titleA'].should.equal @titleA
71
+ it "should get all the specs" do
72
+ @ss.solrfieldspecs.size.should.equal 4
73
+ end
74
+
75
+ it "gets the places field" do
76
+ @h['places'].sort.should.equal @places.sort
77
+ end
78
+
79
+ correct = {
80
+ 'title' => @title,
81
+ 'titleA' => @titleA,
82
+ 'constantField' => ['A', 'B']
83
+ }
84
+ correct.each do |k,v|
85
+ it "gets correct value for #{k}" do
86
+ @h[k].should.equal v
87
+ end
65
88
  end
66
89
 
67
90
  it "allows customs that reference previous work" do
@@ -96,7 +119,49 @@ describe "SpecSet Basics" do
96
119
  h['letters'].should.equal ['a', 'b']
97
120
  end
98
121
  end
99
-
122
+
123
+
124
+
125
+ describe "Specset Benchmarking" do
126
+ before do
127
+ @reader = MARC4J4R::Reader.new("#{DIR}/data/batch.dat")
128
+ @speclist = [
129
+ {
130
+ :solrField=>'title',
131
+ :specs=>[['245']]
132
+ },
133
+ {
134
+ :solrField=> "places",
135
+ :specs => [
136
+ ["260", "a"],
137
+ ["651", "a"],
138
+ ["651", "z"],
139
+ ]
140
+ },
141
+ {
142
+ :solrField => 'titleA',
143
+ :specs => [['245', 'a']]
144
+ }
145
+ ]
146
+
147
+ @ss = MARCSpec::SpecSet.new
148
+ @ss.buildSpecsFromList(@speclist)
149
+ end
150
+
151
+ it "should benchmark" do
152
+ @reader.each do |r|
153
+ h = @ss.hash_from_marc(r, true)
154
+ end
155
+ @ss.solrfieldspecs.each do |sfs|
156
+ @ss.benchmarks[sfs.solrField].real.should.be positive
157
+ end
158
+
159
+ # @ss.benchmarks.each do |k,v|
160
+ # puts "%-10s %s" % [k + ':', v.to_s]
161
+ # end
162
+ end
163
+ end
164
+
100
165
 
101
166
 
102
167
 
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "VariableFieldSpec" do
22
+ before do
23
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
24
+ # @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
25
+ end
26
+
27
+ it "Should get a whole field separated by spaces" do
28
+ dfs = MARCSpec::VariableFieldSpec.new('260')
29
+ dfs.marc_values(@one).should.equal ["Medina, Texas, 1939."]
30
+ end
31
+
32
+ it "Should get just the $a" do
33
+ dfs = MARCSpec::VariableFieldSpec.new('260', 'a')
34
+ dfs.marc_values(@one).should.equal ["Medina, Texas,"]
35
+ end
36
+
37
+ it "should return separate values for repeated subfields if only one code is specified" do
38
+ dfs = MARCSpec::VariableFieldSpec.new('651', 'z')
39
+ dfs.marc_values(@one).sort.should.equal ['Texas', 'United States of America.']
40
+ end
41
+
42
+ it "Should get all fields via several equal routes" do
43
+ a = MARCSpec::VariableFieldSpec.new('260').marc_values(@one)
44
+ ac = MARCSpec::VariableFieldSpec.new('260', ['a', 'c']).marc_values(@one)
45
+ ca = MARCSpec::VariableFieldSpec.new('260', ['c', 'a']).marc_values(@one)
46
+ ca2 = MARCSpec::VariableFieldSpec.new('260', 'ca').marc_values(@one)
47
+ allrange = MARCSpec::VariableFieldSpec.new('260', 'a'..'z').marc_values(@one)
48
+ a.should.equal ac
49
+ ac.should.equal ca
50
+ ca.should.equal allrange
51
+ end
52
+
53
+ it "should get all three 700a's" do
54
+ a = MARCSpec::VariableFieldSpec.new('700', 'a').marc_values(@one)
55
+ a.should.equal ["Lomax, John Avery, 1867-1948", "Lomax, Ruby T. (Ruby Terrill)", "Taylor, Beale D."]
56
+ end
57
+
58
+ it "should round trip" do
59
+ ac = MARCSpec::VariableFieldSpec.new('260', ['a', 'c'])
60
+ ac2 = MARCSpec::VariableFieldSpec.fromPPString(ac.asPPString)
61
+ ac.should.equal ac2
62
+ end
63
+
64
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marcspec
3
3
  version: !ruby/object:Gem::Version
4
- hash: 61
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
- - 0
8
- - 8
9
7
  - 1
10
- version: 0.8.1
8
+ - 0
9
+ - 0
10
+ version: 1.0.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - BillDueber
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-04 00:00:00 -04:00
18
+ date: 2010-09-14 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -96,6 +96,7 @@ files:
96
96
  - Rakefile
97
97
  - VERSION
98
98
  - lib/marcspec.rb
99
+ - lib/marcspec/constantspec.rb
99
100
  - lib/marcspec/controlfieldspec.rb
100
101
  - lib/marcspec/customspec.rb
101
102
  - lib/marcspec/kvmap.rb
@@ -106,6 +107,8 @@ files:
106
107
  - lib/marcspec/solrfieldspec.rb
107
108
  - lib/marcspec/specset.rb
108
109
  - lib/marcspec/variablefieldspec.rb
110
+ - spec/cachespot_spec.rb
111
+ - spec/controlfieldspec_spec.rb
109
112
  - spec/data/batch.dat
110
113
  - spec/data/one.dat
111
114
  - spec/data/umich/translation_maps/area_map.properties
@@ -122,11 +125,13 @@ files:
122
125
  - spec/data/umich/translation_maps/library_map.properties
123
126
  - spec/data/umich/translation_maps/location_map.properties
124
127
  - spec/data/umich/umich_index.properties
128
+ - spec/leaderspec_spec.rb
125
129
  - spec/maps_spec.rb
126
130
  - spec/marcfieldspecs_spec.rb
127
131
  - spec/solrfieldspec_spec.rb
128
132
  - spec/spec_helper.rb
129
133
  - spec/specset_spec.rb
134
+ - spec/variablefieldspec_spec.rb
130
135
  has_rdoc: true
131
136
  homepage: http://github.com/billdueber/marcspec
132
137
  licenses: []
@@ -162,8 +167,12 @@ signing_key:
162
167
  specification_version: 3
163
168
  summary: Extract data from MARC records and send to Solr
164
169
  test_files:
170
+ - spec/cachespot_spec.rb
171
+ - spec/controlfieldspec_spec.rb
172
+ - spec/leaderspec_spec.rb
165
173
  - spec/maps_spec.rb
166
174
  - spec/marcfieldspecs_spec.rb
167
175
  - spec/solrfieldspec_spec.rb
168
176
  - spec/spec_helper.rb
169
177
  - spec/specset_spec.rb
178
+ - spec/variablefieldspec_spec.rb