marcspec 1.5.0 → 1.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +32 -0
- data/Rakefile +21 -11
- data/VERSION +1 -1
- data/lib/marcspec.rb +9 -7
- data/lib/marcspec/constantspec.rb +16 -1
- data/lib/marcspec/controlfieldspec.rb +22 -1
- data/lib/marcspec/customspec.rb +37 -1
- data/lib/marcspec/dsl.rb +20 -5
- data/lib/marcspec/map.rb +8 -7
- data/lib/marcspec/multivaluemap.rb +1 -1
- data/lib/marcspec/solrfieldspec.rb +66 -2
- data/lib/marcspec/specset.rb +79 -11
- data/lib/marcspec/variablefieldspec.rb +52 -4
- data/spec/cachespot_spec.rb +1 -1
- data/spec/controlfieldspec_spec.rb +4 -4
- data/spec/dsl_spec.rb +52 -30
- data/spec/leaderspec_spec.rb +2 -2
- data/spec/maps_spec.rb +42 -42
- data/spec/marcfieldspecs_spec.rb +22 -22
- data/spec/solrfieldspec_spec.rb +24 -24
- data/spec/spec.opts +7 -0
- data/spec/spec_helper.rb +3 -6
- data/spec/specset_spec.rb +20 -19
- data/spec/variablefieldspec_spec.rb +9 -9
- metadata +156 -144
@@ -2,10 +2,19 @@ require 'stringio'
|
|
2
2
|
require 'marc4j4r/controlfield'
|
3
3
|
|
4
4
|
module MARCSpec
|
5
|
+
|
6
|
+
|
7
|
+
# The basic Solr Field spec -- a specification object that knows how to extract data
|
8
|
+
# from a MARC record.
|
9
|
+
|
5
10
|
class SolrFieldSpec
|
11
|
+
include Logback::Simple
|
12
|
+
|
6
13
|
attr_accessor :solrField, :first, :map, :noMapKeyDefault, :marcfieldspecs, :defaultValue, :_mapname
|
7
14
|
attr_reader :arity
|
8
15
|
|
16
|
+
# Get a new object
|
17
|
+
|
9
18
|
def initialize(opts)
|
10
19
|
@solrField = opts[:solrField]
|
11
20
|
@first = opts[:firstOnly] || false
|
@@ -16,11 +25,19 @@ module MARCSpec
|
|
16
25
|
@marcfieldspecs = []
|
17
26
|
end
|
18
27
|
|
28
|
+
# Add a new tag specification
|
29
|
+
# @param [MARCSpec::ControlFieldSpec, MARCSpec::VariableFieldSpec] tagspec The spec to add
|
30
|
+
|
19
31
|
def << tagspec
|
20
32
|
@marcfieldspecs << tagspec
|
21
33
|
end
|
22
34
|
|
23
|
-
|
35
|
+
# Get raw (not translated by a map or anything) values from the MARC
|
36
|
+
#
|
37
|
+
# @param [MARC4J4R::Record] r The record
|
38
|
+
# @param [Hash, SolrInputDocument] doc The hash-like object that contains previously-generated content
|
39
|
+
# @return [Array] an array of values from the MARC record
|
40
|
+
|
24
41
|
def raw_marc_values r, doc
|
25
42
|
vals = []
|
26
43
|
@marcfieldspecs.each do |ts|
|
@@ -28,7 +45,13 @@ module MARCSpec
|
|
28
45
|
end
|
29
46
|
return vals
|
30
47
|
end
|
31
|
-
|
48
|
+
|
49
|
+
# Get the values from the MARC, provide a default or mapping as necessary
|
50
|
+
#
|
51
|
+
# @param [MARC4J4R::Record] r The record
|
52
|
+
# @param [Hash, SolrInputDocument] doc The hash-like object that contains previously-generated content
|
53
|
+
# @return [Array] an array of values from the MARC record after mapping/default/mapMissDefault/firstOnly
|
54
|
+
|
32
55
|
def marc_values r, doc = {}
|
33
56
|
vals = raw_marc_values r, doc
|
34
57
|
return vals if @arity > 1
|
@@ -64,6 +87,9 @@ module MARCSpec
|
|
64
87
|
return vals
|
65
88
|
end
|
66
89
|
|
90
|
+
# Basic equality
|
91
|
+
# @param [MARCSpec::SolrFieldSpec] other The other object to compare to
|
92
|
+
# @return [Boolean] whether it's the same
|
67
93
|
|
68
94
|
def == other
|
69
95
|
return ((other.solrField == self.solrField) and
|
@@ -74,10 +100,15 @@ module MARCSpec
|
|
74
100
|
(other.marcfieldspecs == self.marcfieldspecs))
|
75
101
|
end
|
76
102
|
|
103
|
+
# Build an object from a asPPString string
|
104
|
+
# @deprecated Use the DSL
|
77
105
|
def self.fromPPString str
|
78
106
|
return self.fromHash eval(str)
|
79
107
|
end
|
80
108
|
|
109
|
+
# Build an object from an eval'd asPPString string
|
110
|
+
# @deprecated Use the DSL
|
111
|
+
|
81
112
|
def self.fromHash h
|
82
113
|
sfs = self.new(h)
|
83
114
|
h[:specs].each do |s|
|
@@ -90,10 +121,43 @@ module MARCSpec
|
|
90
121
|
return sfs
|
91
122
|
end
|
92
123
|
|
124
|
+
# Output as a ruby hash
|
125
|
+
# @deprecated Use the DSL
|
126
|
+
|
93
127
|
def pretty_print pp
|
94
128
|
pp.pp eval(self.asPPString)
|
95
129
|
end
|
96
130
|
|
131
|
+
# Create a string representation suitable for inclusion in a DSL file
|
132
|
+
# @return [String] a DSL snippet
|
133
|
+
def asDSLString
|
134
|
+
s = StringIO.new
|
135
|
+
s.puts "field('#{@solrField}') do"
|
136
|
+
s.puts " firstOnly" if @first
|
137
|
+
if @defaultValue
|
138
|
+
s.puts " default " +
|
139
|
+
PP.singleline_pp(@defaultValue + "\n", s)
|
140
|
+
end
|
141
|
+
if @map
|
142
|
+
s.print " mapname "
|
143
|
+
PP.pp(@map.mapname, s)
|
144
|
+
end
|
145
|
+
if @noMapKeyDefault
|
146
|
+
s.print(" mapMissDefault ")
|
147
|
+
PP.singleline_pp(@noMapKeyDefault, s)
|
148
|
+
s.print("\n ")
|
149
|
+
end
|
150
|
+
@marcfieldspecs.each do |spec|
|
151
|
+
s.puts " " + spec.asDSLString
|
152
|
+
end
|
153
|
+
s.puts "end"
|
154
|
+
return s.string
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
# Output as a string representation of a ruby hash
|
159
|
+
# @deprecated Use the DSL
|
160
|
+
|
97
161
|
def asPPString
|
98
162
|
s = StringIO.new
|
99
163
|
s.print "{\n :solrField=> "
|
data/lib/marcspec/specset.rb
CHANGED
@@ -26,20 +26,37 @@ module MARCSpec
|
|
26
26
|
end
|
27
27
|
|
28
28
|
|
29
|
-
|
29
|
+
# A collection of the solr field specifications and maps necessary to turn a MARC record
|
30
|
+
# into a set of key=>value pairs suitable for sending to Solr
|
31
|
+
|
30
32
|
class SpecSet
|
33
|
+
include Logback::Simple
|
34
|
+
|
31
35
|
attr_accessor :tmaps, :solrfieldspecs, :benchmarks
|
32
36
|
|
37
|
+
# Generic new
|
33
38
|
def initialize
|
34
39
|
@tmaps = {}
|
35
40
|
@solrfieldspecs = []
|
36
41
|
@benchmarks = {}
|
37
42
|
end
|
38
43
|
|
44
|
+
# Get the map object associated with the given name
|
45
|
+
# @param [String] name The name of the map you want
|
46
|
+
# @return [MARCSpec::Map, nil] Either the map or nil (if not found)
|
47
|
+
|
39
48
|
def map name
|
40
49
|
return self.tmaps[name]
|
41
50
|
end
|
42
51
|
|
52
|
+
# Get all the *.rb files in a directory, assume they're maps, and create entries for
|
53
|
+
# them in self
|
54
|
+
#
|
55
|
+
# Simple wrapper around MARCSpec::Map#fromFile. Note that if a mapname is not found
|
56
|
+
# in the map structure, the name of the file (without the trailing '.rb') will be used.
|
57
|
+
#
|
58
|
+
# @param [String] dir The directory to look in. Not recursive.
|
59
|
+
|
43
60
|
def loadMapsFromDir dir
|
44
61
|
unless File.exist? dir
|
45
62
|
raise ArgumentError, "Cannot load maps from #{dir}: does not exist"
|
@@ -50,15 +67,26 @@ module MARCSpec
|
|
50
67
|
end
|
51
68
|
|
52
69
|
|
53
|
-
|
70
|
+
# Add a map to self, using its name (map#mapname) as a key
|
71
|
+
# @param [MARCSpec::Map] map the map to add.
|
54
72
|
def add_map map
|
55
73
|
self.tmaps[map.mapname] = map
|
56
74
|
end
|
57
75
|
|
76
|
+
# Build up a specset from the configuration in the given DSL file
|
77
|
+
# Note that this assumes that the maps have already been loaded!!
|
78
|
+
#
|
79
|
+
# @param [String, IO] f The name of the file, or an open IO object
|
80
|
+
# @return [MARCSpec::SpecSet] the new object
|
81
|
+
|
58
82
|
|
59
|
-
def buildSpecsFromDSLFile
|
60
|
-
f = File.open(
|
61
|
-
|
83
|
+
def buildSpecsFromDSLFile f
|
84
|
+
f = File.open(f) if f.is_a? String
|
85
|
+
|
86
|
+
unless f
|
87
|
+
log.fatal("Can't open file #{file}")
|
88
|
+
Process.exit
|
89
|
+
end
|
62
90
|
self.instance_eval(f.read)
|
63
91
|
self.check_and_fill_maps
|
64
92
|
end
|
@@ -68,11 +96,10 @@ module MARCSpec
|
|
68
96
|
if sfs._mapname
|
69
97
|
map = self.map(sfs._mapname)
|
70
98
|
if map
|
71
|
-
|
99
|
+
log.debug " Found map #{map.mapname} for solr field #{sfs.solrField}"
|
72
100
|
sfs.map = map
|
73
101
|
else
|
74
|
-
|
75
|
-
STDERR.puts "FATAL Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
|
102
|
+
log.fatal " Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
|
76
103
|
Process.exit
|
77
104
|
end
|
78
105
|
end
|
@@ -80,6 +107,8 @@ module MARCSpec
|
|
80
107
|
end
|
81
108
|
|
82
109
|
|
110
|
+
# Build a specset from the result of eval'ing an old-style pp hash.
|
111
|
+
# @deprecated Use the DSL
|
83
112
|
|
84
113
|
def buildSpecsFromList speclist
|
85
114
|
speclist.each do |spechash|
|
@@ -93,19 +122,23 @@ module MARCSpec
|
|
93
122
|
if spechash[:mapname]
|
94
123
|
map = self.map(spechash[:mapname])
|
95
124
|
unless map
|
96
|
-
|
125
|
+
log.fatal "Cannot find map #{spechash[:mapname]} for field #{spechash[:solrField]}"
|
97
126
|
Process.exit
|
98
127
|
else
|
99
|
-
|
128
|
+
log.debug " Found map #{spechash[:mapname]} for field #{spechash[:solrField]}"
|
100
129
|
solrspec.map = map
|
101
130
|
end
|
102
131
|
end
|
103
132
|
self.add_spec solrspec
|
104
|
-
|
133
|
+
log.debug "Added spec #{solrspec.solrField}"
|
105
134
|
end
|
106
135
|
end
|
107
136
|
|
108
137
|
|
138
|
+
# Add a spec, making sure there's a slot in the benchmarking stats to keep track of it
|
139
|
+
#
|
140
|
+
# @param [MARCSpec::SolrFieldSpec] solrfieldspec The spec to add
|
141
|
+
|
109
142
|
def add_spec solrfieldspec
|
110
143
|
self.solrfieldspecs << solrfieldspec
|
111
144
|
@benchmarks[solrfieldspec.solrField.to_s] = Benchmark::Tms.new(0,0,0,0, 0, solrfieldspec.solrField)
|
@@ -113,12 +146,24 @@ module MARCSpec
|
|
113
146
|
|
114
147
|
alias_method :<<, :add_spec
|
115
148
|
|
149
|
+
|
150
|
+
# Iterate over each of the solr field specs
|
116
151
|
def each
|
117
152
|
@solrfieldspecs.each do |fs|
|
118
153
|
yield fs
|
119
154
|
end
|
120
155
|
end
|
121
156
|
|
157
|
+
# Fill a hashlike (either a hash or a SolrInputDocument) based on
|
158
|
+
# the specs, maps, and passed-in record.
|
159
|
+
#
|
160
|
+
# Result is the hashlike getting new data added to it. Nothing is returned; it's all
|
161
|
+
# side-effects.
|
162
|
+
#
|
163
|
+
# @param [MARC4J4R::Record] r The record
|
164
|
+
# @param [Hash, SolrInputDocument] hashlike The hash-like object that contains previously-generated content
|
165
|
+
|
166
|
+
|
122
167
|
def fill_hashlike_from_marc r, hashlike
|
123
168
|
@solrfieldspecs.each do |sfs|
|
124
169
|
if sfs.arity == 1
|
@@ -131,6 +176,14 @@ module MARCSpec
|
|
131
176
|
end
|
132
177
|
end
|
133
178
|
end
|
179
|
+
|
180
|
+
# Same as #fill_hashlike_from_marc, but keeps track of how
|
181
|
+
# long each solr field takes (cumulative; it's added to every
|
182
|
+
# time you get data from a record).
|
183
|
+
#
|
184
|
+
# @param [MARC4J4R::Record] r The record
|
185
|
+
# @param [Hash, SolrInputDocument] hashlike The hash-like object that contains previously-generated content
|
186
|
+
|
134
187
|
|
135
188
|
def fill_hashlike_from_marc_benchmark r, hashlike
|
136
189
|
@solrfieldspecs.each do |sfs|
|
@@ -148,6 +201,14 @@ module MARCSpec
|
|
148
201
|
end
|
149
202
|
|
150
203
|
|
204
|
+
# Get a new SolrInputDocument based on the record passed in.
|
205
|
+
# Statistics will optionally be kept, and can be accessed
|
206
|
+
# via the @benchmarks intance varible later on.
|
207
|
+
#
|
208
|
+
# @param [MARC4J4R::Record] r The record
|
209
|
+
# @param [Boolean] timeit Whether to keep cumulative benchmarking statistics or not
|
210
|
+
# @return [SolrInputDocument] Thew new, filled SolrInputDocument
|
211
|
+
|
151
212
|
def doc_from_marc r, timeit = false
|
152
213
|
doc = SolrInputDocument.new
|
153
214
|
if timeit
|
@@ -158,6 +219,13 @@ module MARCSpec
|
|
158
219
|
return doc
|
159
220
|
end
|
160
221
|
|
222
|
+
# Exactly the same as #doc_from_marc, but the return object is a
|
223
|
+
# subclass of Hash
|
224
|
+
#
|
225
|
+
# @param [MARC4J4R::Record] r The record
|
226
|
+
# @param [Boolean] timeit Whether to keep cumulative benchmarking statistics or not
|
227
|
+
# @return [MockSolrDoc] Thew new, filled Hash
|
228
|
+
|
161
229
|
def hash_from_marc r, timeit = false
|
162
230
|
h = MARCSpec::MockSolrDoc.new
|
163
231
|
if timeit
|
@@ -14,22 +14,43 @@ module MARCSpec
|
|
14
14
|
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a'..'b')
|
15
15
|
|
16
16
|
class VariableFieldSpec
|
17
|
-
|
17
|
+
include Logback::Simple
|
18
|
+
|
18
19
|
attr_accessor :tag, :codes, :joiner, :ind1, :ind2, :codehistory
|
19
20
|
|
21
|
+
# Get a new object
|
22
|
+
#
|
23
|
+
# @param [String] tag The MARC field tag
|
24
|
+
# @param [String, Array] codes The list of subfield codes (as 'abc' or ['a', 'b', 'c']) whose values we want
|
25
|
+
# @param [String] joiner What string to use to join the subfield values
|
26
|
+
# @return [VariableFieldSpec] the new object
|
27
|
+
|
20
28
|
def initialize tag, codes=nil, joiner=' '
|
21
29
|
@tag = tag
|
22
30
|
@joiner = joiner || ' '
|
23
31
|
self.codes = codes
|
24
32
|
@codehistory = []
|
25
33
|
end
|
26
|
-
|
34
|
+
|
35
|
+
# Basic equality
|
36
|
+
# @param [VariableFieldSpec] other The other spec
|
37
|
+
# @return [Boolean] whether or not it matches in all values
|
38
|
+
|
27
39
|
def == other
|
28
40
|
return ((self.tag == other.tag) and
|
29
41
|
(self.codes = other.codes) and
|
30
42
|
(self.joiner = other.joiner))
|
31
43
|
end
|
32
44
|
|
45
|
+
|
46
|
+
# Set the list of subfield codes we're concerned with.
|
47
|
+
# Internally, we always store this as an array. For input, accept
|
48
|
+
# an array of single-letter codes, a string of codes like 'abjk09',
|
49
|
+
# or a range like 'a'..'z'. nil means to use all the subfields
|
50
|
+
#
|
51
|
+
# @param [String, Array<String>, Range<String>, nil] c The code(s) to use
|
52
|
+
# @return [Array] the new set of codes
|
53
|
+
|
33
54
|
def codes= c
|
34
55
|
@codehistory << @codes if @codes
|
35
56
|
if c.nil?
|
@@ -37,7 +58,7 @@ module MARCSpec
|
|
37
58
|
return nil
|
38
59
|
end
|
39
60
|
|
40
|
-
if( c.is_a? Array) or (c.is_a?
|
61
|
+
if( c.is_a? Array) or (c.is_a? Range)
|
41
62
|
@codes = c.to_a
|
42
63
|
else
|
43
64
|
@codes = c.split(//)
|
@@ -46,6 +67,11 @@ module MARCSpec
|
|
46
67
|
return @codes
|
47
68
|
end
|
48
69
|
|
70
|
+
# Get the values associated with the tag (and optional subfield codes) for the given record
|
71
|
+
#
|
72
|
+
# @param [MARC4J4R::Record] r The record you want to extract values from
|
73
|
+
# @return [Array<String>] the extracted values, if any
|
74
|
+
|
49
75
|
def marc_values r
|
50
76
|
fields = r.find_by_tag(@tag)
|
51
77
|
vals = []
|
@@ -58,10 +84,30 @@ module MARCSpec
|
|
58
84
|
return vals
|
59
85
|
end
|
60
86
|
|
87
|
+
|
88
|
+
# Get a DSL snipped representing this object
|
89
|
+
# @return [String] the DSL string
|
90
|
+
|
91
|
+
def asDSLString
|
92
|
+
subs = @codes.join('')
|
93
|
+
if subs.size > 0
|
94
|
+
# return "spec('#{@tag}') {subs '#{subs}'}"
|
95
|
+
return "spec('#{tag}#{subs}')"
|
96
|
+
else
|
97
|
+
return "spec('#{@tag}')"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
# Print out hash version of this object
|
103
|
+
# @deprecated Use the DSL
|
61
104
|
def pretty_print pp
|
62
105
|
pp.pp eval(self.asPPString)
|
63
106
|
end
|
64
|
-
|
107
|
+
|
108
|
+
# Create a eval'able string of a hash version of this object
|
109
|
+
# @deprecated Use the DSL
|
110
|
+
|
65
111
|
def asPPString
|
66
112
|
s = StringIO.new
|
67
113
|
if @joiner and @joiner != ' '
|
@@ -72,6 +118,8 @@ module MARCSpec
|
|
72
118
|
return s.string
|
73
119
|
end
|
74
120
|
|
121
|
+
# Create an object from an asPPString string
|
122
|
+
# @deprecated Use the DSL
|
75
123
|
def self.fromPPString str
|
76
124
|
a = eval(str)
|
77
125
|
return self.new(a[0], a[1], a[2])
|
data/spec/cachespot_spec.rb
CHANGED
@@ -29,23 +29,23 @@ describe "ControlFieldSpec" do
|
|
29
29
|
# 01234567890123 # index
|
30
30
|
it "gets a single full value" do
|
31
31
|
cfs = MARCSpec::ControlFieldSpec.new('001')
|
32
|
-
cfs.marc_values(@one).should
|
32
|
+
cfs.marc_values(@one).should == ["afc99990058366"]
|
33
33
|
end
|
34
34
|
|
35
35
|
it "gets a single character" do
|
36
36
|
cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
|
37
|
-
cfs.marc_values(@one).should
|
37
|
+
cfs.marc_values(@one).should == ['8']
|
38
38
|
end
|
39
39
|
|
40
40
|
it "gets a range of characters" do
|
41
41
|
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
42
|
-
cfs.marc_values(@one).should
|
42
|
+
cfs.marc_values(@one).should == ['90058']
|
43
43
|
end
|
44
44
|
|
45
45
|
it "should round trip" do
|
46
46
|
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
47
47
|
cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
|
48
|
-
cfs.should
|
48
|
+
cfs.should == cfs2
|
49
49
|
end
|
50
50
|
end
|
51
51
|
|