marcspec 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +32 -0
- data/Rakefile +21 -11
- data/VERSION +1 -1
- data/lib/marcspec.rb +9 -7
- data/lib/marcspec/constantspec.rb +16 -1
- data/lib/marcspec/controlfieldspec.rb +22 -1
- data/lib/marcspec/customspec.rb +37 -1
- data/lib/marcspec/dsl.rb +20 -5
- data/lib/marcspec/map.rb +8 -7
- data/lib/marcspec/multivaluemap.rb +1 -1
- data/lib/marcspec/solrfieldspec.rb +66 -2
- data/lib/marcspec/specset.rb +79 -11
- data/lib/marcspec/variablefieldspec.rb +52 -4
- data/spec/cachespot_spec.rb +1 -1
- data/spec/controlfieldspec_spec.rb +4 -4
- data/spec/dsl_spec.rb +52 -30
- data/spec/leaderspec_spec.rb +2 -2
- data/spec/maps_spec.rb +42 -42
- data/spec/marcfieldspecs_spec.rb +22 -22
- data/spec/solrfieldspec_spec.rb +24 -24
- data/spec/spec.opts +7 -0
- data/spec/spec_helper.rb +3 -6
- data/spec/specset_spec.rb +20 -19
- data/spec/variablefieldspec_spec.rb +9 -9
- metadata +156 -144
@@ -2,10 +2,19 @@ require 'stringio'
|
|
2
2
|
require 'marc4j4r/controlfield'
|
3
3
|
|
4
4
|
module MARCSpec
|
5
|
+
|
6
|
+
|
7
|
+
# The basic Solr Field spec -- a specification object that knows how to extract data
|
8
|
+
# from a MARC record.
|
9
|
+
|
5
10
|
class SolrFieldSpec
|
11
|
+
include Logback::Simple
|
12
|
+
|
6
13
|
attr_accessor :solrField, :first, :map, :noMapKeyDefault, :marcfieldspecs, :defaultValue, :_mapname
|
7
14
|
attr_reader :arity
|
8
15
|
|
16
|
+
# Get a new object
|
17
|
+
|
9
18
|
def initialize(opts)
|
10
19
|
@solrField = opts[:solrField]
|
11
20
|
@first = opts[:firstOnly] || false
|
@@ -16,11 +25,19 @@ module MARCSpec
|
|
16
25
|
@marcfieldspecs = []
|
17
26
|
end
|
18
27
|
|
28
|
+
# Add a new tag specification
|
29
|
+
# @param [MARCSpec::ControlFieldSpec, MARCSpec::VariableFieldSpec] tagspec The spec to add
|
30
|
+
|
19
31
|
def << tagspec
|
20
32
|
@marcfieldspecs << tagspec
|
21
33
|
end
|
22
34
|
|
23
|
-
|
35
|
+
# Get raw (not translated by a map or anything) values from the MARC
|
36
|
+
#
|
37
|
+
# @param [MARC4J4R::Record] r The record
|
38
|
+
# @param [Hash, SolrInputDocument] doc The hash-like object that contains previously-generated content
|
39
|
+
# @return [Array] an array of values from the MARC record
|
40
|
+
|
24
41
|
def raw_marc_values r, doc
|
25
42
|
vals = []
|
26
43
|
@marcfieldspecs.each do |ts|
|
@@ -28,7 +45,13 @@ module MARCSpec
|
|
28
45
|
end
|
29
46
|
return vals
|
30
47
|
end
|
31
|
-
|
48
|
+
|
49
|
+
# Get the values from the MARC, provide a default or mapping as necessary
|
50
|
+
#
|
51
|
+
# @param [MARC4J4R::Record] r The record
|
52
|
+
# @param [Hash, SolrInputDocument] doc The hash-like object that contains previously-generated content
|
53
|
+
# @return [Array] an array of values from the MARC record after mapping/default/mapMissDefault/firstOnly
|
54
|
+
|
32
55
|
def marc_values r, doc = {}
|
33
56
|
vals = raw_marc_values r, doc
|
34
57
|
return vals if @arity > 1
|
@@ -64,6 +87,9 @@ module MARCSpec
|
|
64
87
|
return vals
|
65
88
|
end
|
66
89
|
|
90
|
+
# Basic equality
|
91
|
+
# @param [MARCSpec::SolrFieldSpec] other The other object to compare to
|
92
|
+
# @return [Boolean] whether it's the same
|
67
93
|
|
68
94
|
def == other
|
69
95
|
return ((other.solrField == self.solrField) and
|
@@ -74,10 +100,15 @@ module MARCSpec
|
|
74
100
|
(other.marcfieldspecs == self.marcfieldspecs))
|
75
101
|
end
|
76
102
|
|
103
|
+
# Build an object from a asPPString string
|
104
|
+
# @deprecated Use the DSL
|
77
105
|
def self.fromPPString str
|
78
106
|
return self.fromHash eval(str)
|
79
107
|
end
|
80
108
|
|
109
|
+
# Build an object from an eval'd asPPString string
|
110
|
+
# @deprecated Use the DSL
|
111
|
+
|
81
112
|
def self.fromHash h
|
82
113
|
sfs = self.new(h)
|
83
114
|
h[:specs].each do |s|
|
@@ -90,10 +121,43 @@ module MARCSpec
|
|
90
121
|
return sfs
|
91
122
|
end
|
92
123
|
|
124
|
+
# Output as a ruby hash
|
125
|
+
# @deprecated Use the DSL
|
126
|
+
|
93
127
|
def pretty_print pp
|
94
128
|
pp.pp eval(self.asPPString)
|
95
129
|
end
|
96
130
|
|
131
|
+
# Create a string representation suitable for inclusion in a DSL file
|
132
|
+
# @return [String] a DSL snippet
|
133
|
+
def asDSLString
|
134
|
+
s = StringIO.new
|
135
|
+
s.puts "field('#{@solrField}') do"
|
136
|
+
s.puts " firstOnly" if @first
|
137
|
+
if @defaultValue
|
138
|
+
s.puts " default " +
|
139
|
+
PP.singleline_pp(@defaultValue + "\n", s)
|
140
|
+
end
|
141
|
+
if @map
|
142
|
+
s.print " mapname "
|
143
|
+
PP.pp(@map.mapname, s)
|
144
|
+
end
|
145
|
+
if @noMapKeyDefault
|
146
|
+
s.print(" mapMissDefault ")
|
147
|
+
PP.singleline_pp(@noMapKeyDefault, s)
|
148
|
+
s.print("\n ")
|
149
|
+
end
|
150
|
+
@marcfieldspecs.each do |spec|
|
151
|
+
s.puts " " + spec.asDSLString
|
152
|
+
end
|
153
|
+
s.puts "end"
|
154
|
+
return s.string
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
# Output as a string representation of a ruby hash
|
159
|
+
# @deprecated Use the DSL
|
160
|
+
|
97
161
|
def asPPString
|
98
162
|
s = StringIO.new
|
99
163
|
s.print "{\n :solrField=> "
|
data/lib/marcspec/specset.rb
CHANGED
@@ -26,20 +26,37 @@ module MARCSpec
|
|
26
26
|
end
|
27
27
|
|
28
28
|
|
29
|
-
|
29
|
+
# A collection of the solr field specifications and maps necessary to turn a MARC record
|
30
|
+
# into a set of key=>value pairs suitable for sending to Solr
|
31
|
+
|
30
32
|
class SpecSet
|
33
|
+
include Logback::Simple
|
34
|
+
|
31
35
|
attr_accessor :tmaps, :solrfieldspecs, :benchmarks
|
32
36
|
|
37
|
+
# Generic new
|
33
38
|
def initialize
|
34
39
|
@tmaps = {}
|
35
40
|
@solrfieldspecs = []
|
36
41
|
@benchmarks = {}
|
37
42
|
end
|
38
43
|
|
44
|
+
# Get the map object associated with the given name
|
45
|
+
# @param [String] name The name of the map you want
|
46
|
+
# @return [MARCSpec::Map, nil] Either the map or nil (if not found)
|
47
|
+
|
39
48
|
def map name
|
40
49
|
return self.tmaps[name]
|
41
50
|
end
|
42
51
|
|
52
|
+
# Get all the *.rb files in a directory, assume they're maps, and create entries for
|
53
|
+
# them in self
|
54
|
+
#
|
55
|
+
# Simple wrapper around MARCSpec::Map#fromFile. Note that if a mapname is not found
|
56
|
+
# in the map structure, the name of the file (without the trailing '.rb') will be used.
|
57
|
+
#
|
58
|
+
# @param [String] dir The directory to look in. Not recursive.
|
59
|
+
|
43
60
|
def loadMapsFromDir dir
|
44
61
|
unless File.exist? dir
|
45
62
|
raise ArgumentError, "Cannot load maps from #{dir}: does not exist"
|
@@ -50,15 +67,26 @@ module MARCSpec
|
|
50
67
|
end
|
51
68
|
|
52
69
|
|
53
|
-
|
70
|
+
# Add a map to self, using its name (map#mapname) as a key
|
71
|
+
# @param [MARCSpec::Map] map the map to add.
|
54
72
|
def add_map map
|
55
73
|
self.tmaps[map.mapname] = map
|
56
74
|
end
|
57
75
|
|
76
|
+
# Build up a specset from the configuration in the given DSL file
|
77
|
+
# Note that this assumes that the maps have already been loaded!!
|
78
|
+
#
|
79
|
+
# @param [String, IO] f The name of the file, or an open IO object
|
80
|
+
# @return [MARCSpec::SpecSet] the new object
|
81
|
+
|
58
82
|
|
59
|
-
def buildSpecsFromDSLFile
|
60
|
-
f = File.open(
|
61
|
-
|
83
|
+
def buildSpecsFromDSLFile f
|
84
|
+
f = File.open(f) if f.is_a? String
|
85
|
+
|
86
|
+
unless f
|
87
|
+
log.fatal("Can't open file #{file}")
|
88
|
+
Process.exit
|
89
|
+
end
|
62
90
|
self.instance_eval(f.read)
|
63
91
|
self.check_and_fill_maps
|
64
92
|
end
|
@@ -68,11 +96,10 @@ module MARCSpec
|
|
68
96
|
if sfs._mapname
|
69
97
|
map = self.map(sfs._mapname)
|
70
98
|
if map
|
71
|
-
|
99
|
+
log.debug " Found map #{map.mapname} for solr field #{sfs.solrField}"
|
72
100
|
sfs.map = map
|
73
101
|
else
|
74
|
-
|
75
|
-
STDERR.puts "FATAL Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
|
102
|
+
log.fatal " Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
|
76
103
|
Process.exit
|
77
104
|
end
|
78
105
|
end
|
@@ -80,6 +107,8 @@ module MARCSpec
|
|
80
107
|
end
|
81
108
|
|
82
109
|
|
110
|
+
# Build a specset from the result of eval'ing an old-style pp hash.
|
111
|
+
# @deprecated Use the DSL
|
83
112
|
|
84
113
|
def buildSpecsFromList speclist
|
85
114
|
speclist.each do |spechash|
|
@@ -93,19 +122,23 @@ module MARCSpec
|
|
93
122
|
if spechash[:mapname]
|
94
123
|
map = self.map(spechash[:mapname])
|
95
124
|
unless map
|
96
|
-
|
125
|
+
log.fatal "Cannot find map #{spechash[:mapname]} for field #{spechash[:solrField]}"
|
97
126
|
Process.exit
|
98
127
|
else
|
99
|
-
|
128
|
+
log.debug " Found map #{spechash[:mapname]} for field #{spechash[:solrField]}"
|
100
129
|
solrspec.map = map
|
101
130
|
end
|
102
131
|
end
|
103
132
|
self.add_spec solrspec
|
104
|
-
|
133
|
+
log.debug "Added spec #{solrspec.solrField}"
|
105
134
|
end
|
106
135
|
end
|
107
136
|
|
108
137
|
|
138
|
+
# Add a spec, making sure there's a slot in the benchmarking stats to keep track of it
|
139
|
+
#
|
140
|
+
# @param [MARCSpec::SolrFieldSpec] solrfieldspec The spec to add
|
141
|
+
|
109
142
|
def add_spec solrfieldspec
|
110
143
|
self.solrfieldspecs << solrfieldspec
|
111
144
|
@benchmarks[solrfieldspec.solrField.to_s] = Benchmark::Tms.new(0,0,0,0, 0, solrfieldspec.solrField)
|
@@ -113,12 +146,24 @@ module MARCSpec
|
|
113
146
|
|
114
147
|
alias_method :<<, :add_spec
|
115
148
|
|
149
|
+
|
150
|
+
# Iterate over each of the solr field specs
|
116
151
|
def each
|
117
152
|
@solrfieldspecs.each do |fs|
|
118
153
|
yield fs
|
119
154
|
end
|
120
155
|
end
|
121
156
|
|
157
|
+
# Fill a hashlike (either a hash or a SolrInputDocument) based on
|
158
|
+
# the specs, maps, and passed-in record.
|
159
|
+
#
|
160
|
+
# Result is the hashlike getting new data added to it. Nothing is returned; it's all
|
161
|
+
# side-effects.
|
162
|
+
#
|
163
|
+
# @param [MARC4J4R::Record] r The record
|
164
|
+
# @param [Hash, SolrInputDocument] hashlike The hash-like object that contains previously-generated content
|
165
|
+
|
166
|
+
|
122
167
|
def fill_hashlike_from_marc r, hashlike
|
123
168
|
@solrfieldspecs.each do |sfs|
|
124
169
|
if sfs.arity == 1
|
@@ -131,6 +176,14 @@ module MARCSpec
|
|
131
176
|
end
|
132
177
|
end
|
133
178
|
end
|
179
|
+
|
180
|
+
# Same as #fill_hashlike_from_marc, but keeps track of how
|
181
|
+
# long each solr field takes (cumulative; it's added to every
|
182
|
+
# time you get data from a record).
|
183
|
+
#
|
184
|
+
# @param [MARC4J4R::Record] r The record
|
185
|
+
# @param [Hash, SolrInputDocument] hashlike The hash-like object that contains previously-generated content
|
186
|
+
|
134
187
|
|
135
188
|
def fill_hashlike_from_marc_benchmark r, hashlike
|
136
189
|
@solrfieldspecs.each do |sfs|
|
@@ -148,6 +201,14 @@ module MARCSpec
|
|
148
201
|
end
|
149
202
|
|
150
203
|
|
204
|
+
# Get a new SolrInputDocument based on the record passed in.
|
205
|
+
# Statistics will optionally be kept, and can be accessed
|
206
|
+
# via the @benchmarks intance varible later on.
|
207
|
+
#
|
208
|
+
# @param [MARC4J4R::Record] r The record
|
209
|
+
# @param [Boolean] timeit Whether to keep cumulative benchmarking statistics or not
|
210
|
+
# @return [SolrInputDocument] Thew new, filled SolrInputDocument
|
211
|
+
|
151
212
|
def doc_from_marc r, timeit = false
|
152
213
|
doc = SolrInputDocument.new
|
153
214
|
if timeit
|
@@ -158,6 +219,13 @@ module MARCSpec
|
|
158
219
|
return doc
|
159
220
|
end
|
160
221
|
|
222
|
+
# Exactly the same as #doc_from_marc, but the return object is a
|
223
|
+
# subclass of Hash
|
224
|
+
#
|
225
|
+
# @param [MARC4J4R::Record] r The record
|
226
|
+
# @param [Boolean] timeit Whether to keep cumulative benchmarking statistics or not
|
227
|
+
# @return [MockSolrDoc] Thew new, filled Hash
|
228
|
+
|
161
229
|
def hash_from_marc r, timeit = false
|
162
230
|
h = MARCSpec::MockSolrDoc.new
|
163
231
|
if timeit
|
@@ -14,22 +14,43 @@ module MARCSpec
|
|
14
14
|
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a'..'b')
|
15
15
|
|
16
16
|
class VariableFieldSpec
|
17
|
-
|
17
|
+
include Logback::Simple
|
18
|
+
|
18
19
|
attr_accessor :tag, :codes, :joiner, :ind1, :ind2, :codehistory
|
19
20
|
|
21
|
+
# Get a new object
|
22
|
+
#
|
23
|
+
# @param [String] tag The MARC field tag
|
24
|
+
# @param [String, Array] codes The list of subfield codes (as 'abc' or ['a', 'b', 'c']) whose values we want
|
25
|
+
# @param [String] joiner What string to use to join the subfield values
|
26
|
+
# @return [VariableFieldSpec] the new object
|
27
|
+
|
20
28
|
def initialize tag, codes=nil, joiner=' '
|
21
29
|
@tag = tag
|
22
30
|
@joiner = joiner || ' '
|
23
31
|
self.codes = codes
|
24
32
|
@codehistory = []
|
25
33
|
end
|
26
|
-
|
34
|
+
|
35
|
+
# Basic equality
|
36
|
+
# @param [VariableFieldSpec] other The other spec
|
37
|
+
# @return [Boolean] whether or not it matches in all values
|
38
|
+
|
27
39
|
def == other
|
28
40
|
return ((self.tag == other.tag) and
|
29
41
|
(self.codes = other.codes) and
|
30
42
|
(self.joiner = other.joiner))
|
31
43
|
end
|
32
44
|
|
45
|
+
|
46
|
+
# Set the list of subfield codes we're concerned with.
|
47
|
+
# Internally, we always store this as an array. For input, accept
|
48
|
+
# an array of single-letter codes, a string of codes like 'abjk09',
|
49
|
+
# or a range like 'a'..'z'. nil means to use all the subfields
|
50
|
+
#
|
51
|
+
# @param [String, Array<String>, Range<String>, nil] c The code(s) to use
|
52
|
+
# @return [Array] the new set of codes
|
53
|
+
|
33
54
|
def codes= c
|
34
55
|
@codehistory << @codes if @codes
|
35
56
|
if c.nil?
|
@@ -37,7 +58,7 @@ module MARCSpec
|
|
37
58
|
return nil
|
38
59
|
end
|
39
60
|
|
40
|
-
if( c.is_a? Array) or (c.is_a?
|
61
|
+
if( c.is_a? Array) or (c.is_a? Range)
|
41
62
|
@codes = c.to_a
|
42
63
|
else
|
43
64
|
@codes = c.split(//)
|
@@ -46,6 +67,11 @@ module MARCSpec
|
|
46
67
|
return @codes
|
47
68
|
end
|
48
69
|
|
70
|
+
# Get the values associated with the tag (and optional subfield codes) for the given record
|
71
|
+
#
|
72
|
+
# @param [MARC4J4R::Record] r The record you want to extract values from
|
73
|
+
# @return [Array<String>] the extracted values, if any
|
74
|
+
|
49
75
|
def marc_values r
|
50
76
|
fields = r.find_by_tag(@tag)
|
51
77
|
vals = []
|
@@ -58,10 +84,30 @@ module MARCSpec
|
|
58
84
|
return vals
|
59
85
|
end
|
60
86
|
|
87
|
+
|
88
|
+
# Get a DSL snipped representing this object
|
89
|
+
# @return [String] the DSL string
|
90
|
+
|
91
|
+
def asDSLString
|
92
|
+
subs = @codes.join('')
|
93
|
+
if subs.size > 0
|
94
|
+
# return "spec('#{@tag}') {subs '#{subs}'}"
|
95
|
+
return "spec('#{tag}#{subs}')"
|
96
|
+
else
|
97
|
+
return "spec('#{@tag}')"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
# Print out hash version of this object
|
103
|
+
# @deprecated Use the DSL
|
61
104
|
def pretty_print pp
|
62
105
|
pp.pp eval(self.asPPString)
|
63
106
|
end
|
64
|
-
|
107
|
+
|
108
|
+
# Create a eval'able string of a hash version of this object
|
109
|
+
# @deprecated Use the DSL
|
110
|
+
|
65
111
|
def asPPString
|
66
112
|
s = StringIO.new
|
67
113
|
if @joiner and @joiner != ' '
|
@@ -72,6 +118,8 @@ module MARCSpec
|
|
72
118
|
return s.string
|
73
119
|
end
|
74
120
|
|
121
|
+
# Create an object from an asPPString string
|
122
|
+
# @deprecated Use the DSL
|
75
123
|
def self.fromPPString str
|
76
124
|
a = eval(str)
|
77
125
|
return self.new(a[0], a[1], a[2])
|
data/spec/cachespot_spec.rb
CHANGED
@@ -29,23 +29,23 @@ describe "ControlFieldSpec" do
|
|
29
29
|
# 01234567890123 # index
|
30
30
|
it "gets a single full value" do
|
31
31
|
cfs = MARCSpec::ControlFieldSpec.new('001')
|
32
|
-
cfs.marc_values(@one).should
|
32
|
+
cfs.marc_values(@one).should == ["afc99990058366"]
|
33
33
|
end
|
34
34
|
|
35
35
|
it "gets a single character" do
|
36
36
|
cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
|
37
|
-
cfs.marc_values(@one).should
|
37
|
+
cfs.marc_values(@one).should == ['8']
|
38
38
|
end
|
39
39
|
|
40
40
|
it "gets a range of characters" do
|
41
41
|
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
42
|
-
cfs.marc_values(@one).should
|
42
|
+
cfs.marc_values(@one).should == ['90058']
|
43
43
|
end
|
44
44
|
|
45
45
|
it "should round trip" do
|
46
46
|
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
47
47
|
cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
|
48
|
-
cfs.should
|
48
|
+
cfs.should == cfs2
|
49
49
|
end
|
50
50
|
end
|
51
51
|
|