marcspec 1.5.0 → 1.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,10 +2,19 @@ require 'stringio'
2
2
  require 'marc4j4r/controlfield'
3
3
 
4
4
  module MARCSpec
5
+
6
+
7
+ # The basic Solr Field spec -- a specification object that knows how to extract data
8
+ # from a MARC record.
9
+
5
10
  class SolrFieldSpec
11
+ include Logback::Simple
12
+
6
13
  attr_accessor :solrField, :first, :map, :noMapKeyDefault, :marcfieldspecs, :defaultValue, :_mapname
7
14
  attr_reader :arity
8
15
 
16
+ # Get a new object
17
+
9
18
  def initialize(opts)
10
19
  @solrField = opts[:solrField]
11
20
  @first = opts[:firstOnly] || false
@@ -16,11 +25,19 @@ module MARCSpec
16
25
  @marcfieldspecs = []
17
26
  end
18
27
 
28
+ # Add a new tag specification
29
+ # @param [MARCSpec::ControlFieldSpec, MARCSpec::VariableFieldSpec] tagspec The spec to add
30
+
19
31
  def << tagspec
20
32
  @marcfieldspecs << tagspec
21
33
  end
22
34
 
23
-
35
+ # Get raw (not translated by a map or anything) values from the MARC
36
+ #
37
+ # @param [MARC4J4R::Record] r The record
38
+ # @param [Hash, SolrInputDocument] doc The hash-like object that contains previously-generated content
39
+ # @return [Array] an array of values from the MARC record
40
+
24
41
  def raw_marc_values r, doc
25
42
  vals = []
26
43
  @marcfieldspecs.each do |ts|
@@ -28,7 +45,13 @@ module MARCSpec
28
45
  end
29
46
  return vals
30
47
  end
31
-
48
+
49
+ # Get the values from the MARC, provide a default or mapping as necessary
50
+ #
51
+ # @param [MARC4J4R::Record] r The record
52
+ # @param [Hash, SolrInputDocument] doc The hash-like object that contains previously-generated content
53
+ # @return [Array] an array of values from the MARC record after mapping/default/mapMissDefault/firstOnly
54
+
32
55
  def marc_values r, doc = {}
33
56
  vals = raw_marc_values r, doc
34
57
  return vals if @arity > 1
@@ -64,6 +87,9 @@ module MARCSpec
64
87
  return vals
65
88
  end
66
89
 
90
+ # Basic equality
91
+ # @param [MARCSpec::SolrFieldSpec] other The other object to compare to
92
+ # @return [Boolean] whether it's the same
67
93
 
68
94
  def == other
69
95
  return ((other.solrField == self.solrField) and
@@ -74,10 +100,15 @@ module MARCSpec
74
100
  (other.marcfieldspecs == self.marcfieldspecs))
75
101
  end
76
102
 
103
+ # Build an object from a asPPString string
104
+ # @deprecated Use the DSL
77
105
  def self.fromPPString str
78
106
  return self.fromHash eval(str)
79
107
  end
80
108
 
109
+ # Build an object from an eval'd asPPString string
110
+ # @deprecated Use the DSL
111
+
81
112
  def self.fromHash h
82
113
  sfs = self.new(h)
83
114
  h[:specs].each do |s|
@@ -90,10 +121,43 @@ module MARCSpec
90
121
  return sfs
91
122
  end
92
123
 
124
+ # Output as a ruby hash
125
+ # @deprecated Use the DSL
126
+
93
127
  def pretty_print pp
94
128
  pp.pp eval(self.asPPString)
95
129
  end
96
130
 
131
+ # Create a string representation suitable for inclusion in a DSL file
132
+ # @return [String] a DSL snippet
133
+ def asDSLString
134
+ s = StringIO.new
135
+ s.puts "field('#{@solrField}') do"
136
+ s.puts " firstOnly" if @first
137
+ if @defaultValue
138
+ s.puts " default " +
139
+ PP.singleline_pp(@defaultValue + "\n", s)
140
+ end
141
+ if @map
142
+ s.print " mapname "
143
+ PP.pp(@map.mapname, s)
144
+ end
145
+ if @noMapKeyDefault
146
+ s.print(" mapMissDefault ")
147
+ PP.singleline_pp(@noMapKeyDefault, s)
148
+ s.print("\n ")
149
+ end
150
+ @marcfieldspecs.each do |spec|
151
+ s.puts " " + spec.asDSLString
152
+ end
153
+ s.puts "end"
154
+ return s.string
155
+ end
156
+
157
+
158
+ # Output as a string representation of a ruby hash
159
+ # @deprecated Use the DSL
160
+
97
161
  def asPPString
98
162
  s = StringIO.new
99
163
  s.print "{\n :solrField=> "
@@ -26,20 +26,37 @@ module MARCSpec
26
26
  end
27
27
 
28
28
 
29
-
29
+ # A collection of the solr field specifications and maps necessary to turn a MARC record
30
+ # into a set of key=>value pairs suitable for sending to Solr
31
+
30
32
  class SpecSet
33
+ include Logback::Simple
34
+
31
35
  attr_accessor :tmaps, :solrfieldspecs, :benchmarks
32
36
 
37
+ # Generic new
33
38
  def initialize
34
39
  @tmaps = {}
35
40
  @solrfieldspecs = []
36
41
  @benchmarks = {}
37
42
  end
38
43
 
44
+ # Get the map object associated with the given name
45
+ # @param [String] name The name of the map you want
46
+ # @return [MARCSpec::Map, nil] Either the map or nil (if not found)
47
+
39
48
  def map name
40
49
  return self.tmaps[name]
41
50
  end
42
51
 
52
+ # Get all the *.rb files in a directory, assume they're maps, and create entries for
53
+ # them in self
54
+ #
55
+ # Simple wrapper around MARCSpec::Map#fromFile. Note that if a mapname is not found
56
+ # in the map structure, the name of the file (without the trailing '.rb') will be used.
57
+ #
58
+ # @param [String] dir The directory to look in. Not recursive.
59
+
43
60
  def loadMapsFromDir dir
44
61
  unless File.exist? dir
45
62
  raise ArgumentError, "Cannot load maps from #{dir}: does not exist"
@@ -50,15 +67,26 @@ module MARCSpec
50
67
  end
51
68
 
52
69
 
53
-
70
+ # Add a map to self, using its name (map#mapname) as a key
71
+ # @param [MARCSpec::Map] map the map to add.
54
72
  def add_map map
55
73
  self.tmaps[map.mapname] = map
56
74
  end
57
75
 
76
+ # Build up a specset from the configuration in the given DSL file
77
+ # Note that this assumes that the maps have already been loaded!!
78
+ #
79
+ # @param [String, IO] f The name of the file, or an open IO object
80
+ # @return [MARCSpec::SpecSet] the new object
81
+
58
82
 
59
- def buildSpecsFromDSLFile file
60
- f = File.open(file)
61
- $LOG.fatal("Can't open file #{file}") unless f
83
+ def buildSpecsFromDSLFile f
84
+ f = File.open(f) if f.is_a? String
85
+
86
+ unless f
87
+ log.fatal("Can't open file #{file}")
88
+ Process.exit
89
+ end
62
90
  self.instance_eval(f.read)
63
91
  self.check_and_fill_maps
64
92
  end
@@ -68,11 +96,10 @@ module MARCSpec
68
96
  if sfs._mapname
69
97
  map = self.map(sfs._mapname)
70
98
  if map
71
- $LOG.debug " Found map #{map.mapname} for solr field #{sfs.solrField}"
99
+ log.debug " Found map #{map.mapname} for solr field #{sfs.solrField}"
72
100
  sfs.map = map
73
101
  else
74
- $LOG.error " Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
75
- STDERR.puts "FATAL Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
102
+ log.fatal " Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
76
103
  Process.exit
77
104
  end
78
105
  end
@@ -80,6 +107,8 @@ module MARCSpec
80
107
  end
81
108
 
82
109
 
110
+ # Build a specset from the result of eval'ing an old-style pp hash.
111
+ # @deprecated Use the DSL
83
112
 
84
113
  def buildSpecsFromList speclist
85
114
  speclist.each do |spechash|
@@ -93,19 +122,23 @@ module MARCSpec
93
122
  if spechash[:mapname]
94
123
  map = self.map(spechash[:mapname])
95
124
  unless map
96
- $LOG.error " Cannot find map #{spechash[:mapname]} for field #{spechash[:solrField]}"
125
+ log.fatal "Cannot find map #{spechash[:mapname]} for field #{spechash[:solrField]}"
97
126
  Process.exit
98
127
  else
99
- $LOG.debug " Found map #{spechash[:mapname]} for field #{spechash[:solrField]}"
128
+ log.debug " Found map #{spechash[:mapname]} for field #{spechash[:solrField]}"
100
129
  solrspec.map = map
101
130
  end
102
131
  end
103
132
  self.add_spec solrspec
104
- $LOG.debug "Added spec #{solrspec.solrField}"
133
+ log.debug "Added spec #{solrspec.solrField}"
105
134
  end
106
135
  end
107
136
 
108
137
 
138
+ # Add a spec, making sure there's a slot in the benchmarking stats to keep track of it
139
+ #
140
+ # @param [MARCSpec::SolrFieldSpec] solrfieldspec The spec to add
141
+
109
142
  def add_spec solrfieldspec
110
143
  self.solrfieldspecs << solrfieldspec
111
144
  @benchmarks[solrfieldspec.solrField.to_s] = Benchmark::Tms.new(0,0,0,0, 0, solrfieldspec.solrField)
@@ -113,12 +146,24 @@ module MARCSpec
113
146
 
114
147
  alias_method :<<, :add_spec
115
148
 
149
+
150
+ # Iterate over each of the solr field specs
116
151
  def each
117
152
  @solrfieldspecs.each do |fs|
118
153
  yield fs
119
154
  end
120
155
  end
121
156
 
157
+ # Fill a hashlike (either a hash or a SolrInputDocument) based on
158
+ # the specs, maps, and passed-in record.
159
+ #
160
+ # Result is the hashlike getting new data added to it. Nothing is returned; it's all
161
+ # side-effects.
162
+ #
163
+ # @param [MARC4J4R::Record] r The record
164
+ # @param [Hash, SolrInputDocument] hashlike The hash-like object that contains previously-generated content
165
+
166
+
122
167
  def fill_hashlike_from_marc r, hashlike
123
168
  @solrfieldspecs.each do |sfs|
124
169
  if sfs.arity == 1
@@ -131,6 +176,14 @@ module MARCSpec
131
176
  end
132
177
  end
133
178
  end
179
+
180
+ # Same as #fill_hashlike_from_marc, but keeps track of how
181
+ # long each solr field takes (cumulative; it's added to every
182
+ # time you get data from a record).
183
+ #
184
+ # @param [MARC4J4R::Record] r The record
185
+ # @param [Hash, SolrInputDocument] hashlike The hash-like object that contains previously-generated content
186
+
134
187
 
135
188
  def fill_hashlike_from_marc_benchmark r, hashlike
136
189
  @solrfieldspecs.each do |sfs|
@@ -148,6 +201,14 @@ module MARCSpec
148
201
  end
149
202
 
150
203
 
204
+ # Get a new SolrInputDocument based on the record passed in.
205
+ # Statistics will optionally be kept, and can be accessed
206
+ # via the @benchmarks intance varible later on.
207
+ #
208
+ # @param [MARC4J4R::Record] r The record
209
+ # @param [Boolean] timeit Whether to keep cumulative benchmarking statistics or not
210
+ # @return [SolrInputDocument] Thew new, filled SolrInputDocument
211
+
151
212
  def doc_from_marc r, timeit = false
152
213
  doc = SolrInputDocument.new
153
214
  if timeit
@@ -158,6 +219,13 @@ module MARCSpec
158
219
  return doc
159
220
  end
160
221
 
222
+ # Exactly the same as #doc_from_marc, but the return object is a
223
+ # subclass of Hash
224
+ #
225
+ # @param [MARC4J4R::Record] r The record
226
+ # @param [Boolean] timeit Whether to keep cumulative benchmarking statistics or not
227
+ # @return [MockSolrDoc] Thew new, filled Hash
228
+
161
229
  def hash_from_marc r, timeit = false
162
230
  h = MARCSpec::MockSolrDoc.new
163
231
  if timeit
@@ -14,22 +14,43 @@ module MARCSpec
14
14
  # vfs = MARCSpec::VariableFieldSpec.new('245', 'a'..'b')
15
15
 
16
16
  class VariableFieldSpec
17
-
17
+ include Logback::Simple
18
+
18
19
  attr_accessor :tag, :codes, :joiner, :ind1, :ind2, :codehistory
19
20
 
21
+ # Get a new object
22
+ #
23
+ # @param [String] tag The MARC field tag
24
+ # @param [String, Array] codes The list of subfield codes (as 'abc' or ['a', 'b', 'c']) whose values we want
25
+ # @param [String] joiner What string to use to join the subfield values
26
+ # @return [VariableFieldSpec] the new object
27
+
20
28
  def initialize tag, codes=nil, joiner=' '
21
29
  @tag = tag
22
30
  @joiner = joiner || ' '
23
31
  self.codes = codes
24
32
  @codehistory = []
25
33
  end
26
-
34
+
35
+ # Basic equality
36
+ # @param [VariableFieldSpec] other The other spec
37
+ # @return [Boolean] whether or not it matches in all values
38
+
27
39
  def == other
28
40
  return ((self.tag == other.tag) and
29
41
  (self.codes = other.codes) and
30
42
  (self.joiner = other.joiner))
31
43
  end
32
44
 
45
+
46
+ # Set the list of subfield codes we're concerned with.
47
+ # Internally, we always store this as an array. For input, accept
48
+ # an array of single-letter codes, a string of codes like 'abjk09',
49
+ # or a range like 'a'..'z'. nil means to use all the subfields
50
+ #
51
+ # @param [String, Array<String>, Range<String>, nil] c The code(s) to use
52
+ # @return [Array] the new set of codes
53
+
33
54
  def codes= c
34
55
  @codehistory << @codes if @codes
35
56
  if c.nil?
@@ -37,7 +58,7 @@ module MARCSpec
37
58
  return nil
38
59
  end
39
60
 
40
- if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
61
+ if( c.is_a? Array) or (c.is_a? Range)
41
62
  @codes = c.to_a
42
63
  else
43
64
  @codes = c.split(//)
@@ -46,6 +67,11 @@ module MARCSpec
46
67
  return @codes
47
68
  end
48
69
 
70
+ # Get the values associated with the tag (and optional subfield codes) for the given record
71
+ #
72
+ # @param [MARC4J4R::Record] r The record you want to extract values from
73
+ # @return [Array<String>] the extracted values, if any
74
+
49
75
  def marc_values r
50
76
  fields = r.find_by_tag(@tag)
51
77
  vals = []
@@ -58,10 +84,30 @@ module MARCSpec
58
84
  return vals
59
85
  end
60
86
 
87
+
88
+ # Get a DSL snipped representing this object
89
+ # @return [String] the DSL string
90
+
91
+ def asDSLString
92
+ subs = @codes.join('')
93
+ if subs.size > 0
94
+ # return "spec('#{@tag}') {subs '#{subs}'}"
95
+ return "spec('#{tag}#{subs}')"
96
+ else
97
+ return "spec('#{@tag}')"
98
+ end
99
+ end
100
+
101
+
102
+ # Print out hash version of this object
103
+ # @deprecated Use the DSL
61
104
  def pretty_print pp
62
105
  pp.pp eval(self.asPPString)
63
106
  end
64
-
107
+
108
+ # Create a eval'able string of a hash version of this object
109
+ # @deprecated Use the DSL
110
+
65
111
  def asPPString
66
112
  s = StringIO.new
67
113
  if @joiner and @joiner != ' '
@@ -72,6 +118,8 @@ module MARCSpec
72
118
  return s.string
73
119
  end
74
120
 
121
+ # Create an object from an asPPString string
122
+ # @deprecated Use the DSL
75
123
  def self.fromPPString str
76
124
  a = eval(str)
77
125
  return self.new(a[0], a[1], a[2])
@@ -25,6 +25,6 @@ describe "Cachespot" do
25
25
 
26
26
  it "should set a value and get it back" do
27
27
  @one.cachespot['one'] = 1
28
- @one.cachespot['one'].should.equal 1
28
+ @one.cachespot['one'].should == 1
29
29
  end
30
30
  end
@@ -29,23 +29,23 @@ describe "ControlFieldSpec" do
29
29
  # 01234567890123 # index
30
30
  it "gets a single full value" do
31
31
  cfs = MARCSpec::ControlFieldSpec.new('001')
32
- cfs.marc_values(@one).should.equal ["afc99990058366"]
32
+ cfs.marc_values(@one).should == ["afc99990058366"]
33
33
  end
34
34
 
35
35
  it "gets a single character" do
36
36
  cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
37
- cfs.marc_values(@one).should.equal ['8']
37
+ cfs.marc_values(@one).should == ['8']
38
38
  end
39
39
 
40
40
  it "gets a range of characters" do
41
41
  cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
42
- cfs.marc_values(@one).should.equal ['90058']
42
+ cfs.marc_values(@one).should == ['90058']
43
43
  end
44
44
 
45
45
  it "should round trip" do
46
46
  cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
47
47
  cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
48
- cfs.should.equal cfs2
48
+ cfs.should == cfs2
49
49
  end
50
50
  end
51
51