marcspec 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,19 @@ require 'stringio'
2
2
  require 'marc4j4r/controlfield'
3
3
 
4
4
  module MARCSpec
5
+
6
+
7
+ # The basic Solr Field spec -- a specification object that knows how to extract data
8
+ # from a MARC record.
9
+
5
10
  class SolrFieldSpec
11
+ include Logback::Simple
12
+
6
13
  attr_accessor :solrField, :first, :map, :noMapKeyDefault, :marcfieldspecs, :defaultValue, :_mapname
7
14
  attr_reader :arity
8
15
 
16
+ # Get a new object
17
+
9
18
  def initialize(opts)
10
19
  @solrField = opts[:solrField]
11
20
  @first = opts[:firstOnly] || false
@@ -16,11 +25,19 @@ module MARCSpec
16
25
  @marcfieldspecs = []
17
26
  end
18
27
 
28
+ # Add a new tag specification
29
+ # @param [MARCSpec::ControlFieldSpec, MARCSpec::VariableFieldSpec] tagspec The spec to add
30
+
19
31
  def << tagspec
20
32
  @marcfieldspecs << tagspec
21
33
  end
22
34
 
23
-
35
+ # Get raw (not translated by a map or anything) values from the MARC
36
+ #
37
+ # @param [MARC4J4R::Record] r The record
38
+ # @param [Hash, SolrInputDocument] doc The hash-like object that contains previously-generated content
39
+ # @return [Array] an array of values from the MARC record
40
+
24
41
  def raw_marc_values r, doc
25
42
  vals = []
26
43
  @marcfieldspecs.each do |ts|
@@ -28,7 +45,13 @@ module MARCSpec
28
45
  end
29
46
  return vals
30
47
  end
31
-
48
+
49
+ # Get the values from the MARC, provide a default or mapping as necessary
50
+ #
51
+ # @param [MARC4J4R::Record] r The record
52
+ # @param [Hash, SolrInputDocument] doc The hash-like object that contains previously-generated content
53
+ # @return [Array] an array of values from the MARC record after mapping/default/mapMissDefault/firstOnly
54
+
32
55
  def marc_values r, doc = {}
33
56
  vals = raw_marc_values r, doc
34
57
  return vals if @arity > 1
@@ -64,6 +87,9 @@ module MARCSpec
64
87
  return vals
65
88
  end
66
89
 
90
+ # Basic equality
91
+ # @param [MARCSpec::SolrFieldSpec] other The other object to compare to
92
+ # @return [Boolean] whether it's the same
67
93
 
68
94
  def == other
69
95
  return ((other.solrField == self.solrField) and
@@ -74,10 +100,15 @@ module MARCSpec
74
100
  (other.marcfieldspecs == self.marcfieldspecs))
75
101
  end
76
102
 
103
+ # Build an object from a asPPString string
104
+ # @deprecated Use the DSL
77
105
  def self.fromPPString str
78
106
  return self.fromHash eval(str)
79
107
  end
80
108
 
109
+ # Build an object from an eval'd asPPString string
110
+ # @deprecated Use the DSL
111
+
81
112
  def self.fromHash h
82
113
  sfs = self.new(h)
83
114
  h[:specs].each do |s|
@@ -90,10 +121,43 @@ module MARCSpec
90
121
  return sfs
91
122
  end
92
123
 
124
+ # Output as a ruby hash
125
+ # @deprecated Use the DSL
126
+
93
127
  def pretty_print pp
94
128
  pp.pp eval(self.asPPString)
95
129
  end
96
130
 
131
+ # Create a string representation suitable for inclusion in a DSL file
132
+ # @return [String] a DSL snippet
133
+ def asDSLString
134
+ s = StringIO.new
135
+ s.puts "field('#{@solrField}') do"
136
+ s.puts " firstOnly" if @first
137
+ if @defaultValue
138
+ s.puts " default " +
139
+ PP.singleline_pp(@defaultValue + "\n", s)
140
+ end
141
+ if @map
142
+ s.print " mapname "
143
+ PP.pp(@map.mapname, s)
144
+ end
145
+ if @noMapKeyDefault
146
+ s.print(" mapMissDefault ")
147
+ PP.singleline_pp(@noMapKeyDefault, s)
148
+ s.print("\n ")
149
+ end
150
+ @marcfieldspecs.each do |spec|
151
+ s.puts " " + spec.asDSLString
152
+ end
153
+ s.puts "end"
154
+ return s.string
155
+ end
156
+
157
+
158
+ # Output as a string representation of a ruby hash
159
+ # @deprecated Use the DSL
160
+
97
161
  def asPPString
98
162
  s = StringIO.new
99
163
  s.print "{\n :solrField=> "
@@ -26,20 +26,37 @@ module MARCSpec
26
26
  end
27
27
 
28
28
 
29
-
29
+ # A collection of the solr field specifications and maps necessary to turn a MARC record
30
+ # into a set of key=>value pairs suitable for sending to Solr
31
+
30
32
  class SpecSet
33
+ include Logback::Simple
34
+
31
35
  attr_accessor :tmaps, :solrfieldspecs, :benchmarks
32
36
 
37
+ # Generic new
33
38
  def initialize
34
39
  @tmaps = {}
35
40
  @solrfieldspecs = []
36
41
  @benchmarks = {}
37
42
  end
38
43
 
44
+ # Get the map object associated with the given name
45
+ # @param [String] name The name of the map you want
46
+ # @return [MARCSpec::Map, nil] Either the map or nil (if not found)
47
+
39
48
  def map name
40
49
  return self.tmaps[name]
41
50
  end
42
51
 
52
+ # Get all the *.rb files in a directory, assume they're maps, and create entries for
53
+ # them in self
54
+ #
55
+ # Simple wrapper around MARCSpec::Map#fromFile. Note that if a mapname is not found
56
+ # in the map structure, the name of the file (without the trailing '.rb') will be used.
57
+ #
58
+ # @param [String] dir The directory to look in. Not recursive.
59
+
43
60
  def loadMapsFromDir dir
44
61
  unless File.exist? dir
45
62
  raise ArgumentError, "Cannot load maps from #{dir}: does not exist"
@@ -50,15 +67,26 @@ module MARCSpec
50
67
  end
51
68
 
52
69
 
53
-
70
+ # Add a map to self, using its name (map#mapname) as a key
71
+ # @param [MARCSpec::Map] map the map to add.
54
72
  def add_map map
55
73
  self.tmaps[map.mapname] = map
56
74
  end
57
75
 
76
+ # Build up a specset from the configuration in the given DSL file
77
+ # Note that this assumes that the maps have already been loaded!!
78
+ #
79
+ # @param [String, IO] f The name of the file, or an open IO object
80
+ # @return [MARCSpec::SpecSet] the new object
81
+
58
82
 
59
- def buildSpecsFromDSLFile file
60
- f = File.open(file)
61
- $LOG.fatal("Can't open file #{file}") unless f
83
+ def buildSpecsFromDSLFile f
84
+ f = File.open(f) if f.is_a? String
85
+
86
+ unless f
87
+ log.fatal("Can't open file #{file}")
88
+ Process.exit
89
+ end
62
90
  self.instance_eval(f.read)
63
91
  self.check_and_fill_maps
64
92
  end
@@ -68,11 +96,10 @@ module MARCSpec
68
96
  if sfs._mapname
69
97
  map = self.map(sfs._mapname)
70
98
  if map
71
- $LOG.debug " Found map #{map.mapname} for solr field #{sfs.solrField}"
99
+ log.debug " Found map #{map.mapname} for solr field #{sfs.solrField}"
72
100
  sfs.map = map
73
101
  else
74
- $LOG.error " Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
75
- STDERR.puts "FATAL Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
102
+ log.fatal " Cannot find map #{sfs._mapname} for solr field #{sfs.solrField}"
76
103
  Process.exit
77
104
  end
78
105
  end
@@ -80,6 +107,8 @@ module MARCSpec
80
107
  end
81
108
 
82
109
 
110
+ # Build a specset from the result of eval'ing an old-style pp hash.
111
+ # @deprecated Use the DSL
83
112
 
84
113
  def buildSpecsFromList speclist
85
114
  speclist.each do |spechash|
@@ -93,19 +122,23 @@ module MARCSpec
93
122
  if spechash[:mapname]
94
123
  map = self.map(spechash[:mapname])
95
124
  unless map
96
- $LOG.error " Cannot find map #{spechash[:mapname]} for field #{spechash[:solrField]}"
125
+ log.fatal "Cannot find map #{spechash[:mapname]} for field #{spechash[:solrField]}"
97
126
  Process.exit
98
127
  else
99
- $LOG.debug " Found map #{spechash[:mapname]} for field #{spechash[:solrField]}"
128
+ log.debug " Found map #{spechash[:mapname]} for field #{spechash[:solrField]}"
100
129
  solrspec.map = map
101
130
  end
102
131
  end
103
132
  self.add_spec solrspec
104
- $LOG.debug "Added spec #{solrspec.solrField}"
133
+ log.debug "Added spec #{solrspec.solrField}"
105
134
  end
106
135
  end
107
136
 
108
137
 
138
+ # Add a spec, making sure there's a slot in the benchmarking stats to keep track of it
139
+ #
140
+ # @param [MARCSpec::SolrFieldSpec] solrfieldspec The spec to add
141
+
109
142
  def add_spec solrfieldspec
110
143
  self.solrfieldspecs << solrfieldspec
111
144
  @benchmarks[solrfieldspec.solrField.to_s] = Benchmark::Tms.new(0,0,0,0, 0, solrfieldspec.solrField)
@@ -113,12 +146,24 @@ module MARCSpec
113
146
 
114
147
  alias_method :<<, :add_spec
115
148
 
149
+
150
+ # Iterate over each of the solr field specs
116
151
  def each
117
152
  @solrfieldspecs.each do |fs|
118
153
  yield fs
119
154
  end
120
155
  end
121
156
 
157
+ # Fill a hashlike (either a hash or a SolrInputDocument) based on
158
+ # the specs, maps, and passed-in record.
159
+ #
160
+ # Result is the hashlike getting new data added to it. Nothing is returned; it's all
161
+ # side-effects.
162
+ #
163
+ # @param [MARC4J4R::Record] r The record
164
+ # @param [Hash, SolrInputDocument] hashlike The hash-like object that contains previously-generated content
165
+
166
+
122
167
  def fill_hashlike_from_marc r, hashlike
123
168
  @solrfieldspecs.each do |sfs|
124
169
  if sfs.arity == 1
@@ -131,6 +176,14 @@ module MARCSpec
131
176
  end
132
177
  end
133
178
  end
179
+
180
+ # Same as #fill_hashlike_from_marc, but keeps track of how
181
+ # long each solr field takes (cumulative; it's added to every
182
+ # time you get data from a record).
183
+ #
184
+ # @param [MARC4J4R::Record] r The record
185
+ # @param [Hash, SolrInputDocument] hashlike The hash-like object that contains previously-generated content
186
+
134
187
 
135
188
  def fill_hashlike_from_marc_benchmark r, hashlike
136
189
  @solrfieldspecs.each do |sfs|
@@ -148,6 +201,14 @@ module MARCSpec
148
201
  end
149
202
 
150
203
 
204
+ # Get a new SolrInputDocument based on the record passed in.
205
+ # Statistics will optionally be kept, and can be accessed
206
+ # via the @benchmarks intance varible later on.
207
+ #
208
+ # @param [MARC4J4R::Record] r The record
209
+ # @param [Boolean] timeit Whether to keep cumulative benchmarking statistics or not
210
+ # @return [SolrInputDocument] Thew new, filled SolrInputDocument
211
+
151
212
  def doc_from_marc r, timeit = false
152
213
  doc = SolrInputDocument.new
153
214
  if timeit
@@ -158,6 +219,13 @@ module MARCSpec
158
219
  return doc
159
220
  end
160
221
 
222
+ # Exactly the same as #doc_from_marc, but the return object is a
223
+ # subclass of Hash
224
+ #
225
+ # @param [MARC4J4R::Record] r The record
226
+ # @param [Boolean] timeit Whether to keep cumulative benchmarking statistics or not
227
+ # @return [MockSolrDoc] Thew new, filled Hash
228
+
161
229
  def hash_from_marc r, timeit = false
162
230
  h = MARCSpec::MockSolrDoc.new
163
231
  if timeit
@@ -14,22 +14,43 @@ module MARCSpec
14
14
  # vfs = MARCSpec::VariableFieldSpec.new('245', 'a'..'b')
15
15
 
16
16
  class VariableFieldSpec
17
-
17
+ include Logback::Simple
18
+
18
19
  attr_accessor :tag, :codes, :joiner, :ind1, :ind2, :codehistory
19
20
 
21
+ # Get a new object
22
+ #
23
+ # @param [String] tag The MARC field tag
24
+ # @param [String, Array] codes The list of subfield codes (as 'abc' or ['a', 'b', 'c']) whose values we want
25
+ # @param [String] joiner What string to use to join the subfield values
26
+ # @return [VariableFieldSpec] the new object
27
+
20
28
  def initialize tag, codes=nil, joiner=' '
21
29
  @tag = tag
22
30
  @joiner = joiner || ' '
23
31
  self.codes = codes
24
32
  @codehistory = []
25
33
  end
26
-
34
+
35
+ # Basic equality
36
+ # @param [VariableFieldSpec] other The other spec
37
+ # @return [Boolean] whether or not it matches in all values
38
+
27
39
  def == other
28
40
  return ((self.tag == other.tag) and
29
41
  (self.codes = other.codes) and
30
42
  (self.joiner = other.joiner))
31
43
  end
32
44
 
45
+
46
+ # Set the list of subfield codes we're concerned with.
47
+ # Internally, we always store this as an array. For input, accept
48
+ # an array of single-letter codes, a string of codes like 'abjk09',
49
+ # or a range like 'a'..'z'. nil means to use all the subfields
50
+ #
51
+ # @param [String, Array<String>, Range<String>, nil] c The code(s) to use
52
+ # @return [Array] the new set of codes
53
+
33
54
  def codes= c
34
55
  @codehistory << @codes if @codes
35
56
  if c.nil?
@@ -37,7 +58,7 @@ module MARCSpec
37
58
  return nil
38
59
  end
39
60
 
40
- if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
61
+ if( c.is_a? Array) or (c.is_a? Range)
41
62
  @codes = c.to_a
42
63
  else
43
64
  @codes = c.split(//)
@@ -46,6 +67,11 @@ module MARCSpec
46
67
  return @codes
47
68
  end
48
69
 
70
+ # Get the values associated with the tag (and optional subfield codes) for the given record
71
+ #
72
+ # @param [MARC4J4R::Record] r The record you want to extract values from
73
+ # @return [Array<String>] the extracted values, if any
74
+
49
75
  def marc_values r
50
76
  fields = r.find_by_tag(@tag)
51
77
  vals = []
@@ -58,10 +84,30 @@ module MARCSpec
58
84
  return vals
59
85
  end
60
86
 
87
+
88
+ # Get a DSL snipped representing this object
89
+ # @return [String] the DSL string
90
+
91
+ def asDSLString
92
+ subs = @codes.join('')
93
+ if subs.size > 0
94
+ # return "spec('#{@tag}') {subs '#{subs}'}"
95
+ return "spec('#{tag}#{subs}')"
96
+ else
97
+ return "spec('#{@tag}')"
98
+ end
99
+ end
100
+
101
+
102
+ # Print out hash version of this object
103
+ # @deprecated Use the DSL
61
104
  def pretty_print pp
62
105
  pp.pp eval(self.asPPString)
63
106
  end
64
-
107
+
108
+ # Create a eval'able string of a hash version of this object
109
+ # @deprecated Use the DSL
110
+
65
111
  def asPPString
66
112
  s = StringIO.new
67
113
  if @joiner and @joiner != ' '
@@ -72,6 +118,8 @@ module MARCSpec
72
118
  return s.string
73
119
  end
74
120
 
121
+ # Create an object from an asPPString string
122
+ # @deprecated Use the DSL
75
123
  def self.fromPPString str
76
124
  a = eval(str)
77
125
  return self.new(a[0], a[1], a[2])
@@ -25,6 +25,6 @@ describe "Cachespot" do
25
25
 
26
26
  it "should set a value and get it back" do
27
27
  @one.cachespot['one'] = 1
28
- @one.cachespot['one'].should.equal 1
28
+ @one.cachespot['one'].should == 1
29
29
  end
30
30
  end
@@ -29,23 +29,23 @@ describe "ControlFieldSpec" do
29
29
  # 01234567890123 # index
30
30
  it "gets a single full value" do
31
31
  cfs = MARCSpec::ControlFieldSpec.new('001')
32
- cfs.marc_values(@one).should.equal ["afc99990058366"]
32
+ cfs.marc_values(@one).should == ["afc99990058366"]
33
33
  end
34
34
 
35
35
  it "gets a single character" do
36
36
  cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
37
- cfs.marc_values(@one).should.equal ['8']
37
+ cfs.marc_values(@one).should == ['8']
38
38
  end
39
39
 
40
40
  it "gets a range of characters" do
41
41
  cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
42
- cfs.marc_values(@one).should.equal ['90058']
42
+ cfs.marc_values(@one).should == ['90058']
43
43
  end
44
44
 
45
45
  it "should round trip" do
46
46
  cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
47
47
  cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
48
- cfs.should.equal cfs2
48
+ cfs.should == cfs2
49
49
  end
50
50
  end
51
51