marcspec 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.0
1
+ 0.7.0
@@ -0,0 +1,93 @@
1
+ require 'marc4j4r'
2
+ require 'set'
3
+ require 'pp'
4
+ module MARCSpec
5
+ # A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
6
+ # When called with marc_values(record), it returns either the complete value of all
7
+ # occurances of the field in question (in the order they appear in the record), or
8
+ # the zero-based substrings based on the passed range.
9
+ #
10
+ # @example Get the whole 001
11
+ # cfs = MARCSpec::ControlTagSpec.new('001')
12
+ #
13
+ # @example Get the first three characters of the 008
14
+ # cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
15
+ #
16
+ # Note that the use of the zero-based range in this manner conforms to the way MARC
17
+ # substrings are specified.
18
+
19
+ class ControlFieldSpec
20
+ attr_accessor :tag, :range
21
+
22
+ def initialize (tag, range=nil)
23
+ unless MARC4J4R::ControlField.control_tag? tag
24
+ raise ArgumentError "Tag must be a control tag"
25
+ end
26
+ @tag = tag
27
+ self.range = range
28
+ end
29
+
30
+ def == other
31
+ return ((self.tag == other.tag) and
32
+ (self.range = other.range))
33
+ end
34
+
35
+
36
+ # Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
37
+ # will return the character code of that character (e.g., "Bill"[0] => 66, wherease
38
+ # "Bill"[0..0] gives the expected 'B'
39
+ #
40
+ # @param [nil, Fixnum, Range] range A zero-based substring range or character position
41
+ # @return [MARCSpec::ControlFieldSpec] self
42
+
43
+ def range= range
44
+ if range.nil?
45
+ @range = nil
46
+ return self
47
+ end
48
+ if range.is_a? Fixnum
49
+ if range < 0
50
+ raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
51
+ end
52
+
53
+ @range = range..range
54
+
55
+ elsif range.is_a? Range
56
+ @range = range
57
+ else
58
+ raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
59
+ end
60
+ return self
61
+ end
62
+
63
+
64
+ def marc_values r
65
+ vals = r.find_by_tag(@tag).map {|f| f.value}
66
+ if @range
67
+ return vals.map {|v| v[@range]}
68
+ else
69
+ return vals
70
+ end
71
+ end
72
+
73
+ def pretty_print pp
74
+ pp.pp eval(self.asPPString)
75
+ end
76
+
77
+ def asPPString
78
+ s = StringIO.new
79
+ if @range
80
+ PP.pp([@tag, @range], s)
81
+ else
82
+ PP.pp([@tag], s)
83
+ end
84
+ return s.string
85
+ end
86
+
87
+ def self.fromPPString str
88
+ a = eval(str)
89
+ return self.new(*a)
90
+ end
91
+
92
+ end
93
+ end
@@ -5,10 +5,12 @@ require 'marcspec/solrfieldspec'
5
5
  module MARCSpec
6
6
 
7
7
  # A CustomSolrSpec is a SolrFieldSpec that derives all its values from a custom function. The custom function
8
- # must me a module function that takes a record and an array of other arguments and returns a
8
+ # must me a module function that takes a hash-like document object, a MARC4J4R record, and an array of other arguments and returns a
9
9
  # (possibly empty) list of resulting values.
10
10
  #
11
- # @example
11
+ # See the example file simple_sample/index.rb in the marc2solr project for configuration examples.
12
+ #
13
+ # @example A sample custom function, to be placed in the configuration directory's lib/ subdir
12
14
  # module MARC2Solr
13
15
  # module MyCustomStuff
14
16
  # def self.uppercaseTitle r, args=[]
@@ -19,25 +21,40 @@ module MARCSpec
19
21
  # end
20
22
  # end
21
23
  #
24
+ # @example A simple custom spec made by hand
22
25
  # css = MARCSpec::CustomSolrSpec.new(:module => MARC2Solr::MyCustomStuff,
23
- # :methodSymbol => :uppercaseTitle,
26
+ # :functionSymbol => :uppercaseTitle,
24
27
  # :map => ss.map('mapname')
25
28
  # )
26
- # ss.add_spec(css)
27
29
  #
28
30
  #
29
31
 
30
32
 
31
33
  class CustomSolrSpec < SolrFieldSpec
32
34
 
33
- attr_accessor :module, :methodSymbol, :methodArgs
35
+ attr_accessor :module, :functionSymbol, :methodArgs
36
+
37
+ # Get a new Custom Solr Spec based on the passed in options.
38
+ # @param [Hash] opts Initialization options
39
+ # @option opts [String, Array<String>] :solrField the name(s) of the Solr field(s) that will receive the data derived from this spec
40
+ # @option opts [Module] :module the actual module constant (not a string or symbol representation) which holds the
41
+ # custom function we'll be calling
42
+ # @option opts [Symbol] :functionSymbol A symbol of the name of the custom function
43
+ # @option opts [Boolean] :firstOnly (false) Whether we should return the first found value
44
+ # @option opts [String] :default (nil) The value to return if the custom function returns no values
45
+ # @option opts [MARC2Solr::Map] :map (nil) An optional Map used to translate resulting values
46
+ # @option opts [String] :noMapKeyDefault (nil) The value to return if (a) a value is found, (b) a map is defined, but (c) there's
47
+ # no key in the map that matches the value.
48
+ #
49
+ # Note that the last four options don't make sense if multiple :solrFields are given, and are illegal in that case.
50
+
34
51
  def initialize(opts)
35
52
  @solrField = opts[:solrField]
36
53
  @module = opts[:module]
37
- @methodSymbol = opts[:methodSymbol]
54
+ @functionSymbol = opts[:functionSymbol]
38
55
 
39
- unless @solrField and @module and @methodSymbol
40
- raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :methodSymbol"
56
+ unless @solrField and @module and @functionSymbol
57
+ raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :functionSymbol"
41
58
  end
42
59
 
43
60
 
@@ -60,11 +77,19 @@ module MARCSpec
60
77
 
61
78
  end
62
79
 
80
+ # Get values from a MARC object and/or the prevously-filled document object.
81
+ #
82
+ # Note that the doc is read-write here, but for the love of god, just leave it alone.
83
+ #
84
+ # @param [MARC4J4R::Record] r A marc record
85
+ # @param [SolrInputDocument, Hash] doc The document we're constructing.
86
+ # @return [Array<String>] An array of values returned by the custom method
63
87
 
64
88
  def raw_marc_values r, doc
65
- return @module.send(@methodSymbol, doc, r, *@methodArgs)
89
+ return @module.send(@functionSymbol, doc, r, *@methodArgs)
66
90
  end
67
91
 
92
+
68
93
  def self.fromHash h
69
94
  return self.new(h)
70
95
  end
@@ -94,8 +119,8 @@ module MARCSpec
94
119
 
95
120
  s.print(":module => ")
96
121
  PP.singleline_pp(@module, s)
97
- s.print(",\n :methodSymbol => ")
98
- PP.singleline_pp(@methodSymbol, s)
122
+ s.print(",\n :functionSymbol => ")
123
+ PP.singleline_pp(@functionSymbol, s)
99
124
  if @methodArgs
100
125
  s.print(",\n :methodArgs => ")
101
126
  PP.singleline_pp(@methodArgs, s)
@@ -8,6 +8,8 @@ module MARCSpec
8
8
 
9
9
  # A KVMap is, when push comes to shove, just a hash with a name, and the
10
10
  # option of adding a default value for each lookup.
11
+ #
12
+ # The map portion of a kvmap is simply a hash.
11
13
 
12
14
  class KVMap < Map
13
15
 
@@ -38,12 +40,18 @@ module MARCSpec
38
40
  end
39
41
  end
40
42
 
43
+ # Set an element in the map, just like for a regular hash
41
44
  def []= key, value
42
45
  @map[key] = value
43
46
  end
44
47
 
45
48
  alias_method :add, :[]=
46
49
 
50
+
51
+ # Produce a configuration file that will round-trip to this object.
52
+ #
53
+ # @return [String] A string representation of valid ruby code that can be turned back into
54
+ # this object using MARCSpec::Map#fromFile
47
55
  def asPPString
48
56
  s = StringIO.new
49
57
  s.print "{\n :maptype=>:kv,\n :mapname=>"
@@ -54,7 +62,11 @@ module MARCSpec
54
62
  return s.string
55
63
  end
56
64
 
57
-
65
+
66
+ # Translate from a solrmarc map file that has *already been determined* to be a KV map
67
+ # @param [String] filename The path to the solrmarc kv map file
68
+ # @return [MARCSpec::KVMap] a KVMap
69
+
58
70
  def self.from_solrmarc_file filename
59
71
  mapname = File.basename(filename).sub(/\..+?$/, '')
60
72
  map = {}
@@ -0,0 +1,37 @@
1
+ require 'marcspec/controlfieldspec'
2
+
3
+ module MARCSpec
4
+ # A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
5
+ # using the string 'LDR' to identify itself
6
+
7
+ class LeaderSpec < ControlFieldSpec
8
+
9
+ # Built to be syntax-compatible with ControlFieldSpec, the tag must always
10
+ # be 'LDR' (case matters)
11
+ #
12
+ # @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
13
+ # @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
14
+ # instead of the whole leader.
15
+
16
+ def initialize (tag, range=nil)
17
+ unless tag == 'LDR'
18
+ raise ArgumentError "Tag must be 'LDR' for a LeaderSpec"
19
+ end
20
+ @tag = 'LDR'
21
+ self.range = range
22
+ end
23
+
24
+ # Return the appropriate value (either the leader or a subset of it) from the
25
+ # given record
26
+ #
27
+ # @param [MARC4J4R::Record] r A MARC4J4R Record
28
+ # @return [String] the leader or substring of the leader
29
+ def marc_values r
30
+ if @range
31
+ return r.leader[@range]
32
+ else
33
+ return r.leader
34
+ end
35
+ end
36
+ end
37
+ end
data/lib/marcspec/map.rb CHANGED
@@ -2,7 +2,12 @@ module MARCSpec
2
2
 
3
3
  # A Map is just a named lookup table. The access
4
4
  # (via []) takes, in adition to a key, an optional
5
- # default value to return
5
+ # default value to return (e.g., val = map[key, defaultIfNotFound])
6
+ #
7
+ # We don't have the default be a part of the map because it might be used
8
+ # in several different contexts.
9
+ #
10
+ # NOTE: THIS IS AN ABSTRACT SUPERCLASS. DO NOT INSTANTIATE IT DIRECTLY
6
11
 
7
12
  class Map
8
13
  attr_accessor :mapname, :map
@@ -10,12 +15,21 @@ module MARCSpec
10
15
  # Create a new map. The passed map is either
11
16
  # a standard hash or a list of duples
12
17
  #
13
- # @param
18
+ # @param [String] mapname The name of this map; can be used to find it later on.
19
+ # @param [Hash, Array<2-value-arrays>] map Either a normal key-value hash (for a KV Map) or an
20
+ # array of duples (2-value arrays) for a MultiValueMap.
14
21
  def initialize(mapname, map)
15
22
  @mapname = mapname
16
23
  @map = map
17
24
  end
18
25
 
26
+ # Load a map from a file, determining what kind it is along the way.
27
+ #
28
+ # The file is valid ruby code; see the subclasses KVMap and MutlValueMap for examples.
29
+ #
30
+ # @param [String] filename The name of the map file to be eval'd
31
+ # @return MARC2Solr::Map An instance of a subclass of MARC2Solr::Map
32
+
19
33
  def self.fromFile filename
20
34
  begin
21
35
  str = File.open(filename).read
@@ -44,14 +58,18 @@ module MARCSpec
44
58
  end
45
59
 
46
60
 
61
+ # Check for map equality
47
62
  def == other
48
63
  return ((other.mapname == self.mapname) and (other.map = self.map))
49
64
  end
50
65
 
66
+ # Generic pretty_print; used mostly for translating from solrmarc
51
67
  def pretty_print pp
52
68
  pp.pp eval(self.asPPString)
53
69
  end
54
70
 
71
+ # Produce a map from the data structure produced by asPPString
72
+ # @param [Hash] rawmap A hash with two keys; :mapname and :map
55
73
  def self.fromHash rawmap
56
74
  return self.new(rawmap[:mapname], rawmap[:map])
57
75
  end
@@ -1,207 +1,3 @@
1
- require 'marc4j4r'
2
- require 'set'
3
- require 'pp'
4
- module MARCSpec
5
-
6
- # A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
7
- # When called with marc_values(record), it returns either the complete value of all
8
- # occurances of the field in question (in the order they appear in the record), or
9
- # the zero-based substrings based on the passed range.
10
- #
11
- # @example Get the whole 001
12
- # cfs = MARCSpec::ControlTagSpec.new('001')
13
- #
14
- # @example Get the first three characters of the 008
15
- # cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
16
- #
17
- # Note that the use of the zero-based range in this manner conforms to the way MARC
18
- # substrings are specified.
19
-
20
- class ControlFieldSpec
21
- attr_accessor :tag, :range
22
-
23
- def initialize (tag, range=nil)
24
- unless MARC4J4R::ControlField.control_tag? tag
25
- raise ArgumentError "Tag must be a control tag"
26
- end
27
- @tag = tag
28
- self.range = range
29
- end
30
-
31
- def == other
32
- return ((self.tag == other.tag) and
33
- (self.range = other.range))
34
- end
35
-
36
-
37
- # Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
38
- # will return the character code of that character (e.g., "Bill"[0] => 66, wherease
39
- # "Bill"[0..0] gives the expected 'B'
40
- #
41
- # @param [nil, Fixnum, Range] range A zero-based substring range or character position
42
- # @return [MARCSpec::ControlFieldSpec] self
43
-
44
- def range= range
45
- if range.nil?
46
- @range = nil
47
- return self
48
- end
49
- if range.is_a? Fixnum
50
- if range < 0
51
- raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
52
- end
53
-
54
- @range = range..range
55
-
56
- elsif range.is_a? Range
57
- @range = range
58
- else
59
- raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
60
- end
61
- return self
62
- end
63
-
64
-
65
- def marc_values r
66
- vals = r.find_by_tag(@tag).map {|f| f.value}
67
- if @range
68
- return vals.map {|v| v[@range]}
69
- else
70
- return vals
71
- end
72
- end
73
-
74
- def pretty_print pp
75
- pp.pp eval(self.asPPString)
76
- end
77
-
78
- def asPPString
79
- s = StringIO.new
80
- if @range
81
- PP.pp([@tag, @range], s)
82
- else
83
- PP.pp([@tag], s)
84
- end
85
- return s.string
86
- end
87
-
88
- def self.fromPPString str
89
- a = eval(str)
90
- return self.new(*a)
91
- end
92
-
93
- end
94
-
95
-
96
- # A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
97
- # using the string 'LDR' to identify itself
98
-
99
- class LeaderSpec < ControlFieldSpec
100
-
101
- # Built to be syntax-compatible with ControlFieldSpec, the tag must always
102
- # be 'LDR' (case matters)
103
- #
104
- # @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
105
- # @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
106
- # instead of the whole leader.
107
-
108
- def initialize (tag, range=nil)
109
- unless tag == 'LDR'
110
- raise ArgumentError "Tag must be 'LDR'"
111
- end
112
- @tag = 'LDR'
113
- self.range = range
114
- end
115
-
116
- # Return the appropriate value (either the leader or a subset of it) from the
117
- # given record
118
- #
119
- # @param [MARC4J4R::Record] r A MARC4J4R Record
120
- # @return [String] the leader or substring of the leader
121
- def marc_values r
122
- if @range
123
- return r.leader[@range]
124
- else
125
- return r.leader
126
- end
127
- end
128
- end
129
-
130
-
131
- # A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
132
- # all the values for the subfields for the given codes joined by the optional joiner (space by default)
133
- #
134
- # The subfield values are presented in the order they appear in the document, *not* the order the subfield
135
- # codes are specified
136
- #
137
- # @example Get the $a from the 245s
138
- # vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
139
- #
140
- # vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
141
-
142
-
143
- class VariableFieldSpec
144
-
145
- attr_accessor :tag, :codes, :joiner
146
-
147
- def initialize tag, codes=nil, joiner=' '
148
- @tag = tag
149
- @joiner = joiner || ' '
150
- self.codes = codes
151
- end
152
-
153
- def == other
154
- return ((self.tag == other.tag) and
155
- (self.codes = other.codes) and
156
- (self.joiner = other.joiner))
157
- end
158
-
159
- def codes= c
160
- if c.nil?
161
- @codes = nil
162
- return nil
163
- end
164
-
165
- if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
166
- @codes = c.to_a
167
- else
168
- @codes = c.split(//)
169
- end
170
-
171
- return @codes
172
- end
173
-
174
- def marc_values r
175
- fields = r.find_by_tag(@tag)
176
- vals = []
177
- fields.each do |f|
178
- subvals = f.sub_values(@codes)
179
- subvals = subvals.join(@joiner) if subvals.size > 0 and (@codes.nil? or @codes.size > 1)
180
- vals << subvals
181
- end
182
- vals.flatten!
183
- return vals
184
- end
185
-
186
- def pretty_print pp
187
- pp.pp eval(self.asPPString)
188
- end
189
-
190
- def asPPString
191
- s = StringIO.new
192
- if @joiner and @joiner != ' '
193
- PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
194
- else
195
- PP.pp([@tag, '*', '*', @codes.join('')], s)
196
- end
197
- return s.string
198
- end
199
-
200
- def self.fromPPString str
201
- a = eval(str)
202
- return self.new(a[0], a[3], a[4])
203
- end
204
-
205
- end
206
-
207
- end
1
+ require 'marcspec/controlfieldspec';
2
+ require 'marcspec/variablefieldspec';
3
+ require 'marcspec/leaderspec';
@@ -0,0 +1,80 @@
1
+ require 'set'
2
+ require 'pp'
3
+ module MARCSpec
4
+ # A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
5
+ # all the values for the subfields for the given codes joined by the optional joiner (space by default)
6
+ #
7
+ # The subfield values are presented in the order they appear in the document, *not* the order the subfield
8
+ # codes are specified
9
+ #
10
+ # @example Get the $a from the 245s
11
+ # vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
12
+ # vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
13
+ # vfs = MARCSpec::VariableFieldSpec.new('245', ['a', 'b'])
14
+ # vfs = MARCSpec::VariableFieldSpec.new('245', 'a'..'b')
15
+
16
+ class VariableFieldSpec
17
+
18
+ attr_accessor :tag, :codes, :joiner
19
+
20
+ def initialize tag, codes=nil, joiner=' '
21
+ @tag = tag
22
+ @joiner = joiner || ' '
23
+ self.codes = codes
24
+ end
25
+
26
+ def == other
27
+ return ((self.tag == other.tag) and
28
+ (self.codes = other.codes) and
29
+ (self.joiner = other.joiner))
30
+ end
31
+
32
+ def codes= c
33
+ if c.nil?
34
+ @codes = nil
35
+ return nil
36
+ end
37
+
38
+ if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
39
+ @codes = c.to_a
40
+ else
41
+ @codes = c.split(//)
42
+ end
43
+
44
+ return @codes
45
+ end
46
+
47
+ def marc_values r
48
+ fields = r.find_by_tag(@tag)
49
+ vals = []
50
+ fields.each do |f|
51
+ subvals = f.sub_values(@codes)
52
+ subvals = subvals.join(@joiner) if subvals.size > 0 and (@codes.nil? or @codes.size > 1)
53
+ vals << subvals
54
+ end
55
+ vals.flatten!
56
+ return vals
57
+ end
58
+
59
+ def pretty_print pp
60
+ pp.pp eval(self.asPPString)
61
+ end
62
+
63
+ def asPPString
64
+ s = StringIO.new
65
+ if @joiner and @joiner != ' '
66
+ PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
67
+ else
68
+ PP.pp([@tag, '*', '*', @codes.join('')], s)
69
+ end
70
+ return s.string
71
+ end
72
+
73
+ def self.fromPPString str
74
+ a = eval(str)
75
+ return self.new(a[0], a[3], a[4])
76
+ end
77
+
78
+ end
79
+
80
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "ControlFieldSpec" do
22
+
23
+ before do
24
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
25
+ # @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
26
+ end
27
+
28
+ # afc99990058366 # data
29
+ # 01234567890123 # index
30
+ it "gets a single full value" do
31
+ cfs = MARCSpec::ControlFieldSpec.new('001')
32
+ cfs.marc_values(@one).should.equal ["afc99990058366"]
33
+ end
34
+
35
+ it "gets a single character" do
36
+ cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
37
+ cfs.marc_values(@one).should.equal ['8']
38
+ end
39
+
40
+ it "gets a range of characters" do
41
+ cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
42
+ cfs.marc_values(@one).should.equal ['90058']
43
+ end
44
+
45
+ it "should round trip" do
46
+ cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
47
+ cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
48
+ cfs.should.equal cfs2
49
+ end
50
+ end
51
+
52
+
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "LeaderSpec" do
22
+ before do
23
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
24
+ end
25
+
26
+ it "Works with full leader" do
27
+ cfs = MARCSpec::LeaderSpec.new('LDR')
28
+ cfs.marc_values(@one).should.equal @one.leader
29
+ end
30
+
31
+ it "Works with substring of leader" do
32
+ cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
33
+ cfs.marc_values(@one).should.equal @one.leader[3..5]
34
+ end
35
+ end
@@ -165,29 +165,29 @@ describe "CustomSolrSpec" do
165
165
  end
166
166
 
167
167
  it "works with no args or map" do
168
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :methodSymbol=>:titleUp)
168
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp)
169
169
  css.marc_values(@one).should.equal [@one['245'].value.upcase]
170
170
  end
171
171
 
172
172
  it "accepts nil for no args" do
173
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :methodSymbol=>:titleUp, :methodArgs=>nil)
173
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>nil)
174
174
  css.marc_values(@one).should.equal [@one['245'].value.upcase]
175
175
  end
176
176
 
177
177
 
178
178
  it "uses a custom method with args but no map" do
179
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :methodSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
179
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
180
180
  css.marc_values(@one).should.equal [@titleACValue.upcase]
181
181
  end
182
182
 
183
183
  it "works with a map" do
184
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :methodSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
184
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
185
185
  css.marc_values(@one).should.equal [@mapValue]
186
186
  end
187
187
 
188
188
  it "works with a map that has multiple return values" do
189
189
  @map[@titleACValue.upcase] = ['two', 'one']
190
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :methodSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
190
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
191
191
  css.marc_values(@one).should.equal ['two', 'one']
192
192
  end
193
193
 
data/spec/specset_spec.rb CHANGED
@@ -65,7 +65,7 @@ describe "SpecSet Basics" do
65
65
  end
66
66
 
67
67
  it "allows customs that reference previous work" do
68
- @speclist << {:solrField=>'titleSort', :module=>A::B, :methodSymbol=>:sortable, :methodArgs=>['title']}
68
+ @speclist << {:solrField=>'titleSort', :module=>A::B, :functionSymbol=>:sortable, :methodArgs=>['title']}
69
69
  ss = MARCSpec::SpecSet.new
70
70
  ss.buildSpecsFromList(@speclist)
71
71
  h = ss.hash_from_marc @one
@@ -86,7 +86,7 @@ describe "SpecSet Basics" do
86
86
  it "should allow multi-headed custom fields" do
87
87
  @speclist << {:solrField => ['one', 'two', 'letters'],
88
88
  :module => A::B,
89
- :methodSymbol => :three_value_custom,
89
+ :functionSymbol => :three_value_custom,
90
90
  }
91
91
  ss = MARCSpec::SpecSet.new
92
92
  ss.buildSpecsFromList(@speclist)
@@ -18,56 +18,6 @@ require 'spec_helper'
18
18
  # 852 $a American Folklife Center, Library of Congress
19
19
  # 852 $a DLC
20
20
 
21
- describe "ControlFieldSpec" do
22
-
23
- before do
24
- @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
25
- # @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
26
- end
27
-
28
- # afc99990058366 # data
29
- # 01234567890123 # index
30
- it "gets a single full value" do
31
- cfs = MARCSpec::ControlFieldSpec.new('001')
32
- cfs.marc_values(@one).should.equal ["afc99990058366"]
33
- end
34
-
35
- it "gets a single character" do
36
- cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
37
- cfs.marc_values(@one).should.equal ['8']
38
- end
39
-
40
- it "gets a range of characters" do
41
- cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
42
- cfs.marc_values(@one).should.equal ['90058']
43
- end
44
-
45
- it "should round trip" do
46
- cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
47
- cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
48
- cfs.should.equal cfs2
49
- end
50
- end
51
-
52
-
53
- describe "LeaderSpec" do
54
- before do
55
- @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
56
- end
57
-
58
- it "Works with full leader" do
59
- cfs = MARCSpec::LeaderSpec.new('LDR')
60
- cfs.marc_values(@one).should.equal @one.leader
61
- end
62
-
63
- it "Works with substring of leader" do
64
- cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
65
- cfs.marc_values(@one).should.equal @one.leader[3..5]
66
- end
67
- end
68
-
69
-
70
-
71
21
  describe "VariableFieldSpec" do
72
22
  before do
73
23
  @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marcspec
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 3
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 6
8
+ - 7
9
9
  - 0
10
- version: 0.6.0
10
+ version: 0.7.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - BillDueber
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-17 00:00:00 -04:00
18
+ date: 2010-08-19 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -96,14 +96,16 @@ files:
96
96
  - Rakefile
97
97
  - VERSION
98
98
  - lib/marcspec.rb
99
+ - lib/marcspec/controlfieldspec.rb
99
100
  - lib/marcspec/customspec.rb
100
101
  - lib/marcspec/kvmap.rb
102
+ - lib/marcspec/leaderspec.rb
101
103
  - lib/marcspec/map.rb
102
104
  - lib/marcspec/marcfieldspec.rb
103
105
  - lib/marcspec/multivaluemap.rb
104
106
  - lib/marcspec/solrfieldspec.rb
105
107
  - lib/marcspec/specset.rb
106
- - lib/orig.rb
108
+ - lib/marcspec/variablefieldspec.rb
107
109
  - spec/data/batch.dat
108
110
  - spec/data/one.dat
109
111
  - spec/data/umich/translation_maps/area_map.properties
@@ -121,11 +123,12 @@ files:
121
123
  - spec/data/umich/translation_maps/location_map.properties
122
124
  - spec/data/umich/umich_index.properties
123
125
  - spec/maps_spec.rb
124
- - spec/marcfieldspecs_spec.rb
125
- - spec/marcspec_spec.rb
126
126
  - spec/solrfieldspec_spec.rb
127
127
  - spec/spec_helper.rb
128
128
  - spec/specset_spec.rb
129
+ - spec/controlfieldspec_spec.rb
130
+ - spec/leaderspec_spec.rb
131
+ - spec/variablefieldspec_spec.rb
129
132
  has_rdoc: true
130
133
  homepage: http://github.com/billdueber/marcspec
131
134
  licenses: []
@@ -161,9 +164,10 @@ signing_key:
161
164
  specification_version: 3
162
165
  summary: Extract data from MARC records and send to Solr
163
166
  test_files:
167
+ - spec/controlfieldspec_spec.rb
168
+ - spec/leaderspec_spec.rb
164
169
  - spec/maps_spec.rb
165
- - spec/marcfieldspecs_spec.rb
166
- - spec/marcspec_spec.rb
167
170
  - spec/solrfieldspec_spec.rb
168
171
  - spec/spec_helper.rb
169
172
  - spec/specset_spec.rb
173
+ - spec/variablefieldspec_spec.rb
data/lib/orig.rb DELETED
@@ -1,288 +0,0 @@
1
- require 'set'
2
- require 'pp'
3
- require 'logger'
4
-
5
- $LOG ||= Logger.new(STDERR)
6
-
7
- module MARCSpec
8
-
9
- class MapSpec
10
- attr_accessor :map, :type, :default
11
-
12
- def initialize(type, map, default=nil)
13
- @type = type
14
- @default = default
15
- @map = map
16
- end
17
-
18
- def [] key
19
- if (@type == :kv)
20
- if @map.has_key? key
21
- return @map[key]
22
- else
23
- return @default
24
- end
25
- end
26
-
27
- # For a pattern, we find all that match.
28
-
29
- if (@type == :pattern)
30
- rv = []
31
- @map.each do |pv|
32
- pat = pv[0]
33
- val = pv[1]
34
- # puts "Trying pattern #{pat} against #{key}"
35
- if pat.match(key)
36
- rv << val
37
- # puts "Matched: adding #{val}"
38
- end
39
- end
40
- rv.uniq!
41
- if rv.size > 0
42
- return rv
43
- else
44
- return @default
45
- end
46
- end
47
- end
48
- end
49
-
50
- class CustomSpec
51
- def initialize(proc, args)
52
- @proc = proc
53
- @args = args
54
- end
55
-
56
- def marc_values_hash fieldnames, r
57
- a = @proc(r, args)
58
- rv = {}
59
- fieldnames.each_with_index do |fn, i|
60
- rv[fn] = a[i]
61
- end
62
- return rv
63
- end
64
- end
65
-
66
-
67
- class TagSpec
68
- attr_accessor :tag, :codes, :joiner, :parent, :ind1, :ind2, :range, :is_control
69
-
70
- def initialize(tag, codes=nil)
71
- @codes = Set.new
72
- @tag = tag
73
- @joiner = ' '
74
- @substr = nil
75
- tagint = tag.to_i
76
- @is_control = (tagint != 0 and tagint < 10)
77
- if (codes)
78
- self.codes = codes
79
- end
80
- end
81
-
82
- def range= newrange
83
- if newrange =~ /\s*(\d+)-(\d+)/
84
- start = $1.to_i
85
- last = $2.to_i
86
- @range = start..last
87
- else
88
- se = newrange.to_i
89
- @range = se..se
90
- end
91
- end
92
-
93
- def codes= newcodes
94
- if newcodes.is_a? Range
95
- @codes = newcodes.to_a
96
- elsif newcodes !~ /\S/
97
- @codes = nil
98
- # Otherwise, just split into individual characters
99
- else
100
- @codes = newcodes.split(//)
101
- end
102
- end
103
-
104
- def marc_values r
105
- if @is_control
106
- vals = r.find_by_tag(@tag).map {|f| f.value}
107
- # puts "Start with #{vals.join(', ')}"
108
- if @range
109
- vals.map! {|v| v[@range]}
110
- end
111
- # puts "End with #{vals.join(', ')}"
112
-
113
- else
114
- fields = r.find_by_tag(@tag)
115
- vals = []
116
- fields.each do |f|
117
- subvals = f.sub_values(@codes)
118
- vals << subvals.join(@joiner) if subvals.size > 0
119
- end
120
- end
121
- # puts vals.join(', ')
122
- return vals
123
- end
124
-
125
- end
126
-
127
- class FieldSpec
128
- attr_accessor :field, :first, :map, :tagspecs
129
-
130
- def initialize(opts)
131
- @field = opts[:field]
132
- @first = opts[:first] || false
133
- @map = opts[:map] || nil
134
- @tagspecs = []
135
- end
136
-
137
- def << tagspec
138
- tagspec.parent = self
139
- @tagspecs << tagspec
140
- end
141
-
142
- def marc_values r
143
- vals = []
144
- # puts "Tagspecs has #{@tagspecs.size} items"
145
- @tagspecs.each do |ts|
146
- vals.concat ts.marc_values(r)
147
- # puts vals.join(', ')
148
- break if @first and vals.size > 0
149
- end
150
-
151
- if (@map)
152
- vals.map! {|v| @map[v]}
153
- # vals.each do |v|
154
- # puts "Map: #{v} => #{@map[v].to_s}"
155
- # end
156
- end
157
- vals.flatten!
158
- vals.uniq!
159
- vals.compact!
160
- return vals
161
- end
162
-
163
- end
164
-
165
-
166
- class SpecSet
167
- attr_accessor :tmaps, :fieldspecs
168
- def initialize(*args)
169
- tmapdir = args.pop!
170
- unless File.directory? tmapdir
171
- $LOG.error "Directory #{tmapdir} not found"
172
- raise LoadError, "Directory #{tmapdir} not found"
173
- end
174
-
175
- @tmaps = {}
176
- Dir.glob(tmapdir + '/*.rb') do |fn|
177
- basename = File.basename(fn).sub(/\.rb$/, '')
178
- $LOG.info "Loading translation map #{basename}"
179
-
180
- begin
181
- rawmap = eval(File.open(fn).read)
182
- @tmaps[basename] = MapSpec.new(rawmap[:type], rawmap[:map], rawmap[:default])
183
- rescue SyntaxError
184
- $LOG.error "Error processing translation map file #{fn}: #{$!}"
185
- raise SyntaxError, $!
186
- end
187
-
188
- end
189
-
190
- @fieldspecs = []
191
-
192
- # Get the index files
193
- args.each do |indexfile|
194
- begin
195
- unless File.exists? indexfile
196
- $LOG.error "File #{indexfile} does not exist"
197
- raise LoadError, "File #{indexfile} does not exist"
198
- end
199
- $LOG.info "Loading index file #{indexfile}"
200
- rawindex = eval(File.open(indexfile).read)
201
- rawindex.each do |entry|
202
- fs = FieldSpec.new(:field => entry[:solrField], :first=>entry[:firstOnly])
203
- mapname = entry[:map]
204
- if mapname
205
- if @tmaps.has_key? mapname
206
- fs.map = @tmaps[mapname]
207
- else
208
- $LOG.error "Can't find map #{mapname}"
209
- end
210
- end
211
- entry[:specs].each do |entryts|
212
-
213
- # A one- or two-element entry is a control field
214
- # A three element entry (tag, ind1, ind2) is all subs of a field (need to implement)
215
- # A four element field is tag, ind1, ind2, subs
216
- # A five element field is tag, ind1, ind2, subs, joiner
217
-
218
-
219
- tag = entryts[0]
220
-
221
- # Is tag the symbol :custom? Then make it a custom item
222
-
223
- if tag == :custom
224
- ts = CustomSpec.new(entryts[1], entryts[2..-1])
225
- fs << ts
226
- next
227
- end
228
-
229
- # If it's not custom, the solrField better be a scale
230
- if entry[:solrField].is_a? Array
231
- # log an error and bail out
232
- end
233
-
234
- # Otherwise, it's a tag spec
235
- if tag.is_a? Fixnum
236
- tag = '%03d' % tag
237
- end
238
-
239
-
240
- ts = TagSpec.new(tag)
241
- if entryts.size < 3
242
- ts.is_control = true
243
- ts.range = entryts[1] if entryts[1]
244
- else
245
- ts.ind1 = entryts[1]
246
- ts.ind2 = entryts[2]
247
- ts.codes = entryts[3]
248
- ts.joiner = entryts[4] if entryts[4]
249
- end
250
- fs << ts
251
- end
252
- self << fs
253
- end
254
- rescue SyntaxError
255
- $LOG.error "Error processing index file #{indexfile}: #{$!}"
256
- raise SyntaxError
257
- end
258
- end
259
- end
260
-
261
- def each
262
- @fieldspecs.each do |fs|
263
- yield fs
264
- end
265
- end
266
-
267
- def << fieldspec
268
- @fieldspecs << fieldspec
269
- end
270
-
271
- def doc_from_marc r
272
- doc = SolrInputDocument.new
273
- @fieldspecs.each do |fs|
274
- doc[fs.field] = fs.marc_values(r)
275
- end
276
- return doc
277
- end
278
-
279
- def hash_from_marc r
280
- h = {}
281
- @fieldspecs.each do |fs|
282
- h[fs.field] = fs.marc_values(r)
283
- end
284
- return h
285
- end
286
-
287
- end
288
- end
@@ -1,10 +0,0 @@
1
-
2
-
3
-
4
-
5
-
6
-
7
-
8
-
9
-
10
-