marcspec 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.0
1
+ 0.7.0
@@ -0,0 +1,93 @@
1
+ require 'marc4j4r'
2
+ require 'set'
3
+ require 'pp'
4
+ module MARCSpec
5
+ # A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
6
+ # When called with marc_values(record), it returns either the complete value of all
7
+ # occurances of the field in question (in the order they appear in the record), or
8
+ # the zero-based substrings based on the passed range.
9
+ #
10
+ # @example Get the whole 001
11
+ # cfs = MARCSpec::ControlTagSpec.new('001')
12
+ #
13
+ # @example Get the first three characters of the 008
14
+ # cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
15
+ #
16
+ # Note that the use of the zero-based range in this manner conforms to the way MARC
17
+ # substrings are specified.
18
+
19
+ class ControlFieldSpec
20
+ attr_accessor :tag, :range
21
+
22
+ def initialize (tag, range=nil)
23
+ unless MARC4J4R::ControlField.control_tag? tag
24
+ raise ArgumentError "Tag must be a control tag"
25
+ end
26
+ @tag = tag
27
+ self.range = range
28
+ end
29
+
30
+ def == other
31
+ return ((self.tag == other.tag) and
32
+ (self.range = other.range))
33
+ end
34
+
35
+
36
+ # Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
37
+ # will return the character code of that character (e.g., "Bill"[0] => 66, wherease
38
+ # "Bill"[0..0] gives the expected 'B'
39
+ #
40
+ # @param [nil, Fixnum, Range] range A zero-based substring range or character position
41
+ # @return [MARCSpec::ControlFieldSpec] self
42
+
43
+ def range= range
44
+ if range.nil?
45
+ @range = nil
46
+ return self
47
+ end
48
+ if range.is_a? Fixnum
49
+ if range < 0
50
+ raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
51
+ end
52
+
53
+ @range = range..range
54
+
55
+ elsif range.is_a? Range
56
+ @range = range
57
+ else
58
+ raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
59
+ end
60
+ return self
61
+ end
62
+
63
+
64
+ def marc_values r
65
+ vals = r.find_by_tag(@tag).map {|f| f.value}
66
+ if @range
67
+ return vals.map {|v| v[@range]}
68
+ else
69
+ return vals
70
+ end
71
+ end
72
+
73
+ def pretty_print pp
74
+ pp.pp eval(self.asPPString)
75
+ end
76
+
77
+ def asPPString
78
+ s = StringIO.new
79
+ if @range
80
+ PP.pp([@tag, @range], s)
81
+ else
82
+ PP.pp([@tag], s)
83
+ end
84
+ return s.string
85
+ end
86
+
87
+ def self.fromPPString str
88
+ a = eval(str)
89
+ return self.new(*a)
90
+ end
91
+
92
+ end
93
+ end
@@ -5,10 +5,12 @@ require 'marcspec/solrfieldspec'
5
5
  module MARCSpec
6
6
 
7
7
  # A CustomSolrSpec is a SolrFieldSpec that derives all its values from a custom function. The custom function
8
- # must me a module function that takes a record and an array of other arguments and returns a
8
+ # must me a module function that takes a hash-like document object, a MARC4J4R record, and an array of other arguments and returns a
9
9
  # (possibly empty) list of resulting values.
10
10
  #
11
- # @example
11
+ # See the example file simple_sample/index.rb in the marc2solr project for configuration examples.
12
+ #
13
+ # @example A sample custom function, to be placed in the configuration directory's lib/ subdir
12
14
  # module MARC2Solr
13
15
  # module MyCustomStuff
14
16
  # def self.uppercaseTitle r, args=[]
@@ -19,25 +21,40 @@ module MARCSpec
19
21
  # end
20
22
  # end
21
23
  #
24
+ # @example A simple custom spec made by hand
22
25
  # css = MARCSpec::CustomSolrSpec.new(:module => MARC2Solr::MyCustomStuff,
23
- # :methodSymbol => :uppercaseTitle,
26
+ # :functionSymbol => :uppercaseTitle,
24
27
  # :map => ss.map('mapname')
25
28
  # )
26
- # ss.add_spec(css)
27
29
  #
28
30
  #
29
31
 
30
32
 
31
33
  class CustomSolrSpec < SolrFieldSpec
32
34
 
33
- attr_accessor :module, :methodSymbol, :methodArgs
35
+ attr_accessor :module, :functionSymbol, :methodArgs
36
+
37
+ # Get a new Custom Solr Spec based on the passed in options.
38
+ # @param [Hash] opts Initialization options
39
+ # @option opts [String, Array<String>] :solrField the name(s) of the Solr field(s) that will receive the data derived from this spec
40
+ # @option opts [Module] :module the actual module constant (not a string or symbol representation) which holds the
41
+ # custom function we'll be calling
42
+ # @option opts [Symbol] :functionSymbol A symbol of the name of the custom function
43
+ # @option opts [Boolean] :firstOnly (false) Whether we should return the first found value
44
+ # @option opts [String] :default (nil) The value to return if the custom function returns no values
45
+ # @option opts [MARC2Solr::Map] :map (nil) An optional Map used to translate resulting values
46
+ # @option opts [String] :noMapKeyDefault (nil) The value to return if (a) a value is found, (b) a map is defined, but (c) there's
47
+ # no key in the map that matches the value.
48
+ #
49
+ # Note that the last four options don't make sense if multiple :solrFields are given, and are illegal in that case.
50
+
34
51
  def initialize(opts)
35
52
  @solrField = opts[:solrField]
36
53
  @module = opts[:module]
37
- @methodSymbol = opts[:methodSymbol]
54
+ @functionSymbol = opts[:functionSymbol]
38
55
 
39
- unless @solrField and @module and @methodSymbol
40
- raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :methodSymbol"
56
+ unless @solrField and @module and @functionSymbol
57
+ raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :functionSymbol"
41
58
  end
42
59
 
43
60
 
@@ -60,11 +77,19 @@ module MARCSpec
60
77
 
61
78
  end
62
79
 
80
+ # Get values from a MARC object and/or the prevously-filled document object.
81
+ #
82
+ # Note that the doc is read-write here, but for the love of god, just leave it alone.
83
+ #
84
+ # @param [MARC4J4R::Record] r A marc record
85
+ # @param [SolrInputDocument, Hash] doc The document we're constructing.
86
+ # @return [Array<String>] An array of values returned by the custom method
63
87
 
64
88
  def raw_marc_values r, doc
65
- return @module.send(@methodSymbol, doc, r, *@methodArgs)
89
+ return @module.send(@functionSymbol, doc, r, *@methodArgs)
66
90
  end
67
91
 
92
+
68
93
  def self.fromHash h
69
94
  return self.new(h)
70
95
  end
@@ -94,8 +119,8 @@ module MARCSpec
94
119
 
95
120
  s.print(":module => ")
96
121
  PP.singleline_pp(@module, s)
97
- s.print(",\n :methodSymbol => ")
98
- PP.singleline_pp(@methodSymbol, s)
122
+ s.print(",\n :functionSymbol => ")
123
+ PP.singleline_pp(@functionSymbol, s)
99
124
  if @methodArgs
100
125
  s.print(",\n :methodArgs => ")
101
126
  PP.singleline_pp(@methodArgs, s)
@@ -8,6 +8,8 @@ module MARCSpec
8
8
 
9
9
  # A KVMap is, when push comes to shove, just a hash with a name, and the
10
10
  # option of adding a default value for each lookup.
11
+ #
12
+ # The map portion of a kvmap is simply a hash.
11
13
 
12
14
  class KVMap < Map
13
15
 
@@ -38,12 +40,18 @@ module MARCSpec
38
40
  end
39
41
  end
40
42
 
43
+ # Set an element in the map, just like for a regular hash
41
44
  def []= key, value
42
45
  @map[key] = value
43
46
  end
44
47
 
45
48
  alias_method :add, :[]=
46
49
 
50
+
51
+ # Produce a configuration file that will round-trip to this object.
52
+ #
53
+ # @return [String] A string representation of valid ruby code that can be turned back into
54
+ # this object using MARCSpec::Map#fromFile
47
55
  def asPPString
48
56
  s = StringIO.new
49
57
  s.print "{\n :maptype=>:kv,\n :mapname=>"
@@ -54,7 +62,11 @@ module MARCSpec
54
62
  return s.string
55
63
  end
56
64
 
57
-
65
+
66
+ # Translate from a solrmarc map file that has *already been determined* to be a KV map
67
+ # @param [String] filename The path to the solrmarc kv map file
68
+ # @return [MARCSpec::KVMap] a KVMap
69
+
58
70
  def self.from_solrmarc_file filename
59
71
  mapname = File.basename(filename).sub(/\..+?$/, '')
60
72
  map = {}
@@ -0,0 +1,37 @@
1
+ require 'marcspec/controlfieldspec'
2
+
3
+ module MARCSpec
4
+ # A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
5
+ # using the string 'LDR' to identify itself
6
+
7
+ class LeaderSpec < ControlFieldSpec
8
+
9
+ # Built to be syntax-compatible with ControlFieldSpec, the tag must always
10
+ # be 'LDR' (case matters)
11
+ #
12
+ # @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
13
+ # @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
14
+ # instead of the whole leader.
15
+
16
+ def initialize (tag, range=nil)
17
+ unless tag == 'LDR'
18
+ raise ArgumentError "Tag must be 'LDR' for a LeaderSpec"
19
+ end
20
+ @tag = 'LDR'
21
+ self.range = range
22
+ end
23
+
24
+ # Return the appropriate value (either the leader or a subset of it) from the
25
+ # given record
26
+ #
27
+ # @param [MARC4J4R::Record] r A MARC4J4R Record
28
+ # @return [String] the leader or substring of the leader
29
+ def marc_values r
30
+ if @range
31
+ return r.leader[@range]
32
+ else
33
+ return r.leader
34
+ end
35
+ end
36
+ end
37
+ end
data/lib/marcspec/map.rb CHANGED
@@ -2,7 +2,12 @@ module MARCSpec
2
2
 
3
3
  # A Map is just a named lookup table. The access
4
4
  # (via []) takes, in adition to a key, an optional
5
- # default value to return
5
+ # default value to return (e.g., val = map[key, defaultIfNotFound])
6
+ #
7
+ # We don't have the default be a part of the map because it might be used
8
+ # in several different contexts.
9
+ #
10
+ # NOTE: THIS IS AN ABSTRACT SUPERCLASS. DO NOT INSTANTIATE IT DIRECTLY
6
11
 
7
12
  class Map
8
13
  attr_accessor :mapname, :map
@@ -10,12 +15,21 @@ module MARCSpec
10
15
  # Create a new map. The passed map is either
11
16
  # a standard hash or a list of duples
12
17
  #
13
- # @param
18
+ # @param [String] mapname The name of this map; can be used to find it later on.
19
+ # @param [Hash, Array<2-value-arrays>] map Either a normal key-value hash (for a KV Map) or an
20
+ # array of duples (2-value arrays) for a MultiValueMap.
14
21
  def initialize(mapname, map)
15
22
  @mapname = mapname
16
23
  @map = map
17
24
  end
18
25
 
26
+ # Load a map from a file, determining what kind it is along the way.
27
+ #
28
+ # The file is valid ruby code; see the subclasses KVMap and MutlValueMap for examples.
29
+ #
30
+ # @param [String] filename The name of the map file to be eval'd
31
+ # @return MARC2Solr::Map An instance of a subclass of MARC2Solr::Map
32
+
19
33
  def self.fromFile filename
20
34
  begin
21
35
  str = File.open(filename).read
@@ -44,14 +58,18 @@ module MARCSpec
44
58
  end
45
59
 
46
60
 
61
+ # Check for map equality
47
62
  def == other
48
63
  return ((other.mapname == self.mapname) and (other.map = self.map))
49
64
  end
50
65
 
66
+ # Generic pretty_print; used mostly for translating from solrmarc
51
67
  def pretty_print pp
52
68
  pp.pp eval(self.asPPString)
53
69
  end
54
70
 
71
+ # Produce a map from the data structure produced by asPPString
72
+ # @param [Hash] rawmap A hash with two keys; :mapname and :map
55
73
  def self.fromHash rawmap
56
74
  return self.new(rawmap[:mapname], rawmap[:map])
57
75
  end
@@ -1,207 +1,3 @@
1
- require 'marc4j4r'
2
- require 'set'
3
- require 'pp'
4
- module MARCSpec
5
-
6
- # A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
7
- # When called with marc_values(record), it returns either the complete value of all
8
- # occurances of the field in question (in the order they appear in the record), or
9
- # the zero-based substrings based on the passed range.
10
- #
11
- # @example Get the whole 001
12
- # cfs = MARCSpec::ControlTagSpec.new('001')
13
- #
14
- # @example Get the first three characters of the 008
15
- # cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
16
- #
17
- # Note that the use of the zero-based range in this manner conforms to the way MARC
18
- # substrings are specified.
19
-
20
- class ControlFieldSpec
21
- attr_accessor :tag, :range
22
-
23
- def initialize (tag, range=nil)
24
- unless MARC4J4R::ControlField.control_tag? tag
25
- raise ArgumentError "Tag must be a control tag"
26
- end
27
- @tag = tag
28
- self.range = range
29
- end
30
-
31
- def == other
32
- return ((self.tag == other.tag) and
33
- (self.range = other.range))
34
- end
35
-
36
-
37
- # Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
38
- # will return the character code of that character (e.g., "Bill"[0] => 66, wherease
39
- # "Bill"[0..0] gives the expected 'B'
40
- #
41
- # @param [nil, Fixnum, Range] range A zero-based substring range or character position
42
- # @return [MARCSpec::ControlFieldSpec] self
43
-
44
- def range= range
45
- if range.nil?
46
- @range = nil
47
- return self
48
- end
49
- if range.is_a? Fixnum
50
- if range < 0
51
- raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
52
- end
53
-
54
- @range = range..range
55
-
56
- elsif range.is_a? Range
57
- @range = range
58
- else
59
- raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
60
- end
61
- return self
62
- end
63
-
64
-
65
- def marc_values r
66
- vals = r.find_by_tag(@tag).map {|f| f.value}
67
- if @range
68
- return vals.map {|v| v[@range]}
69
- else
70
- return vals
71
- end
72
- end
73
-
74
- def pretty_print pp
75
- pp.pp eval(self.asPPString)
76
- end
77
-
78
- def asPPString
79
- s = StringIO.new
80
- if @range
81
- PP.pp([@tag, @range], s)
82
- else
83
- PP.pp([@tag], s)
84
- end
85
- return s.string
86
- end
87
-
88
- def self.fromPPString str
89
- a = eval(str)
90
- return self.new(*a)
91
- end
92
-
93
- end
94
-
95
-
96
- # A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
97
- # using the string 'LDR' to identify itself
98
-
99
- class LeaderSpec < ControlFieldSpec
100
-
101
- # Built to be syntax-compatible with ControlFieldSpec, the tag must always
102
- # be 'LDR' (case matters)
103
- #
104
- # @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
105
- # @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
106
- # instead of the whole leader.
107
-
108
- def initialize (tag, range=nil)
109
- unless tag == 'LDR'
110
- raise ArgumentError "Tag must be 'LDR'"
111
- end
112
- @tag = 'LDR'
113
- self.range = range
114
- end
115
-
116
- # Return the appropriate value (either the leader or a subset of it) from the
117
- # given record
118
- #
119
- # @param [MARC4J4R::Record] r A MARC4J4R Record
120
- # @return [String] the leader or substring of the leader
121
- def marc_values r
122
- if @range
123
- return r.leader[@range]
124
- else
125
- return r.leader
126
- end
127
- end
128
- end
129
-
130
-
131
- # A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
132
- # all the values for the subfields for the given codes joined by the optional joiner (space by default)
133
- #
134
- # The subfield values are presented in the order they appear in the document, *not* the order the subfield
135
- # codes are specified
136
- #
137
- # @example Get the $a from the 245s
138
- # vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
139
- #
140
- # vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
141
-
142
-
143
- class VariableFieldSpec
144
-
145
- attr_accessor :tag, :codes, :joiner
146
-
147
- def initialize tag, codes=nil, joiner=' '
148
- @tag = tag
149
- @joiner = joiner || ' '
150
- self.codes = codes
151
- end
152
-
153
- def == other
154
- return ((self.tag == other.tag) and
155
- (self.codes = other.codes) and
156
- (self.joiner = other.joiner))
157
- end
158
-
159
- def codes= c
160
- if c.nil?
161
- @codes = nil
162
- return nil
163
- end
164
-
165
- if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
166
- @codes = c.to_a
167
- else
168
- @codes = c.split(//)
169
- end
170
-
171
- return @codes
172
- end
173
-
174
- def marc_values r
175
- fields = r.find_by_tag(@tag)
176
- vals = []
177
- fields.each do |f|
178
- subvals = f.sub_values(@codes)
179
- subvals = subvals.join(@joiner) if subvals.size > 0 and (@codes.nil? or @codes.size > 1)
180
- vals << subvals
181
- end
182
- vals.flatten!
183
- return vals
184
- end
185
-
186
- def pretty_print pp
187
- pp.pp eval(self.asPPString)
188
- end
189
-
190
- def asPPString
191
- s = StringIO.new
192
- if @joiner and @joiner != ' '
193
- PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
194
- else
195
- PP.pp([@tag, '*', '*', @codes.join('')], s)
196
- end
197
- return s.string
198
- end
199
-
200
- def self.fromPPString str
201
- a = eval(str)
202
- return self.new(a[0], a[3], a[4])
203
- end
204
-
205
- end
206
-
207
- end
1
+ require 'marcspec/controlfieldspec';
2
+ require 'marcspec/variablefieldspec';
3
+ require 'marcspec/leaderspec';
@@ -0,0 +1,80 @@
1
+ require 'set'
2
+ require 'pp'
3
+ module MARCSpec
4
+ # A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
5
+ # all the values for the subfields for the given codes joined by the optional joiner (space by default)
6
+ #
7
+ # The subfield values are presented in the order they appear in the document, *not* the order the subfield
8
+ # codes are specified
9
+ #
10
+ # @example Get the $a from the 245s
11
+ # vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
12
+ # vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
13
+ # vfs = MARCSpec::VariableFieldSpec.new('245', ['a', 'b'])
14
+ # vfs = MARCSpec::VariableFieldSpec.new('245', 'a'..'b')
15
+
16
+ class VariableFieldSpec
17
+
18
+ attr_accessor :tag, :codes, :joiner
19
+
20
+ def initialize tag, codes=nil, joiner=' '
21
+ @tag = tag
22
+ @joiner = joiner || ' '
23
+ self.codes = codes
24
+ end
25
+
26
+ def == other
27
+ return ((self.tag == other.tag) and
28
+ (self.codes = other.codes) and
29
+ (self.joiner = other.joiner))
30
+ end
31
+
32
+ def codes= c
33
+ if c.nil?
34
+ @codes = nil
35
+ return nil
36
+ end
37
+
38
+ if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
39
+ @codes = c.to_a
40
+ else
41
+ @codes = c.split(//)
42
+ end
43
+
44
+ return @codes
45
+ end
46
+
47
+ def marc_values r
48
+ fields = r.find_by_tag(@tag)
49
+ vals = []
50
+ fields.each do |f|
51
+ subvals = f.sub_values(@codes)
52
+ subvals = subvals.join(@joiner) if subvals.size > 0 and (@codes.nil? or @codes.size > 1)
53
+ vals << subvals
54
+ end
55
+ vals.flatten!
56
+ return vals
57
+ end
58
+
59
+ def pretty_print pp
60
+ pp.pp eval(self.asPPString)
61
+ end
62
+
63
+ def asPPString
64
+ s = StringIO.new
65
+ if @joiner and @joiner != ' '
66
+ PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
67
+ else
68
+ PP.pp([@tag, '*', '*', @codes.join('')], s)
69
+ end
70
+ return s.string
71
+ end
72
+
73
+ def self.fromPPString str
74
+ a = eval(str)
75
+ return self.new(a[0], a[3], a[4])
76
+ end
77
+
78
+ end
79
+
80
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "ControlFieldSpec" do
22
+
23
+ before do
24
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
25
+ # @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
26
+ end
27
+
28
+ # afc99990058366 # data
29
+ # 01234567890123 # index
30
+ it "gets a single full value" do
31
+ cfs = MARCSpec::ControlFieldSpec.new('001')
32
+ cfs.marc_values(@one).should.equal ["afc99990058366"]
33
+ end
34
+
35
+ it "gets a single character" do
36
+ cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
37
+ cfs.marc_values(@one).should.equal ['8']
38
+ end
39
+
40
+ it "gets a range of characters" do
41
+ cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
42
+ cfs.marc_values(@one).should.equal ['90058']
43
+ end
44
+
45
+ it "should round trip" do
46
+ cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
47
+ cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
48
+ cfs.should.equal cfs2
49
+ end
50
+ end
51
+
52
+
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ # LEADER 00734njm a2200217uu 4500
4
+ # 001 afc99990058366
5
+ # 003 DLC
6
+ # 005 20071104155141.9
7
+ # 007 sd ummunniauub
8
+ # 008 071103s1939 xxufmnne||||||||| u eng||
9
+ # 010 $a afc99990058366
10
+ # 040 $a DLC $c DLC
11
+ # 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
12
+ # 260 $a Medina, Texas, $c 1939.
13
+ # 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
14
+ # 651 0 $a Medina $z Texas $z United States of America.
15
+ # 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
16
+ # 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
17
+ # 700 1 $a Taylor, Beale D. $e Singer.
18
+ # 852 $a American Folklife Center, Library of Congress
19
+ # 852 $a DLC
20
+
21
+ describe "LeaderSpec" do
22
+ before do
23
+ @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
24
+ end
25
+
26
+ it "Works with full leader" do
27
+ cfs = MARCSpec::LeaderSpec.new('LDR')
28
+ cfs.marc_values(@one).should.equal @one.leader
29
+ end
30
+
31
+ it "Works with substring of leader" do
32
+ cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
33
+ cfs.marc_values(@one).should.equal @one.leader[3..5]
34
+ end
35
+ end
@@ -165,29 +165,29 @@ describe "CustomSolrSpec" do
165
165
  end
166
166
 
167
167
  it "works with no args or map" do
168
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :methodSymbol=>:titleUp)
168
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp)
169
169
  css.marc_values(@one).should.equal [@one['245'].value.upcase]
170
170
  end
171
171
 
172
172
  it "accepts nil for no args" do
173
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :methodSymbol=>:titleUp, :methodArgs=>nil)
173
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>nil)
174
174
  css.marc_values(@one).should.equal [@one['245'].value.upcase]
175
175
  end
176
176
 
177
177
 
178
178
  it "uses a custom method with args but no map" do
179
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :methodSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
179
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
180
180
  css.marc_values(@one).should.equal [@titleACValue.upcase]
181
181
  end
182
182
 
183
183
  it "works with a map" do
184
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :methodSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
184
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
185
185
  css.marc_values(@one).should.equal [@mapValue]
186
186
  end
187
187
 
188
188
  it "works with a map that has multiple return values" do
189
189
  @map[@titleACValue.upcase] = ['two', 'one']
190
- css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :methodSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
190
+ css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
191
191
  css.marc_values(@one).should.equal ['two', 'one']
192
192
  end
193
193
 
data/spec/specset_spec.rb CHANGED
@@ -65,7 +65,7 @@ describe "SpecSet Basics" do
65
65
  end
66
66
 
67
67
  it "allows customs that reference previous work" do
68
- @speclist << {:solrField=>'titleSort', :module=>A::B, :methodSymbol=>:sortable, :methodArgs=>['title']}
68
+ @speclist << {:solrField=>'titleSort', :module=>A::B, :functionSymbol=>:sortable, :methodArgs=>['title']}
69
69
  ss = MARCSpec::SpecSet.new
70
70
  ss.buildSpecsFromList(@speclist)
71
71
  h = ss.hash_from_marc @one
@@ -86,7 +86,7 @@ describe "SpecSet Basics" do
86
86
  it "should allow multi-headed custom fields" do
87
87
  @speclist << {:solrField => ['one', 'two', 'letters'],
88
88
  :module => A::B,
89
- :methodSymbol => :three_value_custom,
89
+ :functionSymbol => :three_value_custom,
90
90
  }
91
91
  ss = MARCSpec::SpecSet.new
92
92
  ss.buildSpecsFromList(@speclist)
@@ -18,56 +18,6 @@ require 'spec_helper'
18
18
  # 852 $a American Folklife Center, Library of Congress
19
19
  # 852 $a DLC
20
20
 
21
- describe "ControlFieldSpec" do
22
-
23
- before do
24
- @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
25
- # @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
26
- end
27
-
28
- # afc99990058366 # data
29
- # 01234567890123 # index
30
- it "gets a single full value" do
31
- cfs = MARCSpec::ControlFieldSpec.new('001')
32
- cfs.marc_values(@one).should.equal ["afc99990058366"]
33
- end
34
-
35
- it "gets a single character" do
36
- cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
37
- cfs.marc_values(@one).should.equal ['8']
38
- end
39
-
40
- it "gets a range of characters" do
41
- cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
42
- cfs.marc_values(@one).should.equal ['90058']
43
- end
44
-
45
- it "should round trip" do
46
- cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
47
- cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
48
- cfs.should.equal cfs2
49
- end
50
- end
51
-
52
-
53
- describe "LeaderSpec" do
54
- before do
55
- @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
56
- end
57
-
58
- it "Works with full leader" do
59
- cfs = MARCSpec::LeaderSpec.new('LDR')
60
- cfs.marc_values(@one).should.equal @one.leader
61
- end
62
-
63
- it "Works with substring of leader" do
64
- cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
65
- cfs.marc_values(@one).should.equal @one.leader[3..5]
66
- end
67
- end
68
-
69
-
70
-
71
21
  describe "VariableFieldSpec" do
72
22
  before do
73
23
  @one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marcspec
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 3
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 6
8
+ - 7
9
9
  - 0
10
- version: 0.6.0
10
+ version: 0.7.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - BillDueber
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-17 00:00:00 -04:00
18
+ date: 2010-08-19 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -96,14 +96,16 @@ files:
96
96
  - Rakefile
97
97
  - VERSION
98
98
  - lib/marcspec.rb
99
+ - lib/marcspec/controlfieldspec.rb
99
100
  - lib/marcspec/customspec.rb
100
101
  - lib/marcspec/kvmap.rb
102
+ - lib/marcspec/leaderspec.rb
101
103
  - lib/marcspec/map.rb
102
104
  - lib/marcspec/marcfieldspec.rb
103
105
  - lib/marcspec/multivaluemap.rb
104
106
  - lib/marcspec/solrfieldspec.rb
105
107
  - lib/marcspec/specset.rb
106
- - lib/orig.rb
108
+ - lib/marcspec/variablefieldspec.rb
107
109
  - spec/data/batch.dat
108
110
  - spec/data/one.dat
109
111
  - spec/data/umich/translation_maps/area_map.properties
@@ -121,11 +123,12 @@ files:
121
123
  - spec/data/umich/translation_maps/location_map.properties
122
124
  - spec/data/umich/umich_index.properties
123
125
  - spec/maps_spec.rb
124
- - spec/marcfieldspecs_spec.rb
125
- - spec/marcspec_spec.rb
126
126
  - spec/solrfieldspec_spec.rb
127
127
  - spec/spec_helper.rb
128
128
  - spec/specset_spec.rb
129
+ - spec/controlfieldspec_spec.rb
130
+ - spec/leaderspec_spec.rb
131
+ - spec/variablefieldspec_spec.rb
129
132
  has_rdoc: true
130
133
  homepage: http://github.com/billdueber/marcspec
131
134
  licenses: []
@@ -161,9 +164,10 @@ signing_key:
161
164
  specification_version: 3
162
165
  summary: Extract data from MARC records and send to Solr
163
166
  test_files:
167
+ - spec/controlfieldspec_spec.rb
168
+ - spec/leaderspec_spec.rb
164
169
  - spec/maps_spec.rb
165
- - spec/marcfieldspecs_spec.rb
166
- - spec/marcspec_spec.rb
167
170
  - spec/solrfieldspec_spec.rb
168
171
  - spec/spec_helper.rb
169
172
  - spec/specset_spec.rb
173
+ - spec/variablefieldspec_spec.rb
data/lib/orig.rb DELETED
@@ -1,288 +0,0 @@
1
- require 'set'
2
- require 'pp'
3
- require 'logger'
4
-
5
- $LOG ||= Logger.new(STDERR)
6
-
7
- module MARCSpec
8
-
9
- class MapSpec
10
- attr_accessor :map, :type, :default
11
-
12
- def initialize(type, map, default=nil)
13
- @type = type
14
- @default = default
15
- @map = map
16
- end
17
-
18
- def [] key
19
- if (@type == :kv)
20
- if @map.has_key? key
21
- return @map[key]
22
- else
23
- return @default
24
- end
25
- end
26
-
27
- # For a pattern, we find all that match.
28
-
29
- if (@type == :pattern)
30
- rv = []
31
- @map.each do |pv|
32
- pat = pv[0]
33
- val = pv[1]
34
- # puts "Trying pattern #{pat} against #{key}"
35
- if pat.match(key)
36
- rv << val
37
- # puts "Matched: adding #{val}"
38
- end
39
- end
40
- rv.uniq!
41
- if rv.size > 0
42
- return rv
43
- else
44
- return @default
45
- end
46
- end
47
- end
48
- end
49
-
50
- class CustomSpec
51
- def initialize(proc, args)
52
- @proc = proc
53
- @args = args
54
- end
55
-
56
- def marc_values_hash fieldnames, r
57
- a = @proc(r, args)
58
- rv = {}
59
- fieldnames.each_with_index do |fn, i|
60
- rv[fn] = a[i]
61
- end
62
- return rv
63
- end
64
- end
65
-
66
-
67
- class TagSpec
68
- attr_accessor :tag, :codes, :joiner, :parent, :ind1, :ind2, :range, :is_control
69
-
70
- def initialize(tag, codes=nil)
71
- @codes = Set.new
72
- @tag = tag
73
- @joiner = ' '
74
- @substr = nil
75
- tagint = tag.to_i
76
- @is_control = (tagint != 0 and tagint < 10)
77
- if (codes)
78
- self.codes = codes
79
- end
80
- end
81
-
82
- def range= newrange
83
- if newrange =~ /\s*(\d+)-(\d+)/
84
- start = $1.to_i
85
- last = $2.to_i
86
- @range = start..last
87
- else
88
- se = newrange.to_i
89
- @range = se..se
90
- end
91
- end
92
-
93
- def codes= newcodes
94
- if newcodes.is_a? Range
95
- @codes = newcodes.to_a
96
- elsif newcodes !~ /\S/
97
- @codes = nil
98
- # Otherwise, just split into individual characters
99
- else
100
- @codes = newcodes.split(//)
101
- end
102
- end
103
-
104
- def marc_values r
105
- if @is_control
106
- vals = r.find_by_tag(@tag).map {|f| f.value}
107
- # puts "Start with #{vals.join(', ')}"
108
- if @range
109
- vals.map! {|v| v[@range]}
110
- end
111
- # puts "End with #{vals.join(', ')}"
112
-
113
- else
114
- fields = r.find_by_tag(@tag)
115
- vals = []
116
- fields.each do |f|
117
- subvals = f.sub_values(@codes)
118
- vals << subvals.join(@joiner) if subvals.size > 0
119
- end
120
- end
121
- # puts vals.join(', ')
122
- return vals
123
- end
124
-
125
- end
126
-
127
- class FieldSpec
128
- attr_accessor :field, :first, :map, :tagspecs
129
-
130
- def initialize(opts)
131
- @field = opts[:field]
132
- @first = opts[:first] || false
133
- @map = opts[:map] || nil
134
- @tagspecs = []
135
- end
136
-
137
- def << tagspec
138
- tagspec.parent = self
139
- @tagspecs << tagspec
140
- end
141
-
142
- def marc_values r
143
- vals = []
144
- # puts "Tagspecs has #{@tagspecs.size} items"
145
- @tagspecs.each do |ts|
146
- vals.concat ts.marc_values(r)
147
- # puts vals.join(', ')
148
- break if @first and vals.size > 0
149
- end
150
-
151
- if (@map)
152
- vals.map! {|v| @map[v]}
153
- # vals.each do |v|
154
- # puts "Map: #{v} => #{@map[v].to_s}"
155
- # end
156
- end
157
- vals.flatten!
158
- vals.uniq!
159
- vals.compact!
160
- return vals
161
- end
162
-
163
- end
164
-
165
-
166
- class SpecSet
167
- attr_accessor :tmaps, :fieldspecs
168
- def initialize(*args)
169
- tmapdir = args.pop!
170
- unless File.directory? tmapdir
171
- $LOG.error "Directory #{tmapdir} not found"
172
- raise LoadError, "Directory #{tmapdir} not found"
173
- end
174
-
175
- @tmaps = {}
176
- Dir.glob(tmapdir + '/*.rb') do |fn|
177
- basename = File.basename(fn).sub(/\.rb$/, '')
178
- $LOG.info "Loading translation map #{basename}"
179
-
180
- begin
181
- rawmap = eval(File.open(fn).read)
182
- @tmaps[basename] = MapSpec.new(rawmap[:type], rawmap[:map], rawmap[:default])
183
- rescue SyntaxError
184
- $LOG.error "Error processing translation map file #{fn}: #{$!}"
185
- raise SyntaxError, $!
186
- end
187
-
188
- end
189
-
190
- @fieldspecs = []
191
-
192
- # Get the index files
193
- args.each do |indexfile|
194
- begin
195
- unless File.exists? indexfile
196
- $LOG.error "File #{indexfile} does not exist"
197
- raise LoadError, "File #{indexfile} does not exist"
198
- end
199
- $LOG.info "Loading index file #{indexfile}"
200
- rawindex = eval(File.open(indexfile).read)
201
- rawindex.each do |entry|
202
- fs = FieldSpec.new(:field => entry[:solrField], :first=>entry[:firstOnly])
203
- mapname = entry[:map]
204
- if mapname
205
- if @tmaps.has_key? mapname
206
- fs.map = @tmaps[mapname]
207
- else
208
- $LOG.error "Can't find map #{mapname}"
209
- end
210
- end
211
- entry[:specs].each do |entryts|
212
-
213
- # A one- or two-element entry is a control field
214
- # A three element entry (tag, ind1, ind2) is all subs of a field (need to implement)
215
- # A four element field is tag, ind1, ind2, subs
216
- # A five element field is tag, ind1, ind2, subs, joiner
217
-
218
-
219
- tag = entryts[0]
220
-
221
- # Is tag the symbol :custom? Then make it a custom item
222
-
223
- if tag == :custom
224
- ts = CustomSpec.new(entryts[1], entryts[2..-1])
225
- fs << ts
226
- next
227
- end
228
-
229
- # If it's not custom, the solrField better be a scale
230
- if entry[:solrField].is_a? Array
231
- # log an error and bail out
232
- end
233
-
234
- # Otherwise, it's a tag spec
235
- if tag.is_a? Fixnum
236
- tag = '%03d' % tag
237
- end
238
-
239
-
240
- ts = TagSpec.new(tag)
241
- if entryts.size < 3
242
- ts.is_control = true
243
- ts.range = entryts[1] if entryts[1]
244
- else
245
- ts.ind1 = entryts[1]
246
- ts.ind2 = entryts[2]
247
- ts.codes = entryts[3]
248
- ts.joiner = entryts[4] if entryts[4]
249
- end
250
- fs << ts
251
- end
252
- self << fs
253
- end
254
- rescue SyntaxError
255
- $LOG.error "Error processing index file #{indexfile}: #{$!}"
256
- raise SyntaxError
257
- end
258
- end
259
- end
260
-
261
- def each
262
- @fieldspecs.each do |fs|
263
- yield fs
264
- end
265
- end
266
-
267
- def << fieldspec
268
- @fieldspecs << fieldspec
269
- end
270
-
271
- def doc_from_marc r
272
- doc = SolrInputDocument.new
273
- @fieldspecs.each do |fs|
274
- doc[fs.field] = fs.marc_values(r)
275
- end
276
- return doc
277
- end
278
-
279
- def hash_from_marc r
280
- h = {}
281
- @fieldspecs.each do |fs|
282
- h[fs.field] = fs.marc_values(r)
283
- end
284
- return h
285
- end
286
-
287
- end
288
- end
@@ -1,10 +0,0 @@
1
-
2
-
3
-
4
-
5
-
6
-
7
-
8
-
9
-
10
-