marcspec 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/marcspec/controlfieldspec.rb +93 -0
- data/lib/marcspec/customspec.rb +36 -11
- data/lib/marcspec/kvmap.rb +13 -1
- data/lib/marcspec/leaderspec.rb +37 -0
- data/lib/marcspec/map.rb +20 -2
- data/lib/marcspec/marcfieldspec.rb +3 -207
- data/lib/marcspec/variablefieldspec.rb +80 -0
- data/spec/controlfieldspec_spec.rb +52 -0
- data/spec/leaderspec_spec.rb +35 -0
- data/spec/solrfieldspec_spec.rb +5 -5
- data/spec/specset_spec.rb +2 -2
- data/spec/{marcfieldspecs_spec.rb → variablefieldspec_spec.rb} +0 -50
- metadata +13 -9
- data/lib/orig.rb +0 -288
- data/spec/marcspec_spec.rb +0 -10
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.0
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'marc4j4r'
|
2
|
+
require 'set'
|
3
|
+
require 'pp'
|
4
|
+
module MARCSpec
|
5
|
+
# A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
|
6
|
+
# When called with marc_values(record), it returns either the complete value of all
|
7
|
+
# occurances of the field in question (in the order they appear in the record), or
|
8
|
+
# the zero-based substrings based on the passed range.
|
9
|
+
#
|
10
|
+
# @example Get the whole 001
|
11
|
+
# cfs = MARCSpec::ControlTagSpec.new('001')
|
12
|
+
#
|
13
|
+
# @example Get the first three characters of the 008
|
14
|
+
# cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
|
15
|
+
#
|
16
|
+
# Note that the use of the zero-based range in this manner conforms to the way MARC
|
17
|
+
# substrings are specified.
|
18
|
+
|
19
|
+
class ControlFieldSpec
|
20
|
+
attr_accessor :tag, :range
|
21
|
+
|
22
|
+
def initialize (tag, range=nil)
|
23
|
+
unless MARC4J4R::ControlField.control_tag? tag
|
24
|
+
raise ArgumentError "Tag must be a control tag"
|
25
|
+
end
|
26
|
+
@tag = tag
|
27
|
+
self.range = range
|
28
|
+
end
|
29
|
+
|
30
|
+
def == other
|
31
|
+
return ((self.tag == other.tag) and
|
32
|
+
(self.range = other.range))
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
|
37
|
+
# will return the character code of that character (e.g., "Bill"[0] => 66, wherease
|
38
|
+
# "Bill"[0..0] gives the expected 'B'
|
39
|
+
#
|
40
|
+
# @param [nil, Fixnum, Range] range A zero-based substring range or character position
|
41
|
+
# @return [MARCSpec::ControlFieldSpec] self
|
42
|
+
|
43
|
+
def range= range
|
44
|
+
if range.nil?
|
45
|
+
@range = nil
|
46
|
+
return self
|
47
|
+
end
|
48
|
+
if range.is_a? Fixnum
|
49
|
+
if range < 0
|
50
|
+
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
|
51
|
+
end
|
52
|
+
|
53
|
+
@range = range..range
|
54
|
+
|
55
|
+
elsif range.is_a? Range
|
56
|
+
@range = range
|
57
|
+
else
|
58
|
+
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
|
59
|
+
end
|
60
|
+
return self
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
def marc_values r
|
65
|
+
vals = r.find_by_tag(@tag).map {|f| f.value}
|
66
|
+
if @range
|
67
|
+
return vals.map {|v| v[@range]}
|
68
|
+
else
|
69
|
+
return vals
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def pretty_print pp
|
74
|
+
pp.pp eval(self.asPPString)
|
75
|
+
end
|
76
|
+
|
77
|
+
def asPPString
|
78
|
+
s = StringIO.new
|
79
|
+
if @range
|
80
|
+
PP.pp([@tag, @range], s)
|
81
|
+
else
|
82
|
+
PP.pp([@tag], s)
|
83
|
+
end
|
84
|
+
return s.string
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.fromPPString str
|
88
|
+
a = eval(str)
|
89
|
+
return self.new(*a)
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
data/lib/marcspec/customspec.rb
CHANGED
@@ -5,10 +5,12 @@ require 'marcspec/solrfieldspec'
|
|
5
5
|
module MARCSpec
|
6
6
|
|
7
7
|
# A CustomSolrSpec is a SolrFieldSpec that derives all its values from a custom function. The custom function
|
8
|
-
# must me a module function that takes a record and an array of other arguments and returns a
|
8
|
+
# must me a module function that takes a hash-like document object, a MARC4J4R record, and an array of other arguments and returns a
|
9
9
|
# (possibly empty) list of resulting values.
|
10
10
|
#
|
11
|
-
#
|
11
|
+
# See the example file simple_sample/index.rb in the marc2solr project for configuration examples.
|
12
|
+
#
|
13
|
+
# @example A sample custom function, to be placed in the configuration directory's lib/ subdir
|
12
14
|
# module MARC2Solr
|
13
15
|
# module MyCustomStuff
|
14
16
|
# def self.uppercaseTitle r, args=[]
|
@@ -19,25 +21,40 @@ module MARCSpec
|
|
19
21
|
# end
|
20
22
|
# end
|
21
23
|
#
|
24
|
+
# @example A simple custom spec made by hand
|
22
25
|
# css = MARCSpec::CustomSolrSpec.new(:module => MARC2Solr::MyCustomStuff,
|
23
|
-
# :
|
26
|
+
# :functionSymbol => :uppercaseTitle,
|
24
27
|
# :map => ss.map('mapname')
|
25
28
|
# )
|
26
|
-
# ss.add_spec(css)
|
27
29
|
#
|
28
30
|
#
|
29
31
|
|
30
32
|
|
31
33
|
class CustomSolrSpec < SolrFieldSpec
|
32
34
|
|
33
|
-
attr_accessor :module, :
|
35
|
+
attr_accessor :module, :functionSymbol, :methodArgs
|
36
|
+
|
37
|
+
# Get a new Custom Solr Spec based on the passed in options.
|
38
|
+
# @param [Hash] opts Initialization options
|
39
|
+
# @option opts [String, Array<String>] :solrField the name(s) of the Solr field(s) that will receive the data derived from this spec
|
40
|
+
# @option opts [Module] :module the actual module constant (not a string or symbol representation) which holds the
|
41
|
+
# custom function we'll be calling
|
42
|
+
# @option opts [Symbol] :functionSymbol A symbol of the name of the custom function
|
43
|
+
# @option opts [Boolean] :firstOnly (false) Whether we should return the first found value
|
44
|
+
# @option opts [String] :default (nil) The value to return if the custom function returns no values
|
45
|
+
# @option opts [MARC2Solr::Map] :map (nil) An optional Map used to translate resulting values
|
46
|
+
# @option opts [String] :noMapKeyDefault (nil) The value to return if (a) a value is found, (b) a map is defined, but (c) there's
|
47
|
+
# no key in the map that matches the value.
|
48
|
+
#
|
49
|
+
# Note that the last four options don't make sense if multiple :solrFields are given, and are illegal in that case.
|
50
|
+
|
34
51
|
def initialize(opts)
|
35
52
|
@solrField = opts[:solrField]
|
36
53
|
@module = opts[:module]
|
37
|
-
@
|
54
|
+
@functionSymbol = opts[:functionSymbol]
|
38
55
|
|
39
|
-
unless @solrField and @module and @
|
40
|
-
raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :
|
56
|
+
unless @solrField and @module and @functionSymbol
|
57
|
+
raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :functionSymbol"
|
41
58
|
end
|
42
59
|
|
43
60
|
|
@@ -60,11 +77,19 @@ module MARCSpec
|
|
60
77
|
|
61
78
|
end
|
62
79
|
|
80
|
+
# Get values from a MARC object and/or the prevously-filled document object.
|
81
|
+
#
|
82
|
+
# Note that the doc is read-write here, but for the love of god, just leave it alone.
|
83
|
+
#
|
84
|
+
# @param [MARC4J4R::Record] r A marc record
|
85
|
+
# @param [SolrInputDocument, Hash] doc The document we're constructing.
|
86
|
+
# @return [Array<String>] An array of values returned by the custom method
|
63
87
|
|
64
88
|
def raw_marc_values r, doc
|
65
|
-
return @module.send(@
|
89
|
+
return @module.send(@functionSymbol, doc, r, *@methodArgs)
|
66
90
|
end
|
67
91
|
|
92
|
+
|
68
93
|
def self.fromHash h
|
69
94
|
return self.new(h)
|
70
95
|
end
|
@@ -94,8 +119,8 @@ module MARCSpec
|
|
94
119
|
|
95
120
|
s.print(":module => ")
|
96
121
|
PP.singleline_pp(@module, s)
|
97
|
-
s.print(",\n :
|
98
|
-
PP.singleline_pp(@
|
122
|
+
s.print(",\n :functionSymbol => ")
|
123
|
+
PP.singleline_pp(@functionSymbol, s)
|
99
124
|
if @methodArgs
|
100
125
|
s.print(",\n :methodArgs => ")
|
101
126
|
PP.singleline_pp(@methodArgs, s)
|
data/lib/marcspec/kvmap.rb
CHANGED
@@ -8,6 +8,8 @@ module MARCSpec
|
|
8
8
|
|
9
9
|
# A KVMap is, when push comes to shove, just a hash with a name, and the
|
10
10
|
# option of adding a default value for each lookup.
|
11
|
+
#
|
12
|
+
# The map portion of a kvmap is simply a hash.
|
11
13
|
|
12
14
|
class KVMap < Map
|
13
15
|
|
@@ -38,12 +40,18 @@ module MARCSpec
|
|
38
40
|
end
|
39
41
|
end
|
40
42
|
|
43
|
+
# Set an element in the map, just like for a regular hash
|
41
44
|
def []= key, value
|
42
45
|
@map[key] = value
|
43
46
|
end
|
44
47
|
|
45
48
|
alias_method :add, :[]=
|
46
49
|
|
50
|
+
|
51
|
+
# Produce a configuration file that will round-trip to this object.
|
52
|
+
#
|
53
|
+
# @return [String] A string representation of valid ruby code that can be turned back into
|
54
|
+
# this object using MARCSpec::Map#fromFile
|
47
55
|
def asPPString
|
48
56
|
s = StringIO.new
|
49
57
|
s.print "{\n :maptype=>:kv,\n :mapname=>"
|
@@ -54,7 +62,11 @@ module MARCSpec
|
|
54
62
|
return s.string
|
55
63
|
end
|
56
64
|
|
57
|
-
|
65
|
+
|
66
|
+
# Translate from a solrmarc map file that has *already been determined* to be a KV map
|
67
|
+
# @param [String] filename The path to the solrmarc kv map file
|
68
|
+
# @return [MARCSpec::KVMap] a KVMap
|
69
|
+
|
58
70
|
def self.from_solrmarc_file filename
|
59
71
|
mapname = File.basename(filename).sub(/\..+?$/, '')
|
60
72
|
map = {}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'marcspec/controlfieldspec'
|
2
|
+
|
3
|
+
module MARCSpec
|
4
|
+
# A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
|
5
|
+
# using the string 'LDR' to identify itself
|
6
|
+
|
7
|
+
class LeaderSpec < ControlFieldSpec
|
8
|
+
|
9
|
+
# Built to be syntax-compatible with ControlFieldSpec, the tag must always
|
10
|
+
# be 'LDR' (case matters)
|
11
|
+
#
|
12
|
+
# @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
|
13
|
+
# @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
|
14
|
+
# instead of the whole leader.
|
15
|
+
|
16
|
+
def initialize (tag, range=nil)
|
17
|
+
unless tag == 'LDR'
|
18
|
+
raise ArgumentError "Tag must be 'LDR' for a LeaderSpec"
|
19
|
+
end
|
20
|
+
@tag = 'LDR'
|
21
|
+
self.range = range
|
22
|
+
end
|
23
|
+
|
24
|
+
# Return the appropriate value (either the leader or a subset of it) from the
|
25
|
+
# given record
|
26
|
+
#
|
27
|
+
# @param [MARC4J4R::Record] r A MARC4J4R Record
|
28
|
+
# @return [String] the leader or substring of the leader
|
29
|
+
def marc_values r
|
30
|
+
if @range
|
31
|
+
return r.leader[@range]
|
32
|
+
else
|
33
|
+
return r.leader
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/marcspec/map.rb
CHANGED
@@ -2,7 +2,12 @@ module MARCSpec
|
|
2
2
|
|
3
3
|
# A Map is just a named lookup table. The access
|
4
4
|
# (via []) takes, in adition to a key, an optional
|
5
|
-
# default value to return
|
5
|
+
# default value to return (e.g., val = map[key, defaultIfNotFound])
|
6
|
+
#
|
7
|
+
# We don't have the default be a part of the map because it might be used
|
8
|
+
# in several different contexts.
|
9
|
+
#
|
10
|
+
# NOTE: THIS IS AN ABSTRACT SUPERCLASS. DO NOT INSTANTIATE IT DIRECTLY
|
6
11
|
|
7
12
|
class Map
|
8
13
|
attr_accessor :mapname, :map
|
@@ -10,12 +15,21 @@ module MARCSpec
|
|
10
15
|
# Create a new map. The passed map is either
|
11
16
|
# a standard hash or a list of duples
|
12
17
|
#
|
13
|
-
# @param
|
18
|
+
# @param [String] mapname The name of this map; can be used to find it later on.
|
19
|
+
# @param [Hash, Array<2-value-arrays>] map Either a normal key-value hash (for a KV Map) or an
|
20
|
+
# array of duples (2-value arrays) for a MultiValueMap.
|
14
21
|
def initialize(mapname, map)
|
15
22
|
@mapname = mapname
|
16
23
|
@map = map
|
17
24
|
end
|
18
25
|
|
26
|
+
# Load a map from a file, determining what kind it is along the way.
|
27
|
+
#
|
28
|
+
# The file is valid ruby code; see the subclasses KVMap and MutlValueMap for examples.
|
29
|
+
#
|
30
|
+
# @param [String] filename The name of the map file to be eval'd
|
31
|
+
# @return MARC2Solr::Map An instance of a subclass of MARC2Solr::Map
|
32
|
+
|
19
33
|
def self.fromFile filename
|
20
34
|
begin
|
21
35
|
str = File.open(filename).read
|
@@ -44,14 +58,18 @@ module MARCSpec
|
|
44
58
|
end
|
45
59
|
|
46
60
|
|
61
|
+
# Check for map equality
|
47
62
|
def == other
|
48
63
|
return ((other.mapname == self.mapname) and (other.map = self.map))
|
49
64
|
end
|
50
65
|
|
66
|
+
# Generic pretty_print; used mostly for translating from solrmarc
|
51
67
|
def pretty_print pp
|
52
68
|
pp.pp eval(self.asPPString)
|
53
69
|
end
|
54
70
|
|
71
|
+
# Produce a map from the data structure produced by asPPString
|
72
|
+
# @param [Hash] rawmap A hash with two keys; :mapname and :map
|
55
73
|
def self.fromHash rawmap
|
56
74
|
return self.new(rawmap[:mapname], rawmap[:map])
|
57
75
|
end
|
@@ -1,207 +1,3 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
module MARCSpec
|
5
|
-
|
6
|
-
# A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
|
7
|
-
# When called with marc_values(record), it returns either the complete value of all
|
8
|
-
# occurances of the field in question (in the order they appear in the record), or
|
9
|
-
# the zero-based substrings based on the passed range.
|
10
|
-
#
|
11
|
-
# @example Get the whole 001
|
12
|
-
# cfs = MARCSpec::ControlTagSpec.new('001')
|
13
|
-
#
|
14
|
-
# @example Get the first three characters of the 008
|
15
|
-
# cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
|
16
|
-
#
|
17
|
-
# Note that the use of the zero-based range in this manner conforms to the way MARC
|
18
|
-
# substrings are specified.
|
19
|
-
|
20
|
-
class ControlFieldSpec
|
21
|
-
attr_accessor :tag, :range
|
22
|
-
|
23
|
-
def initialize (tag, range=nil)
|
24
|
-
unless MARC4J4R::ControlField.control_tag? tag
|
25
|
-
raise ArgumentError "Tag must be a control tag"
|
26
|
-
end
|
27
|
-
@tag = tag
|
28
|
-
self.range = range
|
29
|
-
end
|
30
|
-
|
31
|
-
def == other
|
32
|
-
return ((self.tag == other.tag) and
|
33
|
-
(self.range = other.range))
|
34
|
-
end
|
35
|
-
|
36
|
-
|
37
|
-
# Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
|
38
|
-
# will return the character code of that character (e.g., "Bill"[0] => 66, wherease
|
39
|
-
# "Bill"[0..0] gives the expected 'B'
|
40
|
-
#
|
41
|
-
# @param [nil, Fixnum, Range] range A zero-based substring range or character position
|
42
|
-
# @return [MARCSpec::ControlFieldSpec] self
|
43
|
-
|
44
|
-
def range= range
|
45
|
-
if range.nil?
|
46
|
-
@range = nil
|
47
|
-
return self
|
48
|
-
end
|
49
|
-
if range.is_a? Fixnum
|
50
|
-
if range < 0
|
51
|
-
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
|
52
|
-
end
|
53
|
-
|
54
|
-
@range = range..range
|
55
|
-
|
56
|
-
elsif range.is_a? Range
|
57
|
-
@range = range
|
58
|
-
else
|
59
|
-
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
|
60
|
-
end
|
61
|
-
return self
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
def marc_values r
|
66
|
-
vals = r.find_by_tag(@tag).map {|f| f.value}
|
67
|
-
if @range
|
68
|
-
return vals.map {|v| v[@range]}
|
69
|
-
else
|
70
|
-
return vals
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def pretty_print pp
|
75
|
-
pp.pp eval(self.asPPString)
|
76
|
-
end
|
77
|
-
|
78
|
-
def asPPString
|
79
|
-
s = StringIO.new
|
80
|
-
if @range
|
81
|
-
PP.pp([@tag, @range], s)
|
82
|
-
else
|
83
|
-
PP.pp([@tag], s)
|
84
|
-
end
|
85
|
-
return s.string
|
86
|
-
end
|
87
|
-
|
88
|
-
def self.fromPPString str
|
89
|
-
a = eval(str)
|
90
|
-
return self.new(*a)
|
91
|
-
end
|
92
|
-
|
93
|
-
end
|
94
|
-
|
95
|
-
|
96
|
-
# A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
|
97
|
-
# using the string 'LDR' to identify itself
|
98
|
-
|
99
|
-
class LeaderSpec < ControlFieldSpec
|
100
|
-
|
101
|
-
# Built to be syntax-compatible with ControlFieldSpec, the tag must always
|
102
|
-
# be 'LDR' (case matters)
|
103
|
-
#
|
104
|
-
# @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
|
105
|
-
# @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
|
106
|
-
# instead of the whole leader.
|
107
|
-
|
108
|
-
def initialize (tag, range=nil)
|
109
|
-
unless tag == 'LDR'
|
110
|
-
raise ArgumentError "Tag must be 'LDR'"
|
111
|
-
end
|
112
|
-
@tag = 'LDR'
|
113
|
-
self.range = range
|
114
|
-
end
|
115
|
-
|
116
|
-
# Return the appropriate value (either the leader or a subset of it) from the
|
117
|
-
# given record
|
118
|
-
#
|
119
|
-
# @param [MARC4J4R::Record] r A MARC4J4R Record
|
120
|
-
# @return [String] the leader or substring of the leader
|
121
|
-
def marc_values r
|
122
|
-
if @range
|
123
|
-
return r.leader[@range]
|
124
|
-
else
|
125
|
-
return r.leader
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
|
131
|
-
# A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
|
132
|
-
# all the values for the subfields for the given codes joined by the optional joiner (space by default)
|
133
|
-
#
|
134
|
-
# The subfield values are presented in the order they appear in the document, *not* the order the subfield
|
135
|
-
# codes are specified
|
136
|
-
#
|
137
|
-
# @example Get the $a from the 245s
|
138
|
-
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
|
139
|
-
#
|
140
|
-
# vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
|
141
|
-
|
142
|
-
|
143
|
-
class VariableFieldSpec
|
144
|
-
|
145
|
-
attr_accessor :tag, :codes, :joiner
|
146
|
-
|
147
|
-
def initialize tag, codes=nil, joiner=' '
|
148
|
-
@tag = tag
|
149
|
-
@joiner = joiner || ' '
|
150
|
-
self.codes = codes
|
151
|
-
end
|
152
|
-
|
153
|
-
def == other
|
154
|
-
return ((self.tag == other.tag) and
|
155
|
-
(self.codes = other.codes) and
|
156
|
-
(self.joiner = other.joiner))
|
157
|
-
end
|
158
|
-
|
159
|
-
def codes= c
|
160
|
-
if c.nil?
|
161
|
-
@codes = nil
|
162
|
-
return nil
|
163
|
-
end
|
164
|
-
|
165
|
-
if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
|
166
|
-
@codes = c.to_a
|
167
|
-
else
|
168
|
-
@codes = c.split(//)
|
169
|
-
end
|
170
|
-
|
171
|
-
return @codes
|
172
|
-
end
|
173
|
-
|
174
|
-
def marc_values r
|
175
|
-
fields = r.find_by_tag(@tag)
|
176
|
-
vals = []
|
177
|
-
fields.each do |f|
|
178
|
-
subvals = f.sub_values(@codes)
|
179
|
-
subvals = subvals.join(@joiner) if subvals.size > 0 and (@codes.nil? or @codes.size > 1)
|
180
|
-
vals << subvals
|
181
|
-
end
|
182
|
-
vals.flatten!
|
183
|
-
return vals
|
184
|
-
end
|
185
|
-
|
186
|
-
def pretty_print pp
|
187
|
-
pp.pp eval(self.asPPString)
|
188
|
-
end
|
189
|
-
|
190
|
-
def asPPString
|
191
|
-
s = StringIO.new
|
192
|
-
if @joiner and @joiner != ' '
|
193
|
-
PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
|
194
|
-
else
|
195
|
-
PP.pp([@tag, '*', '*', @codes.join('')], s)
|
196
|
-
end
|
197
|
-
return s.string
|
198
|
-
end
|
199
|
-
|
200
|
-
def self.fromPPString str
|
201
|
-
a = eval(str)
|
202
|
-
return self.new(a[0], a[3], a[4])
|
203
|
-
end
|
204
|
-
|
205
|
-
end
|
206
|
-
|
207
|
-
end
|
1
|
+
require 'marcspec/controlfieldspec';
|
2
|
+
require 'marcspec/variablefieldspec';
|
3
|
+
require 'marcspec/leaderspec';
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'pp'
|
3
|
+
module MARCSpec
|
4
|
+
# A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
|
5
|
+
# all the values for the subfields for the given codes joined by the optional joiner (space by default)
|
6
|
+
#
|
7
|
+
# The subfield values are presented in the order they appear in the document, *not* the order the subfield
|
8
|
+
# codes are specified
|
9
|
+
#
|
10
|
+
# @example Get the $a from the 245s
|
11
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
|
12
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
|
13
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', ['a', 'b'])
|
14
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a'..'b')
|
15
|
+
|
16
|
+
class VariableFieldSpec
|
17
|
+
|
18
|
+
attr_accessor :tag, :codes, :joiner
|
19
|
+
|
20
|
+
def initialize tag, codes=nil, joiner=' '
|
21
|
+
@tag = tag
|
22
|
+
@joiner = joiner || ' '
|
23
|
+
self.codes = codes
|
24
|
+
end
|
25
|
+
|
26
|
+
def == other
|
27
|
+
return ((self.tag == other.tag) and
|
28
|
+
(self.codes = other.codes) and
|
29
|
+
(self.joiner = other.joiner))
|
30
|
+
end
|
31
|
+
|
32
|
+
def codes= c
|
33
|
+
if c.nil?
|
34
|
+
@codes = nil
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
|
38
|
+
if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
|
39
|
+
@codes = c.to_a
|
40
|
+
else
|
41
|
+
@codes = c.split(//)
|
42
|
+
end
|
43
|
+
|
44
|
+
return @codes
|
45
|
+
end
|
46
|
+
|
47
|
+
def marc_values r
|
48
|
+
fields = r.find_by_tag(@tag)
|
49
|
+
vals = []
|
50
|
+
fields.each do |f|
|
51
|
+
subvals = f.sub_values(@codes)
|
52
|
+
subvals = subvals.join(@joiner) if subvals.size > 0 and (@codes.nil? or @codes.size > 1)
|
53
|
+
vals << subvals
|
54
|
+
end
|
55
|
+
vals.flatten!
|
56
|
+
return vals
|
57
|
+
end
|
58
|
+
|
59
|
+
def pretty_print pp
|
60
|
+
pp.pp eval(self.asPPString)
|
61
|
+
end
|
62
|
+
|
63
|
+
def asPPString
|
64
|
+
s = StringIO.new
|
65
|
+
if @joiner and @joiner != ' '
|
66
|
+
PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
|
67
|
+
else
|
68
|
+
PP.pp([@tag, '*', '*', @codes.join('')], s)
|
69
|
+
end
|
70
|
+
return s.string
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.fromPPString str
|
74
|
+
a = eval(str)
|
75
|
+
return self.new(a[0], a[3], a[4])
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# LEADER 00734njm a2200217uu 4500
|
4
|
+
# 001 afc99990058366
|
5
|
+
# 003 DLC
|
6
|
+
# 005 20071104155141.9
|
7
|
+
# 007 sd ummunniauub
|
8
|
+
# 008 071103s1939 xxufmnne||||||||| u eng||
|
9
|
+
# 010 $a afc99990058366
|
10
|
+
# 040 $a DLC $c DLC
|
11
|
+
# 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
|
12
|
+
# 260 $a Medina, Texas, $c 1939.
|
13
|
+
# 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
|
14
|
+
# 651 0 $a Medina $z Texas $z United States of America.
|
15
|
+
# 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
|
16
|
+
# 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
|
17
|
+
# 700 1 $a Taylor, Beale D. $e Singer.
|
18
|
+
# 852 $a American Folklife Center, Library of Congress
|
19
|
+
# 852 $a DLC
|
20
|
+
|
21
|
+
describe "ControlFieldSpec" do
|
22
|
+
|
23
|
+
before do
|
24
|
+
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
25
|
+
# @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
|
26
|
+
end
|
27
|
+
|
28
|
+
# afc99990058366 # data
|
29
|
+
# 01234567890123 # index
|
30
|
+
it "gets a single full value" do
|
31
|
+
cfs = MARCSpec::ControlFieldSpec.new('001')
|
32
|
+
cfs.marc_values(@one).should.equal ["afc99990058366"]
|
33
|
+
end
|
34
|
+
|
35
|
+
it "gets a single character" do
|
36
|
+
cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
|
37
|
+
cfs.marc_values(@one).should.equal ['8']
|
38
|
+
end
|
39
|
+
|
40
|
+
it "gets a range of characters" do
|
41
|
+
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
42
|
+
cfs.marc_values(@one).should.equal ['90058']
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should round trip" do
|
46
|
+
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
47
|
+
cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
|
48
|
+
cfs.should.equal cfs2
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# LEADER 00734njm a2200217uu 4500
|
4
|
+
# 001 afc99990058366
|
5
|
+
# 003 DLC
|
6
|
+
# 005 20071104155141.9
|
7
|
+
# 007 sd ummunniauub
|
8
|
+
# 008 071103s1939 xxufmnne||||||||| u eng||
|
9
|
+
# 010 $a afc99990058366
|
10
|
+
# 040 $a DLC $c DLC
|
11
|
+
# 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
|
12
|
+
# 260 $a Medina, Texas, $c 1939.
|
13
|
+
# 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
|
14
|
+
# 651 0 $a Medina $z Texas $z United States of America.
|
15
|
+
# 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
|
16
|
+
# 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
|
17
|
+
# 700 1 $a Taylor, Beale D. $e Singer.
|
18
|
+
# 852 $a American Folklife Center, Library of Congress
|
19
|
+
# 852 $a DLC
|
20
|
+
|
21
|
+
describe "LeaderSpec" do
|
22
|
+
before do
|
23
|
+
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
24
|
+
end
|
25
|
+
|
26
|
+
it "Works with full leader" do
|
27
|
+
cfs = MARCSpec::LeaderSpec.new('LDR')
|
28
|
+
cfs.marc_values(@one).should.equal @one.leader
|
29
|
+
end
|
30
|
+
|
31
|
+
it "Works with substring of leader" do
|
32
|
+
cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
|
33
|
+
cfs.marc_values(@one).should.equal @one.leader[3..5]
|
34
|
+
end
|
35
|
+
end
|
data/spec/solrfieldspec_spec.rb
CHANGED
@@ -165,29 +165,29 @@ describe "CustomSolrSpec" do
|
|
165
165
|
end
|
166
166
|
|
167
167
|
it "works with no args or map" do
|
168
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :
|
168
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp)
|
169
169
|
css.marc_values(@one).should.equal [@one['245'].value.upcase]
|
170
170
|
end
|
171
171
|
|
172
172
|
it "accepts nil for no args" do
|
173
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :
|
173
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>nil)
|
174
174
|
css.marc_values(@one).should.equal [@one['245'].value.upcase]
|
175
175
|
end
|
176
176
|
|
177
177
|
|
178
178
|
it "uses a custom method with args but no map" do
|
179
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :
|
179
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
|
180
180
|
css.marc_values(@one).should.equal [@titleACValue.upcase]
|
181
181
|
end
|
182
182
|
|
183
183
|
it "works with a map" do
|
184
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :
|
184
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
|
185
185
|
css.marc_values(@one).should.equal [@mapValue]
|
186
186
|
end
|
187
187
|
|
188
188
|
it "works with a map that has multiple return values" do
|
189
189
|
@map[@titleACValue.upcase] = ['two', 'one']
|
190
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :
|
190
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
|
191
191
|
css.marc_values(@one).should.equal ['two', 'one']
|
192
192
|
end
|
193
193
|
|
data/spec/specset_spec.rb
CHANGED
@@ -65,7 +65,7 @@ describe "SpecSet Basics" do
|
|
65
65
|
end
|
66
66
|
|
67
67
|
it "allows customs that reference previous work" do
|
68
|
-
@speclist << {:solrField=>'titleSort', :module=>A::B, :
|
68
|
+
@speclist << {:solrField=>'titleSort', :module=>A::B, :functionSymbol=>:sortable, :methodArgs=>['title']}
|
69
69
|
ss = MARCSpec::SpecSet.new
|
70
70
|
ss.buildSpecsFromList(@speclist)
|
71
71
|
h = ss.hash_from_marc @one
|
@@ -86,7 +86,7 @@ describe "SpecSet Basics" do
|
|
86
86
|
it "should allow multi-headed custom fields" do
|
87
87
|
@speclist << {:solrField => ['one', 'two', 'letters'],
|
88
88
|
:module => A::B,
|
89
|
-
:
|
89
|
+
:functionSymbol => :three_value_custom,
|
90
90
|
}
|
91
91
|
ss = MARCSpec::SpecSet.new
|
92
92
|
ss.buildSpecsFromList(@speclist)
|
@@ -18,56 +18,6 @@ require 'spec_helper'
|
|
18
18
|
# 852 $a American Folklife Center, Library of Congress
|
19
19
|
# 852 $a DLC
|
20
20
|
|
21
|
-
describe "ControlFieldSpec" do
|
22
|
-
|
23
|
-
before do
|
24
|
-
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
25
|
-
# @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
|
26
|
-
end
|
27
|
-
|
28
|
-
# afc99990058366 # data
|
29
|
-
# 01234567890123 # index
|
30
|
-
it "gets a single full value" do
|
31
|
-
cfs = MARCSpec::ControlFieldSpec.new('001')
|
32
|
-
cfs.marc_values(@one).should.equal ["afc99990058366"]
|
33
|
-
end
|
34
|
-
|
35
|
-
it "gets a single character" do
|
36
|
-
cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
|
37
|
-
cfs.marc_values(@one).should.equal ['8']
|
38
|
-
end
|
39
|
-
|
40
|
-
it "gets a range of characters" do
|
41
|
-
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
42
|
-
cfs.marc_values(@one).should.equal ['90058']
|
43
|
-
end
|
44
|
-
|
45
|
-
it "should round trip" do
|
46
|
-
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
47
|
-
cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
|
48
|
-
cfs.should.equal cfs2
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
|
53
|
-
describe "LeaderSpec" do
|
54
|
-
before do
|
55
|
-
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
56
|
-
end
|
57
|
-
|
58
|
-
it "Works with full leader" do
|
59
|
-
cfs = MARCSpec::LeaderSpec.new('LDR')
|
60
|
-
cfs.marc_values(@one).should.equal @one.leader
|
61
|
-
end
|
62
|
-
|
63
|
-
it "Works with substring of leader" do
|
64
|
-
cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
|
65
|
-
cfs.marc_values(@one).should.equal @one.leader[3..5]
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
21
|
describe "VariableFieldSpec" do
|
72
22
|
before do
|
73
23
|
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marcspec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 7
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.7.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- BillDueber
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-08-
|
18
|
+
date: 2010-08-19 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -96,14 +96,16 @@ files:
|
|
96
96
|
- Rakefile
|
97
97
|
- VERSION
|
98
98
|
- lib/marcspec.rb
|
99
|
+
- lib/marcspec/controlfieldspec.rb
|
99
100
|
- lib/marcspec/customspec.rb
|
100
101
|
- lib/marcspec/kvmap.rb
|
102
|
+
- lib/marcspec/leaderspec.rb
|
101
103
|
- lib/marcspec/map.rb
|
102
104
|
- lib/marcspec/marcfieldspec.rb
|
103
105
|
- lib/marcspec/multivaluemap.rb
|
104
106
|
- lib/marcspec/solrfieldspec.rb
|
105
107
|
- lib/marcspec/specset.rb
|
106
|
-
- lib/
|
108
|
+
- lib/marcspec/variablefieldspec.rb
|
107
109
|
- spec/data/batch.dat
|
108
110
|
- spec/data/one.dat
|
109
111
|
- spec/data/umich/translation_maps/area_map.properties
|
@@ -121,11 +123,12 @@ files:
|
|
121
123
|
- spec/data/umich/translation_maps/location_map.properties
|
122
124
|
- spec/data/umich/umich_index.properties
|
123
125
|
- spec/maps_spec.rb
|
124
|
-
- spec/marcfieldspecs_spec.rb
|
125
|
-
- spec/marcspec_spec.rb
|
126
126
|
- spec/solrfieldspec_spec.rb
|
127
127
|
- spec/spec_helper.rb
|
128
128
|
- spec/specset_spec.rb
|
129
|
+
- spec/controlfieldspec_spec.rb
|
130
|
+
- spec/leaderspec_spec.rb
|
131
|
+
- spec/variablefieldspec_spec.rb
|
129
132
|
has_rdoc: true
|
130
133
|
homepage: http://github.com/billdueber/marcspec
|
131
134
|
licenses: []
|
@@ -161,9 +164,10 @@ signing_key:
|
|
161
164
|
specification_version: 3
|
162
165
|
summary: Extract data from MARC records and send to Solr
|
163
166
|
test_files:
|
167
|
+
- spec/controlfieldspec_spec.rb
|
168
|
+
- spec/leaderspec_spec.rb
|
164
169
|
- spec/maps_spec.rb
|
165
|
-
- spec/marcfieldspecs_spec.rb
|
166
|
-
- spec/marcspec_spec.rb
|
167
170
|
- spec/solrfieldspec_spec.rb
|
168
171
|
- spec/spec_helper.rb
|
169
172
|
- spec/specset_spec.rb
|
173
|
+
- spec/variablefieldspec_spec.rb
|
data/lib/orig.rb
DELETED
@@ -1,288 +0,0 @@
|
|
1
|
-
require 'set'
|
2
|
-
require 'pp'
|
3
|
-
require 'logger'
|
4
|
-
|
5
|
-
$LOG ||= Logger.new(STDERR)
|
6
|
-
|
7
|
-
module MARCSpec
|
8
|
-
|
9
|
-
class MapSpec
|
10
|
-
attr_accessor :map, :type, :default
|
11
|
-
|
12
|
-
def initialize(type, map, default=nil)
|
13
|
-
@type = type
|
14
|
-
@default = default
|
15
|
-
@map = map
|
16
|
-
end
|
17
|
-
|
18
|
-
def [] key
|
19
|
-
if (@type == :kv)
|
20
|
-
if @map.has_key? key
|
21
|
-
return @map[key]
|
22
|
-
else
|
23
|
-
return @default
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
# For a pattern, we find all that match.
|
28
|
-
|
29
|
-
if (@type == :pattern)
|
30
|
-
rv = []
|
31
|
-
@map.each do |pv|
|
32
|
-
pat = pv[0]
|
33
|
-
val = pv[1]
|
34
|
-
# puts "Trying pattern #{pat} against #{key}"
|
35
|
-
if pat.match(key)
|
36
|
-
rv << val
|
37
|
-
# puts "Matched: adding #{val}"
|
38
|
-
end
|
39
|
-
end
|
40
|
-
rv.uniq!
|
41
|
-
if rv.size > 0
|
42
|
-
return rv
|
43
|
-
else
|
44
|
-
return @default
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
class CustomSpec
|
51
|
-
def initialize(proc, args)
|
52
|
-
@proc = proc
|
53
|
-
@args = args
|
54
|
-
end
|
55
|
-
|
56
|
-
def marc_values_hash fieldnames, r
|
57
|
-
a = @proc(r, args)
|
58
|
-
rv = {}
|
59
|
-
fieldnames.each_with_index do |fn, i|
|
60
|
-
rv[fn] = a[i]
|
61
|
-
end
|
62
|
-
return rv
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
|
67
|
-
class TagSpec
|
68
|
-
attr_accessor :tag, :codes, :joiner, :parent, :ind1, :ind2, :range, :is_control
|
69
|
-
|
70
|
-
def initialize(tag, codes=nil)
|
71
|
-
@codes = Set.new
|
72
|
-
@tag = tag
|
73
|
-
@joiner = ' '
|
74
|
-
@substr = nil
|
75
|
-
tagint = tag.to_i
|
76
|
-
@is_control = (tagint != 0 and tagint < 10)
|
77
|
-
if (codes)
|
78
|
-
self.codes = codes
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def range= newrange
|
83
|
-
if newrange =~ /\s*(\d+)-(\d+)/
|
84
|
-
start = $1.to_i
|
85
|
-
last = $2.to_i
|
86
|
-
@range = start..last
|
87
|
-
else
|
88
|
-
se = newrange.to_i
|
89
|
-
@range = se..se
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def codes= newcodes
|
94
|
-
if newcodes.is_a? Range
|
95
|
-
@codes = newcodes.to_a
|
96
|
-
elsif newcodes !~ /\S/
|
97
|
-
@codes = nil
|
98
|
-
# Otherwise, just split into individual characters
|
99
|
-
else
|
100
|
-
@codes = newcodes.split(//)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def marc_values r
|
105
|
-
if @is_control
|
106
|
-
vals = r.find_by_tag(@tag).map {|f| f.value}
|
107
|
-
# puts "Start with #{vals.join(', ')}"
|
108
|
-
if @range
|
109
|
-
vals.map! {|v| v[@range]}
|
110
|
-
end
|
111
|
-
# puts "End with #{vals.join(', ')}"
|
112
|
-
|
113
|
-
else
|
114
|
-
fields = r.find_by_tag(@tag)
|
115
|
-
vals = []
|
116
|
-
fields.each do |f|
|
117
|
-
subvals = f.sub_values(@codes)
|
118
|
-
vals << subvals.join(@joiner) if subvals.size > 0
|
119
|
-
end
|
120
|
-
end
|
121
|
-
# puts vals.join(', ')
|
122
|
-
return vals
|
123
|
-
end
|
124
|
-
|
125
|
-
end
|
126
|
-
|
127
|
-
class FieldSpec
|
128
|
-
attr_accessor :field, :first, :map, :tagspecs
|
129
|
-
|
130
|
-
def initialize(opts)
|
131
|
-
@field = opts[:field]
|
132
|
-
@first = opts[:first] || false
|
133
|
-
@map = opts[:map] || nil
|
134
|
-
@tagspecs = []
|
135
|
-
end
|
136
|
-
|
137
|
-
def << tagspec
|
138
|
-
tagspec.parent = self
|
139
|
-
@tagspecs << tagspec
|
140
|
-
end
|
141
|
-
|
142
|
-
def marc_values r
|
143
|
-
vals = []
|
144
|
-
# puts "Tagspecs has #{@tagspecs.size} items"
|
145
|
-
@tagspecs.each do |ts|
|
146
|
-
vals.concat ts.marc_values(r)
|
147
|
-
# puts vals.join(', ')
|
148
|
-
break if @first and vals.size > 0
|
149
|
-
end
|
150
|
-
|
151
|
-
if (@map)
|
152
|
-
vals.map! {|v| @map[v]}
|
153
|
-
# vals.each do |v|
|
154
|
-
# puts "Map: #{v} => #{@map[v].to_s}"
|
155
|
-
# end
|
156
|
-
end
|
157
|
-
vals.flatten!
|
158
|
-
vals.uniq!
|
159
|
-
vals.compact!
|
160
|
-
return vals
|
161
|
-
end
|
162
|
-
|
163
|
-
end
|
164
|
-
|
165
|
-
|
166
|
-
class SpecSet
|
167
|
-
attr_accessor :tmaps, :fieldspecs
|
168
|
-
def initialize(*args)
|
169
|
-
tmapdir = args.pop!
|
170
|
-
unless File.directory? tmapdir
|
171
|
-
$LOG.error "Directory #{tmapdir} not found"
|
172
|
-
raise LoadError, "Directory #{tmapdir} not found"
|
173
|
-
end
|
174
|
-
|
175
|
-
@tmaps = {}
|
176
|
-
Dir.glob(tmapdir + '/*.rb') do |fn|
|
177
|
-
basename = File.basename(fn).sub(/\.rb$/, '')
|
178
|
-
$LOG.info "Loading translation map #{basename}"
|
179
|
-
|
180
|
-
begin
|
181
|
-
rawmap = eval(File.open(fn).read)
|
182
|
-
@tmaps[basename] = MapSpec.new(rawmap[:type], rawmap[:map], rawmap[:default])
|
183
|
-
rescue SyntaxError
|
184
|
-
$LOG.error "Error processing translation map file #{fn}: #{$!}"
|
185
|
-
raise SyntaxError, $!
|
186
|
-
end
|
187
|
-
|
188
|
-
end
|
189
|
-
|
190
|
-
@fieldspecs = []
|
191
|
-
|
192
|
-
# Get the index files
|
193
|
-
args.each do |indexfile|
|
194
|
-
begin
|
195
|
-
unless File.exists? indexfile
|
196
|
-
$LOG.error "File #{indexfile} does not exist"
|
197
|
-
raise LoadError, "File #{indexfile} does not exist"
|
198
|
-
end
|
199
|
-
$LOG.info "Loading index file #{indexfile}"
|
200
|
-
rawindex = eval(File.open(indexfile).read)
|
201
|
-
rawindex.each do |entry|
|
202
|
-
fs = FieldSpec.new(:field => entry[:solrField], :first=>entry[:firstOnly])
|
203
|
-
mapname = entry[:map]
|
204
|
-
if mapname
|
205
|
-
if @tmaps.has_key? mapname
|
206
|
-
fs.map = @tmaps[mapname]
|
207
|
-
else
|
208
|
-
$LOG.error "Can't find map #{mapname}"
|
209
|
-
end
|
210
|
-
end
|
211
|
-
entry[:specs].each do |entryts|
|
212
|
-
|
213
|
-
# A one- or two-element entry is a control field
|
214
|
-
# A three element entry (tag, ind1, ind2) is all subs of a field (need to implement)
|
215
|
-
# A four element field is tag, ind1, ind2, subs
|
216
|
-
# A five element field is tag, ind1, ind2, subs, joiner
|
217
|
-
|
218
|
-
|
219
|
-
tag = entryts[0]
|
220
|
-
|
221
|
-
# Is tag the symbol :custom? Then make it a custom item
|
222
|
-
|
223
|
-
if tag == :custom
|
224
|
-
ts = CustomSpec.new(entryts[1], entryts[2..-1])
|
225
|
-
fs << ts
|
226
|
-
next
|
227
|
-
end
|
228
|
-
|
229
|
-
# If it's not custom, the solrField better be a scale
|
230
|
-
if entry[:solrField].is_a? Array
|
231
|
-
# log an error and bail out
|
232
|
-
end
|
233
|
-
|
234
|
-
# Otherwise, it's a tag spec
|
235
|
-
if tag.is_a? Fixnum
|
236
|
-
tag = '%03d' % tag
|
237
|
-
end
|
238
|
-
|
239
|
-
|
240
|
-
ts = TagSpec.new(tag)
|
241
|
-
if entryts.size < 3
|
242
|
-
ts.is_control = true
|
243
|
-
ts.range = entryts[1] if entryts[1]
|
244
|
-
else
|
245
|
-
ts.ind1 = entryts[1]
|
246
|
-
ts.ind2 = entryts[2]
|
247
|
-
ts.codes = entryts[3]
|
248
|
-
ts.joiner = entryts[4] if entryts[4]
|
249
|
-
end
|
250
|
-
fs << ts
|
251
|
-
end
|
252
|
-
self << fs
|
253
|
-
end
|
254
|
-
rescue SyntaxError
|
255
|
-
$LOG.error "Error processing index file #{indexfile}: #{$!}"
|
256
|
-
raise SyntaxError
|
257
|
-
end
|
258
|
-
end
|
259
|
-
end
|
260
|
-
|
261
|
-
def each
|
262
|
-
@fieldspecs.each do |fs|
|
263
|
-
yield fs
|
264
|
-
end
|
265
|
-
end
|
266
|
-
|
267
|
-
def << fieldspec
|
268
|
-
@fieldspecs << fieldspec
|
269
|
-
end
|
270
|
-
|
271
|
-
def doc_from_marc r
|
272
|
-
doc = SolrInputDocument.new
|
273
|
-
@fieldspecs.each do |fs|
|
274
|
-
doc[fs.field] = fs.marc_values(r)
|
275
|
-
end
|
276
|
-
return doc
|
277
|
-
end
|
278
|
-
|
279
|
-
def hash_from_marc r
|
280
|
-
h = {}
|
281
|
-
@fieldspecs.each do |fs|
|
282
|
-
h[fs.field] = fs.marc_values(r)
|
283
|
-
end
|
284
|
-
return h
|
285
|
-
end
|
286
|
-
|
287
|
-
end
|
288
|
-
end
|