marcspec 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/marcspec/controlfieldspec.rb +93 -0
- data/lib/marcspec/customspec.rb +36 -11
- data/lib/marcspec/kvmap.rb +13 -1
- data/lib/marcspec/leaderspec.rb +37 -0
- data/lib/marcspec/map.rb +20 -2
- data/lib/marcspec/marcfieldspec.rb +3 -207
- data/lib/marcspec/variablefieldspec.rb +80 -0
- data/spec/controlfieldspec_spec.rb +52 -0
- data/spec/leaderspec_spec.rb +35 -0
- data/spec/solrfieldspec_spec.rb +5 -5
- data/spec/specset_spec.rb +2 -2
- data/spec/{marcfieldspecs_spec.rb → variablefieldspec_spec.rb} +0 -50
- metadata +13 -9
- data/lib/orig.rb +0 -288
- data/spec/marcspec_spec.rb +0 -10
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.0
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'marc4j4r'
|
2
|
+
require 'set'
|
3
|
+
require 'pp'
|
4
|
+
module MARCSpec
|
5
|
+
# A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
|
6
|
+
# When called with marc_values(record), it returns either the complete value of all
|
7
|
+
# occurances of the field in question (in the order they appear in the record), or
|
8
|
+
# the zero-based substrings based on the passed range.
|
9
|
+
#
|
10
|
+
# @example Get the whole 001
|
11
|
+
# cfs = MARCSpec::ControlTagSpec.new('001')
|
12
|
+
#
|
13
|
+
# @example Get the first three characters of the 008
|
14
|
+
# cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
|
15
|
+
#
|
16
|
+
# Note that the use of the zero-based range in this manner conforms to the way MARC
|
17
|
+
# substrings are specified.
|
18
|
+
|
19
|
+
class ControlFieldSpec
|
20
|
+
attr_accessor :tag, :range
|
21
|
+
|
22
|
+
def initialize (tag, range=nil)
|
23
|
+
unless MARC4J4R::ControlField.control_tag? tag
|
24
|
+
raise ArgumentError "Tag must be a control tag"
|
25
|
+
end
|
26
|
+
@tag = tag
|
27
|
+
self.range = range
|
28
|
+
end
|
29
|
+
|
30
|
+
def == other
|
31
|
+
return ((self.tag == other.tag) and
|
32
|
+
(self.range = other.range))
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
|
37
|
+
# will return the character code of that character (e.g., "Bill"[0] => 66, wherease
|
38
|
+
# "Bill"[0..0] gives the expected 'B'
|
39
|
+
#
|
40
|
+
# @param [nil, Fixnum, Range] range A zero-based substring range or character position
|
41
|
+
# @return [MARCSpec::ControlFieldSpec] self
|
42
|
+
|
43
|
+
def range= range
|
44
|
+
if range.nil?
|
45
|
+
@range = nil
|
46
|
+
return self
|
47
|
+
end
|
48
|
+
if range.is_a? Fixnum
|
49
|
+
if range < 0
|
50
|
+
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
|
51
|
+
end
|
52
|
+
|
53
|
+
@range = range..range
|
54
|
+
|
55
|
+
elsif range.is_a? Range
|
56
|
+
@range = range
|
57
|
+
else
|
58
|
+
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
|
59
|
+
end
|
60
|
+
return self
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
def marc_values r
|
65
|
+
vals = r.find_by_tag(@tag).map {|f| f.value}
|
66
|
+
if @range
|
67
|
+
return vals.map {|v| v[@range]}
|
68
|
+
else
|
69
|
+
return vals
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def pretty_print pp
|
74
|
+
pp.pp eval(self.asPPString)
|
75
|
+
end
|
76
|
+
|
77
|
+
def asPPString
|
78
|
+
s = StringIO.new
|
79
|
+
if @range
|
80
|
+
PP.pp([@tag, @range], s)
|
81
|
+
else
|
82
|
+
PP.pp([@tag], s)
|
83
|
+
end
|
84
|
+
return s.string
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.fromPPString str
|
88
|
+
a = eval(str)
|
89
|
+
return self.new(*a)
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
data/lib/marcspec/customspec.rb
CHANGED
@@ -5,10 +5,12 @@ require 'marcspec/solrfieldspec'
|
|
5
5
|
module MARCSpec
|
6
6
|
|
7
7
|
# A CustomSolrSpec is a SolrFieldSpec that derives all its values from a custom function. The custom function
|
8
|
-
# must me a module function that takes a record and an array of other arguments and returns a
|
8
|
+
# must me a module function that takes a hash-like document object, a MARC4J4R record, and an array of other arguments and returns a
|
9
9
|
# (possibly empty) list of resulting values.
|
10
10
|
#
|
11
|
-
#
|
11
|
+
# See the example file simple_sample/index.rb in the marc2solr project for configuration examples.
|
12
|
+
#
|
13
|
+
# @example A sample custom function, to be placed in the configuration directory's lib/ subdir
|
12
14
|
# module MARC2Solr
|
13
15
|
# module MyCustomStuff
|
14
16
|
# def self.uppercaseTitle r, args=[]
|
@@ -19,25 +21,40 @@ module MARCSpec
|
|
19
21
|
# end
|
20
22
|
# end
|
21
23
|
#
|
24
|
+
# @example A simple custom spec made by hand
|
22
25
|
# css = MARCSpec::CustomSolrSpec.new(:module => MARC2Solr::MyCustomStuff,
|
23
|
-
# :
|
26
|
+
# :functionSymbol => :uppercaseTitle,
|
24
27
|
# :map => ss.map('mapname')
|
25
28
|
# )
|
26
|
-
# ss.add_spec(css)
|
27
29
|
#
|
28
30
|
#
|
29
31
|
|
30
32
|
|
31
33
|
class CustomSolrSpec < SolrFieldSpec
|
32
34
|
|
33
|
-
attr_accessor :module, :
|
35
|
+
attr_accessor :module, :functionSymbol, :methodArgs
|
36
|
+
|
37
|
+
# Get a new Custom Solr Spec based on the passed in options.
|
38
|
+
# @param [Hash] opts Initialization options
|
39
|
+
# @option opts [String, Array<String>] :solrField the name(s) of the Solr field(s) that will receive the data derived from this spec
|
40
|
+
# @option opts [Module] :module the actual module constant (not a string or symbol representation) which holds the
|
41
|
+
# custom function we'll be calling
|
42
|
+
# @option opts [Symbol] :functionSymbol A symbol of the name of the custom function
|
43
|
+
# @option opts [Boolean] :firstOnly (false) Whether we should return the first found value
|
44
|
+
# @option opts [String] :default (nil) The value to return if the custom function returns no values
|
45
|
+
# @option opts [MARC2Solr::Map] :map (nil) An optional Map used to translate resulting values
|
46
|
+
# @option opts [String] :noMapKeyDefault (nil) The value to return if (a) a value is found, (b) a map is defined, but (c) there's
|
47
|
+
# no key in the map that matches the value.
|
48
|
+
#
|
49
|
+
# Note that the last four options don't make sense if multiple :solrFields are given, and are illegal in that case.
|
50
|
+
|
34
51
|
def initialize(opts)
|
35
52
|
@solrField = opts[:solrField]
|
36
53
|
@module = opts[:module]
|
37
|
-
@
|
54
|
+
@functionSymbol = opts[:functionSymbol]
|
38
55
|
|
39
|
-
unless @solrField and @module and @
|
40
|
-
raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :
|
56
|
+
unless @solrField and @module and @functionSymbol
|
57
|
+
raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :functionSymbol"
|
41
58
|
end
|
42
59
|
|
43
60
|
|
@@ -60,11 +77,19 @@ module MARCSpec
|
|
60
77
|
|
61
78
|
end
|
62
79
|
|
80
|
+
# Get values from a MARC object and/or the prevously-filled document object.
|
81
|
+
#
|
82
|
+
# Note that the doc is read-write here, but for the love of god, just leave it alone.
|
83
|
+
#
|
84
|
+
# @param [MARC4J4R::Record] r A marc record
|
85
|
+
# @param [SolrInputDocument, Hash] doc The document we're constructing.
|
86
|
+
# @return [Array<String>] An array of values returned by the custom method
|
63
87
|
|
64
88
|
def raw_marc_values r, doc
|
65
|
-
return @module.send(@
|
89
|
+
return @module.send(@functionSymbol, doc, r, *@methodArgs)
|
66
90
|
end
|
67
91
|
|
92
|
+
|
68
93
|
def self.fromHash h
|
69
94
|
return self.new(h)
|
70
95
|
end
|
@@ -94,8 +119,8 @@ module MARCSpec
|
|
94
119
|
|
95
120
|
s.print(":module => ")
|
96
121
|
PP.singleline_pp(@module, s)
|
97
|
-
s.print(",\n :
|
98
|
-
PP.singleline_pp(@
|
122
|
+
s.print(",\n :functionSymbol => ")
|
123
|
+
PP.singleline_pp(@functionSymbol, s)
|
99
124
|
if @methodArgs
|
100
125
|
s.print(",\n :methodArgs => ")
|
101
126
|
PP.singleline_pp(@methodArgs, s)
|
data/lib/marcspec/kvmap.rb
CHANGED
@@ -8,6 +8,8 @@ module MARCSpec
|
|
8
8
|
|
9
9
|
# A KVMap is, when push comes to shove, just a hash with a name, and the
|
10
10
|
# option of adding a default value for each lookup.
|
11
|
+
#
|
12
|
+
# The map portion of a kvmap is simply a hash.
|
11
13
|
|
12
14
|
class KVMap < Map
|
13
15
|
|
@@ -38,12 +40,18 @@ module MARCSpec
|
|
38
40
|
end
|
39
41
|
end
|
40
42
|
|
43
|
+
# Set an element in the map, just like for a regular hash
|
41
44
|
def []= key, value
|
42
45
|
@map[key] = value
|
43
46
|
end
|
44
47
|
|
45
48
|
alias_method :add, :[]=
|
46
49
|
|
50
|
+
|
51
|
+
# Produce a configuration file that will round-trip to this object.
|
52
|
+
#
|
53
|
+
# @return [String] A string representation of valid ruby code that can be turned back into
|
54
|
+
# this object using MARCSpec::Map#fromFile
|
47
55
|
def asPPString
|
48
56
|
s = StringIO.new
|
49
57
|
s.print "{\n :maptype=>:kv,\n :mapname=>"
|
@@ -54,7 +62,11 @@ module MARCSpec
|
|
54
62
|
return s.string
|
55
63
|
end
|
56
64
|
|
57
|
-
|
65
|
+
|
66
|
+
# Translate from a solrmarc map file that has *already been determined* to be a KV map
|
67
|
+
# @param [String] filename The path to the solrmarc kv map file
|
68
|
+
# @return [MARCSpec::KVMap] a KVMap
|
69
|
+
|
58
70
|
def self.from_solrmarc_file filename
|
59
71
|
mapname = File.basename(filename).sub(/\..+?$/, '')
|
60
72
|
map = {}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'marcspec/controlfieldspec'
|
2
|
+
|
3
|
+
module MARCSpec
|
4
|
+
# A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
|
5
|
+
# using the string 'LDR' to identify itself
|
6
|
+
|
7
|
+
class LeaderSpec < ControlFieldSpec
|
8
|
+
|
9
|
+
# Built to be syntax-compatible with ControlFieldSpec, the tag must always
|
10
|
+
# be 'LDR' (case matters)
|
11
|
+
#
|
12
|
+
# @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
|
13
|
+
# @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
|
14
|
+
# instead of the whole leader.
|
15
|
+
|
16
|
+
def initialize (tag, range=nil)
|
17
|
+
unless tag == 'LDR'
|
18
|
+
raise ArgumentError "Tag must be 'LDR' for a LeaderSpec"
|
19
|
+
end
|
20
|
+
@tag = 'LDR'
|
21
|
+
self.range = range
|
22
|
+
end
|
23
|
+
|
24
|
+
# Return the appropriate value (either the leader or a subset of it) from the
|
25
|
+
# given record
|
26
|
+
#
|
27
|
+
# @param [MARC4J4R::Record] r A MARC4J4R Record
|
28
|
+
# @return [String] the leader or substring of the leader
|
29
|
+
def marc_values r
|
30
|
+
if @range
|
31
|
+
return r.leader[@range]
|
32
|
+
else
|
33
|
+
return r.leader
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/marcspec/map.rb
CHANGED
@@ -2,7 +2,12 @@ module MARCSpec
|
|
2
2
|
|
3
3
|
# A Map is just a named lookup table. The access
|
4
4
|
# (via []) takes, in adition to a key, an optional
|
5
|
-
# default value to return
|
5
|
+
# default value to return (e.g., val = map[key, defaultIfNotFound])
|
6
|
+
#
|
7
|
+
# We don't have the default be a part of the map because it might be used
|
8
|
+
# in several different contexts.
|
9
|
+
#
|
10
|
+
# NOTE: THIS IS AN ABSTRACT SUPERCLASS. DO NOT INSTANTIATE IT DIRECTLY
|
6
11
|
|
7
12
|
class Map
|
8
13
|
attr_accessor :mapname, :map
|
@@ -10,12 +15,21 @@ module MARCSpec
|
|
10
15
|
# Create a new map. The passed map is either
|
11
16
|
# a standard hash or a list of duples
|
12
17
|
#
|
13
|
-
# @param
|
18
|
+
# @param [String] mapname The name of this map; can be used to find it later on.
|
19
|
+
# @param [Hash, Array<2-value-arrays>] map Either a normal key-value hash (for a KV Map) or an
|
20
|
+
# array of duples (2-value arrays) for a MultiValueMap.
|
14
21
|
def initialize(mapname, map)
|
15
22
|
@mapname = mapname
|
16
23
|
@map = map
|
17
24
|
end
|
18
25
|
|
26
|
+
# Load a map from a file, determining what kind it is along the way.
|
27
|
+
#
|
28
|
+
# The file is valid ruby code; see the subclasses KVMap and MutlValueMap for examples.
|
29
|
+
#
|
30
|
+
# @param [String] filename The name of the map file to be eval'd
|
31
|
+
# @return MARC2Solr::Map An instance of a subclass of MARC2Solr::Map
|
32
|
+
|
19
33
|
def self.fromFile filename
|
20
34
|
begin
|
21
35
|
str = File.open(filename).read
|
@@ -44,14 +58,18 @@ module MARCSpec
|
|
44
58
|
end
|
45
59
|
|
46
60
|
|
61
|
+
# Check for map equality
|
47
62
|
def == other
|
48
63
|
return ((other.mapname == self.mapname) and (other.map = self.map))
|
49
64
|
end
|
50
65
|
|
66
|
+
# Generic pretty_print; used mostly for translating from solrmarc
|
51
67
|
def pretty_print pp
|
52
68
|
pp.pp eval(self.asPPString)
|
53
69
|
end
|
54
70
|
|
71
|
+
# Produce a map from the data structure produced by asPPString
|
72
|
+
# @param [Hash] rawmap A hash with two keys; :mapname and :map
|
55
73
|
def self.fromHash rawmap
|
56
74
|
return self.new(rawmap[:mapname], rawmap[:map])
|
57
75
|
end
|
@@ -1,207 +1,3 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
module MARCSpec
|
5
|
-
|
6
|
-
# A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
|
7
|
-
# When called with marc_values(record), it returns either the complete value of all
|
8
|
-
# occurances of the field in question (in the order they appear in the record), or
|
9
|
-
# the zero-based substrings based on the passed range.
|
10
|
-
#
|
11
|
-
# @example Get the whole 001
|
12
|
-
# cfs = MARCSpec::ControlTagSpec.new('001')
|
13
|
-
#
|
14
|
-
# @example Get the first three characters of the 008
|
15
|
-
# cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
|
16
|
-
#
|
17
|
-
# Note that the use of the zero-based range in this manner conforms to the way MARC
|
18
|
-
# substrings are specified.
|
19
|
-
|
20
|
-
class ControlFieldSpec
|
21
|
-
attr_accessor :tag, :range
|
22
|
-
|
23
|
-
def initialize (tag, range=nil)
|
24
|
-
unless MARC4J4R::ControlField.control_tag? tag
|
25
|
-
raise ArgumentError "Tag must be a control tag"
|
26
|
-
end
|
27
|
-
@tag = tag
|
28
|
-
self.range = range
|
29
|
-
end
|
30
|
-
|
31
|
-
def == other
|
32
|
-
return ((self.tag == other.tag) and
|
33
|
-
(self.range = other.range))
|
34
|
-
end
|
35
|
-
|
36
|
-
|
37
|
-
# Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
|
38
|
-
# will return the character code of that character (e.g., "Bill"[0] => 66, wherease
|
39
|
-
# "Bill"[0..0] gives the expected 'B'
|
40
|
-
#
|
41
|
-
# @param [nil, Fixnum, Range] range A zero-based substring range or character position
|
42
|
-
# @return [MARCSpec::ControlFieldSpec] self
|
43
|
-
|
44
|
-
def range= range
|
45
|
-
if range.nil?
|
46
|
-
@range = nil
|
47
|
-
return self
|
48
|
-
end
|
49
|
-
if range.is_a? Fixnum
|
50
|
-
if range < 0
|
51
|
-
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
|
52
|
-
end
|
53
|
-
|
54
|
-
@range = range..range
|
55
|
-
|
56
|
-
elsif range.is_a? Range
|
57
|
-
@range = range
|
58
|
-
else
|
59
|
-
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
|
60
|
-
end
|
61
|
-
return self
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
def marc_values r
|
66
|
-
vals = r.find_by_tag(@tag).map {|f| f.value}
|
67
|
-
if @range
|
68
|
-
return vals.map {|v| v[@range]}
|
69
|
-
else
|
70
|
-
return vals
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def pretty_print pp
|
75
|
-
pp.pp eval(self.asPPString)
|
76
|
-
end
|
77
|
-
|
78
|
-
def asPPString
|
79
|
-
s = StringIO.new
|
80
|
-
if @range
|
81
|
-
PP.pp([@tag, @range], s)
|
82
|
-
else
|
83
|
-
PP.pp([@tag], s)
|
84
|
-
end
|
85
|
-
return s.string
|
86
|
-
end
|
87
|
-
|
88
|
-
def self.fromPPString str
|
89
|
-
a = eval(str)
|
90
|
-
return self.new(*a)
|
91
|
-
end
|
92
|
-
|
93
|
-
end
|
94
|
-
|
95
|
-
|
96
|
-
# A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
|
97
|
-
# using the string 'LDR' to identify itself
|
98
|
-
|
99
|
-
class LeaderSpec < ControlFieldSpec
|
100
|
-
|
101
|
-
# Built to be syntax-compatible with ControlFieldSpec, the tag must always
|
102
|
-
# be 'LDR' (case matters)
|
103
|
-
#
|
104
|
-
# @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
|
105
|
-
# @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
|
106
|
-
# instead of the whole leader.
|
107
|
-
|
108
|
-
def initialize (tag, range=nil)
|
109
|
-
unless tag == 'LDR'
|
110
|
-
raise ArgumentError "Tag must be 'LDR'"
|
111
|
-
end
|
112
|
-
@tag = 'LDR'
|
113
|
-
self.range = range
|
114
|
-
end
|
115
|
-
|
116
|
-
# Return the appropriate value (either the leader or a subset of it) from the
|
117
|
-
# given record
|
118
|
-
#
|
119
|
-
# @param [MARC4J4R::Record] r A MARC4J4R Record
|
120
|
-
# @return [String] the leader or substring of the leader
|
121
|
-
def marc_values r
|
122
|
-
if @range
|
123
|
-
return r.leader[@range]
|
124
|
-
else
|
125
|
-
return r.leader
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
|
131
|
-
# A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
|
132
|
-
# all the values for the subfields for the given codes joined by the optional joiner (space by default)
|
133
|
-
#
|
134
|
-
# The subfield values are presented in the order they appear in the document, *not* the order the subfield
|
135
|
-
# codes are specified
|
136
|
-
#
|
137
|
-
# @example Get the $a from the 245s
|
138
|
-
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
|
139
|
-
#
|
140
|
-
# vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
|
141
|
-
|
142
|
-
|
143
|
-
class VariableFieldSpec
|
144
|
-
|
145
|
-
attr_accessor :tag, :codes, :joiner
|
146
|
-
|
147
|
-
def initialize tag, codes=nil, joiner=' '
|
148
|
-
@tag = tag
|
149
|
-
@joiner = joiner || ' '
|
150
|
-
self.codes = codes
|
151
|
-
end
|
152
|
-
|
153
|
-
def == other
|
154
|
-
return ((self.tag == other.tag) and
|
155
|
-
(self.codes = other.codes) and
|
156
|
-
(self.joiner = other.joiner))
|
157
|
-
end
|
158
|
-
|
159
|
-
def codes= c
|
160
|
-
if c.nil?
|
161
|
-
@codes = nil
|
162
|
-
return nil
|
163
|
-
end
|
164
|
-
|
165
|
-
if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
|
166
|
-
@codes = c.to_a
|
167
|
-
else
|
168
|
-
@codes = c.split(//)
|
169
|
-
end
|
170
|
-
|
171
|
-
return @codes
|
172
|
-
end
|
173
|
-
|
174
|
-
def marc_values r
|
175
|
-
fields = r.find_by_tag(@tag)
|
176
|
-
vals = []
|
177
|
-
fields.each do |f|
|
178
|
-
subvals = f.sub_values(@codes)
|
179
|
-
subvals = subvals.join(@joiner) if subvals.size > 0 and (@codes.nil? or @codes.size > 1)
|
180
|
-
vals << subvals
|
181
|
-
end
|
182
|
-
vals.flatten!
|
183
|
-
return vals
|
184
|
-
end
|
185
|
-
|
186
|
-
def pretty_print pp
|
187
|
-
pp.pp eval(self.asPPString)
|
188
|
-
end
|
189
|
-
|
190
|
-
def asPPString
|
191
|
-
s = StringIO.new
|
192
|
-
if @joiner and @joiner != ' '
|
193
|
-
PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
|
194
|
-
else
|
195
|
-
PP.pp([@tag, '*', '*', @codes.join('')], s)
|
196
|
-
end
|
197
|
-
return s.string
|
198
|
-
end
|
199
|
-
|
200
|
-
def self.fromPPString str
|
201
|
-
a = eval(str)
|
202
|
-
return self.new(a[0], a[3], a[4])
|
203
|
-
end
|
204
|
-
|
205
|
-
end
|
206
|
-
|
207
|
-
end
|
1
|
+
require 'marcspec/controlfieldspec';
|
2
|
+
require 'marcspec/variablefieldspec';
|
3
|
+
require 'marcspec/leaderspec';
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'pp'
|
3
|
+
module MARCSpec
|
4
|
+
# A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
|
5
|
+
# all the values for the subfields for the given codes joined by the optional joiner (space by default)
|
6
|
+
#
|
7
|
+
# The subfield values are presented in the order they appear in the document, *not* the order the subfield
|
8
|
+
# codes are specified
|
9
|
+
#
|
10
|
+
# @example Get the $a from the 245s
|
11
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
|
12
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
|
13
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', ['a', 'b'])
|
14
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a'..'b')
|
15
|
+
|
16
|
+
class VariableFieldSpec
|
17
|
+
|
18
|
+
attr_accessor :tag, :codes, :joiner
|
19
|
+
|
20
|
+
def initialize tag, codes=nil, joiner=' '
|
21
|
+
@tag = tag
|
22
|
+
@joiner = joiner || ' '
|
23
|
+
self.codes = codes
|
24
|
+
end
|
25
|
+
|
26
|
+
def == other
|
27
|
+
return ((self.tag == other.tag) and
|
28
|
+
(self.codes = other.codes) and
|
29
|
+
(self.joiner = other.joiner))
|
30
|
+
end
|
31
|
+
|
32
|
+
def codes= c
|
33
|
+
if c.nil?
|
34
|
+
@codes = nil
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
|
38
|
+
if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
|
39
|
+
@codes = c.to_a
|
40
|
+
else
|
41
|
+
@codes = c.split(//)
|
42
|
+
end
|
43
|
+
|
44
|
+
return @codes
|
45
|
+
end
|
46
|
+
|
47
|
+
def marc_values r
|
48
|
+
fields = r.find_by_tag(@tag)
|
49
|
+
vals = []
|
50
|
+
fields.each do |f|
|
51
|
+
subvals = f.sub_values(@codes)
|
52
|
+
subvals = subvals.join(@joiner) if subvals.size > 0 and (@codes.nil? or @codes.size > 1)
|
53
|
+
vals << subvals
|
54
|
+
end
|
55
|
+
vals.flatten!
|
56
|
+
return vals
|
57
|
+
end
|
58
|
+
|
59
|
+
def pretty_print pp
|
60
|
+
pp.pp eval(self.asPPString)
|
61
|
+
end
|
62
|
+
|
63
|
+
def asPPString
|
64
|
+
s = StringIO.new
|
65
|
+
if @joiner and @joiner != ' '
|
66
|
+
PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
|
67
|
+
else
|
68
|
+
PP.pp([@tag, '*', '*', @codes.join('')], s)
|
69
|
+
end
|
70
|
+
return s.string
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.fromPPString str
|
74
|
+
a = eval(str)
|
75
|
+
return self.new(a[0], a[3], a[4])
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# LEADER 00734njm a2200217uu 4500
|
4
|
+
# 001 afc99990058366
|
5
|
+
# 003 DLC
|
6
|
+
# 005 20071104155141.9
|
7
|
+
# 007 sd ummunniauub
|
8
|
+
# 008 071103s1939 xxufmnne||||||||| u eng||
|
9
|
+
# 010 $a afc99990058366
|
10
|
+
# 040 $a DLC $c DLC
|
11
|
+
# 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
|
12
|
+
# 260 $a Medina, Texas, $c 1939.
|
13
|
+
# 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
|
14
|
+
# 651 0 $a Medina $z Texas $z United States of America.
|
15
|
+
# 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
|
16
|
+
# 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
|
17
|
+
# 700 1 $a Taylor, Beale D. $e Singer.
|
18
|
+
# 852 $a American Folklife Center, Library of Congress
|
19
|
+
# 852 $a DLC
|
20
|
+
|
21
|
+
describe "ControlFieldSpec" do
|
22
|
+
|
23
|
+
before do
|
24
|
+
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
25
|
+
# @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
|
26
|
+
end
|
27
|
+
|
28
|
+
# afc99990058366 # data
|
29
|
+
# 01234567890123 # index
|
30
|
+
it "gets a single full value" do
|
31
|
+
cfs = MARCSpec::ControlFieldSpec.new('001')
|
32
|
+
cfs.marc_values(@one).should.equal ["afc99990058366"]
|
33
|
+
end
|
34
|
+
|
35
|
+
it "gets a single character" do
|
36
|
+
cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
|
37
|
+
cfs.marc_values(@one).should.equal ['8']
|
38
|
+
end
|
39
|
+
|
40
|
+
it "gets a range of characters" do
|
41
|
+
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
42
|
+
cfs.marc_values(@one).should.equal ['90058']
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should round trip" do
|
46
|
+
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
47
|
+
cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
|
48
|
+
cfs.should.equal cfs2
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# LEADER 00734njm a2200217uu 4500
|
4
|
+
# 001 afc99990058366
|
5
|
+
# 003 DLC
|
6
|
+
# 005 20071104155141.9
|
7
|
+
# 007 sd ummunniauub
|
8
|
+
# 008 071103s1939 xxufmnne||||||||| u eng||
|
9
|
+
# 010 $a afc99990058366
|
10
|
+
# 040 $a DLC $c DLC
|
11
|
+
# 245 04 $a The Texas ranger $h [sound recording] / $c Sung by Beale D. Taylor.
|
12
|
+
# 260 $a Medina, Texas, $c 1939.
|
13
|
+
# 300 $a 1 sound disc : $b analog, 33 1/3 rpm, mono. ; $c 12 in.
|
14
|
+
# 651 0 $a Medina $z Texas $z United States of America.
|
15
|
+
# 700 1 $a Lomax, John Avery, 1867-1948 $e Recording engineer.
|
16
|
+
# 700 1 $a Lomax, Ruby T. (Ruby Terrill) $e Recording engineer.
|
17
|
+
# 700 1 $a Taylor, Beale D. $e Singer.
|
18
|
+
# 852 $a American Folklife Center, Library of Congress
|
19
|
+
# 852 $a DLC
|
20
|
+
|
21
|
+
describe "LeaderSpec" do
|
22
|
+
before do
|
23
|
+
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
24
|
+
end
|
25
|
+
|
26
|
+
it "Works with full leader" do
|
27
|
+
cfs = MARCSpec::LeaderSpec.new('LDR')
|
28
|
+
cfs.marc_values(@one).should.equal @one.leader
|
29
|
+
end
|
30
|
+
|
31
|
+
it "Works with substring of leader" do
|
32
|
+
cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
|
33
|
+
cfs.marc_values(@one).should.equal @one.leader[3..5]
|
34
|
+
end
|
35
|
+
end
|
data/spec/solrfieldspec_spec.rb
CHANGED
@@ -165,29 +165,29 @@ describe "CustomSolrSpec" do
|
|
165
165
|
end
|
166
166
|
|
167
167
|
it "works with no args or map" do
|
168
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :
|
168
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp)
|
169
169
|
css.marc_values(@one).should.equal [@one['245'].value.upcase]
|
170
170
|
end
|
171
171
|
|
172
172
|
it "accepts nil for no args" do
|
173
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :
|
173
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>nil)
|
174
174
|
css.marc_values(@one).should.equal [@one['245'].value.upcase]
|
175
175
|
end
|
176
176
|
|
177
177
|
|
178
178
|
it "uses a custom method with args but no map" do
|
179
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :
|
179
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
|
180
180
|
css.marc_values(@one).should.equal [@titleACValue.upcase]
|
181
181
|
end
|
182
182
|
|
183
183
|
it "works with a map" do
|
184
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :
|
184
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
|
185
185
|
css.marc_values(@one).should.equal [@mapValue]
|
186
186
|
end
|
187
187
|
|
188
188
|
it "works with a map that has multiple return values" do
|
189
189
|
@map[@titleACValue.upcase] = ['two', 'one']
|
190
|
-
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :
|
190
|
+
css = MARCSpec::CustomSolrSpec.new(:solrField=>'solrField', :map=>@map, :module=>A::B, :functionSymbol=>:titleUp, :methodArgs=>[['a', 'c']])
|
191
191
|
css.marc_values(@one).should.equal ['two', 'one']
|
192
192
|
end
|
193
193
|
|
data/spec/specset_spec.rb
CHANGED
@@ -65,7 +65,7 @@ describe "SpecSet Basics" do
|
|
65
65
|
end
|
66
66
|
|
67
67
|
it "allows customs that reference previous work" do
|
68
|
-
@speclist << {:solrField=>'titleSort', :module=>A::B, :
|
68
|
+
@speclist << {:solrField=>'titleSort', :module=>A::B, :functionSymbol=>:sortable, :methodArgs=>['title']}
|
69
69
|
ss = MARCSpec::SpecSet.new
|
70
70
|
ss.buildSpecsFromList(@speclist)
|
71
71
|
h = ss.hash_from_marc @one
|
@@ -86,7 +86,7 @@ describe "SpecSet Basics" do
|
|
86
86
|
it "should allow multi-headed custom fields" do
|
87
87
|
@speclist << {:solrField => ['one', 'two', 'letters'],
|
88
88
|
:module => A::B,
|
89
|
-
:
|
89
|
+
:functionSymbol => :three_value_custom,
|
90
90
|
}
|
91
91
|
ss = MARCSpec::SpecSet.new
|
92
92
|
ss.buildSpecsFromList(@speclist)
|
@@ -18,56 +18,6 @@ require 'spec_helper'
|
|
18
18
|
# 852 $a American Folklife Center, Library of Congress
|
19
19
|
# 852 $a DLC
|
20
20
|
|
21
|
-
describe "ControlFieldSpec" do
|
22
|
-
|
23
|
-
before do
|
24
|
-
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
25
|
-
# @batch = MARC4J4R::Reader.new("#{DIR}/batch.dat").collect
|
26
|
-
end
|
27
|
-
|
28
|
-
# afc99990058366 # data
|
29
|
-
# 01234567890123 # index
|
30
|
-
it "gets a single full value" do
|
31
|
-
cfs = MARCSpec::ControlFieldSpec.new('001')
|
32
|
-
cfs.marc_values(@one).should.equal ["afc99990058366"]
|
33
|
-
end
|
34
|
-
|
35
|
-
it "gets a single character" do
|
36
|
-
cfs = MARCSpec::ControlFieldSpec.new('001', 10 )
|
37
|
-
cfs.marc_values(@one).should.equal ['8']
|
38
|
-
end
|
39
|
-
|
40
|
-
it "gets a range of characters" do
|
41
|
-
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
42
|
-
cfs.marc_values(@one).should.equal ['90058']
|
43
|
-
end
|
44
|
-
|
45
|
-
it "should round trip" do
|
46
|
-
cfs = MARCSpec::ControlFieldSpec.new('001', 6..10 )
|
47
|
-
cfs2 = MARCSpec::ControlFieldSpec.fromPPString(cfs.asPPString)
|
48
|
-
cfs.should.equal cfs2
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
|
53
|
-
describe "LeaderSpec" do
|
54
|
-
before do
|
55
|
-
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
56
|
-
end
|
57
|
-
|
58
|
-
it "Works with full leader" do
|
59
|
-
cfs = MARCSpec::LeaderSpec.new('LDR')
|
60
|
-
cfs.marc_values(@one).should.equal @one.leader
|
61
|
-
end
|
62
|
-
|
63
|
-
it "Works with substring of leader" do
|
64
|
-
cfs = MARCSpec::LeaderSpec.new('LDR', 3..5)
|
65
|
-
cfs.marc_values(@one).should.equal @one.leader[3..5]
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
21
|
describe "VariableFieldSpec" do
|
72
22
|
before do
|
73
23
|
@one = MARC4J4R::Reader.new("#{DIR}/data/one.dat").first
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marcspec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 7
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.7.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- BillDueber
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-08-
|
18
|
+
date: 2010-08-19 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -96,14 +96,16 @@ files:
|
|
96
96
|
- Rakefile
|
97
97
|
- VERSION
|
98
98
|
- lib/marcspec.rb
|
99
|
+
- lib/marcspec/controlfieldspec.rb
|
99
100
|
- lib/marcspec/customspec.rb
|
100
101
|
- lib/marcspec/kvmap.rb
|
102
|
+
- lib/marcspec/leaderspec.rb
|
101
103
|
- lib/marcspec/map.rb
|
102
104
|
- lib/marcspec/marcfieldspec.rb
|
103
105
|
- lib/marcspec/multivaluemap.rb
|
104
106
|
- lib/marcspec/solrfieldspec.rb
|
105
107
|
- lib/marcspec/specset.rb
|
106
|
-
- lib/
|
108
|
+
- lib/marcspec/variablefieldspec.rb
|
107
109
|
- spec/data/batch.dat
|
108
110
|
- spec/data/one.dat
|
109
111
|
- spec/data/umich/translation_maps/area_map.properties
|
@@ -121,11 +123,12 @@ files:
|
|
121
123
|
- spec/data/umich/translation_maps/location_map.properties
|
122
124
|
- spec/data/umich/umich_index.properties
|
123
125
|
- spec/maps_spec.rb
|
124
|
-
- spec/marcfieldspecs_spec.rb
|
125
|
-
- spec/marcspec_spec.rb
|
126
126
|
- spec/solrfieldspec_spec.rb
|
127
127
|
- spec/spec_helper.rb
|
128
128
|
- spec/specset_spec.rb
|
129
|
+
- spec/controlfieldspec_spec.rb
|
130
|
+
- spec/leaderspec_spec.rb
|
131
|
+
- spec/variablefieldspec_spec.rb
|
129
132
|
has_rdoc: true
|
130
133
|
homepage: http://github.com/billdueber/marcspec
|
131
134
|
licenses: []
|
@@ -161,9 +164,10 @@ signing_key:
|
|
161
164
|
specification_version: 3
|
162
165
|
summary: Extract data from MARC records and send to Solr
|
163
166
|
test_files:
|
167
|
+
- spec/controlfieldspec_spec.rb
|
168
|
+
- spec/leaderspec_spec.rb
|
164
169
|
- spec/maps_spec.rb
|
165
|
-
- spec/marcfieldspecs_spec.rb
|
166
|
-
- spec/marcspec_spec.rb
|
167
170
|
- spec/solrfieldspec_spec.rb
|
168
171
|
- spec/spec_helper.rb
|
169
172
|
- spec/specset_spec.rb
|
173
|
+
- spec/variablefieldspec_spec.rb
|
data/lib/orig.rb
DELETED
@@ -1,288 +0,0 @@
|
|
1
|
-
require 'set'
|
2
|
-
require 'pp'
|
3
|
-
require 'logger'
|
4
|
-
|
5
|
-
$LOG ||= Logger.new(STDERR)
|
6
|
-
|
7
|
-
module MARCSpec
|
8
|
-
|
9
|
-
class MapSpec
|
10
|
-
attr_accessor :map, :type, :default
|
11
|
-
|
12
|
-
def initialize(type, map, default=nil)
|
13
|
-
@type = type
|
14
|
-
@default = default
|
15
|
-
@map = map
|
16
|
-
end
|
17
|
-
|
18
|
-
def [] key
|
19
|
-
if (@type == :kv)
|
20
|
-
if @map.has_key? key
|
21
|
-
return @map[key]
|
22
|
-
else
|
23
|
-
return @default
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
# For a pattern, we find all that match.
|
28
|
-
|
29
|
-
if (@type == :pattern)
|
30
|
-
rv = []
|
31
|
-
@map.each do |pv|
|
32
|
-
pat = pv[0]
|
33
|
-
val = pv[1]
|
34
|
-
# puts "Trying pattern #{pat} against #{key}"
|
35
|
-
if pat.match(key)
|
36
|
-
rv << val
|
37
|
-
# puts "Matched: adding #{val}"
|
38
|
-
end
|
39
|
-
end
|
40
|
-
rv.uniq!
|
41
|
-
if rv.size > 0
|
42
|
-
return rv
|
43
|
-
else
|
44
|
-
return @default
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
class CustomSpec
|
51
|
-
def initialize(proc, args)
|
52
|
-
@proc = proc
|
53
|
-
@args = args
|
54
|
-
end
|
55
|
-
|
56
|
-
def marc_values_hash fieldnames, r
|
57
|
-
a = @proc(r, args)
|
58
|
-
rv = {}
|
59
|
-
fieldnames.each_with_index do |fn, i|
|
60
|
-
rv[fn] = a[i]
|
61
|
-
end
|
62
|
-
return rv
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
|
67
|
-
class TagSpec
|
68
|
-
attr_accessor :tag, :codes, :joiner, :parent, :ind1, :ind2, :range, :is_control
|
69
|
-
|
70
|
-
def initialize(tag, codes=nil)
|
71
|
-
@codes = Set.new
|
72
|
-
@tag = tag
|
73
|
-
@joiner = ' '
|
74
|
-
@substr = nil
|
75
|
-
tagint = tag.to_i
|
76
|
-
@is_control = (tagint != 0 and tagint < 10)
|
77
|
-
if (codes)
|
78
|
-
self.codes = codes
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def range= newrange
|
83
|
-
if newrange =~ /\s*(\d+)-(\d+)/
|
84
|
-
start = $1.to_i
|
85
|
-
last = $2.to_i
|
86
|
-
@range = start..last
|
87
|
-
else
|
88
|
-
se = newrange.to_i
|
89
|
-
@range = se..se
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def codes= newcodes
|
94
|
-
if newcodes.is_a? Range
|
95
|
-
@codes = newcodes.to_a
|
96
|
-
elsif newcodes !~ /\S/
|
97
|
-
@codes = nil
|
98
|
-
# Otherwise, just split into individual characters
|
99
|
-
else
|
100
|
-
@codes = newcodes.split(//)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def marc_values r
|
105
|
-
if @is_control
|
106
|
-
vals = r.find_by_tag(@tag).map {|f| f.value}
|
107
|
-
# puts "Start with #{vals.join(', ')}"
|
108
|
-
if @range
|
109
|
-
vals.map! {|v| v[@range]}
|
110
|
-
end
|
111
|
-
# puts "End with #{vals.join(', ')}"
|
112
|
-
|
113
|
-
else
|
114
|
-
fields = r.find_by_tag(@tag)
|
115
|
-
vals = []
|
116
|
-
fields.each do |f|
|
117
|
-
subvals = f.sub_values(@codes)
|
118
|
-
vals << subvals.join(@joiner) if subvals.size > 0
|
119
|
-
end
|
120
|
-
end
|
121
|
-
# puts vals.join(', ')
|
122
|
-
return vals
|
123
|
-
end
|
124
|
-
|
125
|
-
end
|
126
|
-
|
127
|
-
class FieldSpec
|
128
|
-
attr_accessor :field, :first, :map, :tagspecs
|
129
|
-
|
130
|
-
def initialize(opts)
|
131
|
-
@field = opts[:field]
|
132
|
-
@first = opts[:first] || false
|
133
|
-
@map = opts[:map] || nil
|
134
|
-
@tagspecs = []
|
135
|
-
end
|
136
|
-
|
137
|
-
def << tagspec
|
138
|
-
tagspec.parent = self
|
139
|
-
@tagspecs << tagspec
|
140
|
-
end
|
141
|
-
|
142
|
-
def marc_values r
|
143
|
-
vals = []
|
144
|
-
# puts "Tagspecs has #{@tagspecs.size} items"
|
145
|
-
@tagspecs.each do |ts|
|
146
|
-
vals.concat ts.marc_values(r)
|
147
|
-
# puts vals.join(', ')
|
148
|
-
break if @first and vals.size > 0
|
149
|
-
end
|
150
|
-
|
151
|
-
if (@map)
|
152
|
-
vals.map! {|v| @map[v]}
|
153
|
-
# vals.each do |v|
|
154
|
-
# puts "Map: #{v} => #{@map[v].to_s}"
|
155
|
-
# end
|
156
|
-
end
|
157
|
-
vals.flatten!
|
158
|
-
vals.uniq!
|
159
|
-
vals.compact!
|
160
|
-
return vals
|
161
|
-
end
|
162
|
-
|
163
|
-
end
|
164
|
-
|
165
|
-
|
166
|
-
class SpecSet
|
167
|
-
attr_accessor :tmaps, :fieldspecs
|
168
|
-
def initialize(*args)
|
169
|
-
tmapdir = args.pop!
|
170
|
-
unless File.directory? tmapdir
|
171
|
-
$LOG.error "Directory #{tmapdir} not found"
|
172
|
-
raise LoadError, "Directory #{tmapdir} not found"
|
173
|
-
end
|
174
|
-
|
175
|
-
@tmaps = {}
|
176
|
-
Dir.glob(tmapdir + '/*.rb') do |fn|
|
177
|
-
basename = File.basename(fn).sub(/\.rb$/, '')
|
178
|
-
$LOG.info "Loading translation map #{basename}"
|
179
|
-
|
180
|
-
begin
|
181
|
-
rawmap = eval(File.open(fn).read)
|
182
|
-
@tmaps[basename] = MapSpec.new(rawmap[:type], rawmap[:map], rawmap[:default])
|
183
|
-
rescue SyntaxError
|
184
|
-
$LOG.error "Error processing translation map file #{fn}: #{$!}"
|
185
|
-
raise SyntaxError, $!
|
186
|
-
end
|
187
|
-
|
188
|
-
end
|
189
|
-
|
190
|
-
@fieldspecs = []
|
191
|
-
|
192
|
-
# Get the index files
|
193
|
-
args.each do |indexfile|
|
194
|
-
begin
|
195
|
-
unless File.exists? indexfile
|
196
|
-
$LOG.error "File #{indexfile} does not exist"
|
197
|
-
raise LoadError, "File #{indexfile} does not exist"
|
198
|
-
end
|
199
|
-
$LOG.info "Loading index file #{indexfile}"
|
200
|
-
rawindex = eval(File.open(indexfile).read)
|
201
|
-
rawindex.each do |entry|
|
202
|
-
fs = FieldSpec.new(:field => entry[:solrField], :first=>entry[:firstOnly])
|
203
|
-
mapname = entry[:map]
|
204
|
-
if mapname
|
205
|
-
if @tmaps.has_key? mapname
|
206
|
-
fs.map = @tmaps[mapname]
|
207
|
-
else
|
208
|
-
$LOG.error "Can't find map #{mapname}"
|
209
|
-
end
|
210
|
-
end
|
211
|
-
entry[:specs].each do |entryts|
|
212
|
-
|
213
|
-
# A one- or two-element entry is a control field
|
214
|
-
# A three element entry (tag, ind1, ind2) is all subs of a field (need to implement)
|
215
|
-
# A four element field is tag, ind1, ind2, subs
|
216
|
-
# A five element field is tag, ind1, ind2, subs, joiner
|
217
|
-
|
218
|
-
|
219
|
-
tag = entryts[0]
|
220
|
-
|
221
|
-
# Is tag the symbol :custom? Then make it a custom item
|
222
|
-
|
223
|
-
if tag == :custom
|
224
|
-
ts = CustomSpec.new(entryts[1], entryts[2..-1])
|
225
|
-
fs << ts
|
226
|
-
next
|
227
|
-
end
|
228
|
-
|
229
|
-
# If it's not custom, the solrField better be a scale
|
230
|
-
if entry[:solrField].is_a? Array
|
231
|
-
# log an error and bail out
|
232
|
-
end
|
233
|
-
|
234
|
-
# Otherwise, it's a tag spec
|
235
|
-
if tag.is_a? Fixnum
|
236
|
-
tag = '%03d' % tag
|
237
|
-
end
|
238
|
-
|
239
|
-
|
240
|
-
ts = TagSpec.new(tag)
|
241
|
-
if entryts.size < 3
|
242
|
-
ts.is_control = true
|
243
|
-
ts.range = entryts[1] if entryts[1]
|
244
|
-
else
|
245
|
-
ts.ind1 = entryts[1]
|
246
|
-
ts.ind2 = entryts[2]
|
247
|
-
ts.codes = entryts[3]
|
248
|
-
ts.joiner = entryts[4] if entryts[4]
|
249
|
-
end
|
250
|
-
fs << ts
|
251
|
-
end
|
252
|
-
self << fs
|
253
|
-
end
|
254
|
-
rescue SyntaxError
|
255
|
-
$LOG.error "Error processing index file #{indexfile}: #{$!}"
|
256
|
-
raise SyntaxError
|
257
|
-
end
|
258
|
-
end
|
259
|
-
end
|
260
|
-
|
261
|
-
def each
|
262
|
-
@fieldspecs.each do |fs|
|
263
|
-
yield fs
|
264
|
-
end
|
265
|
-
end
|
266
|
-
|
267
|
-
def << fieldspec
|
268
|
-
@fieldspecs << fieldspec
|
269
|
-
end
|
270
|
-
|
271
|
-
def doc_from_marc r
|
272
|
-
doc = SolrInputDocument.new
|
273
|
-
@fieldspecs.each do |fs|
|
274
|
-
doc[fs.field] = fs.marc_values(r)
|
275
|
-
end
|
276
|
-
return doc
|
277
|
-
end
|
278
|
-
|
279
|
-
def hash_from_marc r
|
280
|
-
h = {}
|
281
|
-
@fieldspecs.each do |fs|
|
282
|
-
h[fs.field] = fs.marc_values(r)
|
283
|
-
end
|
284
|
-
return h
|
285
|
-
end
|
286
|
-
|
287
|
-
end
|
288
|
-
end
|