marcspec 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +59 -0
- data/Rakefile +58 -0
- data/VERSION +1 -0
- data/lib/marcspec/customspec.rb +97 -0
- data/lib/marcspec/kvmap.rb +79 -0
- data/lib/marcspec/map.rb +67 -0
- data/lib/marcspec/marcfieldspec.rb +205 -0
- data/lib/marcspec/multivaluemap.rb +62 -0
- data/lib/marcspec/solrfieldspec.rb +123 -0
- data/lib/marcspec/specset.rb +58 -0
- data/lib/marcspec.rb +11 -0
- data/lib/orig.rb +288 -0
- data/spec/data/batch.dat +1 -0
- data/spec/data/one.dat +1 -0
- data/spec/data/umich/translation_maps/area_map.properties +1039 -0
- data/spec/data/umich/translation_maps/availability_map_ht.properties +9 -0
- data/spec/data/umich/translation_maps/availability_map_umich.properties +6 -0
- data/spec/data/umich/translation_maps/callnumber_map.properties +21 -0
- data/spec/data/umich/translation_maps/callnumber_subject_map.properties +214 -0
- data/spec/data/umich/translation_maps/country_map.properties +320 -0
- data/spec/data/umich/translation_maps/format_map.properties +47 -0
- data/spec/data/umich/translation_maps/format_map_umich.properties +35 -0
- data/spec/data/umich/translation_maps/ht_namespace_map.properties +10 -0
- data/spec/data/umich/translation_maps/institution_map.properties +11 -0
- data/spec/data/umich/translation_maps/language_map.properties +489 -0
- data/spec/data/umich/translation_maps/library_map.properties +48 -0
- data/spec/data/umich/translation_maps/location_map.properties +345 -0
- data/spec/data/umich/umich_index.properties +130 -0
- data/spec/maps_spec.rb +91 -0
- data/spec/marcfieldspecs_spec.rb +109 -0
- data/spec/marcspec_spec.rb +10 -0
- data/spec/solrfieldspec_spec.rb +177 -0
- data/spec/spec_helper.rb +16 -0
- metadata +166 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 BillDueber
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
= marcspec
|
2
|
+
|
3
|
+
The MARCSpec contains classes designed to make it (relatively) easy to specify
|
4
|
+
a data field (my use case specifically is solr) in terms of sets of MARC fields and subfields.
|
5
|
+
|
6
|
+
== A simple breakdown of the hierarchy
|
7
|
+
|
8
|
+
This is all based on how the excellent [Solrmarc](http://code.google.com/p/solrmarc/)
|
9
|
+
deals with configuration. MARCSpec supports a subset of Solrmarc's configuration
|
10
|
+
options.
|
11
|
+
|
12
|
+
* A {MARCSpec::SpecSet} consists of a (possibly empty) set of named {MARCSpec::Map}
|
13
|
+
and a list of {MARCSpec::SolrFieldSpec} and/or {MARCSpec::CustomSolrSpec}.
|
14
|
+
* A {MARCSpec::SolrFieldSpec} consists of a field name, a list of MARC field specs (see below),
|
15
|
+
an optional {MARCSpec::Map} for translating raw values to something else, and
|
16
|
+
a bunch of optional specials (e.g., a notation to only use the first value,
|
17
|
+
a default value if no appropriate data is in the MARC record, a default value
|
18
|
+
for when marc data is found, but nothing is in the map, etc.)
|
19
|
+
* A {MARCSpec::CustomSolrSpec} occupies the same niche as a {MARCSpec::SolrFieldSpec}, but
|
20
|
+
allows you to use a custom routine instead of the pre-packaged MARC specification syntax.
|
21
|
+
* A MARC Field Spec is one of the following
|
22
|
+
* A {MARCSpec::LeaderSpec} for dealing with the leader
|
23
|
+
* A {MARCSpec::ControlFieldSpec}, consisting of a tag (as a string, not a number) and
|
24
|
+
an optional zero-based index (e.g., "001[3]") or range (e.g., "001\[11..13\]")
|
25
|
+
* A {MARCSpec::VariableFieldSpec}, consisting of a tag, a couple indicator patterns
|
26
|
+
(currently ignored, but stay tuned), an optional list of subfields (default
|
27
|
+
is all), and an optional string used to join the subfields (default is
|
28
|
+
a single space)
|
29
|
+
* A *map* is one of:
|
30
|
+
* A {MARCSpec::KVMap}, which is just a Ruby hash. It will match at most one k-v pair, although
|
31
|
+
the "value" might actually be either a scalar or an array of scalars
|
32
|
+
* A {MARCSpec::MultiValueMap}, which is an array of key/value duples. A passed
|
33
|
+
potential map is compared (via ===) with every key, returning all
|
34
|
+
the associated values. Again, a single "key" can be associated with an array of
|
35
|
+
return values; the whole thing is flattened out before returning
|
36
|
+
|
37
|
+
Obviously, better descriptions and full documentation is available in each
|
38
|
+
individual class.
|
39
|
+
|
40
|
+
== Better examples in marc2solr
|
41
|
+
|
42
|
+
Better documented samples are available as part of the marc2solr project
|
43
|
+
at http://github.com/billdueber/marc2solr -- look in the simple_sample area.
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
== Note on Patches/Pull Requests
|
48
|
+
|
49
|
+
* Fork the project.
|
50
|
+
* Make your feature addition or bug fix.
|
51
|
+
* Add tests for it. This is important so I don't break it in a
|
52
|
+
future version unintentionally.
|
53
|
+
* Commit, do not mess with rakefile, version, or history.
|
54
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
55
|
+
* Send me a pull request. Bonus points for topic branches.
|
56
|
+
|
57
|
+
== Copyright
|
58
|
+
|
59
|
+
Copyright (c) 2010 BillDueber. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "marcspec"
|
8
|
+
gem.summary = %Q{Extract data from MARC records and send to Solr}
|
9
|
+
gem.description = %Q{Relies on marc4j4r, based on work in solrmarc}
|
10
|
+
gem.email = "bill@dueber.com"
|
11
|
+
gem.homepage = "http://github.com/billdueber/marcspec"
|
12
|
+
gem.authors = ["BillDueber"]
|
13
|
+
gem.add_development_dependency "bacon", ">= 0"
|
14
|
+
gem.add_development_dependency "yard", ">= 0"
|
15
|
+
|
16
|
+
gem.add_dependency 'marc4j4r', '>=0.9.0'
|
17
|
+
gem.add_dependency 'jruby_streaming_update_solr_server', '>=0.2.0'
|
18
|
+
|
19
|
+
|
20
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
21
|
+
end
|
22
|
+
Jeweler::GemcutterTasks.new
|
23
|
+
rescue LoadError
|
24
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'rake/testtask'
|
28
|
+
Rake::TestTask.new(:spec) do |spec|
|
29
|
+
spec.libs << 'lib' << 'spec'
|
30
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
31
|
+
spec.verbose = true
|
32
|
+
end
|
33
|
+
|
34
|
+
begin
|
35
|
+
require 'rcov/rcovtask'
|
36
|
+
Rcov::RcovTask.new do |spec|
|
37
|
+
spec.libs << 'spec'
|
38
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
39
|
+
spec.verbose = true
|
40
|
+
end
|
41
|
+
rescue LoadError
|
42
|
+
task :rcov do
|
43
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
task :spec => :check_dependencies
|
48
|
+
|
49
|
+
task :default => :spec
|
50
|
+
|
51
|
+
begin
|
52
|
+
require 'yard'
|
53
|
+
YARD::Rake::YardocTask.new
|
54
|
+
rescue LoadError
|
55
|
+
task :yardoc do
|
56
|
+
abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
|
57
|
+
end
|
58
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.1
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'marcspec/map'
|
2
|
+
require 'marcspec/solrfieldspec'
|
3
|
+
|
4
|
+
|
5
|
+
module MARCSpec
|
6
|
+
|
7
|
+
# A CustomSolrSpec is a SolrFieldSpec that derives all its values from a custom function. The custom function
|
8
|
+
# must me a module function that takes a record and an array of other arguments and returns a
|
9
|
+
# (possibly empty) list of resulting values.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# module MARC2Solr
|
13
|
+
# module MyCustomStuff
|
14
|
+
# def self.uppercaseTitle r, args=[]
|
15
|
+
# vals = []
|
16
|
+
# vals.push r['245'].value.upcase
|
17
|
+
# return vals
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# css = MARCSpec::CustomSolrSpec.new(:module => MARC2Solr::MyCustomStuff,
|
23
|
+
# :methodSymbol => :uppercaseTitle,
|
24
|
+
# :map => ss.map('mapname')
|
25
|
+
# )
|
26
|
+
# ss.add_spec(css)
|
27
|
+
#
|
28
|
+
#
|
29
|
+
|
30
|
+
|
31
|
+
class CustomSolrSpec < SolrFieldSpec
|
32
|
+
|
33
|
+
attr_accessor :module, :methodSymbol, :methodArgs
|
34
|
+
def initialize(opts)
|
35
|
+
@solrField = opts[:solrField]
|
36
|
+
@module = opts[:module]
|
37
|
+
@methodSymbol = opts[:methodSymbol]
|
38
|
+
|
39
|
+
unless @solrField and @module and @methodSymbol
|
40
|
+
raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :methodSymbol"
|
41
|
+
end
|
42
|
+
|
43
|
+
@methodArgs = opts[:methodArgs] || []
|
44
|
+
|
45
|
+
@first = opts[:firstOnly] || false
|
46
|
+
@default = opts[:default] || nil
|
47
|
+
@map = opts[:map] || nil
|
48
|
+
@noMapKeyDefault = opts[:noMapKeyDefault] || nil
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
def raw_marc_values r
|
53
|
+
return @module.send(@methodSymbol, r, *@methodArgs)
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.fromHash h
|
57
|
+
return self.new(h)
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def asPPString
|
62
|
+
s = StringIO.new
|
63
|
+
s.print "{\n :solrField=> "
|
64
|
+
PP.singleline_pp(@solrField, s)
|
65
|
+
s.print(",\n ")
|
66
|
+
s.print ":firstOnly => true,\n " if @first
|
67
|
+
if @default
|
68
|
+
s.print(":default => ")
|
69
|
+
PP.singleline_pp(@default, s)
|
70
|
+
s.print(",\n ")
|
71
|
+
end
|
72
|
+
if @map
|
73
|
+
s.print(":mapname => ")
|
74
|
+
PP.singleline_pp(@map.mapname, s)
|
75
|
+
s.print(",\n ")
|
76
|
+
end
|
77
|
+
if @noMapKeyDefault
|
78
|
+
s.print(":noMapKeyDefault => ")
|
79
|
+
PP.singleline_pp(@noMapKeyDefault, s)
|
80
|
+
s.print(",\n ")
|
81
|
+
end
|
82
|
+
|
83
|
+
s.print(":module => ")
|
84
|
+
PP.singleline_pp(@module, s)
|
85
|
+
s.print(",\n :methodSymbol => ")
|
86
|
+
PP.singleline_pp(@methodSymbol, s)
|
87
|
+
if @methodArgs
|
88
|
+
s.print(",\n :methodArgs => ")
|
89
|
+
PP.singleline_pp(@methodArgs, s)
|
90
|
+
end
|
91
|
+
s.print "\n}"
|
92
|
+
return s.string
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'marcspec/map'
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
module MARCSpec
|
8
|
+
|
9
|
+
# A KVMap is, when push comes to shove, just a hash with a name, and the
|
10
|
+
# option of adding a default value for each lookup.
|
11
|
+
|
12
|
+
class KVMap < Map
|
13
|
+
|
14
|
+
# Basic lookup which takes a lookup key and an optional default value,
|
15
|
+
# which will be returned iff the map doesn't have
|
16
|
+
# the passed key
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
# kvmap = MARCSpec::KVMap.new("sample_map", {1=>'one'})
|
20
|
+
# kvmap[1] #=> 'one'
|
21
|
+
# kvmap[2] #=> nil
|
22
|
+
# kvmap[2, 'Not Found'] #=> 'Not Found'
|
23
|
+
#
|
24
|
+
# @param [Object] key The key to look up
|
25
|
+
# @param [Object] default The value to return if the lookup fails
|
26
|
+
# @return [Object] The value associated with the passed key, or the
|
27
|
+
# default value
|
28
|
+
|
29
|
+
def [] key, default=nil
|
30
|
+
if @map.has_key? key
|
31
|
+
@map[key]
|
32
|
+
else
|
33
|
+
if default == :passthrough
|
34
|
+
return key
|
35
|
+
else
|
36
|
+
return default
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def []= key, value
|
42
|
+
@map[key] = value
|
43
|
+
end
|
44
|
+
|
45
|
+
alias_method :add, :[]=
|
46
|
+
|
47
|
+
def asPPString
|
48
|
+
s = StringIO.new
|
49
|
+
s.print "{\n :maptype=>:kv,\n :mapname=>"
|
50
|
+
PP.singleline_pp(@mapname, s)
|
51
|
+
s.print ",\n :map => "
|
52
|
+
PP.pp(@map, s)
|
53
|
+
s.puts "\n}"
|
54
|
+
return s.string
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def self.from_solrmarc_file filename
|
59
|
+
mapname = File.basename(filename).sub(/\..+?$/, '')
|
60
|
+
map = {}
|
61
|
+
File.open(filename) do |smf|
|
62
|
+
smf.each_line do |l|
|
63
|
+
l.chomp!
|
64
|
+
next unless l =~ /\S/
|
65
|
+
l.strip!
|
66
|
+
next if l =~ /^#/
|
67
|
+
unless l =~ /^(.+?)\s*=\s*(.+)$/
|
68
|
+
$LOG.warn "KVMap import skipping weird line in #{filename}\n #{l}"
|
69
|
+
next
|
70
|
+
end
|
71
|
+
map[$1] = $2
|
72
|
+
end
|
73
|
+
end
|
74
|
+
return self.new(mapname, map)
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
data/lib/marcspec/map.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
module MARCSpec
|
2
|
+
|
3
|
+
# A Map is just a named lookup table. The access
|
4
|
+
# (via []) takes, in adition to a key, an optional
|
5
|
+
# default value to return
|
6
|
+
|
7
|
+
class Map
|
8
|
+
attr_accessor :mapname, :map
|
9
|
+
|
10
|
+
# Create a new map. The passed map is either
|
11
|
+
# a standard hash or a list of duples
|
12
|
+
#
|
13
|
+
# @param
|
14
|
+
def initialize(mapname, map)
|
15
|
+
@mapname = mapname
|
16
|
+
@map = map
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.fromFile filename
|
20
|
+
begin
|
21
|
+
str = File.open(filename).read
|
22
|
+
rescue Exception => e
|
23
|
+
$LOG.error "Problem opening #{filename}: #{e.message}"
|
24
|
+
raise e
|
25
|
+
end
|
26
|
+
|
27
|
+
begin
|
28
|
+
rawmap = eval(str)
|
29
|
+
rescue Exception => e
|
30
|
+
$LOG.error "Problem evaluating (with 'eval') file #{filename}: #{e.message}"
|
31
|
+
raise e
|
32
|
+
end
|
33
|
+
|
34
|
+
case rawmap[:maptype]
|
35
|
+
when :kv
|
36
|
+
return KVMap.new(rawmap[:mapname], rawmap[:map])
|
37
|
+
when :multi
|
38
|
+
return MultiValueMap.new(rawmap[:mapname], rawmap[:map])
|
39
|
+
else
|
40
|
+
$LOG.error "Map file #{filename} doesn't seem to be either a KV map or a MuliValueMap according to :maptype (#{rawmap[:maptype]})"
|
41
|
+
raise ArgumentError, "File #{filename} doesn't evaluate to a valid map"
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def == other
|
48
|
+
return ((other.mapname == self.mapname) and (other.map = self.map))
|
49
|
+
end
|
50
|
+
|
51
|
+
def pretty_print pp
|
52
|
+
pp.pp eval(self.asPPString)
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.fromHash rawmap
|
56
|
+
return self.new(rawmap[:mapname], rawmap[:map])
|
57
|
+
end
|
58
|
+
|
59
|
+
# Take the output of pretty_print and eval it to get rawmap; pass it
|
60
|
+
# tp fromHash to get the map object
|
61
|
+
def self.fromPPString str
|
62
|
+
rawmap = eval(str)
|
63
|
+
return self.fromHash rawmap
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
require 'marc4j4r'
|
2
|
+
require 'set'
|
3
|
+
require 'pp'
|
4
|
+
module MARCSpec
|
5
|
+
|
6
|
+
# A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
|
7
|
+
# When called with marc_values(record), it returns either the complete value of all
|
8
|
+
# occurances of the field in question (in the order they appear in the record), or
|
9
|
+
# the zero-based substrings based on the passed range.
|
10
|
+
#
|
11
|
+
# @example Get the whole 001
|
12
|
+
# cfs = MARCSpec::ControlTagSpec.new('001')
|
13
|
+
#
|
14
|
+
# @example Get the first three characters of the 008
|
15
|
+
# cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
|
16
|
+
#
|
17
|
+
# Note that the use of the zero-based range in this manner conforms to the way MARC
|
18
|
+
# substrings are specified.
|
19
|
+
|
20
|
+
class ControlFieldSpec
|
21
|
+
attr_accessor :tag, :range
|
22
|
+
|
23
|
+
def initialize (tag, range=nil)
|
24
|
+
unless MARC4J4R::ControlField.control_tag? tag
|
25
|
+
raise ArgumentError "Tag must be a control tag"
|
26
|
+
end
|
27
|
+
@tag = tag
|
28
|
+
self.range = range
|
29
|
+
end
|
30
|
+
|
31
|
+
def == other
|
32
|
+
return ((self.tag == other.tag) and
|
33
|
+
(self.range = other.range))
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
# Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
|
38
|
+
# will return the character code of that character (e.g., "Bill"[0] => 66, wherease
|
39
|
+
# "Bill"[0..0] gives the expected 'B'
|
40
|
+
#
|
41
|
+
# @param [nil, Fixnum, Range] range A zero-based substring range or character position
|
42
|
+
# @return [MARCSpec::ControlFieldSpec] self
|
43
|
+
|
44
|
+
def range= range
|
45
|
+
if range.nil?
|
46
|
+
@range = nil
|
47
|
+
return self
|
48
|
+
end
|
49
|
+
if range.is_a? Fixnum
|
50
|
+
if range < 0
|
51
|
+
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
|
52
|
+
end
|
53
|
+
|
54
|
+
@range = range..range
|
55
|
+
|
56
|
+
elsif range.is_a? Range
|
57
|
+
@range = range
|
58
|
+
else
|
59
|
+
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
|
60
|
+
end
|
61
|
+
return self
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
def marc_values r
|
66
|
+
vals = r.find_by_tag(@tag).map {|f| f.value}
|
67
|
+
if @range
|
68
|
+
return vals.map {|v| v[@range]}
|
69
|
+
else
|
70
|
+
return vals
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def pretty_print pp
|
75
|
+
pp.pp eval(self.asPPString)
|
76
|
+
end
|
77
|
+
|
78
|
+
def asPPString
|
79
|
+
s = StringIO.new
|
80
|
+
if @range
|
81
|
+
PP.pp([@tag, @range], s)
|
82
|
+
else
|
83
|
+
PP.pp([@tag], s)
|
84
|
+
end
|
85
|
+
return s.string
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.fromPPString str
|
89
|
+
a = eval(str)
|
90
|
+
return self.new(*a)
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
|
97
|
+
# using the string 'LDR' to identify itself
|
98
|
+
|
99
|
+
class LeaderSpec < ControlFieldSpec
|
100
|
+
|
101
|
+
# Built to be syntax-compatible with ControlFieldSpec, the tag must always
|
102
|
+
# be 'LDR' (case matters)
|
103
|
+
#
|
104
|
+
# @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
|
105
|
+
# @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
|
106
|
+
# instead of the whole leader.
|
107
|
+
|
108
|
+
def initialize (tag, range=nil)
|
109
|
+
unless tag == 'LDR'
|
110
|
+
raise ArgumentError "Tag must be 'LDR'"
|
111
|
+
end
|
112
|
+
@tag = 'LDR'
|
113
|
+
self.range = range
|
114
|
+
end
|
115
|
+
|
116
|
+
# Return the appropriate value (either the leader or a subset of it) from the
|
117
|
+
# given record
|
118
|
+
#
|
119
|
+
# @param [MARC4J4R::Record] r A MARC4J4R Record
|
120
|
+
# @return [String] the leader or substring of the leader
|
121
|
+
def marc_values r
|
122
|
+
if @range
|
123
|
+
return r.leader[@range]
|
124
|
+
else
|
125
|
+
return r.leader
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
# A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
|
132
|
+
# all the values for the subfields for the given codes joined by the optional joiner (space by default)
|
133
|
+
#
|
134
|
+
# The subfield values are presented in the order they appear in the document, *not* the order the subfield
|
135
|
+
# codes are specified
|
136
|
+
#
|
137
|
+
# @example Get the $a from the 245s
|
138
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
|
139
|
+
#
|
140
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
|
141
|
+
|
142
|
+
|
143
|
+
class VariableFieldSpec
|
144
|
+
|
145
|
+
attr_accessor :tag, :codes, :joiner
|
146
|
+
|
147
|
+
def initialize tag, codes=nil, joiner=' '
|
148
|
+
@tag = tag
|
149
|
+
@joiner = joiner || ' '
|
150
|
+
self.codes = codes
|
151
|
+
end
|
152
|
+
|
153
|
+
def == other
|
154
|
+
return ((self.tag == other.tag) and
|
155
|
+
(self.codes = other.codes) and
|
156
|
+
(self.joiner = other.joiner))
|
157
|
+
end
|
158
|
+
|
159
|
+
def codes= c
|
160
|
+
if c.nil?
|
161
|
+
@codes = nil
|
162
|
+
return nil
|
163
|
+
end
|
164
|
+
|
165
|
+
if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
|
166
|
+
@codes = c.to_a
|
167
|
+
else
|
168
|
+
@codes = c.split(//)
|
169
|
+
end
|
170
|
+
|
171
|
+
return @codes
|
172
|
+
end
|
173
|
+
|
174
|
+
def marc_values r
|
175
|
+
fields = r.find_by_tag(@tag)
|
176
|
+
vals = []
|
177
|
+
fields.each do |f|
|
178
|
+
subvals = f.sub_values(@codes)
|
179
|
+
vals << subvals.join(@joiner) if subvals.size > 0
|
180
|
+
end
|
181
|
+
return vals
|
182
|
+
end
|
183
|
+
|
184
|
+
def pretty_print pp
|
185
|
+
pp.pp eval(self.asPPString)
|
186
|
+
end
|
187
|
+
|
188
|
+
def asPPString
|
189
|
+
s = StringIO.new
|
190
|
+
if @joiner and @joiner != ' '
|
191
|
+
PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
|
192
|
+
else
|
193
|
+
PP.pp([@tag, '*', '*', @codes.join('')], s)
|
194
|
+
end
|
195
|
+
return s.string
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.fromPPString str
|
199
|
+
a = eval(str)
|
200
|
+
return self.new(a[0], a[3], a[4])
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
204
|
+
|
205
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'marcspec/map'
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
|
5
|
+
module MARCSpec
|
6
|
+
|
7
|
+
# A MultiValueMap (in this conectex) is an array of duples of the form
|
8
|
+
# [thingToMatch, 'Value'] (or [thingToMatch, [array,of,values]])
|
9
|
+
# along with an associated name.
|
10
|
+
#
|
11
|
+
# Accessing via [] will give you an array of non-nil values that match (via ===)
|
12
|
+
# the corresponding keys.
|
13
|
+
#
|
14
|
+
# Keys can be either strings or regular expressions (e.g., /^Bil/).
|
15
|
+
#
|
16
|
+
# Again, note that if several keys are === to the passed argument, all the values will be returned.
|
17
|
+
|
18
|
+
class MultiValueMap < Map
|
19
|
+
|
20
|
+
attr_accessor :mapname,:map
|
21
|
+
|
22
|
+
|
23
|
+
def [] key, default=nil
|
24
|
+
rv = @map.map {|pv| pv[0] === key ? pv[1] : nil}
|
25
|
+
rv.flatten!
|
26
|
+
rv.compact!
|
27
|
+
rv.uniq!
|
28
|
+
if rv.size > 0
|
29
|
+
return rv
|
30
|
+
else
|
31
|
+
return [default]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.from_solrmarc_file filename
|
36
|
+
mapname = File.basename(filename).sub(/\..+?$/, '')
|
37
|
+
kvlist = []
|
38
|
+
File.open(filename) do |f|
|
39
|
+
f.each_line do |line|
|
40
|
+
match = /^pattern.*?=\s*(.*?)\s*=>\s*(.*?)\s*$/.match(line)
|
41
|
+
unless match
|
42
|
+
$LOG.warn "MultiValueMap import skipping weird line in #{filename}\n #{l}"
|
43
|
+
next
|
44
|
+
end
|
45
|
+
kvlist << [Regexp.new(match[1]), match[2]]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
return self.new(mapname, kvlist)
|
49
|
+
end
|
50
|
+
|
51
|
+
def asPPString
|
52
|
+
s = StringIO.new
|
53
|
+
s.print "{\n :maptype=>:multi,\n :mapname=>"
|
54
|
+
PP.singleline_pp(@mapname, s)
|
55
|
+
s.print ",\n :map => "
|
56
|
+
PP.pp(@map, s)
|
57
|
+
s.puts "\n}"
|
58
|
+
return s.string
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|