marcspec 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +59 -0
- data/Rakefile +58 -0
- data/VERSION +1 -0
- data/lib/marcspec/customspec.rb +97 -0
- data/lib/marcspec/kvmap.rb +79 -0
- data/lib/marcspec/map.rb +67 -0
- data/lib/marcspec/marcfieldspec.rb +205 -0
- data/lib/marcspec/multivaluemap.rb +62 -0
- data/lib/marcspec/solrfieldspec.rb +123 -0
- data/lib/marcspec/specset.rb +58 -0
- data/lib/marcspec.rb +11 -0
- data/lib/orig.rb +288 -0
- data/spec/data/batch.dat +1 -0
- data/spec/data/one.dat +1 -0
- data/spec/data/umich/translation_maps/area_map.properties +1039 -0
- data/spec/data/umich/translation_maps/availability_map_ht.properties +9 -0
- data/spec/data/umich/translation_maps/availability_map_umich.properties +6 -0
- data/spec/data/umich/translation_maps/callnumber_map.properties +21 -0
- data/spec/data/umich/translation_maps/callnumber_subject_map.properties +214 -0
- data/spec/data/umich/translation_maps/country_map.properties +320 -0
- data/spec/data/umich/translation_maps/format_map.properties +47 -0
- data/spec/data/umich/translation_maps/format_map_umich.properties +35 -0
- data/spec/data/umich/translation_maps/ht_namespace_map.properties +10 -0
- data/spec/data/umich/translation_maps/institution_map.properties +11 -0
- data/spec/data/umich/translation_maps/language_map.properties +489 -0
- data/spec/data/umich/translation_maps/library_map.properties +48 -0
- data/spec/data/umich/translation_maps/location_map.properties +345 -0
- data/spec/data/umich/umich_index.properties +130 -0
- data/spec/maps_spec.rb +91 -0
- data/spec/marcfieldspecs_spec.rb +109 -0
- data/spec/marcspec_spec.rb +10 -0
- data/spec/solrfieldspec_spec.rb +177 -0
- data/spec/spec_helper.rb +16 -0
- metadata +166 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 BillDueber
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
= marcspec
|
2
|
+
|
3
|
+
The MARCSpec contains classes designed to make it (relatively) easy to specify
|
4
|
+
a data field (my use case specifically is solr) in terms of sets of MARC fields and subfields.
|
5
|
+
|
6
|
+
== A simple breakdown of the hierarchy
|
7
|
+
|
8
|
+
This is all based on how the excellent [Solrmarc](http://code.google.com/p/solrmarc/)
|
9
|
+
deals with configuration. MARCSpec supports a subset of Solrmarc's configuration
|
10
|
+
options.
|
11
|
+
|
12
|
+
* A {MARCSpec::SpecSet} consists of a (possibly empty) set of named {MARCSpec::Map}
|
13
|
+
and a list of {MARCSpec::SolrFieldSpec} and/or {MARCSpec::CustomSolrSpec}.
|
14
|
+
* A {MARCSpec::SolrFieldSpec} consists of a field name, a list of MARC field specs (see below),
|
15
|
+
an optional {MARCSpec::Map} for translating raw values to something else, and
|
16
|
+
a bunch of optional specials (e.g., a notation to only use the first value,
|
17
|
+
a default value if no appropriate data is in the MARC record, a default value
|
18
|
+
for when marc data is found, but nothing is in the map, etc.)
|
19
|
+
* A {MARCSpec::CustomSolrSpec} occupies the same niche as a {MARCSpec::SolrFieldSpec}, but
|
20
|
+
allows you to use a custom routine instead of the pre-packaged MARC specification syntax.
|
21
|
+
* A MARC Field Spec is one of the following
|
22
|
+
* A {MARCSpec::LeaderSpec} for dealing with the leader
|
23
|
+
* A {MARCSpec::ControlFieldSpec}, consisting of a tag (as a string, not a number) and
|
24
|
+
an optional zero-based index (e.g., "001[3]") or range (e.g., "001\[11..13\]")
|
25
|
+
* A {MARCSpec::VariableFieldSpec}, consisting of a tag, a couple indicator patterns
|
26
|
+
(currently ignored, but stay tuned), an optional list of subfields (default
|
27
|
+
is all), and an optional string used to join the subfields (default is
|
28
|
+
a single space)
|
29
|
+
* A *map* is one of:
|
30
|
+
* A {MARCSpec::KVMap}, which is just a Ruby hash. It will match at most one k-v pair, although
|
31
|
+
the "value" might actually be either a scalar or an array of scalars
|
32
|
+
* A {MARCSpec::MultiValueMap}, which is an array of key/value duples. A passed
|
33
|
+
potential map is compared (via ===) with every key, returning all
|
34
|
+
the associated values. Again, a single "key" can be associated with an array of
|
35
|
+
return values; the whole thing is flattened out before returning
|
36
|
+
|
37
|
+
Obviously, better descriptions and full documentation is available in each
|
38
|
+
individual class.
|
39
|
+
|
40
|
+
== Better examples in marc2solr
|
41
|
+
|
42
|
+
Better documented samples are available as part of the marc2solr project
|
43
|
+
at http://github.com/billdueber/marc2solr -- look in the simple_sample area.
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
== Note on Patches/Pull Requests
|
48
|
+
|
49
|
+
* Fork the project.
|
50
|
+
* Make your feature addition or bug fix.
|
51
|
+
* Add tests for it. This is important so I don't break it in a
|
52
|
+
future version unintentionally.
|
53
|
+
* Commit, do not mess with rakefile, version, or history.
|
54
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
55
|
+
* Send me a pull request. Bonus points for topic branches.
|
56
|
+
|
57
|
+
== Copyright
|
58
|
+
|
59
|
+
Copyright (c) 2010 BillDueber. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "marcspec"
|
8
|
+
gem.summary = %Q{Extract data from MARC records and send to Solr}
|
9
|
+
gem.description = %Q{Relies on marc4j4r, based on work in solrmarc}
|
10
|
+
gem.email = "bill@dueber.com"
|
11
|
+
gem.homepage = "http://github.com/billdueber/marcspec"
|
12
|
+
gem.authors = ["BillDueber"]
|
13
|
+
gem.add_development_dependency "bacon", ">= 0"
|
14
|
+
gem.add_development_dependency "yard", ">= 0"
|
15
|
+
|
16
|
+
gem.add_dependency 'marc4j4r', '>=0.9.0'
|
17
|
+
gem.add_dependency 'jruby_streaming_update_solr_server', '>=0.2.0'
|
18
|
+
|
19
|
+
|
20
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
21
|
+
end
|
22
|
+
Jeweler::GemcutterTasks.new
|
23
|
+
rescue LoadError
|
24
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'rake/testtask'
|
28
|
+
Rake::TestTask.new(:spec) do |spec|
|
29
|
+
spec.libs << 'lib' << 'spec'
|
30
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
31
|
+
spec.verbose = true
|
32
|
+
end
|
33
|
+
|
34
|
+
begin
|
35
|
+
require 'rcov/rcovtask'
|
36
|
+
Rcov::RcovTask.new do |spec|
|
37
|
+
spec.libs << 'spec'
|
38
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
39
|
+
spec.verbose = true
|
40
|
+
end
|
41
|
+
rescue LoadError
|
42
|
+
task :rcov do
|
43
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
task :spec => :check_dependencies
|
48
|
+
|
49
|
+
task :default => :spec
|
50
|
+
|
51
|
+
begin
|
52
|
+
require 'yard'
|
53
|
+
YARD::Rake::YardocTask.new
|
54
|
+
rescue LoadError
|
55
|
+
task :yardoc do
|
56
|
+
abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
|
57
|
+
end
|
58
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.1
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'marcspec/map'
|
2
|
+
require 'marcspec/solrfieldspec'
|
3
|
+
|
4
|
+
|
5
|
+
module MARCSpec
|
6
|
+
|
7
|
+
# A CustomSolrSpec is a SolrFieldSpec that derives all its values from a custom function. The custom function
|
8
|
+
# must me a module function that takes a record and an array of other arguments and returns a
|
9
|
+
# (possibly empty) list of resulting values.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# module MARC2Solr
|
13
|
+
# module MyCustomStuff
|
14
|
+
# def self.uppercaseTitle r, args=[]
|
15
|
+
# vals = []
|
16
|
+
# vals.push r['245'].value.upcase
|
17
|
+
# return vals
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# css = MARCSpec::CustomSolrSpec.new(:module => MARC2Solr::MyCustomStuff,
|
23
|
+
# :methodSymbol => :uppercaseTitle,
|
24
|
+
# :map => ss.map('mapname')
|
25
|
+
# )
|
26
|
+
# ss.add_spec(css)
|
27
|
+
#
|
28
|
+
#
|
29
|
+
|
30
|
+
|
31
|
+
class CustomSolrSpec < SolrFieldSpec
|
32
|
+
|
33
|
+
attr_accessor :module, :methodSymbol, :methodArgs
|
34
|
+
def initialize(opts)
|
35
|
+
@solrField = opts[:solrField]
|
36
|
+
@module = opts[:module]
|
37
|
+
@methodSymbol = opts[:methodSymbol]
|
38
|
+
|
39
|
+
unless @solrField and @module and @methodSymbol
|
40
|
+
raise ArgumentError, "Custom solr spec must have a field name in :solrField, module in :module, and the method name as a symbol in :methodSymbol"
|
41
|
+
end
|
42
|
+
|
43
|
+
@methodArgs = opts[:methodArgs] || []
|
44
|
+
|
45
|
+
@first = opts[:firstOnly] || false
|
46
|
+
@default = opts[:default] || nil
|
47
|
+
@map = opts[:map] || nil
|
48
|
+
@noMapKeyDefault = opts[:noMapKeyDefault] || nil
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
def raw_marc_values r
|
53
|
+
return @module.send(@methodSymbol, r, *@methodArgs)
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.fromHash h
|
57
|
+
return self.new(h)
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def asPPString
|
62
|
+
s = StringIO.new
|
63
|
+
s.print "{\n :solrField=> "
|
64
|
+
PP.singleline_pp(@solrField, s)
|
65
|
+
s.print(",\n ")
|
66
|
+
s.print ":firstOnly => true,\n " if @first
|
67
|
+
if @default
|
68
|
+
s.print(":default => ")
|
69
|
+
PP.singleline_pp(@default, s)
|
70
|
+
s.print(",\n ")
|
71
|
+
end
|
72
|
+
if @map
|
73
|
+
s.print(":mapname => ")
|
74
|
+
PP.singleline_pp(@map.mapname, s)
|
75
|
+
s.print(",\n ")
|
76
|
+
end
|
77
|
+
if @noMapKeyDefault
|
78
|
+
s.print(":noMapKeyDefault => ")
|
79
|
+
PP.singleline_pp(@noMapKeyDefault, s)
|
80
|
+
s.print(",\n ")
|
81
|
+
end
|
82
|
+
|
83
|
+
s.print(":module => ")
|
84
|
+
PP.singleline_pp(@module, s)
|
85
|
+
s.print(",\n :methodSymbol => ")
|
86
|
+
PP.singleline_pp(@methodSymbol, s)
|
87
|
+
if @methodArgs
|
88
|
+
s.print(",\n :methodArgs => ")
|
89
|
+
PP.singleline_pp(@methodArgs, s)
|
90
|
+
end
|
91
|
+
s.print "\n}"
|
92
|
+
return s.string
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'marcspec/map'
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
module MARCSpec
|
8
|
+
|
9
|
+
# A KVMap is, when push comes to shove, just a hash with a name, and the
|
10
|
+
# option of adding a default value for each lookup.
|
11
|
+
|
12
|
+
class KVMap < Map
|
13
|
+
|
14
|
+
# Basic lookup which takes a lookup key and an optional default value,
|
15
|
+
# which will be returned iff the map doesn't have
|
16
|
+
# the passed key
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
# kvmap = MARCSpec::KVMap.new("sample_map", {1=>'one'})
|
20
|
+
# kvmap[1] #=> 'one'
|
21
|
+
# kvmap[2] #=> nil
|
22
|
+
# kvmap[2, 'Not Found'] #=> 'Not Found'
|
23
|
+
#
|
24
|
+
# @param [Object] key The key to look up
|
25
|
+
# @param [Object] default The value to return if the lookup fails
|
26
|
+
# @return [Object] The value associated with the passed key, or the
|
27
|
+
# default value
|
28
|
+
|
29
|
+
def [] key, default=nil
|
30
|
+
if @map.has_key? key
|
31
|
+
@map[key]
|
32
|
+
else
|
33
|
+
if default == :passthrough
|
34
|
+
return key
|
35
|
+
else
|
36
|
+
return default
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def []= key, value
|
42
|
+
@map[key] = value
|
43
|
+
end
|
44
|
+
|
45
|
+
alias_method :add, :[]=
|
46
|
+
|
47
|
+
def asPPString
|
48
|
+
s = StringIO.new
|
49
|
+
s.print "{\n :maptype=>:kv,\n :mapname=>"
|
50
|
+
PP.singleline_pp(@mapname, s)
|
51
|
+
s.print ",\n :map => "
|
52
|
+
PP.pp(@map, s)
|
53
|
+
s.puts "\n}"
|
54
|
+
return s.string
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def self.from_solrmarc_file filename
|
59
|
+
mapname = File.basename(filename).sub(/\..+?$/, '')
|
60
|
+
map = {}
|
61
|
+
File.open(filename) do |smf|
|
62
|
+
smf.each_line do |l|
|
63
|
+
l.chomp!
|
64
|
+
next unless l =~ /\S/
|
65
|
+
l.strip!
|
66
|
+
next if l =~ /^#/
|
67
|
+
unless l =~ /^(.+?)\s*=\s*(.+)$/
|
68
|
+
$LOG.warn "KVMap import skipping weird line in #{filename}\n #{l}"
|
69
|
+
next
|
70
|
+
end
|
71
|
+
map[$1] = $2
|
72
|
+
end
|
73
|
+
end
|
74
|
+
return self.new(mapname, map)
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
data/lib/marcspec/map.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
module MARCSpec
|
2
|
+
|
3
|
+
# A Map is just a named lookup table. The access
|
4
|
+
# (via []) takes, in adition to a key, an optional
|
5
|
+
# default value to return
|
6
|
+
|
7
|
+
class Map
|
8
|
+
attr_accessor :mapname, :map
|
9
|
+
|
10
|
+
# Create a new map. The passed map is either
|
11
|
+
# a standard hash or a list of duples
|
12
|
+
#
|
13
|
+
# @param
|
14
|
+
def initialize(mapname, map)
|
15
|
+
@mapname = mapname
|
16
|
+
@map = map
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.fromFile filename
|
20
|
+
begin
|
21
|
+
str = File.open(filename).read
|
22
|
+
rescue Exception => e
|
23
|
+
$LOG.error "Problem opening #{filename}: #{e.message}"
|
24
|
+
raise e
|
25
|
+
end
|
26
|
+
|
27
|
+
begin
|
28
|
+
rawmap = eval(str)
|
29
|
+
rescue Exception => e
|
30
|
+
$LOG.error "Problem evaluating (with 'eval') file #{filename}: #{e.message}"
|
31
|
+
raise e
|
32
|
+
end
|
33
|
+
|
34
|
+
case rawmap[:maptype]
|
35
|
+
when :kv
|
36
|
+
return KVMap.new(rawmap[:mapname], rawmap[:map])
|
37
|
+
when :multi
|
38
|
+
return MultiValueMap.new(rawmap[:mapname], rawmap[:map])
|
39
|
+
else
|
40
|
+
$LOG.error "Map file #{filename} doesn't seem to be either a KV map or a MuliValueMap according to :maptype (#{rawmap[:maptype]})"
|
41
|
+
raise ArgumentError, "File #{filename} doesn't evaluate to a valid map"
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def == other
|
48
|
+
return ((other.mapname == self.mapname) and (other.map = self.map))
|
49
|
+
end
|
50
|
+
|
51
|
+
def pretty_print pp
|
52
|
+
pp.pp eval(self.asPPString)
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.fromHash rawmap
|
56
|
+
return self.new(rawmap[:mapname], rawmap[:map])
|
57
|
+
end
|
58
|
+
|
59
|
+
# Take the output of pretty_print and eval it to get rawmap; pass it
|
60
|
+
# tp fromHash to get the map object
|
61
|
+
def self.fromPPString str
|
62
|
+
rawmap = eval(str)
|
63
|
+
return self.fromHash rawmap
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
require 'marc4j4r'
|
2
|
+
require 'set'
|
3
|
+
require 'pp'
|
4
|
+
module MARCSpec
|
5
|
+
|
6
|
+
# A ControlFieldSpec takes a control tag (generally 001..009) and an optional zero-based range
|
7
|
+
# When called with marc_values(record), it returns either the complete value of all
|
8
|
+
# occurances of the field in question (in the order they appear in the record), or
|
9
|
+
# the zero-based substrings based on the passed range.
|
10
|
+
#
|
11
|
+
# @example Get the whole 001
|
12
|
+
# cfs = MARCSpec::ControlTagSpec.new('001')
|
13
|
+
#
|
14
|
+
# @example Get the first three characters of the 008
|
15
|
+
# cfs = MARCSpec::ControlTagSpec.new('001', 0..2)
|
16
|
+
#
|
17
|
+
# Note that the use of the zero-based range in this manner conforms to the way MARC
|
18
|
+
# substrings are specified.
|
19
|
+
|
20
|
+
class ControlFieldSpec
|
21
|
+
attr_accessor :tag, :range
|
22
|
+
|
23
|
+
def initialize (tag, range=nil)
|
24
|
+
unless MARC4J4R::ControlField.control_tag? tag
|
25
|
+
raise ArgumentError "Tag must be a control tag"
|
26
|
+
end
|
27
|
+
@tag = tag
|
28
|
+
self.range = range
|
29
|
+
end
|
30
|
+
|
31
|
+
def == other
|
32
|
+
return ((self.tag == other.tag) and
|
33
|
+
(self.range = other.range))
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
# Always force a real range, since in Ruby 1.9 a string subscript with a single fixnum
|
38
|
+
# will return the character code of that character (e.g., "Bill"[0] => 66, wherease
|
39
|
+
# "Bill"[0..0] gives the expected 'B'
|
40
|
+
#
|
41
|
+
# @param [nil, Fixnum, Range] range A zero-based substring range or character position
|
42
|
+
# @return [MARCSpec::ControlFieldSpec] self
|
43
|
+
|
44
|
+
def range= range
|
45
|
+
if range.nil?
|
46
|
+
@range = nil
|
47
|
+
return self
|
48
|
+
end
|
49
|
+
if range.is_a? Fixnum
|
50
|
+
if range < 0
|
51
|
+
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range}"
|
52
|
+
end
|
53
|
+
|
54
|
+
@range = range..range
|
55
|
+
|
56
|
+
elsif range.is_a? Range
|
57
|
+
@range = range
|
58
|
+
else
|
59
|
+
raise ArgumentError, "Range must be nil, an integer offset (1-based), or a Range, not #{range.inspect}"
|
60
|
+
end
|
61
|
+
return self
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
def marc_values r
|
66
|
+
vals = r.find_by_tag(@tag).map {|f| f.value}
|
67
|
+
if @range
|
68
|
+
return vals.map {|v| v[@range]}
|
69
|
+
else
|
70
|
+
return vals
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def pretty_print pp
|
75
|
+
pp.pp eval(self.asPPString)
|
76
|
+
end
|
77
|
+
|
78
|
+
def asPPString
|
79
|
+
s = StringIO.new
|
80
|
+
if @range
|
81
|
+
PP.pp([@tag, @range], s)
|
82
|
+
else
|
83
|
+
PP.pp([@tag], s)
|
84
|
+
end
|
85
|
+
return s.string
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.fromPPString str
|
89
|
+
a = eval(str)
|
90
|
+
return self.new(*a)
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# A LeaderSpec deals only with the leader. It's basically the same as a controlfield spec, but
|
97
|
+
# using the string 'LDR' to identify itself
|
98
|
+
|
99
|
+
class LeaderSpec < ControlFieldSpec
|
100
|
+
|
101
|
+
# Built to be syntax-compatible with ControlFieldSpec, the tag must always
|
102
|
+
# be 'LDR' (case matters)
|
103
|
+
#
|
104
|
+
# @param ['LDR'] tag The 'tag'; in this case, always 'LDR'
|
105
|
+
# @param [Fixnum, Range<Fixnum>] range substring specification (either one character or a range) to return
|
106
|
+
# instead of the whole leader.
|
107
|
+
|
108
|
+
def initialize (tag, range=nil)
|
109
|
+
unless tag == 'LDR'
|
110
|
+
raise ArgumentError "Tag must be 'LDR'"
|
111
|
+
end
|
112
|
+
@tag = 'LDR'
|
113
|
+
self.range = range
|
114
|
+
end
|
115
|
+
|
116
|
+
# Return the appropriate value (either the leader or a subset of it) from the
|
117
|
+
# given record
|
118
|
+
#
|
119
|
+
# @param [MARC4J4R::Record] r A MARC4J4R Record
|
120
|
+
# @return [String] the leader or substring of the leader
|
121
|
+
def marc_values r
|
122
|
+
if @range
|
123
|
+
return r.leader[@range]
|
124
|
+
else
|
125
|
+
return r.leader
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
# A VariableFieldSpec has a tag (three chars) and a set of codes. Its #marc_values(r) method will return
|
132
|
+
# all the values for the subfields for the given codes joined by the optional joiner (space by default)
|
133
|
+
#
|
134
|
+
# The subfield values are presented in the order they appear in the document, *not* the order the subfield
|
135
|
+
# codes are specified
|
136
|
+
#
|
137
|
+
# @example Get the $a from the 245s
|
138
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'a')
|
139
|
+
#
|
140
|
+
# vfs = MARCSpec::VariableFieldSpec.new('245', 'ab')
|
141
|
+
|
142
|
+
|
143
|
+
class VariableFieldSpec
|
144
|
+
|
145
|
+
attr_accessor :tag, :codes, :joiner
|
146
|
+
|
147
|
+
def initialize tag, codes=nil, joiner=' '
|
148
|
+
@tag = tag
|
149
|
+
@joiner = joiner || ' '
|
150
|
+
self.codes = codes
|
151
|
+
end
|
152
|
+
|
153
|
+
def == other
|
154
|
+
return ((self.tag == other.tag) and
|
155
|
+
(self.codes = other.codes) and
|
156
|
+
(self.joiner = other.joiner))
|
157
|
+
end
|
158
|
+
|
159
|
+
def codes= c
|
160
|
+
if c.nil?
|
161
|
+
@codes = nil
|
162
|
+
return nil
|
163
|
+
end
|
164
|
+
|
165
|
+
if( c.is_a? Array) or (c.is_a? Set) or (c.is_a? Range)
|
166
|
+
@codes = c.to_a
|
167
|
+
else
|
168
|
+
@codes = c.split(//)
|
169
|
+
end
|
170
|
+
|
171
|
+
return @codes
|
172
|
+
end
|
173
|
+
|
174
|
+
def marc_values r
|
175
|
+
fields = r.find_by_tag(@tag)
|
176
|
+
vals = []
|
177
|
+
fields.each do |f|
|
178
|
+
subvals = f.sub_values(@codes)
|
179
|
+
vals << subvals.join(@joiner) if subvals.size > 0
|
180
|
+
end
|
181
|
+
return vals
|
182
|
+
end
|
183
|
+
|
184
|
+
def pretty_print pp
|
185
|
+
pp.pp eval(self.asPPString)
|
186
|
+
end
|
187
|
+
|
188
|
+
def asPPString
|
189
|
+
s = StringIO.new
|
190
|
+
if @joiner and @joiner != ' '
|
191
|
+
PP.pp([@tag, '*', '*', @codes.join(''), @joiner], s)
|
192
|
+
else
|
193
|
+
PP.pp([@tag, '*', '*', @codes.join('')], s)
|
194
|
+
end
|
195
|
+
return s.string
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.fromPPString str
|
199
|
+
a = eval(str)
|
200
|
+
return self.new(a[0], a[3], a[4])
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
204
|
+
|
205
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'marcspec/map'
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
|
5
|
+
module MARCSpec
|
6
|
+
|
7
|
+
# A MultiValueMap (in this conectex) is an array of duples of the form
|
8
|
+
# [thingToMatch, 'Value'] (or [thingToMatch, [array,of,values]])
|
9
|
+
# along with an associated name.
|
10
|
+
#
|
11
|
+
# Accessing via [] will give you an array of non-nil values that match (via ===)
|
12
|
+
# the corresponding keys.
|
13
|
+
#
|
14
|
+
# Keys can be either strings or regular expressions (e.g., /^Bil/).
|
15
|
+
#
|
16
|
+
# Again, note that if several keys are === to the passed argument, all the values will be returned.
|
17
|
+
|
18
|
+
class MultiValueMap < Map
|
19
|
+
|
20
|
+
attr_accessor :mapname,:map
|
21
|
+
|
22
|
+
|
23
|
+
def [] key, default=nil
|
24
|
+
rv = @map.map {|pv| pv[0] === key ? pv[1] : nil}
|
25
|
+
rv.flatten!
|
26
|
+
rv.compact!
|
27
|
+
rv.uniq!
|
28
|
+
if rv.size > 0
|
29
|
+
return rv
|
30
|
+
else
|
31
|
+
return [default]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.from_solrmarc_file filename
|
36
|
+
mapname = File.basename(filename).sub(/\..+?$/, '')
|
37
|
+
kvlist = []
|
38
|
+
File.open(filename) do |f|
|
39
|
+
f.each_line do |line|
|
40
|
+
match = /^pattern.*?=\s*(.*?)\s*=>\s*(.*?)\s*$/.match(line)
|
41
|
+
unless match
|
42
|
+
$LOG.warn "MultiValueMap import skipping weird line in #{filename}\n #{l}"
|
43
|
+
next
|
44
|
+
end
|
45
|
+
kvlist << [Regexp.new(match[1]), match[2]]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
return self.new(mapname, kvlist)
|
49
|
+
end
|
50
|
+
|
51
|
+
def asPPString
|
52
|
+
s = StringIO.new
|
53
|
+
s.print "{\n :maptype=>:multi,\n :mapname=>"
|
54
|
+
PP.singleline_pp(@mapname, s)
|
55
|
+
s.print ",\n :map => "
|
56
|
+
PP.pp(@map, s)
|
57
|
+
s.puts "\n}"
|
58
|
+
return s.string
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|