marc2solr 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.md +29 -0
- data/Rakefile +56 -0
- data/bin/marc2solr +247 -0
- data/bin/solrmarc_to_marc2solr +260 -0
- data/lib/marc2solr/marc2solr_custom.rb +194 -0
- data/lib/marc2solr.rb +452 -0
- data/spec/marc2solr_spec.rb +7 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +190 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 BillDueber
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# marc2solr -- get MARC data into Solr
|
2
|
+
|
3
|
+
`marc2Solr` is a package wrapping up functionality in a variety of other gems, designed to make getting data from [MARC21](http://en.wikipedia.org/wiki/MARC_standards) files into [Solr](http://lucene.apache.org/Solr/) as painless as possible.
|
4
|
+
|
5
|
+
`marc2Solr` is based on [Solrmarc](http://code.google.com/p/solrmarc/), the excellent Java-based program that does more or less the same thing. `marc2Solr` is *not* a drop-in replacement for Solrmarc, but can do most of the same things. A naive program to translate solrmarc config files to marc2solr config files is included. It's called -- wait for it -- solrmarc_to_marc2solr.
|
6
|
+
|
7
|
+
It relies on [jruby](http://jruby.org/) to pull it all together; this will not run under stock Ruby!
|
8
|
+
|
9
|
+
## Documentation
|
10
|
+
|
11
|
+
* [The marc2solr wiki]() has documentation on how to install, configure, and use `marc2solr`, how it compares to `solrmarc`, etc.
|
12
|
+
* The [marc2solr_example](http://github.com/billdueber/marc2solr_example) git project has two examples: `simple_sample` has a very simple index and some translation maps that show off the major features with plenty of documentation. The `umich` subdirectory is the actual working code for the University of Michigan [mirlyn](http://mirlyn.lib.umich.edu/) install.
|
13
|
+
* The [marcspec wiki](http://github.com/billdueber/marcspec/wiki/) is the definitive source for how to construct your index file, translation maps, and custom functions.
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
## Note on Patches/Pull Requests
|
18
|
+
|
19
|
+
* Fork the project.
|
20
|
+
* Make your feature addition or bug fix.
|
21
|
+
* Add tests for it. This is important so I don't break it in a
|
22
|
+
future version unintentionally.
|
23
|
+
* Commit, do not mess with rakefile, version, or history.
|
24
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
25
|
+
* Send me a pull request. Bonus points for topic branches.
|
26
|
+
|
27
|
+
## Copyright
|
28
|
+
|
29
|
+
Copyright (c) 2010 BillDueber. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "marc2solr"
|
8
|
+
gem.summary = %Q{MARC2Solr: Get MARC into Solr via JRuby}
|
9
|
+
gem.description = %Q{Given a file of MARC records, send them to Sorl for indexing based on a set of MARCSpecs}
|
10
|
+
gem.email = "bill@dueber.com"
|
11
|
+
gem.homepage = "http://github.com/billdueber/marc2solr"
|
12
|
+
gem.authors = ["BillDueber"]
|
13
|
+
|
14
|
+
gem.add_dependency 'marc4j4r', '>= 1.2.0'
|
15
|
+
gem.add_dependency 'jruby_streaming_update_solr_server', '>=0.5.2'
|
16
|
+
gem.add_dependency 'marcspec', '>= 1.6.3'
|
17
|
+
gem.add_dependency 'threach', '>= 0.2.0'
|
18
|
+
gem.add_dependency 'logback-simple'
|
19
|
+
|
20
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
21
|
+
gem.add_development_dependency "yard", ">= 0"
|
22
|
+
|
23
|
+
gem.bindir = 'bin'
|
24
|
+
gem.executables << 'solrmarc_to_marc2solr'
|
25
|
+
gem.executables << 'marc2solr'
|
26
|
+
|
27
|
+
end
|
28
|
+
Jeweler::GemcutterTasks.new
|
29
|
+
rescue LoadError
|
30
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'spec/rake/spectask'
|
34
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
35
|
+
spec.libs << 'lib' << 'spec'
|
36
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
37
|
+
end
|
38
|
+
|
39
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
40
|
+
spec.libs << 'lib' << 'spec'
|
41
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
42
|
+
spec.rcov = true
|
43
|
+
end
|
44
|
+
|
45
|
+
task :spec => :check_dependencies
|
46
|
+
|
47
|
+
task :default => :spec
|
48
|
+
|
49
|
+
begin
|
50
|
+
require 'yard'
|
51
|
+
YARD::Rake::YardocTask.new
|
52
|
+
rescue LoadError
|
53
|
+
task :yardoc do
|
54
|
+
abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
|
55
|
+
end
|
56
|
+
end
|
data/bin/marc2solr
ADDED
@@ -0,0 +1,247 @@
|
|
1
|
+
#!/usr/bin/env jruby --server -J-Djruby.compile.frameless=true -J-Djruby.compile.positionless=true -J-Djruby.compile.peephole=true
|
2
|
+
require 'marc2solr'
|
3
|
+
require 'marc2solr/marc2solr_custom'
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'pp'
|
7
|
+
require 'logback-simple'
|
8
|
+
require 'marcspec'
|
9
|
+
require 'threach'
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
opts = MARC2Solr::Conf.new
|
15
|
+
|
16
|
+
# pp opts
|
17
|
+
|
18
|
+
|
19
|
+
###########################
|
20
|
+
# Get a master logger
|
21
|
+
###########################
|
22
|
+
|
23
|
+
|
24
|
+
$LOG = opts.masterLogger
|
25
|
+
|
26
|
+
# Perform the command
|
27
|
+
|
28
|
+
case opts.command
|
29
|
+
|
30
|
+
#################
|
31
|
+
# Commit -- just send a commit to the configured solr
|
32
|
+
#################
|
33
|
+
|
34
|
+
when "commit"
|
35
|
+
$LOG.info "Commit to #{opts.sussURL}"
|
36
|
+
if opts[:dryrun]
|
37
|
+
$LOG.debug "Using javabin" if opts[:javabin]
|
38
|
+
$LOG.info "DRY RUN. Stopping now."
|
39
|
+
else
|
40
|
+
opts.suss.commit
|
41
|
+
$LOG.info "Commit done"
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
#####################################################
|
46
|
+
# delete -- delete IDs listed in the given file(s)
|
47
|
+
#####################################################
|
48
|
+
|
49
|
+
when "delete"
|
50
|
+
delfiles = opts.rest
|
51
|
+
unless delfiles.size > 0
|
52
|
+
$LOG.error "command 'delete' needs at least one filename"
|
53
|
+
puts "\n\nERROR: command 'delete' needs at least one filename"
|
54
|
+
opts.print_command_help('delete')
|
55
|
+
end
|
56
|
+
|
57
|
+
# Get the suss
|
58
|
+
suss = opts.suss
|
59
|
+
|
60
|
+
# Make sure they can all be opened
|
61
|
+
delfiles.each do |filename|
|
62
|
+
unless File.readable? filename
|
63
|
+
$LOG.error "Can't open configuration file `#{filename}`"
|
64
|
+
raise ArgumentError, "Can't open configuration file `#{filename}`"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Now go ahead and process them
|
69
|
+
total = 0
|
70
|
+
|
71
|
+
delfiles.each do |filename|
|
72
|
+
count = 0
|
73
|
+
f = File.open(filename)
|
74
|
+
$LOG.info "Deleting IDs listed in #{filename}"
|
75
|
+
$LOG.info "DRY RUN ONLY" if opts[:dryrun]
|
76
|
+
f.each_line do |id|
|
77
|
+
id.chomp!
|
78
|
+
suss.deleteById(id) unless opts[:dryrun]
|
79
|
+
$LOG.debug "Deleted id\t#{id}"
|
80
|
+
count += 1
|
81
|
+
total += 1
|
82
|
+
end
|
83
|
+
f.close
|
84
|
+
$LOG.info "Tried to delete #{count} ids from file '#{filename}'"
|
85
|
+
end
|
86
|
+
$LOG.info "Tried to delete #{total} ids from all #{delfiles.size} files" if delfiles.size > 1
|
87
|
+
|
88
|
+
unless opts[:dryrun] or opts[:skipcommit]
|
89
|
+
$LOG.info "Sending final commit"
|
90
|
+
suss.commit
|
91
|
+
$LOG.info "Final commit finished"
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
#####################################################
|
96
|
+
# index -- index the given marc files
|
97
|
+
#####################################################
|
98
|
+
|
99
|
+
when "index"
|
100
|
+
marcfiles = opts.rest
|
101
|
+
|
102
|
+
unless marcfiles.size > 0
|
103
|
+
$LOG.error "command 'delete' needs at least one marc file to index"
|
104
|
+
puts "\n\nERROR: command 'index' needs at least one filename"
|
105
|
+
opts.print_command_help('index')
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
# Make sure everything can be opened
|
110
|
+
unless File.readable? opts[:indexfile]
|
111
|
+
$LOG.error "Index configuration file '#{opts[:indexfile]}' (set with --indexfile) cannot be found/read"
|
112
|
+
raise ArgumentError, "Index configuration file '#{opts[:indexfile]}' (set with --indexfile) cannot be found/read"
|
113
|
+
else
|
114
|
+
$LOG.debug "Found index file #{opts[:indexfile]}"
|
115
|
+
end
|
116
|
+
|
117
|
+
unless File.readable? opts[:tmapdir]
|
118
|
+
$LOG.error "Translation Map directory '#{opts[:tmapdir]}' (set with --tmapdir) cannot be found/read"
|
119
|
+
raise ArgumentError, "Translation Map directory '#{opts[:tmapdir]}' (set with --tmapdir) cannot be found/read"
|
120
|
+
else
|
121
|
+
$LOG.debug "Found translation maps directory #{opts[:tmapdir]}"
|
122
|
+
end
|
123
|
+
|
124
|
+
marcfiles.each do |filename|
|
125
|
+
if filename == "STDIN"
|
126
|
+
$LOG.info "Using standard input as a marc file"
|
127
|
+
next
|
128
|
+
end
|
129
|
+
unless File.readable? filename
|
130
|
+
$LOG.error "Can't open MARC file `#{filename}`"
|
131
|
+
raise ArgumentError, "Can't open MARC file `#{filename}`"
|
132
|
+
else
|
133
|
+
$LOG.debug "Adding marc file #{filename} to queue"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
# Load all the files in the customdir(s)
|
139
|
+
$LOG.debug "Loading custom routines"
|
140
|
+
if opts[:customdir].size > 0
|
141
|
+
opts[:customdir].each do |dir|
|
142
|
+
unless File.exist? dir
|
143
|
+
$LOG.warn "Skipping load directory '#{dir}': Not found"
|
144
|
+
opts[:customdir].delete dir
|
145
|
+
end
|
146
|
+
$LOG.info "Loading files in #{dir}"
|
147
|
+
Dir.glob(["#{dir}/*.jar"]).each do |x|
|
148
|
+
$LOG.debug "Loading #{x}"
|
149
|
+
require x
|
150
|
+
end
|
151
|
+
Dir.glob(["#{dir}/*.rb"]).each do |x|
|
152
|
+
$LOG.debug "Loading #{x}"
|
153
|
+
require x
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Get the suss. Will throw an Argument error if there's not enough information
|
159
|
+
suss = opts.suss
|
160
|
+
|
161
|
+
# Create a specset
|
162
|
+
|
163
|
+
ss = MARCSpec::SpecSet.new
|
164
|
+
ss.loadMapsFromDir opts[:tmapdir]
|
165
|
+
ss.buildSpecsFromDSLFile opts[:indexfile]
|
166
|
+
|
167
|
+
# Set up if we're using threach or not
|
168
|
+
if opts[:threads] > 1
|
169
|
+
method = :threach
|
170
|
+
args = [opts[:threads], :each_with_index]
|
171
|
+
else
|
172
|
+
method = :each_with_index
|
173
|
+
args = []
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
if opts[:dryrun]
|
178
|
+
$LOG.info "Begin DRY RUN; nothing will be sent to Solr"
|
179
|
+
end
|
180
|
+
|
181
|
+
# Clear solr if so declared
|
182
|
+
if opts[:clearsolr]
|
183
|
+
if opts[:dryrun]
|
184
|
+
$LOG.info "Would have cleared out solr (but for dryrun)"
|
185
|
+
else
|
186
|
+
suss.deleteByQuery('*:*')
|
187
|
+
suss.commit
|
188
|
+
$LOG.info "Cleared out solr"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# Read each filename in turn, indexing records in each
|
193
|
+
|
194
|
+
i = 0 # Declare out here so we have a counter independent of the file being indexed
|
195
|
+
|
196
|
+
start = Time.new.to_f
|
197
|
+
marcfiles.each do |filename|
|
198
|
+
$LOG.info "Indexing file #{filename}"
|
199
|
+
|
200
|
+
reader = opts.reader(filename)
|
201
|
+
reader.send(method, *args) do |r, i|
|
202
|
+
Thread.current[:index] = i
|
203
|
+
|
204
|
+
$LOG.debug("Indexing record with 001 {}", r['001'])
|
205
|
+
|
206
|
+
doc = ss.doc_from_marc(r, opts[:benchmark])
|
207
|
+
|
208
|
+
# Send it to solr
|
209
|
+
unless opts[:dryrun]
|
210
|
+
suss << doc
|
211
|
+
end
|
212
|
+
|
213
|
+
# Print out the record and/or the document
|
214
|
+
opts[:debugfile].puts r if opts[:printmarc]
|
215
|
+
opts[:debugfile].puts doc if opts[:printdoc]
|
216
|
+
opts[:debugfile].puts "\n\n" if opts[:printdoc] or opts[:printmarc]
|
217
|
+
|
218
|
+
if Thread.current[:index] % opts[:logbatchsize] == 0
|
219
|
+
pace = Thread.current[:index] / (Time.new.to_f - start)
|
220
|
+
$LOG.info "%d indexed (overall pace: %.0f rec/sec)" % [Thread.current[:index], pace]
|
221
|
+
end
|
222
|
+
end # reader
|
223
|
+
end # marcfiles
|
224
|
+
|
225
|
+
# Commit
|
226
|
+
|
227
|
+
unless opts[:dryrun] or opts[:skipcommit]
|
228
|
+
$LOG.info "Sending final commit"
|
229
|
+
suss.commit
|
230
|
+
$LOG.info "Final commit finished"
|
231
|
+
end
|
232
|
+
|
233
|
+
# Be done
|
234
|
+
$LOG.info "Done indexing"
|
235
|
+
pace = i / (Time.new.to_f - start)
|
236
|
+
$LOG.info "%d indexed (overall pace: %.0f rec/sec)" % [i, pace]
|
237
|
+
|
238
|
+
|
239
|
+
# Log the benchmarking information if requested
|
240
|
+
if opts[:benchmark]
|
241
|
+
ss.benchmarks.keys.sort{|a,b| ss.benchmarks[b].real <=> ss.benchmarks[a].real}.each do |k|
|
242
|
+
$LOG.info("%-20s %s" % [k + ':', ss.benchmarks[k].real.to_s])
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
end # end of the case statement
|
247
|
+
|
@@ -0,0 +1,260 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
require 'marc2solr/marc2solr_custom'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'marc4j4r'
|
5
|
+
require 'logback-simple'
|
6
|
+
require 'pp'
|
7
|
+
require 'marcspec'
|
8
|
+
require 'fileutils'
|
9
|
+
|
10
|
+
|
11
|
+
# We'll take two arguments: a .properties index file, and a new directory
|
12
|
+
propfile = ARGV[0]
|
13
|
+
newdir = ARGV[1]
|
14
|
+
|
15
|
+
def usage
|
16
|
+
puts %q{
|
17
|
+
solrmarc_to_marc2solr -- attempt to convert a solrmarc index file and
|
18
|
+
translation maps to marc2solr format
|
19
|
+
|
20
|
+
Usage:
|
21
|
+
|
22
|
+
solrmarc_to_marc2solr /path/to/blah_index.properties /path/to/newdir
|
23
|
+
|
24
|
+
This will:
|
25
|
+
1. Tranlaste all the translation maps found in the directory "translation_maps"
|
26
|
+
located in the same direcotry as blah_index.properites and put them in
|
27
|
+
/path/to/newdir/translation_maps
|
28
|
+
2. Translate blah_index.properties and put the resultling index.dsl file in
|
29
|
+
/path/to/newdir/index.dsl
|
30
|
+
3. Create a logfile, solrmarc_to_marc2solr.log in /path/to/newdir/ that
|
31
|
+
lists the lines it couldn't read and didn't attempt to translate (e.g., most
|
32
|
+
custom functions)
|
33
|
+
}
|
34
|
+
Process.exit
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
# Need help?
|
39
|
+
|
40
|
+
if ARGV.size != 2
|
41
|
+
usage
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
$LOG = Logback::Simple::Logger.singleton('fromsolrmarc')
|
46
|
+
Logback::Simple.loglevel = :debug
|
47
|
+
Logback::Simple.startFileLogger("#{newdir}/fromsolrmarc.log")
|
48
|
+
|
49
|
+
|
50
|
+
# First, try to create the new directory structure
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
unless File.exist? propfile
|
55
|
+
STDERR.puts "Can't find index file '#{propfile}\nUse 'solrmarc_to_marc2solr' by itself for usage"
|
56
|
+
$LOG.error "Can't find file '#{propfile}"
|
57
|
+
Process.exit
|
58
|
+
end
|
59
|
+
|
60
|
+
unless File.readable? propfile
|
61
|
+
STDERR.puts "Index file '#{propfile}' exists but cannot be read\nUse 'solrmarc_to_marc2solr' by itself for usage"
|
62
|
+
$LOG.error "File '#{propfile}' exists but cannot be read"
|
63
|
+
Process.exit
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
begin
|
68
|
+
FileUtils.mkdir_p "#{newdir}/translation_maps"
|
69
|
+
FileUtils.mkdir "#{newdir}/lib"
|
70
|
+
rescue Exception => e
|
71
|
+
$LOG.debug e
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
propfiledir = File.dirname(propfile)
|
78
|
+
trmapdir = propfiledir + '/translation_maps'
|
79
|
+
newpropfile = 'index.dsl'
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
ss = MARCSpec::SpecSet.new
|
86
|
+
Dir.glob(trmapdir + '/*.properties').each do |f|
|
87
|
+
File.open(f) do |fh|
|
88
|
+
fh.each_line do |line|
|
89
|
+
next if line =~ /^\s*#/
|
90
|
+
next unless line =~ /\S/
|
91
|
+
if line =~ /^\s*pattern/
|
92
|
+
$LOG.debug "Adding '#{File.basename f}' as a pattern map"
|
93
|
+
ss.add_map MARCSpec::MultiValueMap.from_solrmarc_file(f)
|
94
|
+
break
|
95
|
+
else
|
96
|
+
$LOG.debug "Adding '#{File.basename f}' as a key/value map"
|
97
|
+
ss.add_map MARCSpec::KVMap.from_solrmarc_file(f)
|
98
|
+
break
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
WHOLE = /^(\d{3})$/
|
107
|
+
CTRL = /^(\d{3})\[(.+?)\]/
|
108
|
+
VAR = /^(\d{3})(.+)/
|
109
|
+
|
110
|
+
File.open(propfile) do |fh|
|
111
|
+
fh.each_line do |line|
|
112
|
+
next unless line =~ /\S/
|
113
|
+
line.strip!
|
114
|
+
|
115
|
+
# Leave comments alone
|
116
|
+
if line =~ /^#/
|
117
|
+
# puts line
|
118
|
+
next
|
119
|
+
end
|
120
|
+
|
121
|
+
fieldname,spec = line.split(/\s*=\s*/)
|
122
|
+
|
123
|
+
# Deal with constants
|
124
|
+
if spec =~ /^"(.+)"\s*$/
|
125
|
+
constant = $1
|
126
|
+
csf = MARCSpec::ConstantSolrSpec.new(:solrField=>fieldname, :constantValue=>constant)
|
127
|
+
ss << csf
|
128
|
+
next
|
129
|
+
end
|
130
|
+
|
131
|
+
# Deal with built-in functions if we can
|
132
|
+
if spec == 'FullRecordAsXML'
|
133
|
+
csf = MARCSpec::CustomSolrSpec.new(:solrField=>fieldname,
|
134
|
+
:module => MARC2Solr::Custom,
|
135
|
+
:functionSymbol => :asXML)
|
136
|
+
ss << csf
|
137
|
+
next
|
138
|
+
end
|
139
|
+
|
140
|
+
if spec == 'FullRecordAsMARC'
|
141
|
+
csf = MARCSpec::CustomSolrSpec.new(:solrField=>fieldname,
|
142
|
+
:module => MARC2Solr::Custom,
|
143
|
+
:functionSymbol => :asMARC)
|
144
|
+
ss << csf
|
145
|
+
next
|
146
|
+
end
|
147
|
+
|
148
|
+
if spec == 'DateOfPublication'
|
149
|
+
csf = MARCSpec::CustomSolrSpec.new(:solrField=>fieldname,
|
150
|
+
:module => MARC2Solr::Custom,
|
151
|
+
:functionSymbol => :getDate)
|
152
|
+
ss << csf
|
153
|
+
next
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
if spec =~ /^custom,\s*getAllSearchableFields\((\d+),\s*(\d+)\)/
|
158
|
+
low = $1
|
159
|
+
high = $2
|
160
|
+
csf = MARCSpec::CustomSolrSpec.new(:solrField=>fieldname,
|
161
|
+
:module => MARC2Solr::Custom,
|
162
|
+
:functionSymbol => :getAllSearchableFields,
|
163
|
+
:functionArgs => [low, high])
|
164
|
+
ss << csf
|
165
|
+
next
|
166
|
+
end
|
167
|
+
|
168
|
+
# Log and ignore other custom fields
|
169
|
+
if spec =~ /^custom/
|
170
|
+
$LOG.warn "Skipping custom line #{line}"
|
171
|
+
next
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
#otherwise, build one from scratch
|
177
|
+
|
178
|
+
sfs = MARCSpec::SolrFieldSpec.new(:solrField => fieldname)
|
179
|
+
|
180
|
+
marcfields, *specials = spec.split(/\s*,\s*/)
|
181
|
+
|
182
|
+
marcfields.split(/\s*:\s*/).each do |ms|
|
183
|
+
if WHOLE.match ms
|
184
|
+
tag = $1
|
185
|
+
if MARC4J4R::ControlField.control_tag? tag
|
186
|
+
sfs << MARCSpec::ControlFieldSpec.new(tag)
|
187
|
+
else
|
188
|
+
sfs << MARCSpec::VariableFieldSpec.new(tag)
|
189
|
+
end
|
190
|
+
next
|
191
|
+
|
192
|
+
elsif CTRL.match ms
|
193
|
+
tag = $1
|
194
|
+
range = $2
|
195
|
+
first,last = range.split('-')
|
196
|
+
last ||= first
|
197
|
+
first = first.to_i
|
198
|
+
last = last.to_i
|
199
|
+
sfs << MARCSpec::ControlFieldSpec.new(tag, first..last)
|
200
|
+
next
|
201
|
+
elsif VAR.match ms
|
202
|
+
tag = $1
|
203
|
+
sfcodes = $2.split(//)
|
204
|
+
sfs << MARCSpec::VariableFieldSpec.new(tag, sfcodes)
|
205
|
+
else
|
206
|
+
$LOG.warn "Didn't recognize line '#{line}'"
|
207
|
+
end
|
208
|
+
end # marcfields.split
|
209
|
+
|
210
|
+
# Add in the specials -- "first", maps, etc.
|
211
|
+
specials.each do |special|
|
212
|
+
case special
|
213
|
+
when 'first'
|
214
|
+
sfs.first = true
|
215
|
+
else
|
216
|
+
origmapname = special
|
217
|
+
mapname = special.gsub(/.properties/, '')
|
218
|
+
sfs.map = ss.map(mapname)
|
219
|
+
if mapname.nil?
|
220
|
+
$LOG.warn "Map problem in #{fieldname}: Unrecognized map name '#{mapname}' (specified as '#{origmapname}')"
|
221
|
+
end
|
222
|
+
if mapname =~ /\((.*)\)/
|
223
|
+
$LOG.warn "Map problem in #{fieldname}: Translator doesn't deal at all with property key prefixes ('#{$1}' in this case). Please break them into separate map files or complain to Bill."
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
ss << sfs if sfs.marcfieldspecs.size > 0
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# Spit it out
|
232
|
+
|
233
|
+
# First, put the maps in newdir/translation_maps
|
234
|
+
|
235
|
+
ss.tmaps.each do |name, map|
|
236
|
+
filename = name + '.rb'
|
237
|
+
$LOG.debug "Writing out translation map #{filename}"
|
238
|
+
File.open("#{newdir}/translation_maps/#{filename}", 'w') do |f|
|
239
|
+
f.puts map.asPPString
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
# Now the solrspecs
|
244
|
+
# File.open("#{newdir}/#{newpropfile}", 'w') do |f|
|
245
|
+
# $LOG.debug "Writing out spec file #{newpropfile}"
|
246
|
+
# f.puts '['
|
247
|
+
# ss.solrfieldspecs.each do |sfs|
|
248
|
+
# f.puts sfs.asPPString + ','
|
249
|
+
# end
|
250
|
+
# f.puts ']'
|
251
|
+
# end
|
252
|
+
|
253
|
+
File.open("#{newdir}/#{newpropfile}", 'w') do |f|
|
254
|
+
$LOG.debug "Writing out spec file #{newpropfile}"
|
255
|
+
ss.solrfieldspecs.each do |sfs|
|
256
|
+
f.puts sfs.asDSLString
|
257
|
+
f.puts ''
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|