marc2solr 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.md +29 -0
- data/Rakefile +56 -0
- data/bin/marc2solr +247 -0
- data/bin/solrmarc_to_marc2solr +260 -0
- data/lib/marc2solr/marc2solr_custom.rb +194 -0
- data/lib/marc2solr.rb +452 -0
- data/spec/marc2solr_spec.rb +7 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +190 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 BillDueber
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# marc2solr -- get MARC data into Solr
|
2
|
+
|
3
|
+
`marc2Solr` is a package wrapping up functionality in a variety of other gems, designed to make getting data from [MARC21](http://en.wikipedia.org/wiki/MARC_standards) files into [Solr](http://lucene.apache.org/Solr/) as painless as possible.
|
4
|
+
|
5
|
+
`marc2Solr` is based on [Solrmarc](http://code.google.com/p/solrmarc/), the excellent Java-based program that does more or less the same thing. `marc2Solr` is *not* a drop-in replacement for Solrmarc, but can do most of the same things. A naive program to translate solrmarc config files to marc2solr config files is included. It's called -- wait for it -- solrmarc_to_marc2solr.
|
6
|
+
|
7
|
+
It relies on [jruby](http://jruby.org/) to pull it all together; this will not run under stock Ruby!
|
8
|
+
|
9
|
+
## Documentation
|
10
|
+
|
11
|
+
* [The marc2solr wiki]() has documentation on how to install, configure, and use `marc2solr`, how it compares to `solrmarc`, etc.
|
12
|
+
* The [marc2solr_example](http://github.com/billdueber/marc2solr_example) git project has two examples: `simple_sample` has a very simple index and some translation maps that show off the major features with plenty of documentation. The `umich` subdirectory is the actual working code for the University of Michigan [mirlyn](http://mirlyn.lib.umich.edu/) install.
|
13
|
+
* The [marcspec wiki](http://github.com/billdueber/marcspec/wiki/) is the definitive source for how to construct your index file, translation maps, and custom functions.
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
## Note on Patches/Pull Requests
|
18
|
+
|
19
|
+
* Fork the project.
|
20
|
+
* Make your feature addition or bug fix.
|
21
|
+
* Add tests for it. This is important so I don't break it in a
|
22
|
+
future version unintentionally.
|
23
|
+
* Commit, do not mess with rakefile, version, or history.
|
24
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
25
|
+
* Send me a pull request. Bonus points for topic branches.
|
26
|
+
|
27
|
+
## Copyright
|
28
|
+
|
29
|
+
Copyright (c) 2010 BillDueber. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "marc2solr"
|
8
|
+
gem.summary = %Q{MARC2Solr: Get MARC into Solr via JRuby}
|
9
|
+
gem.description = %Q{Given a file of MARC records, send them to Sorl for indexing based on a set of MARCSpecs}
|
10
|
+
gem.email = "bill@dueber.com"
|
11
|
+
gem.homepage = "http://github.com/billdueber/marc2solr"
|
12
|
+
gem.authors = ["BillDueber"]
|
13
|
+
|
14
|
+
gem.add_dependency 'marc4j4r', '>= 1.2.0'
|
15
|
+
gem.add_dependency 'jruby_streaming_update_solr_server', '>=0.5.2'
|
16
|
+
gem.add_dependency 'marcspec', '>= 1.6.3'
|
17
|
+
gem.add_dependency 'threach', '>= 0.2.0'
|
18
|
+
gem.add_dependency 'logback-simple'
|
19
|
+
|
20
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
21
|
+
gem.add_development_dependency "yard", ">= 0"
|
22
|
+
|
23
|
+
gem.bindir = 'bin'
|
24
|
+
gem.executables << 'solrmarc_to_marc2solr'
|
25
|
+
gem.executables << 'marc2solr'
|
26
|
+
|
27
|
+
end
|
28
|
+
Jeweler::GemcutterTasks.new
|
29
|
+
rescue LoadError
|
30
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'spec/rake/spectask'
|
34
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
35
|
+
spec.libs << 'lib' << 'spec'
|
36
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
37
|
+
end
|
38
|
+
|
39
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
40
|
+
spec.libs << 'lib' << 'spec'
|
41
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
42
|
+
spec.rcov = true
|
43
|
+
end
|
44
|
+
|
45
|
+
task :spec => :check_dependencies
|
46
|
+
|
47
|
+
task :default => :spec
|
48
|
+
|
49
|
+
begin
|
50
|
+
require 'yard'
|
51
|
+
YARD::Rake::YardocTask.new
|
52
|
+
rescue LoadError
|
53
|
+
task :yardoc do
|
54
|
+
abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
|
55
|
+
end
|
56
|
+
end
|
data/bin/marc2solr
ADDED
@@ -0,0 +1,247 @@
|
|
1
|
+
#!/usr/bin/env jruby --server -J-Djruby.compile.frameless=true -J-Djruby.compile.positionless=true -J-Djruby.compile.peephole=true
|
2
|
+
require 'marc2solr'
|
3
|
+
require 'marc2solr/marc2solr_custom'
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'pp'
|
7
|
+
require 'logback-simple'
|
8
|
+
require 'marcspec'
|
9
|
+
require 'threach'
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
opts = MARC2Solr::Conf.new
|
15
|
+
|
16
|
+
# pp opts
|
17
|
+
|
18
|
+
|
19
|
+
###########################
|
20
|
+
# Get a master logger
|
21
|
+
###########################
|
22
|
+
|
23
|
+
|
24
|
+
$LOG = opts.masterLogger
|
25
|
+
|
26
|
+
# Perform the command
|
27
|
+
|
28
|
+
case opts.command
|
29
|
+
|
30
|
+
#################
|
31
|
+
# Commit -- just send a commit to the configured solr
|
32
|
+
#################
|
33
|
+
|
34
|
+
when "commit"
|
35
|
+
$LOG.info "Commit to #{opts.sussURL}"
|
36
|
+
if opts[:dryrun]
|
37
|
+
$LOG.debug "Using javabin" if opts[:javabin]
|
38
|
+
$LOG.info "DRY RUN. Stopping now."
|
39
|
+
else
|
40
|
+
opts.suss.commit
|
41
|
+
$LOG.info "Commit done"
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
#####################################################
|
46
|
+
# delete -- delete IDs listed in the given file(s)
|
47
|
+
#####################################################
|
48
|
+
|
49
|
+
when "delete"
|
50
|
+
delfiles = opts.rest
|
51
|
+
unless delfiles.size > 0
|
52
|
+
$LOG.error "command 'delete' needs at least one filename"
|
53
|
+
puts "\n\nERROR: command 'delete' needs at least one filename"
|
54
|
+
opts.print_command_help('delete')
|
55
|
+
end
|
56
|
+
|
57
|
+
# Get the suss
|
58
|
+
suss = opts.suss
|
59
|
+
|
60
|
+
# Make sure they can all be opened
|
61
|
+
delfiles.each do |filename|
|
62
|
+
unless File.readable? filename
|
63
|
+
$LOG.error "Can't open configuration file `#{filename}`"
|
64
|
+
raise ArgumentError, "Can't open configuration file `#{filename}`"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Now go ahead and process them
|
69
|
+
total = 0
|
70
|
+
|
71
|
+
delfiles.each do |filename|
|
72
|
+
count = 0
|
73
|
+
f = File.open(filename)
|
74
|
+
$LOG.info "Deleting IDs listed in #{filename}"
|
75
|
+
$LOG.info "DRY RUN ONLY" if opts[:dryrun]
|
76
|
+
f.each_line do |id|
|
77
|
+
id.chomp!
|
78
|
+
suss.deleteById(id) unless opts[:dryrun]
|
79
|
+
$LOG.debug "Deleted id\t#{id}"
|
80
|
+
count += 1
|
81
|
+
total += 1
|
82
|
+
end
|
83
|
+
f.close
|
84
|
+
$LOG.info "Tried to delete #{count} ids from file '#{filename}'"
|
85
|
+
end
|
86
|
+
$LOG.info "Tried to delete #{total} ids from all #{delfiles.size} files" if delfiles.size > 1
|
87
|
+
|
88
|
+
unless opts[:dryrun] or opts[:skipcommit]
|
89
|
+
$LOG.info "Sending final commit"
|
90
|
+
suss.commit
|
91
|
+
$LOG.info "Final commit finished"
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
#####################################################
|
96
|
+
# index -- index the given marc files
|
97
|
+
#####################################################
|
98
|
+
|
99
|
+
when "index"
|
100
|
+
marcfiles = opts.rest
|
101
|
+
|
102
|
+
unless marcfiles.size > 0
|
103
|
+
$LOG.error "command 'delete' needs at least one marc file to index"
|
104
|
+
puts "\n\nERROR: command 'index' needs at least one filename"
|
105
|
+
opts.print_command_help('index')
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
# Make sure everything can be opened
|
110
|
+
unless File.readable? opts[:indexfile]
|
111
|
+
$LOG.error "Index configuration file '#{opts[:indexfile]}' (set with --indexfile) cannot be found/read"
|
112
|
+
raise ArgumentError, "Index configuration file '#{opts[:indexfile]}' (set with --indexfile) cannot be found/read"
|
113
|
+
else
|
114
|
+
$LOG.debug "Found index file #{opts[:indexfile]}"
|
115
|
+
end
|
116
|
+
|
117
|
+
unless File.readable? opts[:tmapdir]
|
118
|
+
$LOG.error "Translation Map directory '#{opts[:tmapdir]}' (set with --tmapdir) cannot be found/read"
|
119
|
+
raise ArgumentError, "Translation Map directory '#{opts[:tmapdir]}' (set with --tmapdir) cannot be found/read"
|
120
|
+
else
|
121
|
+
$LOG.debug "Found translation maps directory #{opts[:tmapdir]}"
|
122
|
+
end
|
123
|
+
|
124
|
+
marcfiles.each do |filename|
|
125
|
+
if filename == "STDIN"
|
126
|
+
$LOG.info "Using standard input as a marc file"
|
127
|
+
next
|
128
|
+
end
|
129
|
+
unless File.readable? filename
|
130
|
+
$LOG.error "Can't open MARC file `#{filename}`"
|
131
|
+
raise ArgumentError, "Can't open MARC file `#{filename}`"
|
132
|
+
else
|
133
|
+
$LOG.debug "Adding marc file #{filename} to queue"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
# Load all the files in the customdir(s)
|
139
|
+
$LOG.debug "Loading custom routines"
|
140
|
+
if opts[:customdir].size > 0
|
141
|
+
opts[:customdir].each do |dir|
|
142
|
+
unless File.exist? dir
|
143
|
+
$LOG.warn "Skipping load directory '#{dir}': Not found"
|
144
|
+
opts[:customdir].delete dir
|
145
|
+
end
|
146
|
+
$LOG.info "Loading files in #{dir}"
|
147
|
+
Dir.glob(["#{dir}/*.jar"]).each do |x|
|
148
|
+
$LOG.debug "Loading #{x}"
|
149
|
+
require x
|
150
|
+
end
|
151
|
+
Dir.glob(["#{dir}/*.rb"]).each do |x|
|
152
|
+
$LOG.debug "Loading #{x}"
|
153
|
+
require x
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Get the suss. Will throw an Argument error if there's not enough information
|
159
|
+
suss = opts.suss
|
160
|
+
|
161
|
+
# Create a specset
|
162
|
+
|
163
|
+
ss = MARCSpec::SpecSet.new
|
164
|
+
ss.loadMapsFromDir opts[:tmapdir]
|
165
|
+
ss.buildSpecsFromDSLFile opts[:indexfile]
|
166
|
+
|
167
|
+
# Set up if we're using threach or not
|
168
|
+
if opts[:threads] > 1
|
169
|
+
method = :threach
|
170
|
+
args = [opts[:threads], :each_with_index]
|
171
|
+
else
|
172
|
+
method = :each_with_index
|
173
|
+
args = []
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
if opts[:dryrun]
|
178
|
+
$LOG.info "Begin DRY RUN; nothing will be sent to Solr"
|
179
|
+
end
|
180
|
+
|
181
|
+
# Clear solr if so declared
|
182
|
+
if opts[:clearsolr]
|
183
|
+
if opts[:dryrun]
|
184
|
+
$LOG.info "Would have cleared out solr (but for dryrun)"
|
185
|
+
else
|
186
|
+
suss.deleteByQuery('*:*')
|
187
|
+
suss.commit
|
188
|
+
$LOG.info "Cleared out solr"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# Read each filename in turn, indexing records in each
|
193
|
+
|
194
|
+
i = 0 # Declare out here so we have a counter independent of the file being indexed
|
195
|
+
|
196
|
+
start = Time.new.to_f
|
197
|
+
marcfiles.each do |filename|
|
198
|
+
$LOG.info "Indexing file #{filename}"
|
199
|
+
|
200
|
+
reader = opts.reader(filename)
|
201
|
+
reader.send(method, *args) do |r, i|
|
202
|
+
Thread.current[:index] = i
|
203
|
+
|
204
|
+
$LOG.debug("Indexing record with 001 {}", r['001'])
|
205
|
+
|
206
|
+
doc = ss.doc_from_marc(r, opts[:benchmark])
|
207
|
+
|
208
|
+
# Send it to solr
|
209
|
+
unless opts[:dryrun]
|
210
|
+
suss << doc
|
211
|
+
end
|
212
|
+
|
213
|
+
# Print out the record and/or the document
|
214
|
+
opts[:debugfile].puts r if opts[:printmarc]
|
215
|
+
opts[:debugfile].puts doc if opts[:printdoc]
|
216
|
+
opts[:debugfile].puts "\n\n" if opts[:printdoc] or opts[:printmarc]
|
217
|
+
|
218
|
+
if Thread.current[:index] % opts[:logbatchsize] == 0
|
219
|
+
pace = Thread.current[:index] / (Time.new.to_f - start)
|
220
|
+
$LOG.info "%d indexed (overall pace: %.0f rec/sec)" % [Thread.current[:index], pace]
|
221
|
+
end
|
222
|
+
end # reader
|
223
|
+
end # marcfiles
|
224
|
+
|
225
|
+
# Commit
|
226
|
+
|
227
|
+
unless opts[:dryrun] or opts[:skipcommit]
|
228
|
+
$LOG.info "Sending final commit"
|
229
|
+
suss.commit
|
230
|
+
$LOG.info "Final commit finished"
|
231
|
+
end
|
232
|
+
|
233
|
+
# Be done
|
234
|
+
$LOG.info "Done indexing"
|
235
|
+
pace = i / (Time.new.to_f - start)
|
236
|
+
$LOG.info "%d indexed (overall pace: %.0f rec/sec)" % [i, pace]
|
237
|
+
|
238
|
+
|
239
|
+
# Log the benchmarking information if requested
|
240
|
+
if opts[:benchmark]
|
241
|
+
ss.benchmarks.keys.sort{|a,b| ss.benchmarks[b].real <=> ss.benchmarks[a].real}.each do |k|
|
242
|
+
$LOG.info("%-20s %s" % [k + ':', ss.benchmarks[k].real.to_s])
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
end # end of the case statement
|
247
|
+
|
@@ -0,0 +1,260 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
require 'marc2solr/marc2solr_custom'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'marc4j4r'
|
5
|
+
require 'logback-simple'
|
6
|
+
require 'pp'
|
7
|
+
require 'marcspec'
|
8
|
+
require 'fileutils'
|
9
|
+
|
10
|
+
|
11
|
+
# We'll take two arguments: a .properties index file, and a new directory
|
12
|
+
propfile = ARGV[0]
|
13
|
+
newdir = ARGV[1]
|
14
|
+
|
15
|
+
def usage
|
16
|
+
puts %q{
|
17
|
+
solrmarc_to_marc2solr -- attempt to convert a solrmarc index file and
|
18
|
+
translation maps to marc2solr format
|
19
|
+
|
20
|
+
Usage:
|
21
|
+
|
22
|
+
solrmarc_to_marc2solr /path/to/blah_index.properties /path/to/newdir
|
23
|
+
|
24
|
+
This will:
|
25
|
+
1. Tranlaste all the translation maps found in the directory "translation_maps"
|
26
|
+
located in the same direcotry as blah_index.properites and put them in
|
27
|
+
/path/to/newdir/translation_maps
|
28
|
+
2. Translate blah_index.properties and put the resultling index.dsl file in
|
29
|
+
/path/to/newdir/index.dsl
|
30
|
+
3. Create a logfile, solrmarc_to_marc2solr.log in /path/to/newdir/ that
|
31
|
+
lists the lines it couldn't read and didn't attempt to translate (e.g., most
|
32
|
+
custom functions)
|
33
|
+
}
|
34
|
+
Process.exit
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
# Need help?
|
39
|
+
|
40
|
+
if ARGV.size != 2
|
41
|
+
usage
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
$LOG = Logback::Simple::Logger.singleton('fromsolrmarc')
|
46
|
+
Logback::Simple.loglevel = :debug
|
47
|
+
Logback::Simple.startFileLogger("#{newdir}/fromsolrmarc.log")
|
48
|
+
|
49
|
+
|
50
|
+
# First, try to create the new directory structure
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
unless File.exist? propfile
|
55
|
+
STDERR.puts "Can't find index file '#{propfile}\nUse 'solrmarc_to_marc2solr' by itself for usage"
|
56
|
+
$LOG.error "Can't find file '#{propfile}"
|
57
|
+
Process.exit
|
58
|
+
end
|
59
|
+
|
60
|
+
unless File.readable? propfile
|
61
|
+
STDERR.puts "Index file '#{propfile}' exists but cannot be read\nUse 'solrmarc_to_marc2solr' by itself for usage"
|
62
|
+
$LOG.error "File '#{propfile}' exists but cannot be read"
|
63
|
+
Process.exit
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
begin
|
68
|
+
FileUtils.mkdir_p "#{newdir}/translation_maps"
|
69
|
+
FileUtils.mkdir "#{newdir}/lib"
|
70
|
+
rescue Exception => e
|
71
|
+
$LOG.debug e
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
propfiledir = File.dirname(propfile)
|
78
|
+
trmapdir = propfiledir + '/translation_maps'
|
79
|
+
newpropfile = 'index.dsl'
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
ss = MARCSpec::SpecSet.new
|
86
|
+
Dir.glob(trmapdir + '/*.properties').each do |f|
|
87
|
+
File.open(f) do |fh|
|
88
|
+
fh.each_line do |line|
|
89
|
+
next if line =~ /^\s*#/
|
90
|
+
next unless line =~ /\S/
|
91
|
+
if line =~ /^\s*pattern/
|
92
|
+
$LOG.debug "Adding '#{File.basename f}' as a pattern map"
|
93
|
+
ss.add_map MARCSpec::MultiValueMap.from_solrmarc_file(f)
|
94
|
+
break
|
95
|
+
else
|
96
|
+
$LOG.debug "Adding '#{File.basename f}' as a key/value map"
|
97
|
+
ss.add_map MARCSpec::KVMap.from_solrmarc_file(f)
|
98
|
+
break
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
WHOLE = /^(\d{3})$/
|
107
|
+
CTRL = /^(\d{3})\[(.+?)\]/
|
108
|
+
VAR = /^(\d{3})(.+)/
|
109
|
+
|
110
|
+
File.open(propfile) do |fh|
|
111
|
+
fh.each_line do |line|
|
112
|
+
next unless line =~ /\S/
|
113
|
+
line.strip!
|
114
|
+
|
115
|
+
# Leave comments alone
|
116
|
+
if line =~ /^#/
|
117
|
+
# puts line
|
118
|
+
next
|
119
|
+
end
|
120
|
+
|
121
|
+
fieldname,spec = line.split(/\s*=\s*/)
|
122
|
+
|
123
|
+
# Deal with constants
|
124
|
+
if spec =~ /^"(.+)"\s*$/
|
125
|
+
constant = $1
|
126
|
+
csf = MARCSpec::ConstantSolrSpec.new(:solrField=>fieldname, :constantValue=>constant)
|
127
|
+
ss << csf
|
128
|
+
next
|
129
|
+
end
|
130
|
+
|
131
|
+
# Deal with built-in functions if we can
|
132
|
+
if spec == 'FullRecordAsXML'
|
133
|
+
csf = MARCSpec::CustomSolrSpec.new(:solrField=>fieldname,
|
134
|
+
:module => MARC2Solr::Custom,
|
135
|
+
:functionSymbol => :asXML)
|
136
|
+
ss << csf
|
137
|
+
next
|
138
|
+
end
|
139
|
+
|
140
|
+
if spec == 'FullRecordAsMARC'
|
141
|
+
csf = MARCSpec::CustomSolrSpec.new(:solrField=>fieldname,
|
142
|
+
:module => MARC2Solr::Custom,
|
143
|
+
:functionSymbol => :asMARC)
|
144
|
+
ss << csf
|
145
|
+
next
|
146
|
+
end
|
147
|
+
|
148
|
+
if spec == 'DateOfPublication'
|
149
|
+
csf = MARCSpec::CustomSolrSpec.new(:solrField=>fieldname,
|
150
|
+
:module => MARC2Solr::Custom,
|
151
|
+
:functionSymbol => :getDate)
|
152
|
+
ss << csf
|
153
|
+
next
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
if spec =~ /^custom,\s*getAllSearchableFields\((\d+),\s*(\d+)\)/
|
158
|
+
low = $1
|
159
|
+
high = $2
|
160
|
+
csf = MARCSpec::CustomSolrSpec.new(:solrField=>fieldname,
|
161
|
+
:module => MARC2Solr::Custom,
|
162
|
+
:functionSymbol => :getAllSearchableFields,
|
163
|
+
:functionArgs => [low, high])
|
164
|
+
ss << csf
|
165
|
+
next
|
166
|
+
end
|
167
|
+
|
168
|
+
# Log and ignore other custom fields
|
169
|
+
if spec =~ /^custom/
|
170
|
+
$LOG.warn "Skipping custom line #{line}"
|
171
|
+
next
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
#otherwise, build one from scratch
|
177
|
+
|
178
|
+
sfs = MARCSpec::SolrFieldSpec.new(:solrField => fieldname)
|
179
|
+
|
180
|
+
marcfields, *specials = spec.split(/\s*,\s*/)
|
181
|
+
|
182
|
+
marcfields.split(/\s*:\s*/).each do |ms|
|
183
|
+
if WHOLE.match ms
|
184
|
+
tag = $1
|
185
|
+
if MARC4J4R::ControlField.control_tag? tag
|
186
|
+
sfs << MARCSpec::ControlFieldSpec.new(tag)
|
187
|
+
else
|
188
|
+
sfs << MARCSpec::VariableFieldSpec.new(tag)
|
189
|
+
end
|
190
|
+
next
|
191
|
+
|
192
|
+
elsif CTRL.match ms
|
193
|
+
tag = $1
|
194
|
+
range = $2
|
195
|
+
first,last = range.split('-')
|
196
|
+
last ||= first
|
197
|
+
first = first.to_i
|
198
|
+
last = last.to_i
|
199
|
+
sfs << MARCSpec::ControlFieldSpec.new(tag, first..last)
|
200
|
+
next
|
201
|
+
elsif VAR.match ms
|
202
|
+
tag = $1
|
203
|
+
sfcodes = $2.split(//)
|
204
|
+
sfs << MARCSpec::VariableFieldSpec.new(tag, sfcodes)
|
205
|
+
else
|
206
|
+
$LOG.warn "Didn't recognize line '#{line}'"
|
207
|
+
end
|
208
|
+
end # marcfields.split
|
209
|
+
|
210
|
+
# Add in the specials -- "first", maps, etc.
|
211
|
+
specials.each do |special|
|
212
|
+
case special
|
213
|
+
when 'first'
|
214
|
+
sfs.first = true
|
215
|
+
else
|
216
|
+
origmapname = special
|
217
|
+
mapname = special.gsub(/.properties/, '')
|
218
|
+
sfs.map = ss.map(mapname)
|
219
|
+
if mapname.nil?
|
220
|
+
$LOG.warn "Map problem in #{fieldname}: Unrecognized map name '#{mapname}' (specified as '#{origmapname}')"
|
221
|
+
end
|
222
|
+
if mapname =~ /\((.*)\)/
|
223
|
+
$LOG.warn "Map problem in #{fieldname}: Translator doesn't deal at all with property key prefixes ('#{$1}' in this case). Please break them into separate map files or complain to Bill."
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
ss << sfs if sfs.marcfieldspecs.size > 0
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# Spit it out
|
232
|
+
|
233
|
+
# First, put the maps in newdir/translation_maps
|
234
|
+
|
235
|
+
ss.tmaps.each do |name, map|
|
236
|
+
filename = name + '.rb'
|
237
|
+
$LOG.debug "Writing out translation map #{filename}"
|
238
|
+
File.open("#{newdir}/translation_maps/#{filename}", 'w') do |f|
|
239
|
+
f.puts map.asPPString
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
# Now the solrspecs
|
244
|
+
# File.open("#{newdir}/#{newpropfile}", 'w') do |f|
|
245
|
+
# $LOG.debug "Writing out spec file #{newpropfile}"
|
246
|
+
# f.puts '['
|
247
|
+
# ss.solrfieldspecs.each do |sfs|
|
248
|
+
# f.puts sfs.asPPString + ','
|
249
|
+
# end
|
250
|
+
# f.puts ']'
|
251
|
+
# end
|
252
|
+
|
253
|
+
File.open("#{newdir}/#{newpropfile}", 'w') do |f|
|
254
|
+
$LOG.debug "Writing out spec file #{newpropfile}"
|
255
|
+
ss.solrfieldspecs.each do |sfs|
|
256
|
+
f.puts sfs.asDSLString
|
257
|
+
f.puts ''
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|