marc2solr 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +12 -0
- data/VERSION +1 -1
- data/bin/marc2solr +58 -10
- data/lib/marc2solr.rb +26 -5
- metadata +4 -4
data/CHANGES
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
0.2.0
|
2
|
+
Added ability to specify custom options in config file (custom "myKey", "myvalue") or
|
3
|
+
on the command line (--custom key="value")
|
4
|
+
|
5
|
+
Putting options hash in each record as it gets processed; allows you to access the options
|
6
|
+
in a custom function with, e.g., r.cachespot["myKey"]
|
7
|
+
|
8
|
+
0.1.9
|
9
|
+
Added 'ping' method
|
10
|
+
Changed commit/delete/index to internally call 'ping' to make sure machine is alive
|
11
|
+
unless dryrun is set.
|
12
|
+
|
1
13
|
0.1.8
|
2
14
|
Missed a ref to 'logback-simple'; removed.
|
3
15
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/bin/marc2solr
CHANGED
@@ -9,33 +9,65 @@ require 'threach'
|
|
9
9
|
|
10
10
|
|
11
11
|
|
12
|
-
|
13
12
|
opts = MARC2Solr::Conf.new
|
14
13
|
|
15
|
-
# pp opts
|
16
14
|
|
17
15
|
|
18
|
-
###########################
|
19
|
-
# Get a master logger
|
20
|
-
###########################
|
21
16
|
|
17
|
+
# Perform the command
|
22
18
|
|
23
|
-
|
19
|
+
# If we're going to be talking to Solr, make sure it's there!
|
20
|
+
HC = Java::org.apache.commons.httpclient
|
24
21
|
|
25
|
-
|
22
|
+
def pingSolr(opts)
|
23
|
+
if opts[:dryrun]
|
24
|
+
LOG.info "Dry run: not bothing to check and see if solr is alive"
|
25
|
+
else
|
26
|
+
url = opts.sussURL + '/admin/ping'
|
27
|
+
client = HC.HttpClient.new
|
28
|
+
method = HC.methods.GetMethod.new(url)
|
29
|
+
begin
|
30
|
+
code = client.executeMethod(method)
|
31
|
+
rescue
|
32
|
+
LOG.error "Can't ping solr URL '#{url}'" if defined? LOG
|
33
|
+
raise ArgumentError, "(Not up?) Can't ping solr server at #{opts.sussURL}"
|
34
|
+
end
|
35
|
+
unless code == 200
|
36
|
+
LOG.error "Can't ping solr URL '#{url}'" if defined? LOG
|
37
|
+
raise ArgumentError, "Got response code #{code} from #{opts.sussURL}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
26
41
|
|
27
|
-
susslog = RJack::Logback['org.apache.solr.client.solrj.impl.StreamingUpdateSolrServer']
|
28
|
-
susslog.level = RJack::Logback::WARN
|
29
42
|
|
30
|
-
# Perform the command
|
31
43
|
|
32
44
|
case opts.command
|
45
|
+
|
46
|
+
###################
|
47
|
+
# Ping -- see if your solr is alive
|
48
|
+
###################
|
49
|
+
|
50
|
+
when "ping"
|
51
|
+
begin
|
52
|
+
pingSolr(opts)
|
53
|
+
puts "\n#{opts.sussURL} is alive\n"
|
54
|
+
rescue ArgumentError => e
|
55
|
+
puts e.message
|
56
|
+
end
|
33
57
|
|
34
58
|
#################
|
35
59
|
# Commit -- just send a commit to the configured solr
|
36
60
|
#################
|
37
61
|
|
38
62
|
when "commit"
|
63
|
+
LOG = opts.masterLogger
|
64
|
+
|
65
|
+
# Crank down the SUSS logging
|
66
|
+
|
67
|
+
susslog = RJack::Logback['org.apache.solr.client.solrj.impl.StreamingUpdateSolrServer']
|
68
|
+
susslog.level = RJack::Logback::WARN
|
69
|
+
|
70
|
+
pingSolr(opts)
|
39
71
|
LOG.info "Commit to #{opts.sussURL}"
|
40
72
|
if opts[:dryrun]
|
41
73
|
LOG.debug "Using javabin" if opts[:javabin]
|
@@ -51,6 +83,13 @@ when "commit"
|
|
51
83
|
#####################################################
|
52
84
|
|
53
85
|
when "delete"
|
86
|
+
LOG = opts.masterLogger
|
87
|
+
|
88
|
+
# Crank down the SUSS logging
|
89
|
+
|
90
|
+
susslog = RJack::Logback['org.apache.solr.client.solrj.impl.StreamingUpdateSolrServer']
|
91
|
+
susslog.level = RJack::Logback::WARN
|
92
|
+
|
54
93
|
delfiles = opts.rest
|
55
94
|
unless delfiles.size > 0
|
56
95
|
LOG.error "command 'delete' needs at least one filename"
|
@@ -101,6 +140,14 @@ when "delete"
|
|
101
140
|
#####################################################
|
102
141
|
|
103
142
|
when "index"
|
143
|
+
|
144
|
+
LOG = opts.masterLogger
|
145
|
+
|
146
|
+
# Crank down the SUSS logging
|
147
|
+
|
148
|
+
susslog = RJack::Logback['org.apache.solr.client.solrj.impl.StreamingUpdateSolrServer']
|
149
|
+
susslog.level = RJack::Logback::WARN
|
150
|
+
|
104
151
|
marcfiles = opts.rest
|
105
152
|
|
106
153
|
unless marcfiles.size > 0
|
@@ -215,6 +262,7 @@ when "index"
|
|
215
262
|
reader = opts.reader(filename)
|
216
263
|
reader.send(method, *args) do |r, i|
|
217
264
|
Thread.current[:index] = i
|
265
|
+
r.cachespot[opts] = opts
|
218
266
|
|
219
267
|
LOG.debug "Record {} with 001 {}", i, r['001'].value
|
220
268
|
|
data/lib/marc2solr.rb
CHANGED
@@ -11,10 +11,16 @@ module MARC2Solr
|
|
11
11
|
class Conf
|
12
12
|
include JLogger::Simple
|
13
13
|
|
14
|
-
SUB_COMMANDS = %w(index delete commit help)
|
14
|
+
SUB_COMMANDS = %w(index delete commit help ping)
|
15
15
|
|
16
16
|
|
17
17
|
OPTIONSCONFIG = [
|
18
|
+
[:custom, {:desc => "Any custom value you want. In a config file, use two String arguments (custom key value); on the command line use (--custom key=value) or (--custom key=\"three word value\")",
|
19
|
+
:type=>String,
|
20
|
+
:multi => true,
|
21
|
+
:only => [:index],
|
22
|
+
:short => '-C'
|
23
|
+
}],
|
18
24
|
[:config, {:desc => "Configuation file specifying options. Repeatable. Command-line arguments always override the config file(s)",
|
19
25
|
:type => :io,
|
20
26
|
:multi => true}],
|
@@ -58,7 +64,7 @@ module MARC2Solr
|
|
58
64
|
:only => [:index]
|
59
65
|
}],
|
60
66
|
[:skipcommit, {:desc => "DON'T send solr a 'commit' afterwards",
|
61
|
-
:short => '-
|
67
|
+
:short => '-S',
|
62
68
|
:only => [:delete, :index],
|
63
69
|
}],
|
64
70
|
[:threads, {:desc => "Number of threads to use to process MARC records (>1 => use 'threach')",
|
@@ -70,7 +76,6 @@ module MARC2Solr
|
|
70
76
|
:type => :int,
|
71
77
|
:default => 1}],
|
72
78
|
[:susssize, {:desc => "Size of the documente queue for sending to Solr",
|
73
|
-
:short => '-S',
|
74
79
|
:default => 128}],
|
75
80
|
[:machine, {:desc => "Name of solr machine (e.g., solr.myplace.org)",
|
76
81
|
:short => '-m',
|
@@ -175,7 +180,7 @@ module MARC2Solr
|
|
175
180
|
|
176
181
|
@cmdline.each_pair do |k,v|
|
177
182
|
if @cmdline_given[k]
|
178
|
-
|
183
|
+
puts "Send override #{k} = #{v}"
|
179
184
|
self.send(k,v)
|
180
185
|
else
|
181
186
|
unless @config.has_key? k
|
@@ -274,6 +279,20 @@ module MARC2Solr
|
|
274
279
|
pp.pp @config
|
275
280
|
end
|
276
281
|
|
282
|
+
# Handle custom events specially
|
283
|
+
def custom (*args)
|
284
|
+
if args.size == 2 # called in a config file
|
285
|
+
@config[args[0]] = args[1]
|
286
|
+
else # parse it out
|
287
|
+
args[0].each do |str|
|
288
|
+
key,val = str.split(/\s*=\s*/)
|
289
|
+
val.gsub!(/^["']*(.*?)['"]$/, '\1')
|
290
|
+
@config[key] = val
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
|
277
296
|
def method_missing(methodSymbol, arg=:notgiven, fromCmdline = false)
|
278
297
|
return @config[methodSymbol] if arg == :notgiven
|
279
298
|
methodSymbol = methodSymbol.to_s.gsub(/=$/, '').to_sym
|
@@ -362,8 +381,10 @@ module MARC2Solr
|
|
362
381
|
log.error "Need solr path (--solrpath)"
|
363
382
|
raise ArgumentError, "Need solr path (--solrpath)"
|
364
383
|
end
|
384
|
+
path.gsub! /^\/*(.*?)\/*$/, '\1' # remove any leading/trailing slashes
|
385
|
+
path.squeeze! '/' # make sure there are no double-slashses
|
365
386
|
|
366
|
-
url = 'http://' + machine + ':' + port.to_s + '/' + path
|
387
|
+
url = 'http://' + machine + ':' + port.to_s + '/' + path
|
367
388
|
end
|
368
389
|
|
369
390
|
def suss
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 2
|
8
|
+
- 0
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- BillDueber
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-11-09 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|