traject 2.0.0.rc.1 → 2.0.0.rc.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -10
- data/lib/traject/indexer.rb +1 -5
- data/lib/traject/macros/marc21.rb +2 -3
- data/lib/traject/version.rb +1 -1
- data/test/indexer/macros_marc21_test.rb +3 -2
- data/traject.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6fbaf2787d85b83ff8df29436475a8366a6f89d3
|
4
|
+
data.tar.gz: 5487c8f6614e6875131ee821fd5ad85f45eaad06
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 20b254aedcae88a4f0392900677373b86f337b0ccb04e4ef27841c90621620a828e7572769a50049f402e30a81156a5f9988a2c144d5db513bcaa6f70d938af2
|
7
|
+
data.tar.gz: f8f9bd42c8c11e375d45a33e58341f54c5fbc8060d7eeff48abd0096615b993efb11ce78337ab131d6137f93ec0be039ac94f89bc611a4075752943ae114e27e
|
data/README.md
CHANGED
@@ -31,11 +31,10 @@ that can combine to deal with any of your local needs.
|
|
31
31
|
|
32
32
|
## Installation
|
33
33
|
|
34
|
-
Traject runs under
|
34
|
+
Traject runs under jruby (1.7.x or higher), MRI ruby (1.9.3 or higher), or probably any other ruby platform.
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
multiple cores for much better performance.
|
36
|
+
**Traject runs much faster on JRuby** where it can use multi-core parallelism, and the Java
|
37
|
+
Marc4J marc reader. If performance is a concern, you should run traject on JRuby.
|
39
38
|
|
40
39
|
Some options for installing a ruby other than your system-provided one are [chruby](https://github.com/postmodern/chruby) and [ruby-install](https://github.com/postmodern/ruby-install#readme).
|
41
40
|
|
@@ -150,7 +149,7 @@ to matched specifications, but you can turn that off, or extract *only* correspo
|
|
150
149
|
to_field "title_vernacular", extract_marc("245abc", :alternate_script => :only)
|
151
150
|
~~~
|
152
151
|
|
153
|
-
By default, specifications with multiple subfields (like "240abc") will produce one single string of output for each
|
152
|
+
By default, specifications with multiple subfields (like "240abc") will produce one single string of output per field (for each '240'), with the concatenation of each matched subfield. Specifications with single subfields (like "020a") will split subfields and produce an output string for each matching subfield.
|
154
153
|
|
155
154
|
For the syntax and complete possibilities of the specification
|
156
155
|
string argument to extract_marc, see docs at the [MarcExtractor class](./lib/traject/marc_extractor.rb) ([rdoc](http://rdoc.info/gems/traject/Traject/MarcExtractor)).
|
@@ -427,11 +426,6 @@ Run tests with `rake test` or just `rake`. Tests are written using Minitest (pl
|
|
427
426
|
list the tests -- but generally prefer unit-style "assert_*" methods
|
428
427
|
to make actual assertions, for clarity.
|
429
428
|
|
430
|
-
Some tests need to run against a solr instance. Currently no solr
|
431
|
-
instance is baked in. You can provide your own solr instance to test against and set shell ENV variable
|
432
|
-
"solr_url", and the tests will use it. Otherwise, tests will
|
433
|
-
use a mocked up Solr instance.
|
434
|
-
|
435
429
|
To make a pull request, please make a feature branch *created from the master branch*, not from an existing feature branch. (If you need to do a feature branch dependent on an existing not-yet merged feature branch... discuss
|
436
430
|
this with other developers first!)
|
437
431
|
|
@@ -442,6 +436,9 @@ and/or extra files in ./docs -- as appropriate for what needs to be docs.
|
|
442
436
|
online api docs has a `--markup markdown` specified -- inline class/method docs are in markdown, not rdoc.
|
443
437
|
|
444
438
|
Bundler rake tasks included for gem releases: `rake release`
|
439
|
+
* Every traject release needs to be done once when running MRI, and switch to JRuby
|
440
|
+
and do the same release again. The JRuby release is identical but for including
|
441
|
+
a gemspec dependency on the Marc4JReader gem.
|
445
442
|
|
446
443
|
## TODO
|
447
444
|
|
data/lib/traject/indexer.rb
CHANGED
@@ -324,13 +324,9 @@ class Traject::Indexer
|
|
324
324
|
|
325
325
|
|
326
326
|
processing_threads = settings["processing_thread_pool"].to_i
|
327
|
-
if processing_threads > 0 and !(defined? JRuby)
|
328
|
-
processing_threads = 0
|
329
|
-
logger.warn "Processing threads set to 0 because we're not running under JRuby"
|
330
|
-
end
|
331
327
|
thread_pool = Traject::ThreadPool.new(processing_threads)
|
332
328
|
|
333
|
-
logger.info " Indexer with reader: #{reader.class.name} and writer: #{writer.class.name}"
|
329
|
+
logger.info " Indexer with #{processing_threads} processing threads, reader: #{reader.class.name} and writer: #{writer.class.name}"
|
334
330
|
|
335
331
|
log_batch_size = settings["log.batch_size"] && settings["log.batch_size"].to_i
|
336
332
|
|
@@ -3,6 +3,7 @@ require 'traject/translation_map'
|
|
3
3
|
require 'traject/util'
|
4
4
|
require 'base64'
|
5
5
|
require 'json'
|
6
|
+
require 'marc/fastxmlwriter'
|
6
7
|
|
7
8
|
module Traject::Macros
|
8
9
|
# Some of these may be generic for any MARC, but we haven't done
|
@@ -140,9 +141,7 @@ module Traject::Macros
|
|
140
141
|
binary = Base64.encode64(binary) if binary_escape
|
141
142
|
accumulator << binary
|
142
143
|
when "xml"
|
143
|
-
|
144
|
-
# call #to_s on it. Hopefully that'll be forward compatible.
|
145
|
-
accumulator << record.to_xml.to_s
|
144
|
+
accumulator << MARC::FastXMLWriter.encode(record)
|
146
145
|
when "json"
|
147
146
|
accumulator << JSON.dump(record.to_hash)
|
148
147
|
end
|
data/lib/traject/version.rb
CHANGED
@@ -4,7 +4,7 @@ require 'traject/indexer'
|
|
4
4
|
require 'traject/macros/marc21'
|
5
5
|
|
6
6
|
require 'json'
|
7
|
-
require 'marc
|
7
|
+
require 'marc'
|
8
8
|
|
9
9
|
# See also marc_extractor_test.rb for more detailed tests on marc extraction,
|
10
10
|
# this is just a basic test to make sure our macro works passing through to there
|
@@ -127,7 +127,8 @@ describe "Traject::Macros::Marc21" do
|
|
127
127
|
|
128
128
|
assert_length 1, output["marc_record"]
|
129
129
|
assert_kind_of String, output["marc_record"].first
|
130
|
-
|
130
|
+
roundtrip_record = MARC::XMLReader.new(StringIO.new(output["marc_record"].first)).first
|
131
|
+
assert_equal @record, roundtrip_record
|
131
132
|
end
|
132
133
|
|
133
134
|
it "serializes binary UUEncoded" do
|
data/traject.gemspec
CHANGED
@@ -28,6 +28,7 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_dependency "yell" # logging
|
29
29
|
spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
|
30
30
|
spec.add_dependency "httpclient", "~> 2.5"
|
31
|
+
spec.add_dependency 'marc-fastxmlwriter', '~>1.0' # fast marc->xml
|
31
32
|
|
32
33
|
# If we're building the package under JRuby, add in the
|
33
34
|
# jruby-only gems and specify the platform.
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.rc.
|
4
|
+
version: 2.0.0.rc.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Rochkind
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-02-
|
12
|
+
date: 2015-02-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: concurrent-ruby
|
@@ -115,6 +115,20 @@ dependencies:
|
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '2.5'
|
118
|
+
- !ruby/object:Gem::Dependency
|
119
|
+
name: marc-fastxmlwriter
|
120
|
+
requirement: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '1.0'
|
125
|
+
type: :runtime
|
126
|
+
prerelease: false
|
127
|
+
version_requirements: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '1.0'
|
118
132
|
- !ruby/object:Gem::Dependency
|
119
133
|
name: bundler
|
120
134
|
requirement: !ruby/object:Gem::Requirement
|