traject 3.3.0 → 3.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/CHANGES.md +23 -2
- data/README.md +23 -2
- data/doc/settings.md +4 -2
- data/doc/xml.md +12 -0
- data/examples/marc/tiny.xml +35 -0
- data/lib/traject/command_line.rb +34 -43
- data/lib/traject/debug_writer.rb +1 -1
- data/lib/traject/macros/marc21.rb +3 -3
- data/lib/traject/macros/marc21_semantics.rb +7 -3
- data/lib/traject/macros/nokogiri_macros.rb +9 -3
- data/lib/traject/macros/transformation.rb +30 -0
- data/lib/traject/marc_extractor.rb +3 -3
- data/lib/traject/nokogiri_reader.rb +2 -0
- data/lib/traject/solr_json_writer.rb +28 -10
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +77 -48
- data/test/command_line_test.rb +52 -0
- data/test/debug_writer_test.rb +13 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +4 -0
- data/test/indexer/macros/transformation_test.rb +110 -0
- data/test/indexer/nokogiri_indexer_test.rb +35 -0
- data/test/indexer/read_write_test.rb +14 -3
- data/test/solr_json_writer_test.rb +45 -10
- data/test/test_support/missing-second-date.marc +1 -0
- data/traject.gemspec +3 -3
- metadata +19 -21
- data/.travis.yml +0 -16
@@ -178,17 +178,52 @@ describe "Traject::SolrJsonWriter" do
|
|
178
178
|
assert(auth.empty?)
|
179
179
|
end
|
180
180
|
|
181
|
-
|
182
|
-
settings = {
|
183
|
-
"solr.url" => "http://example.com/solr/foo",
|
184
|
-
"solr_writer.basic_auth_user" => "foo",
|
185
|
-
"solr_writer.basic_auth_password" => "bar",
|
186
|
-
}
|
181
|
+
describe "HTTP basic auth" do
|
187
182
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
183
|
+
it "supports basic authentication settings" do
|
184
|
+
settings = {
|
185
|
+
"solr.url" => "http://example.com/solr/foo",
|
186
|
+
"solr_writer.basic_auth_user" => "foo",
|
187
|
+
"solr_writer.basic_auth_password" => "bar",
|
188
|
+
}
|
189
|
+
|
190
|
+
# testing with some internal implementation of HTTPClient sorry
|
191
|
+
|
192
|
+
writer = Traject::SolrJsonWriter.new(settings)
|
193
|
+
|
194
|
+
auth = writer.instance_variable_get("@http_client")
|
195
|
+
.www_auth.basic_auth.instance_variable_get("@auth")
|
196
|
+
assert(!auth.empty?)
|
197
|
+
assert_equal(auth.values.first, Base64.encode64("foo:bar").chomp)
|
198
|
+
end
|
199
|
+
|
200
|
+
it "supports basic auth from solr.url" do
|
201
|
+
settings = {
|
202
|
+
"solr.url" => "http://foo:bar@example.com/solr/foo",
|
203
|
+
}
|
204
|
+
|
205
|
+
# testing with some internal implementation of HTTPClient sorry
|
206
|
+
|
207
|
+
writer = Traject::SolrJsonWriter.new(settings)
|
208
|
+
auth = writer.instance_variable_get("@http_client")
|
209
|
+
.www_auth.basic_auth.instance_variable_get("@auth")
|
210
|
+
assert(!auth.empty?)
|
211
|
+
assert_equal(auth.values.first, Base64.encode64("foo:bar").chomp)
|
212
|
+
end
|
213
|
+
|
214
|
+
it "does not log basic auth from solr.url" do
|
215
|
+
string_io = StringIO.new
|
216
|
+
settings = {
|
217
|
+
"solr.url" => "http://secret_username:secret_password@example.com/solr/foo",
|
218
|
+
"logger" => Logger.new(string_io)
|
219
|
+
}
|
220
|
+
|
221
|
+
|
222
|
+
writer = Traject::SolrJsonWriter.new(settings)
|
223
|
+
|
224
|
+
refute_includes string_io.string, "secret_username:secret_password"
|
225
|
+
assert_includes string_io.string, "(with HTTP basic auth)"
|
226
|
+
end
|
192
227
|
end
|
193
228
|
|
194
229
|
describe "commit" do
|
@@ -0,0 +1 @@
|
|
1
|
+
01351nem a2200313 a 4500001001100000001001100011008004100022034001300063035002200076043003000098080001200128100006100140245011600201255002900317260005900346300003500405500004500440500013600485500003800621500005000659530004400709651006000753700003800813710010200851730000900953856005300962907001401015940000801029.b20028118.b6928510x170714q1678 fr |||| | |||| ||fre|c1 aab10000 a(OCoLC)1120596466 ae-sp---be-spcce2catmarc a(084.3)1 aBeaulieu, Sébastien de Pontault,csieur de,d1613-1674.10aPlan de la ville de Puiçerdah[Document cartogràfic] :bpris en 1678 /c[Beaulieu] ; DR f. [Des Roches fecit] aEscala [1:10 000 aprox.] a[A Paris :bpar le Chevalier de Beaulieu,cpost. 1678] a1 mapa :bgravat;c28 x 32 cm. aEscala gràfica: Eschelle de 150 toises. aPertany a l'obra "Les plans et profils des principales villes et lieux considerables de la Principauté de Catalogne", de Beaulieu. aPeu d'impremta de l'obra general. aMapa emmarcat en una orla amb motius florals. aTambé disponible la versió en línia. 4aPuigcerdà (Catalunya)xMapesxObres anteriors al 1800.1 aDes Roches, Jean Baptiste Hamont.2 aCol·lecció de mapes antics de Martí Gelabertó (Universitat Autònoma de Barcelona)5ES-BaUAB.0 aDDD.41zAccés lliureuhttps://ddd.uab.cat/record/180313 ab20028118 aUAB
|
data/traject.gemspec
CHANGED
@@ -24,12 +24,12 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_dependency "concurrent-ruby", ">= 0.8.0"
|
25
25
|
spec.add_dependency "marc", "~> 1.0"
|
26
26
|
|
27
|
-
spec.add_dependency "hashie", ">= 3.1", "<
|
28
|
-
spec.add_dependency "slop", "
|
27
|
+
spec.add_dependency "hashie", ">= 3.1", "< 6" # used for Indexer#settings
|
28
|
+
spec.add_dependency "slop", "~> 4.0" # command line parsing
|
29
29
|
spec.add_dependency "yell" # logging
|
30
30
|
spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
|
31
31
|
spec.add_dependency "httpclient", "~> 2.5"
|
32
|
-
spec.add_dependency "http", ">= 3.0", "<
|
32
|
+
spec.add_dependency "http", ">= 3.0", "< 6" # used in oai_pmh_reader, may use more extensively in future instead of httpclient
|
33
33
|
spec.add_dependency 'marc-fastxmlwriter', '~>1.0' # fast marc->xml
|
34
34
|
spec.add_dependency "nokogiri", "~> 1.9" # NokogiriIndexer
|
35
35
|
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Rochkind
|
8
8
|
- Bill Dueber
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-02-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: concurrent-ruby
|
@@ -48,7 +48,7 @@ dependencies:
|
|
48
48
|
version: '3.1'
|
49
49
|
- - "<"
|
50
50
|
- !ruby/object:Gem::Version
|
51
|
-
version: '
|
51
|
+
version: '6'
|
52
52
|
type: :runtime
|
53
53
|
prerelease: false
|
54
54
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,25 +58,19 @@ dependencies:
|
|
58
58
|
version: '3.1'
|
59
59
|
- - "<"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '6'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: slop
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: 3.4.5
|
69
|
-
- - "<"
|
66
|
+
- - "~>"
|
70
67
|
- !ruby/object:Gem::Version
|
71
68
|
version: '4.0'
|
72
69
|
type: :runtime
|
73
70
|
prerelease: false
|
74
71
|
version_requirements: !ruby/object:Gem::Requirement
|
75
72
|
requirements:
|
76
|
-
- - "
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
version: 3.4.5
|
79
|
-
- - "<"
|
73
|
+
- - "~>"
|
80
74
|
- !ruby/object:Gem::Version
|
81
75
|
version: '4.0'
|
82
76
|
- !ruby/object:Gem::Dependency
|
@@ -130,7 +124,7 @@ dependencies:
|
|
130
124
|
version: '3.0'
|
131
125
|
- - "<"
|
132
126
|
- !ruby/object:Gem::Version
|
133
|
-
version: '
|
127
|
+
version: '6'
|
134
128
|
type: :runtime
|
135
129
|
prerelease: false
|
136
130
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -140,7 +134,7 @@ dependencies:
|
|
140
134
|
version: '3.0'
|
141
135
|
- - "<"
|
142
136
|
- !ruby/object:Gem::Version
|
143
|
-
version: '
|
137
|
+
version: '6'
|
144
138
|
- !ruby/object:Gem::Dependency
|
145
139
|
name: marc-fastxmlwriter
|
146
140
|
requirement: !ruby/object:Gem::Requirement
|
@@ -231,7 +225,7 @@ dependencies:
|
|
231
225
|
- - "~>"
|
232
226
|
- !ruby/object:Gem::Version
|
233
227
|
version: '3.4'
|
234
|
-
description:
|
228
|
+
description:
|
235
229
|
email:
|
236
230
|
- none@nowhere.org
|
237
231
|
executables:
|
@@ -246,8 +240,8 @@ extra_rdoc_files:
|
|
246
240
|
- doc/settings.md
|
247
241
|
- doc/xml.md
|
248
242
|
files:
|
243
|
+
- ".github/workflows/ruby.yml"
|
249
244
|
- ".gitignore"
|
250
|
-
- ".travis.yml"
|
251
245
|
- ".yardopts"
|
252
246
|
- CHANGES.md
|
253
247
|
- Gemfile
|
@@ -263,6 +257,7 @@ files:
|
|
263
257
|
- doc/programmatic_use.md
|
264
258
|
- doc/settings.md
|
265
259
|
- doc/xml.md
|
260
|
+
- examples/marc/tiny.xml
|
266
261
|
- lib/tasks/load_maps.rake
|
267
262
|
- lib/traject.rb
|
268
263
|
- lib/traject/array_writer.rb
|
@@ -307,6 +302,7 @@ files:
|
|
307
302
|
- lib/translation_maps/marc_geographic.yaml
|
308
303
|
- lib/translation_maps/marc_instruments.yaml
|
309
304
|
- lib/translation_maps/marc_languages.yaml
|
305
|
+
- test/command_line_test.rb
|
310
306
|
- test/debug_writer_test.rb
|
311
307
|
- test/delimited_writer_test.rb
|
312
308
|
- test/experimental_nokogiri_streaming_reader_test.rb
|
@@ -354,6 +350,7 @@ files:
|
|
354
350
|
- test/test_support/manufacturing_consent.marc
|
355
351
|
- test/test_support/manuscript_online_thesis.marc
|
356
352
|
- test/test_support/microform_online_conference.marc
|
353
|
+
- test/test_support/missing-second-date.marc
|
357
354
|
- test/test_support/multi_era.marc
|
358
355
|
- test/test_support/multi_geo.marc
|
359
356
|
- test/test_support/musical_cage.marc
|
@@ -390,7 +387,7 @@ homepage: http://github.com/traject/traject
|
|
390
387
|
licenses:
|
391
388
|
- MIT
|
392
389
|
metadata: {}
|
393
|
-
post_install_message:
|
390
|
+
post_install_message:
|
394
391
|
rdoc_options: []
|
395
392
|
require_paths:
|
396
393
|
- lib
|
@@ -405,13 +402,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
405
402
|
- !ruby/object:Gem::Version
|
406
403
|
version: '0'
|
407
404
|
requirements: []
|
408
|
-
|
409
|
-
|
410
|
-
signing_key:
|
405
|
+
rubygems_version: 3.1.6
|
406
|
+
signing_key:
|
411
407
|
specification_version: 4
|
412
408
|
summary: An easy to use, high-performance, flexible and extensible metadata transformation
|
413
409
|
system, focused on library-archives-museums input, and indexing to Solr as output.
|
414
410
|
test_files:
|
411
|
+
- test/command_line_test.rb
|
415
412
|
- test/debug_writer_test.rb
|
416
413
|
- test/delimited_writer_test.rb
|
417
414
|
- test/experimental_nokogiri_streaming_reader_test.rb
|
@@ -459,6 +456,7 @@ test_files:
|
|
459
456
|
- test/test_support/manufacturing_consent.marc
|
460
457
|
- test/test_support/manuscript_online_thesis.marc
|
461
458
|
- test/test_support/microform_online_conference.marc
|
459
|
+
- test/test_support/missing-second-date.marc
|
462
460
|
- test/test_support/multi_era.marc
|
463
461
|
- test/test_support/multi_geo.marc
|
464
462
|
- test/test_support/musical_cage.marc
|
data/.travis.yml
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
language: ruby
|
2
|
-
cache: bundler
|
3
|
-
# we don't really need `sudo: true`, but for some reason travis docker-based systems are unreliable
|
4
|
-
# at downloading jruby, and
|
5
|
-
sudo: true
|
6
|
-
rvm:
|
7
|
-
- 2.4.4
|
8
|
-
- 2.5.1
|
9
|
-
- 2.6.1
|
10
|
-
# avoid having travis install jdk on MRI builds where we don't need it.
|
11
|
-
matrix:
|
12
|
-
include:
|
13
|
-
- jdk: openjdk8
|
14
|
-
rvm: jruby-9.1.17.0
|
15
|
-
- jdk: openjdk8
|
16
|
-
rvm: jruby-9.2.6.0
|