traject 3.6.0 → 3.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +5 -1
- data/README.md +5 -0
- data/lib/traject/macros/marc21_semantics.rb +7 -3
- data/lib/traject/macros/transformation.rb +30 -0
- data/lib/traject/version.rb +1 -1
- data/test/indexer/macros/macros_marc21_semantics_test.rb +4 -0
- data/test/indexer/macros/transformation_test.rb +110 -0
- data/test/test_support/missing-second-date.marc +1 -0
- data/traject.gemspec +1 -1
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7ffc677e0ebb13e01b852a1d59ddfdd3cd9906142520e0c296f69ebb0eeb7429
|
4
|
+
data.tar.gz: 61b0e966f6ecd4d27e757e4cfc1057c72ac6deca5ad119c78ff883c246744814
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8240b450b27df011c2ff998c24c612f44bdd21a2fde5fbab996ffe509f3fc45cec7a8a8947e385a35d06f5bd8ed19732287e8f2d3dab682cc6295f7320f8dfab
|
7
|
+
data.tar.gz: f5dbcb44edb8d37a4e74cd1255aa1b05b638913337577c92d4fa276150c023b9e29823cc2b20ef050b33879ec63d76194d0202697d7c858a7aacd3d08241dcce
|
data/CHANGES.md
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
# Changes
|
2
2
|
|
3
|
-
##
|
3
|
+
## NEXT
|
4
4
|
|
5
5
|
*
|
6
6
|
|
7
7
|
*
|
8
8
|
|
9
|
+
## 3.7.0
|
10
|
+
|
11
|
+
* Add two new transformation macros, `Traject::Macros::Transformation.delete_if` and `Traject::Macros::Transformations.select`.
|
12
|
+
|
9
13
|
## 3.6.0
|
10
14
|
|
11
15
|
* Tiny backward compat changes for ruby 3.0 compat. https://github.com/traject/traject/pull/263
|
data/README.md
CHANGED
@@ -177,6 +177,11 @@ TranslationMap use above is just one example of a transformation macro, that tra
|
|
177
177
|
* `split(" ")`: take values and split them, possibly result in multiple values.
|
178
178
|
* `transform(proc)`: transform each existing macro using a proc, kind of like `map`.
|
179
179
|
eg `to_field "something", extract_xml("//author"), transform( ->(author) { "#{author.last}, #{author.first}" })
|
180
|
+
* `delete_if(["a", "b"])`: remove a value from accumulated values if it is included in the passed in argumet.
|
181
|
+
* Can also take a string, proc or regex as an argument. See [tests](test/indexer/macros/transformation_test.rb) for full functionality.
|
182
|
+
* `select(proc)`: selects (keeps) values from accumulated values if proc evaluates to true for specifc value.
|
183
|
+
* Can also take a arrays, sets and regex as an argument. See [tests](test/indexer/macros/transformation_test.rb) for full functionality.
|
184
|
+
|
180
185
|
|
181
186
|
You can add on as many transformation macros as you want, they will be applied to output in order.
|
182
187
|
|
@@ -327,10 +327,14 @@ module Traject::Macros
|
|
327
327
|
if field008 && field008.length >= 11
|
328
328
|
date_type = field008.slice(6)
|
329
329
|
date1_str = field008.slice(7,4)
|
330
|
-
|
330
|
+
if field008.length > 15
|
331
|
+
date2_str = field008.slice(11, 4)
|
332
|
+
else
|
333
|
+
date2_str = date1_str
|
334
|
+
end
|
331
335
|
|
332
|
-
# for date_type q=questionable, we have a range.
|
333
|
-
if
|
336
|
+
# for date_type q=questionable, we expect to have a range.
|
337
|
+
if date_type == 'q' and date1_str != date2_str
|
334
338
|
# make unknown digits at the beginning or end of range,
|
335
339
|
date1 = date1_str.sub("u", "0").to_i
|
336
340
|
date2 = date2_str.sub("u", "9").to_i
|
@@ -157,6 +157,36 @@ module Traject
|
|
157
157
|
acc.collect! { |v| v.gsub(pattern, replace) }
|
158
158
|
end
|
159
159
|
end
|
160
|
+
|
161
|
+
# Run ruby `delete_if` on the accumulator for values that include or are equal to arg.
|
162
|
+
# It will also accept an array, set, regex pattern, proc or lambda as an arugment.
|
163
|
+
#
|
164
|
+
# @example
|
165
|
+
# to_field "creator_facet", extract_marc("100abcdq"), delete_if(/foo/)
|
166
|
+
def delete_if(arg)
|
167
|
+
p = if arg.respond_to? :include?
|
168
|
+
proc { |v| arg.include?(v) }
|
169
|
+
else
|
170
|
+
proc { |v| arg === v }
|
171
|
+
end
|
172
|
+
|
173
|
+
->(_, acc) { acc.delete_if(&p) }
|
174
|
+
end
|
175
|
+
|
176
|
+
# Run ruby `select!` on the accumulator for values that include or are equal to arg.
|
177
|
+
# It accepts an array, set, regex pattern, proc or lambda as an arugument.
|
178
|
+
#
|
179
|
+
# @example
|
180
|
+
# to_field "creator_facet", extract_marc("100abcdq"), select(->(v) { v != "foo" })
|
181
|
+
def select(arg)
|
182
|
+
p = if arg.respond_to? :include?
|
183
|
+
proc { |v| arg.include?(v) }
|
184
|
+
else
|
185
|
+
proc { |v| arg === v }
|
186
|
+
end
|
187
|
+
|
188
|
+
->(_, acc) { acc.select!(&p) }
|
189
|
+
end
|
160
190
|
end
|
161
191
|
end
|
162
192
|
end
|
data/lib/traject/version.rb
CHANGED
@@ -209,6 +209,10 @@ describe "Traject::Macros::Marc21Semantics" do
|
|
209
209
|
@record = MARC::Reader.new(support_file_path "date_type_r_missing_date2.marc").to_a.first
|
210
210
|
assert_equal 1957, Marc21Semantics.publication_date(@record)
|
211
211
|
end
|
212
|
+
it "provides a fallback for a missing second date" do
|
213
|
+
@record = MARC::Reader.new(support_file_path "missing-second-date.marc").to_a.first
|
214
|
+
assert_equal 1678, Marc21Semantics.publication_date(@record)
|
215
|
+
end
|
212
216
|
|
213
217
|
it "works correctly with date type 'q'" do
|
214
218
|
val = @record['008'].value
|
@@ -174,4 +174,114 @@ describe "Traject::Macros::Transformation" do
|
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
177
|
+
describe "delete_if" do
|
178
|
+
|
179
|
+
describe "argument is an Array" do
|
180
|
+
it "filters out selected values from accumulatd values" do
|
181
|
+
arg = [ "one", "three"]
|
182
|
+
|
183
|
+
@indexer.configure do
|
184
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
185
|
+
end
|
186
|
+
|
187
|
+
output = @indexer.map_record(@record)
|
188
|
+
assert_equal ["two"], output["test"]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
describe "argument is a Set" do
|
193
|
+
it "filters out selected values from accumulatd values" do
|
194
|
+
arg = [ "one", "three"].to_set
|
195
|
+
|
196
|
+
@indexer.configure do
|
197
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
198
|
+
end
|
199
|
+
|
200
|
+
output = @indexer.map_record(@record)
|
201
|
+
assert_equal ["two"], output["test"]
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
describe "argument is a Regex" do
|
206
|
+
it "filters out selected values from accumulatd values" do
|
207
|
+
arg = /^t/
|
208
|
+
|
209
|
+
@indexer.configure do
|
210
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
211
|
+
end
|
212
|
+
|
213
|
+
output = @indexer.map_record(@record)
|
214
|
+
assert_equal ["one"], output["test"]
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
describe "argument is a Procedure or Lambda" do
|
219
|
+
it "filters out selected values from accumulatd values" do
|
220
|
+
arg = ->(v) { v == "one" }
|
221
|
+
|
222
|
+
@indexer.configure do
|
223
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
224
|
+
end
|
225
|
+
|
226
|
+
output = @indexer.map_record(@record)
|
227
|
+
assert_equal ["two", "three"], output["test"]
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
describe "select" do
|
233
|
+
|
234
|
+
describe "argument is an Array" do
|
235
|
+
it "selects a subset of values from accumulatd values" do
|
236
|
+
arg = [ "one", "three", "four"]
|
237
|
+
|
238
|
+
@indexer.configure do
|
239
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
240
|
+
end
|
241
|
+
|
242
|
+
output = @indexer.map_record(@record)
|
243
|
+
assert_equal ["one", "three"], output["test"]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
describe "argument is a Set" do
|
248
|
+
it "selects a subset of values from accumulatd values" do
|
249
|
+
arg = [ "one", "three", "four"].to_set
|
250
|
+
|
251
|
+
@indexer.configure do
|
252
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
253
|
+
end
|
254
|
+
|
255
|
+
output = @indexer.map_record(@record)
|
256
|
+
assert_equal ["one", "three"], output["test"]
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
describe "argument is a Regex" do
|
261
|
+
it "selects a subset of values from accumulatd values" do
|
262
|
+
arg = /^t/
|
263
|
+
|
264
|
+
@indexer.configure do
|
265
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
266
|
+
end
|
267
|
+
|
268
|
+
output = @indexer.map_record(@record)
|
269
|
+
assert_equal ["two", "three"], output["test"]
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
describe "argument is a Procedure or Lambda" do
|
274
|
+
it "selects a subset of values from accumulatd values" do
|
275
|
+
arg = ->(v) { v != "one" }
|
276
|
+
|
277
|
+
@indexer.configure do
|
278
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
279
|
+
end
|
280
|
+
|
281
|
+
output = @indexer.map_record(@record)
|
282
|
+
assert_equal ["two", "three"], output["test"]
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
177
287
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
01351nem a2200313 a 4500001001100000001001100011008004100022034001300063035002200076043003000098080001200128100006100140245011600201255002900317260005900346300003500405500004500440500013600485500003800621500005000659530004400709651006000753700003800813710010200851730000900953856005300962907001401015940000801029.b20028118.b6928510x170714q1678 fr |||| | |||| ||fre|c1 aab10000 a(OCoLC)1120596466 ae-sp---be-spcce2catmarc a(084.3)1 aBeaulieu, Sébastien de Pontault,csieur de,d1613-1674.10aPlan de la ville de Puiçerdah[Document cartogràfic] :bpris en 1678 /c[Beaulieu] ; DR f. [Des Roches fecit] aEscala [1:10 000 aprox.] a[A Paris :bpar le Chevalier de Beaulieu,cpost. 1678] a1 mapa :bgravat;c28 x 32 cm. aEscala gràfica: Eschelle de 150 toises. aPertany a l'obra "Les plans et profils des principales villes et lieux considerables de la Principauté de Catalogne", de Beaulieu. aPeu d'impremta de l'obra general. aMapa emmarcat en una orla amb motius florals. aTambé disponible la versió en línia. 4aPuigcerdà (Catalunya)xMapesxObres anteriors al 1800.1 aDes Roches, Jean Baptiste Hamont.2 aCol·lecció de mapes antics de Martí Gelabertó (Universitat Autònoma de Barcelona)5ES-BaUAB.0 aDDD.41zAccés lliureuhttps://ddd.uab.cat/record/180313 ab20028118 aUAB
|
data/traject.gemspec
CHANGED
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_dependency "concurrent-ruby", ">= 0.8.0"
|
25
25
|
spec.add_dependency "marc", "~> 1.0"
|
26
26
|
|
27
|
-
spec.add_dependency "hashie", ">= 3.1", "<
|
27
|
+
spec.add_dependency "hashie", ">= 3.1", "< 6" # used for Indexer#settings
|
28
28
|
spec.add_dependency "slop", "~> 4.0" # command line parsing
|
29
29
|
spec.add_dependency "yell" # logging
|
30
30
|
spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Rochkind
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-02-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: concurrent-ruby
|
@@ -48,7 +48,7 @@ dependencies:
|
|
48
48
|
version: '3.1'
|
49
49
|
- - "<"
|
50
50
|
- !ruby/object:Gem::Version
|
51
|
-
version: '
|
51
|
+
version: '6'
|
52
52
|
type: :runtime
|
53
53
|
prerelease: false
|
54
54
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,7 @@ dependencies:
|
|
58
58
|
version: '3.1'
|
59
59
|
- - "<"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '6'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: slop
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -350,6 +350,7 @@ files:
|
|
350
350
|
- test/test_support/manufacturing_consent.marc
|
351
351
|
- test/test_support/manuscript_online_thesis.marc
|
352
352
|
- test/test_support/microform_online_conference.marc
|
353
|
+
- test/test_support/missing-second-date.marc
|
353
354
|
- test/test_support/multi_era.marc
|
354
355
|
- test/test_support/multi_geo.marc
|
355
356
|
- test/test_support/musical_cage.marc
|
@@ -401,7 +402,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
401
402
|
- !ruby/object:Gem::Version
|
402
403
|
version: '0'
|
403
404
|
requirements: []
|
404
|
-
rubygems_version: 3.
|
405
|
+
rubygems_version: 3.1.6
|
405
406
|
signing_key:
|
406
407
|
specification_version: 4
|
407
408
|
summary: An easy to use, high-performance, flexible and extensible metadata transformation
|
@@ -455,6 +456,7 @@ test_files:
|
|
455
456
|
- test/test_support/manufacturing_consent.marc
|
456
457
|
- test/test_support/manuscript_online_thesis.marc
|
457
458
|
- test/test_support/microform_online_conference.marc
|
459
|
+
- test/test_support/missing-second-date.marc
|
458
460
|
- test/test_support/multi_era.marc
|
459
461
|
- test/test_support/multi_geo.marc
|
460
462
|
- test/test_support/musical_cage.marc
|