traject 3.6.0 → 3.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2e47e6648ed9fc963d18e10c9be48a30273147c4920cb4b7e448d078fd2398ac
4
- data.tar.gz: efa549ebcbd87e599b56b955b4bd26422dfe7de67697aed6b39cb421c3b80677
3
+ metadata.gz: 7ffc677e0ebb13e01b852a1d59ddfdd3cd9906142520e0c296f69ebb0eeb7429
4
+ data.tar.gz: 61b0e966f6ecd4d27e757e4cfc1057c72ac6deca5ad119c78ff883c246744814
5
5
  SHA512:
6
- metadata.gz: 6acdd2b8cfc888b221a1f19cd5197127006be81d0525169d531fc9bf43fe02cc9ec87401e6b2442c57ff0cd483d9884504ac75be92e3718cbbc49208dc97024f
7
- data.tar.gz: 30abefa7af9e1c170ae8570aa59b6c571a9acc1eb7b0abf6efd64d97550b678c21c72d76a8156ef3844ab01154111fd3747f96d6346ee8a8d76e747b2cf92e1f
6
+ metadata.gz: 8240b450b27df011c2ff998c24c612f44bdd21a2fde5fbab996ffe509f3fc45cec7a8a8947e385a35d06f5bd8ed19732287e8f2d3dab682cc6295f7320f8dfab
7
+ data.tar.gz: f5dbcb44edb8d37a4e74cd1255aa1b05b638913337577c92d4fa276150c023b9e29823cc2b20ef050b33879ec63d76194d0202697d7c858a7aacd3d08241dcce
data/CHANGES.md CHANGED
@@ -1,11 +1,15 @@
1
1
  # Changes
2
2
 
3
- ## Next
3
+ ## NEXT
4
4
 
5
5
  *
6
6
 
7
7
  *
8
8
 
9
+ ## 3.7.0
10
+
11
+ * Add two new transformation macros, `Traject::Macros::Transformation.delete_if` and `Traject::Macros::Transformations.select`.
12
+
9
13
  ## 3.6.0
10
14
 
11
15
  * Tiny backward compat changes for ruby 3.0 compat. https://github.com/traject/traject/pull/263
data/README.md CHANGED
@@ -177,6 +177,11 @@ TranslationMap use above is just one example of a transformation macro, that tra
177
177
  * `split(" ")`: take values and split them, possibly result in multiple values.
178
178
  * `transform(proc)`: transform each existing macro using a proc, kind of like `map`.
179
179
  eg `to_field "something", extract_xml("//author"), transform( ->(author) { "#{author.last}, #{author.first}" })
180
+ * `delete_if(["a", "b"])`: remove a value from accumulated values if it is included in the passed in argumet.
181
+ * Can also take a string, proc or regex as an argument. See [tests](test/indexer/macros/transformation_test.rb) for full functionality.
182
+ * `select(proc)`: selects (keeps) values from accumulated values if proc evaluates to true for specifc value.
183
+ * Can also take a arrays, sets and regex as an argument. See [tests](test/indexer/macros/transformation_test.rb) for full functionality.
184
+
180
185
 
181
186
  You can add on as many transformation macros as you want, they will be applied to output in order.
182
187
 
@@ -327,10 +327,14 @@ module Traject::Macros
327
327
  if field008 && field008.length >= 11
328
328
  date_type = field008.slice(6)
329
329
  date1_str = field008.slice(7,4)
330
- date2_str = field008.slice(11, 4) if field008.length > 15
330
+ if field008.length > 15
331
+ date2_str = field008.slice(11, 4)
332
+ else
333
+ date2_str = date1_str
334
+ end
331
335
 
332
- # for date_type q=questionable, we have a range.
333
- if (date_type == 'q')
336
+ # for date_type q=questionable, we expect to have a range.
337
+ if date_type == 'q' and date1_str != date2_str
334
338
  # make unknown digits at the beginning or end of range,
335
339
  date1 = date1_str.sub("u", "0").to_i
336
340
  date2 = date2_str.sub("u", "9").to_i
@@ -157,6 +157,36 @@ module Traject
157
157
  acc.collect! { |v| v.gsub(pattern, replace) }
158
158
  end
159
159
  end
160
+
161
+ # Run ruby `delete_if` on the accumulator for values that include or are equal to arg.
162
+ # It will also accept an array, set, regex pattern, proc or lambda as an arugment.
163
+ #
164
+ # @example
165
+ # to_field "creator_facet", extract_marc("100abcdq"), delete_if(/foo/)
166
+ def delete_if(arg)
167
+ p = if arg.respond_to? :include?
168
+ proc { |v| arg.include?(v) }
169
+ else
170
+ proc { |v| arg === v }
171
+ end
172
+
173
+ ->(_, acc) { acc.delete_if(&p) }
174
+ end
175
+
176
+ # Run ruby `select!` on the accumulator for values that include or are equal to arg.
177
+ # It accepts an array, set, regex pattern, proc or lambda as an arugument.
178
+ #
179
+ # @example
180
+ # to_field "creator_facet", extract_marc("100abcdq"), select(->(v) { v != "foo" })
181
+ def select(arg)
182
+ p = if arg.respond_to? :include?
183
+ proc { |v| arg.include?(v) }
184
+ else
185
+ proc { |v| arg === v }
186
+ end
187
+
188
+ ->(_, acc) { acc.select!(&p) }
189
+ end
160
190
  end
161
191
  end
162
192
  end
@@ -1,3 +1,3 @@
1
1
  module Traject
2
- VERSION = "3.6.0"
2
+ VERSION = "3.7.0"
3
3
  end
@@ -209,6 +209,10 @@ describe "Traject::Macros::Marc21Semantics" do
209
209
  @record = MARC::Reader.new(support_file_path "date_type_r_missing_date2.marc").to_a.first
210
210
  assert_equal 1957, Marc21Semantics.publication_date(@record)
211
211
  end
212
+ it "provides a fallback for a missing second date" do
213
+ @record = MARC::Reader.new(support_file_path "missing-second-date.marc").to_a.first
214
+ assert_equal 1678, Marc21Semantics.publication_date(@record)
215
+ end
212
216
 
213
217
  it "works correctly with date type 'q'" do
214
218
  val = @record['008'].value
@@ -174,4 +174,114 @@ describe "Traject::Macros::Transformation" do
174
174
  end
175
175
  end
176
176
 
177
+ describe "delete_if" do
178
+
179
+ describe "argument is an Array" do
180
+ it "filters out selected values from accumulatd values" do
181
+ arg = [ "one", "three"]
182
+
183
+ @indexer.configure do
184
+ to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
185
+ end
186
+
187
+ output = @indexer.map_record(@record)
188
+ assert_equal ["two"], output["test"]
189
+ end
190
+ end
191
+
192
+ describe "argument is a Set" do
193
+ it "filters out selected values from accumulatd values" do
194
+ arg = [ "one", "three"].to_set
195
+
196
+ @indexer.configure do
197
+ to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
198
+ end
199
+
200
+ output = @indexer.map_record(@record)
201
+ assert_equal ["two"], output["test"]
202
+ end
203
+ end
204
+
205
+ describe "argument is a Regex" do
206
+ it "filters out selected values from accumulatd values" do
207
+ arg = /^t/
208
+
209
+ @indexer.configure do
210
+ to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
211
+ end
212
+
213
+ output = @indexer.map_record(@record)
214
+ assert_equal ["one"], output["test"]
215
+ end
216
+ end
217
+
218
+ describe "argument is a Procedure or Lambda" do
219
+ it "filters out selected values from accumulatd values" do
220
+ arg = ->(v) { v == "one" }
221
+
222
+ @indexer.configure do
223
+ to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
224
+ end
225
+
226
+ output = @indexer.map_record(@record)
227
+ assert_equal ["two", "three"], output["test"]
228
+ end
229
+ end
230
+ end
231
+
232
+ describe "select" do
233
+
234
+ describe "argument is an Array" do
235
+ it "selects a subset of values from accumulatd values" do
236
+ arg = [ "one", "three", "four"]
237
+
238
+ @indexer.configure do
239
+ to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
240
+ end
241
+
242
+ output = @indexer.map_record(@record)
243
+ assert_equal ["one", "three"], output["test"]
244
+ end
245
+ end
246
+
247
+ describe "argument is a Set" do
248
+ it "selects a subset of values from accumulatd values" do
249
+ arg = [ "one", "three", "four"].to_set
250
+
251
+ @indexer.configure do
252
+ to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
253
+ end
254
+
255
+ output = @indexer.map_record(@record)
256
+ assert_equal ["one", "three"], output["test"]
257
+ end
258
+ end
259
+
260
+ describe "argument is a Regex" do
261
+ it "selects a subset of values from accumulatd values" do
262
+ arg = /^t/
263
+
264
+ @indexer.configure do
265
+ to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
266
+ end
267
+
268
+ output = @indexer.map_record(@record)
269
+ assert_equal ["two", "three"], output["test"]
270
+ end
271
+ end
272
+
273
+ describe "argument is a Procedure or Lambda" do
274
+ it "selects a subset of values from accumulatd values" do
275
+ arg = ->(v) { v != "one" }
276
+
277
+ @indexer.configure do
278
+ to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
279
+ end
280
+
281
+ output = @indexer.map_record(@record)
282
+ assert_equal ["two", "three"], output["test"]
283
+ end
284
+ end
285
+ end
286
+
177
287
  end
@@ -0,0 +1 @@
1
+ 01351nem a2200313 a 4500001001100000001001100011008004100022034001300063035002200076043003000098080001200128100006100140245011600201255002900317260005900346300003500405500004500440500013600485500003800621500005000659530004400709651006000753700003800813710010200851730000900953856005300962907001401015940000801029.b20028118.b6928510x170714q1678 fr |||| | |||| ||fre|c1 aab10000 a(OCoLC)1120596466 ae-sp---be-spcce2catmarc a(084.3)1 aBeaulieu, Sébastien de Pontault,csieur de,d1613-1674.10aPlan de la ville de Puiçerdah[Document cartogràfic] :bpris en 1678 /c[Beaulieu] ; DR f. [Des Roches fecit] aEscala [1:10 000 aprox.] a[A Paris :bpar le Chevalier de Beaulieu,cpost. 1678] a1 mapa :bgravat;c28 x 32 cm. aEscala gràfica: Eschelle de 150 toises. aPertany a l'obra "Les plans et profils des principales villes et lieux considerables de la Principauté de Catalogne", de Beaulieu. aPeu d'impremta de l'obra general. aMapa emmarcat en una orla amb motius florals. aTambé disponible la versió en línia. 4aPuigcerdà (Catalunya)xMapesxObres anteriors al 1800.1 aDes Roches, Jean Baptiste Hamont.2 aCol·lecció de mapes antics de Martí Gelabertó (Universitat Autònoma de Barcelona)5ES-BaUAB.0 aDDD.41zAccés lliureuhttps://ddd.uab.cat/record/180313 ab20028118 aUAB
data/traject.gemspec CHANGED
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency "concurrent-ruby", ">= 0.8.0"
25
25
  spec.add_dependency "marc", "~> 1.0"
26
26
 
27
- spec.add_dependency "hashie", ">= 3.1", "< 5" # used for Indexer#settings
27
+ spec.add_dependency "hashie", ">= 3.1", "< 6" # used for Indexer#settings
28
28
  spec.add_dependency "slop", "~> 4.0" # command line parsing
29
29
  spec.add_dependency "yell" # logging
30
30
  spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.6.0
4
+ version: 3.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Rochkind
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-21 00:00:00.000000000 Z
12
+ date: 2022-02-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: concurrent-ruby
@@ -48,7 +48,7 @@ dependencies:
48
48
  version: '3.1'
49
49
  - - "<"
50
50
  - !ruby/object:Gem::Version
51
- version: '5'
51
+ version: '6'
52
52
  type: :runtime
53
53
  prerelease: false
54
54
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,7 +58,7 @@ dependencies:
58
58
  version: '3.1'
59
59
  - - "<"
60
60
  - !ruby/object:Gem::Version
61
- version: '5'
61
+ version: '6'
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: slop
64
64
  requirement: !ruby/object:Gem::Requirement
@@ -350,6 +350,7 @@ files:
350
350
  - test/test_support/manufacturing_consent.marc
351
351
  - test/test_support/manuscript_online_thesis.marc
352
352
  - test/test_support/microform_online_conference.marc
353
+ - test/test_support/missing-second-date.marc
353
354
  - test/test_support/multi_era.marc
354
355
  - test/test_support/multi_geo.marc
355
356
  - test/test_support/musical_cage.marc
@@ -401,7 +402,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
401
402
  - !ruby/object:Gem::Version
402
403
  version: '0'
403
404
  requirements: []
404
- rubygems_version: 3.0.3
405
+ rubygems_version: 3.1.6
405
406
  signing_key:
406
407
  specification_version: 4
407
408
  summary: An easy to use, high-performance, flexible and extensible metadata transformation
@@ -455,6 +456,7 @@ test_files:
455
456
  - test/test_support/manufacturing_consent.marc
456
457
  - test/test_support/manuscript_online_thesis.marc
457
458
  - test/test_support/microform_online_conference.marc
459
+ - test/test_support/missing-second-date.marc
458
460
  - test/test_support/multi_era.marc
459
461
  - test/test_support/multi_geo.marc
460
462
  - test/test_support/musical_cage.marc