traject 3.2.0 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9a69bdb470c759d08d117f910ec1c6b16cbe93dbb3e955b653a07e3c03efec68
4
- data.tar.gz: 68016ddf17fe29348248e5f26ff5a813c1485c37a0ac014faa69803be182f7b4
3
+ metadata.gz: 1700077d5c2d3c667fc9520b659c3ca986b8ab34aee233f62bd7f73fdef91977
4
+ data.tar.gz: 736b217f209ed08faba9c1d20c006b29586aa3ebdf088a89e37f5f3b7400de06
5
5
  SHA512:
6
- metadata.gz: 8921a5d2349025291f28c7d39ebec44c0c0ebdc11a489e1d4c22913c04c840cf40163c576b14432794f1d663b9b81eb1d777b87d38af2b429f4b6ac98982ad48
7
- data.tar.gz: 8bdb95f98baa11ee60c74a7a4bbab06c44fb5da466138d098b05e5ab3fddc28a1fe8926044be5350e5e0b8a91ccd2d6e2e2ecf78424cfc0430464dd0ccf79c66
6
+ metadata.gz: 21877d6cd5b03f7ffbbac316a6d58a3bc65b534cb7457e57d39ba470ad49d99c8677e5e6ede25c650bba5ac3f0b22f9b348ebabb36ac4047433eb8a76379ef1d
7
+ data.tar.gz: 4ec1938d2d7b60a61ebde4e9c4e763e511c2896788b56b38ad6f22615dffb57449e29c9ef40e261952e125b90f5fe491fa447b077c7dcb1c55f57d6ef603fd5b
data/CHANGES.md CHANGED
@@ -8,6 +8,16 @@
8
8
 
9
9
  *
10
10
 
11
+ ## 3.3.0
12
+
13
+ * `Traject::Macros::Marc21Semantics.publication_date` now gets date from 264 before 260. https://github.com/traject/traject/pull/233
14
+
15
+ * Allow hashie 4.x in gemspec https://github.com/traject/traject/pull/234
16
+
17
+ * Allow `http` gem 4.x versions. https://github.com/traject/traject/pull/236
18
+
19
+ * Can now call class-level Indexer.configure multiple times https://github.com/sciencehistory/scihist_digicoll/pull/525
20
+
11
21
  ## 3.2.0
12
22
 
13
23
  * NokogiriReader has a "nokogiri.strict_mode" setting. Set to true or string 'true' to ask Nokogori to parse in strict mode, so it will immediately raise on ill-formed XML, instead of nokogiri's default to do what it can with it. https://github.com/traject/traject/pull/226
@@ -190,7 +190,7 @@ class Traject::Indexer
190
190
  instance_eval(&block)
191
191
  end
192
192
 
193
- ## Class level configure block accepted too, and applied at instantiation
193
+ ## Class level configure block(s) accepted too, and applied at instantiation
194
194
  # before instance-level configuration.
195
195
  #
196
196
  # EXPERIMENTAL, implementation may change in ways that effect some uses.
@@ -199,8 +199,14 @@ class Traject::Indexer
199
199
  # Note that settings set by 'provide' in subclass can not really be overridden
200
200
  # by 'provide' in a next level subclass. Use self.default_settings instead, with
201
201
  # call to super.
202
+ #
203
+ # You can call this .configure multiple times, blocks are added to a list, and
204
+ # will be used to initialize an instance in order.
205
+ #
206
+ # The main downside of this workaround implementation is performance, even though
207
+ # defined at load-time on class level, blocks are all executed on every instantiation.
202
208
  def self.configure(&block)
203
- @class_configure_block = block
209
+ (@class_configure_blocks ||= []) << block
204
210
  end
205
211
 
206
212
  def self.apply_class_configure_block(instance)
@@ -208,8 +214,10 @@ class Traject::Indexer
208
214
  if self.superclass.respond_to?(:apply_class_configure_block)
209
215
  self.superclass.apply_class_configure_block(instance)
210
216
  end
211
- if @class_configure_block
212
- instance.configure(&@class_configure_block)
217
+ if @class_configure_blocks && !@class_configure_blocks.empty?
218
+ @class_configure_blocks.each do |block|
219
+ instance.configure(&block)
220
+ end
213
221
  end
214
222
  end
215
223
 
@@ -26,19 +26,19 @@ module Traject::Macros
26
26
  accumulator.concat list.uniq if list
27
27
  end
28
28
  end
29
-
29
+
30
30
  # If a num begins with a known OCLC prefix, return it without the prefix.
31
31
  # otherwise nil.
32
32
  #
33
- # Allow (OCoLC) and/or ocn/ocm/on
34
-
33
+ # Allow (OCoLC) and/or ocn/ocm/on
34
+
35
35
  OCLCPAT = /
36
36
  \A\s*
37
37
  (?:(?:\(OCoLC\)) |
38
38
  (?:\(OCoLC\))?(?:(?:ocm)|(?:ocn)|(?:on))
39
39
  )(\d+)
40
40
  /x
41
-
41
+
42
42
  def self.oclcnum_extract(num)
43
43
  if m = OCLCPAT.match(num)
44
44
  return m[1]
@@ -364,13 +364,16 @@ module Traject::Macros
364
364
  end
365
365
  end
366
366
  end
367
- # Okay, nothing from 008, try 260
367
+ # Okay, nothing from 008, first try 264, then try 260
368
368
  if found_date.nil?
369
+ v264c = MarcExtractor.cached("264c", :separator => nil).extract(record).first
369
370
  v260c = MarcExtractor.cached("260c", :separator => nil).extract(record).first
370
371
  # just try to take the first four digits out of there, we're not going to try
371
372
  # anything crazy.
372
- if m = /(\d{4})/.match(v260c)
373
+ if m = /(\d{4})/.match(v264c)
373
374
  found_date = m[1].to_i
375
+ elsif m = /(\d{4})/.match(v260c)
376
+ found_date = m[1].to_i
374
377
  end
375
378
  end
376
379
 
@@ -519,11 +522,11 @@ module Traject::Macros
519
522
 
520
523
  # Extracts LCSH-carrying fields, and formatting them
521
524
  # as a pre-coordinated LCSH string, for instance suitable for including
522
- # in a facet.
525
+ # in a facet.
523
526
  #
524
527
  # You can supply your own list of fields as a spec, but for significant
525
528
  # customization you probably just want to write your own method in
526
- # terms of the Marc21Semantics.assemble_lcsh method.
529
+ # terms of the Marc21Semantics.assemble_lcsh method.
527
530
  def marc_lcsh_formatted(options = {})
528
531
  spec = options[:spec] || "600:610:611:630:648:650:651:654:662"
529
532
  subd_separator = options[:subdivison_separator] || " — "
@@ -540,17 +543,17 @@ module Traject::Macros
540
543
  end
541
544
 
542
545
  # Takes a MARC::Field and formats it into a pre-coordinated LCSH string
543
- # with subdivision seperators in the right place.
546
+ # with subdivision seperators in the right place.
544
547
  #
545
548
  # For 600 fields especially, need to not just join with subdivision seperator
546
549
  # to take acount of $a$d$t -- for other fields, might be able to just
547
- # join subfields, not sure.
550
+ # join subfields, not sure.
548
551
  #
549
552
  # WILL strip trailing period from generated string, contrary to some LCSH practice.
550
553
  # Our data is inconsistent on whether it has period or not, this was
551
- # the easiest way to standardize.
554
+ # the easiest way to standardize.
552
555
  #
553
- # Default subdivision seperator is em-dash with spaces, set to '--' if you want.
556
+ # Default subdivision seperator is em-dash with spaces, set to '--' if you want.
554
557
  #
555
558
  # Cite: "Dash (-) that precedes a subdivision in an extended 600 subject heading
556
559
  # is not carried in the MARC record. It may be system generated as a display constant
@@ -115,9 +115,15 @@ module Traject
115
115
  # @returns [HTTP::Client] from http.rb gem
116
116
  def http_client
117
117
  @http_client ||= begin
118
- # timeout setting on http.rb seems to be a mess.
119
- # https://github.com/httprb/http/issues/488
120
- client = HTTP.timeout(:global, write: timeout / 3, connect: timeout / 3, read: timeout / 3)
118
+ client = nil
119
+
120
+ if HTTP::VERSION.split(".").first.to_i > 3
121
+ client = HTTP.timeout(timeout)
122
+ else
123
+ # timeout setting on http.rb 3.x are a bit of a mess.
124
+ # https://github.com/httprb/http/issues/488
125
+ client = HTTP.timeout(:global, write: timeout / 3, connect: timeout / 3, read: timeout / 3)
126
+ end
121
127
 
122
128
  if settings["oai_pmh.try_gzip"]
123
129
  client = client.use(:auto_inflate).headers("accept-encoding" => "gzip;q=1.0, identity;q=0.5")
@@ -1,3 +1,3 @@
1
1
  module Traject
2
- VERSION = "3.2.0"
2
+ VERSION = "3.3.0"
3
3
  end
@@ -24,40 +24,40 @@ describe "Delimited/CSV Writers" do
24
24
 
25
25
  it "creates a dw with defaults" do
26
26
  dw = Traject::DelimitedWriter.new(@settings)
27
- dw.delimiter.must_equal "\t"
28
- dw.internal_delimiter.must_equal '|'
29
- dw.edelim.must_equal ' '
30
- dw.eidelim.must_equal '\\|'
27
+ assert_equal dw.delimiter, "\t"
28
+ assert_equal dw.internal_delimiter, '|'
29
+ assert_equal dw.edelim, ' '
30
+ assert_equal dw.eidelim, '\\|'
31
31
  end
32
32
 
33
33
  it "respects different delimiter" do
34
34
  @settings['delimited_writer.delimiter'] = '^'
35
35
  dw = Traject::DelimitedWriter.new(@settings)
36
- dw.delimiter.must_equal '^'
37
- dw.edelim.must_equal '\\^'
38
- dw.internal_delimiter.must_equal '|'
36
+ assert_equal dw.delimiter, '^'
37
+ assert_equal dw.edelim, '\\^'
38
+ assert_equal dw.internal_delimiter, '|'
39
39
  end
40
40
 
41
41
  it "outputs a header if asked to" do
42
42
  Traject::DelimitedWriter.new(@settings)
43
- @out.string.chomp.must_equal %w[four one two].join("\t")
43
+ assert_equal @out.string.chomp, %w[four one two].join("\t")
44
44
  end
45
45
 
46
46
  it "doesn't output a header if asked not to" do
47
47
  @settings['delimited_writer.header'] = 'false'
48
48
  Traject::DelimitedWriter.new(@settings)
49
- @out.string.must_be_empty
49
+ assert_empty @out.string
50
50
  end
51
51
 
52
52
  it "deals with multiple values" do
53
53
  dw = Traject::DelimitedWriter.new(@settings)
54
54
  dw.put @context
55
- @out.string.split("\n").last.must_equal ['four', 'one', 'two1|two2'].join(dw.delimiter)
55
+ assert_equal @out.string.split("\n").last, ['four', 'one', 'two1|two2'].join(dw.delimiter)
56
56
  end
57
57
 
58
58
  it "bails if delimited_writer.fields isn't set" do
59
59
  @settings.delete 'delimited_writer.fields'
60
- proc { Traject::DelimitedWriter.new(@settings) }.must_raise(ArgumentError)
60
+ assert_raises(ArgumentError) { Traject::DelimitedWriter.new(@settings) }
61
61
  end
62
62
 
63
63
  end
@@ -65,18 +65,18 @@ describe "Delimited/CSV Writers" do
65
65
  describe "Traject::CSVWriter" do
66
66
  it "unsets the delimiter" do
67
67
  cw = Traject::CSVWriter.new(@settings)
68
- cw.delimiter.must_be_nil
68
+ assert_nil cw.delimiter
69
69
  end
70
70
 
71
71
  it "writes the header" do
72
72
  Traject::CSVWriter.new(@settings)
73
- @out.string.chomp.must_equal 'four,one,two'
73
+ assert_equal @out.string.chomp, 'four,one,two'
74
74
  end
75
75
 
76
76
  it "uses the internal delimiter" do
77
77
  cw = Traject::CSVWriter.new(@settings)
78
78
  cw.put @context
79
- @out.string.split("\n").last.must_equal ['four', 'one', 'two1|two2'].join(',')
79
+ assert_equal @out.string.split("\n").last, ['four', 'one', 'two1|two2'].join(',')
80
80
  end
81
81
 
82
82
  it "produces complex output" do
@@ -97,8 +97,6 @@ describe "Delimited/CSV Writers" do
97
97
  traject_csvwriter_output = @out.string.split("\n").last.chomp
98
98
 
99
99
  assert_equal(csv_output, traject_csvwriter_output)
100
-
101
100
  end
102
-
103
101
  end
104
102
  end
@@ -25,6 +25,7 @@ describe "Class-level configuration of Indexer sub-class" do
25
25
  end
26
26
  end
27
27
 
28
+
28
29
  before do
29
30
  @indexer = TestIndexerSubclass.new
30
31
  end
@@ -47,6 +48,28 @@ describe "Class-level configuration of Indexer sub-class" do
47
48
  assert_equal ['from-instance-config'], result["instance_field"]
48
49
  end
49
50
 
51
+ describe "multiple class-level configure" do
52
+ class MultipleConfigureIndexer < Traject::Indexer
53
+ configure do
54
+ to_field "field", literal("value")
55
+ end
56
+ configure do
57
+ to_field "field", literal("value from second configure")
58
+ to_field "second_call", literal("value from second configure")
59
+ end
60
+ end
61
+
62
+ before do
63
+ @indexer = MultipleConfigureIndexer.new
64
+ end
65
+
66
+ it "lets you call class-level configure multiple times and aggregates" do
67
+ result = @indexer.map_record(Object.new)
68
+ assert_equal ['value', 'value from second configure'], result['field']
69
+ assert_equal ['value from second configure'], result['second_call']
70
+ end
71
+ end
72
+
50
73
  describe "with multi-level subclass" do
51
74
  class TestIndexerSubclassSubclass < TestIndexerSubclass
52
75
  configure do
@@ -197,6 +197,10 @@ describe "Traject::Macros::Marc21Semantics" do
197
197
  # we take the first date. And need to deal with the u.
198
198
  assert_equal 1845, Marc21Semantics.publication_date(@record)
199
199
  end
200
+ it "resorts to 264c" do
201
+ @record = MARC::Reader.new(support_file_path "date_resort_to_264.marc").to_a.first
202
+ assert_equal 2015, Marc21Semantics.publication_date(@record)
203
+ end
200
204
  it "resorts to 260c" do
201
205
  @record = MARC::Reader.new(support_file_path "date_resort_to_260.marc").to_a.first
202
206
  assert_equal 1980, Marc21Semantics.publication_date(@record)
@@ -0,0 +1 @@
1
+ 01180aam a2200337 a 4500001001000000008004100010015001900051016001800070020002500088020002200113040005900135043001200194050002400206082001400230100003500244245006400279260003800343264003800381264001100419300003600430336002100466336002800487337002500515338002300540504006700563651004000630651005000670651004300720651005600763035002300819a11417842130723t20uu20uuenkb b 001 0 eng d aGBB3854302bnb7 a0164999372Uk a9781849043427 (pbk.) a1849043426 (pbk.) aUKMGBcUKMGBdOCLCOdYDXCPdOCLCOdZWZdOCLCOdCaONFJC aa-ii--- 4aDS485.K25bS64 201504a954.62231 aSnedden, Christopher,eauthor.10aUnderstanding Kashmir and Kashmiris /cChristopher Snedden. 1aLondon :bHurst & Company,c2014. 1aLondon :bHurst & Company,c2015. 4c©2015 axix, 372 pages :bmaps ;c22 cm atext2rdacontent astill image2rdacontent aunmediated2rdamedia avolume2rdacarrier aIncludes bibliographical references (pages 331-355) and index. 0aJammu and Kashmir (India)xHistory. 0aJammu and Kashmir (India)xForeign relations. 0aJammu and Kashmir (India)vBoundaries. 0aJammu and Kashmir (India)xPolitics and government. a(OCoLC-M)858826393
@@ -24,12 +24,12 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency "concurrent-ruby", ">= 0.8.0"
25
25
  spec.add_dependency "marc", "~> 1.0"
26
26
 
27
- spec.add_dependency "hashie", "~> 3.1" # used for Indexer#settings
27
+ spec.add_dependency "hashie", ">= 3.1", "< 5" # used for Indexer#settings
28
28
  spec.add_dependency "slop", ">= 3.4.5", "< 4.0" # command line parsing
29
29
  spec.add_dependency "yell" # logging
30
30
  spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
31
31
  spec.add_dependency "httpclient", "~> 2.5"
32
- spec.add_dependency "http", "~> 3.0" # used in oai_pmh_reader, may use more extensively in future instead of httpclient
32
+ spec.add_dependency "http", ">= 3.0", "< 5" # used in oai_pmh_reader, may use more extensively in future instead of httpclient
33
33
  spec.add_dependency 'marc-fastxmlwriter', '~>1.0' # fast marc->xml
34
34
  spec.add_dependency "nokogiri", "~> 1.9" # NokogiriIndexer
35
35
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Rochkind
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-09-10 00:00:00.000000000 Z
12
+ date: 2019-12-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: concurrent-ruby
@@ -43,16 +43,22 @@ dependencies:
43
43
  name: hashie
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - "~>"
46
+ - - ">="
47
47
  - !ruby/object:Gem::Version
48
48
  version: '3.1'
49
+ - - "<"
50
+ - !ruby/object:Gem::Version
51
+ version: '5'
49
52
  type: :runtime
50
53
  prerelease: false
51
54
  version_requirements: !ruby/object:Gem::Requirement
52
55
  requirements:
53
- - - "~>"
56
+ - - ">="
54
57
  - !ruby/object:Gem::Version
55
58
  version: '3.1'
59
+ - - "<"
60
+ - !ruby/object:Gem::Version
61
+ version: '5'
56
62
  - !ruby/object:Gem::Dependency
57
63
  name: slop
58
64
  requirement: !ruby/object:Gem::Requirement
@@ -119,16 +125,22 @@ dependencies:
119
125
  name: http
120
126
  requirement: !ruby/object:Gem::Requirement
121
127
  requirements:
122
- - - "~>"
128
+ - - ">="
123
129
  - !ruby/object:Gem::Version
124
130
  version: '3.0'
131
+ - - "<"
132
+ - !ruby/object:Gem::Version
133
+ version: '5'
125
134
  type: :runtime
126
135
  prerelease: false
127
136
  version_requirements: !ruby/object:Gem::Requirement
128
137
  requirements:
129
- - - "~>"
138
+ - - ">="
130
139
  - !ruby/object:Gem::Version
131
140
  version: '3.0'
141
+ - - "<"
142
+ - !ruby/object:Gem::Version
143
+ version: '5'
132
144
  - !ruby/object:Gem::Dependency
133
145
  name: marc-fastxmlwriter
134
146
  requirement: !ruby/object:Gem::Requirement
@@ -330,6 +342,7 @@ files:
330
342
  - test/test_support/bad_subfield_code.marc
331
343
  - test/test_support/bad_utf_byte.utf8.marc
332
344
  - test/test_support/date_resort_to_260.marc
345
+ - test/test_support/date_resort_to_264.marc
333
346
  - test/test_support/date_type_r_missing_date2.marc
334
347
  - test/test_support/date_with_u.marc
335
348
  - test/test_support/demo_config.rb
@@ -434,6 +447,7 @@ test_files:
434
447
  - test/test_support/bad_subfield_code.marc
435
448
  - test/test_support/bad_utf_byte.utf8.marc
436
449
  - test/test_support/date_resort_to_260.marc
450
+ - test/test_support/date_resort_to_264.marc
437
451
  - test/test_support/date_type_r_missing_date2.marc
438
452
  - test/test_support/date_with_u.marc
439
453
  - test/test_support/demo_config.rb