oai_schedules 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3e87f0fab9a90cc32bb6c3892a9ad72e6dae77c8b7efbc4ab3198c39b7d2a660
4
- data.tar.gz: b59ddf59dd77df9dc1365a279dbe7ac3a9fb7b8d5f1bb7a33a8951f3d4ce1f48
3
+ metadata.gz: 7d574b21314744d8ad56d1a1394d3fc778e860ba636d43569fedee7642ff3af2
4
+ data.tar.gz: b06822ea6110eedf0663f40f4c32120c321c0acd9a508deabe3204cb6f094962
5
5
  SHA512:
6
- metadata.gz: d384485639a44d848f35eb3bc0a25a44fe2357cd84aeb37530a683a153aaba6f4b9bb24b7a316caf424e8f0c57b77b775b45f168c06339b3a6b6e35b1b2ee8d9
7
- data.tar.gz: 84117dbd3b7f6757290fdee7a9f3388623330d8d293961946b6ee2058b2be8a7898b60822598bd8e8ac6bf2340d3cbabcba05124d1aa087e876b2a83192ea948
6
+ metadata.gz: 288bea5932749f50a8629e6fe7ebc041c68f19d39ca44a7513cbdfe1f24682f079f8d8434eabea6ce1cd1d1d3f40f71923ea4bcd701a5926589c544880de2b47
7
+ data.tar.gz: e2c309f3b419a605ad1eac62023bfdbb4102cd1ae7be8809659a42909851abcb90cfc22bccc790d21f5c43554f744f7bffaa40e006d7f67e91db77b20f50c0e6
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [0.4.0] - 2025-03-28
2
+
3
+ - Added logger to digestion function
4
+ - Invalidated resumption token when either schedule set or format change
5
+ - Extended error message on OAI URL fetch fail
6
+
1
7
  ## [0.3.0] - 2025-03-26
2
8
 
3
9
  - Added schedule name and content to digestion function
data/README.md CHANGED
@@ -22,7 +22,7 @@ gem install oai_schedules
22
22
  ```ruby
23
23
  require 'oai_schedules/manager'
24
24
 
25
- f_show = lambda do |name, content, records, done|
25
+ f_show = lambda do |name, content, records, done, logger|
26
26
  # ... do your stuff with records ...
27
27
  if done
28
28
  puts "done full harvesting"
@@ -58,8 +58,8 @@ At every iteration, every 2 seconds, it will get the *partial* records by using
58
58
  The code will do this by querying `https://eudml.org/oai/OAIHandler?verb=ListRecords&...` and adding the necessary
59
59
  query parameters.
60
60
  The custom function provided as `f_digest` will then be called at each iteration.
61
- This will be provided schedule `name` and `content`, the partial list of `records` as a hash,
62
- and a `done` flag (full harvesting complete).
61
+ This will be provided schedule `name` and `content`, the partial list of `records` as a hash,
62
+ a `done` flag (full harvesting complete), and the same logger used internally by the schedules manager.
63
63
  and it will write the new one to the state file, until no token is provided (end of the harvesting).
64
64
  As soon as the schedule is added, it is executed.
65
65
  It is possible to add all schedules in advance, then call `sleep` for infinite event loop.
@@ -4,7 +4,7 @@ require 'oai_schedules/manager'
4
4
 
5
5
  # usage with folder listener
6
6
 
7
- f_show = lambda do |name, content, records, done|
7
+ f_show = lambda do |name, content, records, done, logger|
8
8
  # ... do your stuff with records ...
9
9
  if done
10
10
  puts "done full harvesting"
@@ -4,7 +4,7 @@ require 'oai_schedules/manager'
4
4
 
5
5
  # usage with programmatic schedules addition / modify / remove
6
6
 
7
- f_show = lambda do |name, content, records, done|
7
+ f_show = lambda do |name, content, records, done, logger|
8
8
  # ... do your stuff with records ...
9
9
  if done
10
10
  puts "done full harvesting"
@@ -213,6 +213,7 @@ module OAISchedules
213
213
 
214
214
 
215
215
  def modify_schedule(name, content)
216
+ handle_schedule_state_at_schedule_change(name, content)
216
217
  @schedules[name][:content] = deep_copy(content)
217
218
  handle_schedule_task(name)
218
219
  end
@@ -222,6 +223,18 @@ module OAISchedules
222
223
 
223
224
  private
224
225
 
226
+
227
+ def handle_schedule_state_at_schedule_change(name, content)
228
+ # invalidate resumption token if either schedule format or set changes
229
+ state = @schedules[name][:state]
230
+ if (content["format"] != @schedules[name][:content]["format"]) \
231
+ || (content["set"] != @schedules[name][:content]["set"])
232
+ state["resumption_token"] = nil
233
+ state["count_partial_harversting"] = 0
234
+ end
235
+ end
236
+
237
+
225
238
  def deep_copy(o)
226
239
  Marshal.load(Marshal.dump(o))
227
240
  end
@@ -364,7 +377,7 @@ module OAISchedules
364
377
  state_machine.add_event(EventHarvesting::DONE_FULL_HARVEST)
365
378
  done = true
366
379
  end
367
- @f_digest&.call(name, content, data, done)
380
+ @f_digest&.call(name, content, data, done, @logger)
368
381
  break
369
382
  when StateHarvesting::COMPLETE
370
383
  @logger.warn("#{name}: full harvesting complete")
@@ -416,7 +429,7 @@ module OAISchedules
416
429
  @logger.info("#{name}: fetching from #{url_query}")
417
430
  data = DataCollector::Core.filter(DataCollector::Input.new.from_uri(url_query), "$..#{verb}")
418
431
  if data.empty?
419
- raise StandardError, "#{name}: GET #{url_query}: URL not available, or response has no element #{verb}"
432
+ raise StandardError, "#{name}: GET #{url_query}: URL not available, or response has no element #{verb}, or missing query parameters (e.g. set)"
420
433
  end
421
434
  data = data[0]
422
435
  rescue DataCollector::InputError => e
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OaiSchedules
4
- VERSION = "0.3.0"
4
+ VERSION = "0.4.0"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oai_schedules
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Davide Monari
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-03-26 00:00:00.000000000 Z
10
+ date: 2025-03-28 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: concurrent-ruby