oai_schedules 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3f254ace930dec7e151a5af485130dd23623548d6f9d0860a764b90ce217c4bb
4
- data.tar.gz: bfe93c7f19a894976c53f3d2e3d720e048c77c116b92283034ba7b17d4efe386
3
+ metadata.gz: d2b2bae058157785c775ebd1b8c0ea76e8685adb8588280f6c6427644b9da8b5
4
+ data.tar.gz: 928102f14abd5bf59bb7b55bb7e100074c5b20bec3276e1dd129a04e74c6da60
5
5
  SHA512:
6
- metadata.gz: bbd9df7881c619f2660dd6b622adf2fd7ec6fa35cde400ff3490b1001b3635c7d89605882e53c9b242cc992de6d0153a7ec5d25d5b41e0672853b997924900a0
7
- data.tar.gz: 918414787e68f89b8dab0e692096e708864ca7b32a81d1341c0cc09e72fbb70d2cd886a137f024e5946b6176b7d4b1b20d606ec12d33c81894c65588ca9c712e
6
+ metadata.gz: 35bb16497f382eb1accaecd95e1e2b022f3addeeef7a4ae79ab5bc4b9f6b023f3fbee70535815ee0edcf5e48b62fc4bdb697cba378cdbac16f114420394cf85e
7
+ data.tar.gz: 3811fd28be558414ebfabcc83e1978080578ff4b33147f0da06cf569ab6b42af498489cf7bef3dd5f14b6e0dbb8598dc615d15220c7496c710f93fcea0149a3e
@@ -181,6 +181,9 @@ module OAISchedules
181
181
  state = {}
182
182
  if File.file?(path_file_state)
183
183
  state = read_state_file(path_file_state)
184
+ else
185
+ init_schedule_state(state)
186
+ write_state_file(path_file_state, state)
184
187
  end
185
188
  # create task
186
189
  task = Concurrent::TimerTask.new(run_now: false) {
@@ -224,14 +227,22 @@ module OAISchedules
224
227
  private
225
228
 
226
229
 
230
+ def init_schedule_state(state)
231
+ state["resumption_token"] = nil
232
+ state["count_success"] = 0
233
+ state["count_fails"] = 0
234
+ state["done"] = false
235
+ state["count_harvested_records"] = 0
236
+ state["latest_harvested_records_datestamp"] = ""
237
+ end
238
+
239
+
227
240
  def handle_schedule_state_at_schedule_change(name, content)
228
241
  # invalidate resumption token if either schedule format or set changes
229
242
  state = @schedules[name][:state]
230
243
  if (content["format"] != @schedules[name][:content]["format"]) \
231
244
  || (content["set"] != @schedules[name][:content]["set"])
232
- state["resumption_token"] = nil
233
- state["count_success"] = 0
234
- state["count_fails"] = 0
245
+ init_schedule_state(state)
235
246
  end
236
247
  end
237
248
 
@@ -346,6 +357,15 @@ module OAISchedules
346
357
  unless state.has_key?("count_fails")
347
358
  state["count_fails"] = 0
348
359
  end
360
+ unless state.has_key?("done")
361
+ state["done"] = false
362
+ end
363
+ unless state.has_key?("count_harvested_records")
364
+ state["count_harvested_records"] = 0
365
+ end
366
+ unless state.has_key?("latest_harvested_records_datestamp")
367
+ state["latest_harvested_records_datestamp"] = ""
368
+ end
349
369
  if !state["resumption_token"].nil?
350
370
  format = ""
351
371
  from = ""
@@ -361,35 +381,49 @@ module OAISchedules
361
381
  end
362
382
  data = nil
363
383
  error = nil
364
- begin
365
- data = oai_get_records(
366
- name,
367
- content["repository"]["uri"],
368
- format,
369
- from,
370
- to,
371
- set,
372
- resumption_token
373
- )
374
- state["resumption_token"] = data["resumptionToken"]
375
- state["count_success"] += 1
376
- rescue StandardError => e
377
- state["count_fails"] += 1
378
- error = e
384
+ done = false
385
+ if state["done"]
386
+ state_machine.add_event(EventHarvesting::DONE_FULL_HARVEST)
387
+ done = true
388
+ else
389
+ begin
390
+ data = oai_get_records(
391
+ name,
392
+ content["repository"]["uri"],
393
+ format,
394
+ from,
395
+ to,
396
+ set,
397
+ resumption_token
398
+ )
399
+ state["resumption_token"] = data["resumptionToken"]
400
+ state["count_success"] += 1
401
+ n_records = data["record"].size
402
+ state["count_harvested_records"] += n_records
403
+ if n_records > 0
404
+ timestamps = data["record"].map do |record|
405
+ record["header"]["datestamp"]
406
+ end.sort
407
+ state["latest_harvested_records_datestamp"] = timestamps[-1]
408
+ end
409
+ rescue StandardError => e
410
+ state["count_fails"] += 1
411
+ error = e
412
+ end
413
+ if error.nil?
414
+ if !data["resumptionToken"].nil?
415
+ state_machine.add_event(EventHarvesting::DONE_HARVEST)
416
+ done = false
417
+ else
418
+ state_machine.add_event(EventHarvesting::DONE_FULL_HARVEST)
419
+ done = true
420
+ end
421
+ end
422
+ state["done"] = done
379
423
  end
380
424
  path_file_state = get_path_state_file_from_schedule_name(name)
381
425
  @logger.info("#{name}: writing to state file #{path_file_state}")
382
426
  write_state_file(path_file_state, state)
383
- done = false
384
- if error.nil?
385
- if !data["resumptionToken"].nil?
386
- state_machine.add_event(EventHarvesting::DONE_HARVEST)
387
- done = false
388
- else
389
- state_machine.add_event(EventHarvesting::DONE_FULL_HARVEST)
390
- done = true
391
- end
392
- end
393
427
  @f_digest&.call(name, content, data, done, error, state, @logger)
394
428
  break
395
429
  when StateHarvesting::COMPLETE
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OaiSchedules
4
- VERSION = "0.5.0"
4
+ VERSION = "0.6.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oai_schedules
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Davide Monari