oai_schedules 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/examples/dir_schedules/.gitkeep +0 -0
- data/examples/dir_schedules_all/schedule_deventit.json +41 -0
- data/examples/dir_schedules_all/schedule_heron.json +38 -0
- data/lib/oai_schedules/manager.rb +93 -69
- data/lib/oai_schedules/version.rb +1 -1
- metadata +25 -8
- /data/examples/{dir_schedules → dir_schedules_all}/schedule_495607cf-f773-463f-8ee1-77d0f53e0c29.json +0 -0
- /data/examples/{dir_schedules → dir_schedules_all}/schedule_5e344861-806b-4361-98f7-a0be6a5984de.json +0 -0
- /data/examples/{dir_schedules → dir_schedules_all}/schedule_712f46ab-f87f-4db8-b69e-7101d9e2ae61.json +0 -0
- /data/examples/{dir_schedules → dir_schedules_all}/schedule_99922363-5b37-4438-a274-5a4a5167f811.json +0 -0
- /data/examples/{dir_schedules → dir_schedules_all}/schedule_dc34623d-2ae2-4e90-91d8-26f4ba29a056.json +0 -0
- /data/examples/{dir_schedules → dir_schedules_all}/schedule_sample.json +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dac0dec1b24093321fea8bb4e8fe05a315255d4a4418f0ff72803f6af902196f
|
4
|
+
data.tar.gz: 590c1f29d049ee82a4352ec3d5a79fbd070013be36f4f08a166cf34323c0afd1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e7fa2c85188069c4997fe4fb91e39f05b6f9ec250184b60868729701a1fefd16fa3fa830cf993913454e5acb8ea4649d2b16a4bad6fcee499053dbe929b5b105
|
7
|
+
data.tar.gz: 50b0ed8e1955e52fe4ea25bd66a38abd397be64d2986ee608c042502091b44e58835110fc18d5406c1c28c431c6e741455091a824ce592c7e8bce4fead953c34
|
data/CHANGELOG.md
CHANGED
File without changes
|
@@ -0,0 +1,41 @@
|
|
1
|
+
{
|
2
|
+
"interval": "PT5S",
|
3
|
+
"repository": {
|
4
|
+
"uri": "http://lag.hosting.deventit.net/atlantispubliek/oai.axd",
|
5
|
+
"repository_name": "Liberas",
|
6
|
+
"protocol_version": "2.0",
|
7
|
+
"admin_email": [
|
8
|
+
"support@deventit.nl"
|
9
|
+
],
|
10
|
+
"earliest_datestamp": "1900-01-01T01:01:01+00:00",
|
11
|
+
"deleted_records": "persistent",
|
12
|
+
"granularity": "YYYY-MM-DDThh:mm:ssZ",
|
13
|
+
"metadata_format": [
|
14
|
+
"APEX",
|
15
|
+
"EAC",
|
16
|
+
"EAD"
|
17
|
+
],
|
18
|
+
"set": [
|
19
|
+
"APEX",
|
20
|
+
"APEXAO",
|
21
|
+
"EAC",
|
22
|
+
"EAD"
|
23
|
+
],
|
24
|
+
"id": "93b6d240ac9b782664f18823b761a128fc19116506dd228763b52ba8fb64e1b9"
|
25
|
+
},
|
26
|
+
"active": true,
|
27
|
+
"transformer": {
|
28
|
+
"transformer_name": [
|
29
|
+
"dummy_transformer"
|
30
|
+
],
|
31
|
+
"type": {
|
32
|
+
"id": "bb8bfd5d-d914-41ae-b9f1-b5065299d9b9",
|
33
|
+
"value": "dummy_type"
|
34
|
+
},
|
35
|
+
"uri": "http://dummy-uri.org/",
|
36
|
+
"id": "dd377ec0-c890-4635-b6d8-553ddf609c01"
|
37
|
+
},
|
38
|
+
"format": "EAD",
|
39
|
+
"set": "EAD",
|
40
|
+
"id": "deventit"
|
41
|
+
}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
{
|
2
|
+
"interval": "PT5S",
|
3
|
+
"repository": {
|
4
|
+
"uri": "https://heron.libis.be/ca_veb_q/admin/service.php/OAI/cw_organisaties/request",
|
5
|
+
"repository_name": "VEB",
|
6
|
+
"protocol_version": "2.0",
|
7
|
+
"admin_email": [
|
8
|
+
"collectiveaccess@vlaamse-erfgoedbibliotheken.be"
|
9
|
+
],
|
10
|
+
"earliest_datestamp": "2020-08-17T12:55:02+00:00",
|
11
|
+
"deleted_records": "transient",
|
12
|
+
"granularity": "YYYY-MM-DDThh:mm:ssZ",
|
13
|
+
"metadata_format": [
|
14
|
+
"oai_dc",
|
15
|
+
"oai_veb"
|
16
|
+
],
|
17
|
+
"set": [
|
18
|
+
"1"
|
19
|
+
],
|
20
|
+
"id": "4886e3793003d2b1dffbbdb41ca13024c7c809892314ab301ded0d7ecfd7b469"
|
21
|
+
},
|
22
|
+
"active": true,
|
23
|
+
"transformer": {
|
24
|
+
"transformer_name": [
|
25
|
+
"dummy_transformer"
|
26
|
+
],
|
27
|
+
"type": {
|
28
|
+
"id": "bb8bfd5d-d914-41ae-b9f1-b5065299d9b9",
|
29
|
+
"value": "dummy_type"
|
30
|
+
},
|
31
|
+
"uri": "http://dummy-uri.org/",
|
32
|
+
"id": "dd377ec0-c890-4635-b6d8-553ddf609c01"
|
33
|
+
},
|
34
|
+
"format": "oai_veb",
|
35
|
+
"set": "1",
|
36
|
+
"from": "1900-06-10T12:39:00+00:00",
|
37
|
+
"id": "heron"
|
38
|
+
}
|
@@ -8,6 +8,7 @@ require 'logger'
|
|
8
8
|
require 'data_collector'
|
9
9
|
require 'iso8601'
|
10
10
|
require 'date'
|
11
|
+
require 'active_support/core_ext/time'
|
11
12
|
|
12
13
|
|
13
14
|
|
@@ -49,6 +50,7 @@ module OAISchedules
|
|
49
50
|
REQUEST_HARVEST = 2
|
50
51
|
DONE_HARVEST = 3
|
51
52
|
DONE_FULL_HARVEST = 4
|
53
|
+
RESTART = 5
|
52
54
|
end
|
53
55
|
|
54
56
|
class StateMachineHarvesting
|
@@ -103,6 +105,15 @@ module OAISchedules
|
|
103
105
|
@state
|
104
106
|
end
|
105
107
|
|
108
|
+
when StateHarvesting::COMPLETE
|
109
|
+
|
110
|
+
case event
|
111
|
+
when EventHarvesting::RESTART
|
112
|
+
@state = StateHarvesting::NOT_IDENTIFIED
|
113
|
+
else
|
114
|
+
@state
|
115
|
+
end
|
116
|
+
|
106
117
|
else
|
107
118
|
@state
|
108
119
|
end
|
@@ -228,6 +239,7 @@ module OAISchedules
|
|
228
239
|
|
229
240
|
|
230
241
|
def init_schedule_state(state)
|
242
|
+
state["use_resumption_token"] = nil
|
231
243
|
state["resumption_token"] = nil
|
232
244
|
state["expiration_date_resumption_token"] = nil
|
233
245
|
state["datetime_now"] = get_datetime_now
|
@@ -239,6 +251,7 @@ module OAISchedules
|
|
239
251
|
state["latest_harvested_records_datestamp"] = nil
|
240
252
|
state["harvesting"] = false
|
241
253
|
state["error"] = nil
|
254
|
+
state["status"] = nil
|
242
255
|
end
|
243
256
|
|
244
257
|
|
@@ -357,8 +370,9 @@ module OAISchedules
|
|
357
370
|
when StateHarvesting::IDLE
|
358
371
|
state_machine.add_event(EventHarvesting::REQUEST_HARVEST)
|
359
372
|
when StateHarvesting::HARVESTING
|
360
|
-
use_resumption_token = false
|
361
|
-
|
373
|
+
# use_resumption_token = false
|
374
|
+
use_resumption_token = true
|
375
|
+
state["use_resumption_token"] = use_resumption_token
|
362
376
|
format = content["format"] || ""
|
363
377
|
fmt_dt = content.dig("repository", "granularity")
|
364
378
|
from = convert_datetime(content["from"], fmt_dt) || ""
|
@@ -367,91 +381,101 @@ module OAISchedules
|
|
367
381
|
resumption_token = ""
|
368
382
|
if use_resumption_token
|
369
383
|
if !state["resumption_token"].nil?
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
384
|
+
# if state["expiration_date_resumption_token"].nil? || (!state["expiration_date_resumption_token"].nil? && (DateTime.parse(state["expiration_date_resumption_token"]) - DateTime.now) > 0)
|
385
|
+
format = ""
|
386
|
+
from = ""
|
387
|
+
to = ""
|
388
|
+
set = ""
|
389
|
+
resumption_token = state["resumption_token"]
|
390
|
+
# end
|
375
391
|
end
|
376
392
|
else
|
377
393
|
from = add_eps_to_datetime(state["latest_harvested_records_datestamp"], fmt_dt) || from
|
378
394
|
end
|
379
395
|
data = nil
|
380
396
|
error = nil
|
381
|
-
|
397
|
+
err_info = nil
|
398
|
+
harvesting = true
|
382
399
|
done = false
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
data_token = data["resumptionToken"]
|
404
|
-
state["resumption_token"] = data_token["$text"]
|
405
|
-
state["expiration_date_resumption_token"] = data_token["_expirationDate"]
|
406
|
-
end
|
407
|
-
end
|
408
|
-
n_records = data["record"].size
|
409
|
-
state["count_harvested_records"] += n_records
|
410
|
-
if n_records > 0
|
411
|
-
timestamps = data["record"].map do |record|
|
412
|
-
record["header"]["datestamp"]
|
413
|
-
end.sort
|
414
|
-
timestamp_latest = timestamps[-1].strftime('%FT%TZ')
|
415
|
-
state["latest_harvested_records_datestamp"] = convert_datetime(timestamp_latest, fmt_dt)
|
400
|
+
status = nil
|
401
|
+
begin
|
402
|
+
data = oai_get_records(
|
403
|
+
name,
|
404
|
+
content["repository"]["uri"],
|
405
|
+
format,
|
406
|
+
from,
|
407
|
+
to,
|
408
|
+
set,
|
409
|
+
resumption_token
|
410
|
+
)
|
411
|
+
state["resumption_token"] = nil
|
412
|
+
state["expiration_date_resumption_token"] = nil
|
413
|
+
if use_resumption_token
|
414
|
+
if data["resumptionToken"].is_a?(String)
|
415
|
+
state["resumption_token"] = data["resumptionToken"]
|
416
|
+
elsif data["resumptionToken"].is_a?(Hash)
|
417
|
+
data_token = data["resumptionToken"]
|
418
|
+
state["resumption_token"] = data_token["$text"]
|
419
|
+
state["expiration_date_resumption_token"] = data_token["_expirationDate"]
|
416
420
|
end
|
417
|
-
state["count_success"] += 1
|
418
|
-
rescue StandardError => e
|
419
|
-
state["count_fails"] += 1
|
420
|
-
error = e
|
421
421
|
end
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
422
|
+
n_records = data["record"].size
|
423
|
+
state["count_harvested_records"] += n_records
|
424
|
+
if n_records > 0
|
425
|
+
timestamps = data["record"].map do |record|
|
426
|
+
record["header"]["datestamp"]
|
427
|
+
end.sort
|
428
|
+
timestamp_latest = timestamps[-1].strftime('%FT%TZ')
|
429
|
+
state["latest_harvested_records_datestamp"] = convert_datetime(timestamp_latest, fmt_dt)
|
430
|
+
end
|
431
|
+
state["count_success"] += 1
|
432
|
+
rescue StandardError => e
|
433
|
+
state["count_fails"] += 1
|
434
|
+
error = e
|
435
|
+
end
|
436
|
+
if error.nil?
|
437
|
+
err_info = nil
|
438
|
+
if use_resumption_token
|
439
|
+
if state["resumption_token"].nil?
|
440
|
+
state_machine.add_event(EventHarvesting::DONE_FULL_HARVEST)
|
441
|
+
harvesting = false
|
442
|
+
status = "next harvesting: #{state["datetime_next_harvesting"]}"
|
431
443
|
else
|
432
|
-
|
444
|
+
state_machine.add_event(EventHarvesting::DONE_HARVEST)
|
445
|
+
harvesting = true
|
446
|
+
status = "harvesting"
|
433
447
|
end
|
434
448
|
else
|
435
|
-
state["error"] = {
|
436
|
-
"message" => error.message,
|
437
|
-
"backtrace" => error.backtrace
|
438
|
-
}
|
439
449
|
state_machine.add_event(EventHarvesting::DONE_HARVEST)
|
440
|
-
|
450
|
+
harvesting = true
|
451
|
+
status = "harvesting"
|
441
452
|
end
|
442
|
-
|
443
|
-
|
444
|
-
|
453
|
+
else
|
454
|
+
err_info = {
|
455
|
+
"message" => error.message,
|
456
|
+
"backtrace" => error.backtrace
|
457
|
+
}
|
458
|
+
state_machine.add_event(EventHarvesting::DONE_FULL_HARVEST)
|
459
|
+
harvesting = false
|
460
|
+
status = "error (see logs), next harvesting: #{state["datetime_next_harvesting"]}"
|
445
461
|
end
|
462
|
+
state["done"] = done
|
463
|
+
state["harvesting"] = harvesting
|
464
|
+
state["datetime_now"] = get_datetime_now
|
465
|
+
state["status"] = status
|
466
|
+
state["error"] = err_info
|
446
467
|
path_file_state = get_path_state_file_from_schedule_name(name)
|
447
468
|
@logger.info("#{name}: writing to state file #{path_file_state}")
|
448
469
|
write_state_file(path_file_state, state)
|
449
470
|
@f_digest&.call(name, content, data, done, error, state, @logger)
|
450
|
-
break if to_pause
|
451
471
|
when StateHarvesting::COMPLETE
|
452
472
|
@logger.warn("#{name}: full harvesting complete")
|
453
|
-
|
454
|
-
|
473
|
+
state_machine.add_event(EventHarvesting::RESTART)
|
474
|
+
auto_deactivate_schedule = false
|
475
|
+
if auto_deactivate_schedule
|
476
|
+
content["active"] = false
|
477
|
+
handle_schedule_task(name)
|
478
|
+
end
|
455
479
|
break
|
456
480
|
else
|
457
481
|
@logger.warn("#{name}: state #{state_machine.state} not known")
|
@@ -580,7 +604,7 @@ module OAISchedules
|
|
580
604
|
end
|
581
605
|
|
582
606
|
def get_datetime_now
|
583
|
-
|
607
|
+
Time.current.in_time_zone('Zulu').strftime('%FT%TZ')
|
584
608
|
end
|
585
609
|
|
586
610
|
def interval_iso8601_to_seconds(str_interval)
|
@@ -589,8 +613,8 @@ module OAISchedules
|
|
589
613
|
|
590
614
|
def get_datetime_next_schedule_tick_from_now(str_interval)
|
591
615
|
interval_s = interval_iso8601_to_seconds(str_interval)
|
592
|
-
dt =
|
593
|
-
dt +=
|
616
|
+
dt = Time.current.in_time_zone('Zulu')
|
617
|
+
dt += interval_s
|
594
618
|
dt.strftime('%FT%TZ')
|
595
619
|
end
|
596
620
|
|
metadata
CHANGED
@@ -1,14 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oai_schedules
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Davide Monari
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-07-
|
10
|
+
date: 2025-07-15 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: activesupport
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: 8.0.2
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 8.0.2
|
12
26
|
- !ruby/object:Gem::Dependency
|
13
27
|
name: concurrent-ruby
|
14
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -78,12 +92,15 @@ files:
|
|
78
92
|
- LICENSE.txt
|
79
93
|
- README.md
|
80
94
|
- Rakefile
|
81
|
-
- examples/dir_schedules
|
82
|
-
- examples/
|
83
|
-
- examples/
|
84
|
-
- examples/
|
85
|
-
- examples/
|
86
|
-
- examples/
|
95
|
+
- examples/dir_schedules/.gitkeep
|
96
|
+
- examples/dir_schedules_all/schedule_495607cf-f773-463f-8ee1-77d0f53e0c29.json
|
97
|
+
- examples/dir_schedules_all/schedule_5e344861-806b-4361-98f7-a0be6a5984de.json
|
98
|
+
- examples/dir_schedules_all/schedule_712f46ab-f87f-4db8-b69e-7101d9e2ae61.json
|
99
|
+
- examples/dir_schedules_all/schedule_99922363-5b37-4438-a274-5a4a5167f811.json
|
100
|
+
- examples/dir_schedules_all/schedule_dc34623d-2ae2-4e90-91d8-26f4ba29a056.json
|
101
|
+
- examples/dir_schedules_all/schedule_deventit.json
|
102
|
+
- examples/dir_schedules_all/schedule_heron.json
|
103
|
+
- examples/dir_schedules_all/schedule_sample.json
|
87
104
|
- examples/dir_state/.gitkeep
|
88
105
|
- examples/example_01.rb
|
89
106
|
- examples/example_02.rb
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|