oai_schedules 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f254ace930dec7e151a5af485130dd23623548d6f9d0860a764b90ce217c4bb
|
4
|
+
data.tar.gz: bfe93c7f19a894976c53f3d2e3d720e048c77c116b92283034ba7b17d4efe386
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbd9df7881c619f2660dd6b622adf2fd7ec6fa35cde400ff3490b1001b3635c7d89605882e53c9b242cc992de6d0153a7ec5d25d5b41e0672853b997924900a0
|
7
|
+
data.tar.gz: 918414787e68f89b8dab0e692096e708864ca7b32a81d1341c0cc09e72fbb70d2cd886a137f024e5946b6176b7d4b1b20d606ec12d33c81894c65588ca9c712e
|
data/README.md
CHANGED
@@ -22,8 +22,13 @@ gem install oai_schedules
|
|
22
22
|
```ruby
|
23
23
|
require 'oai_schedules/manager'
|
24
24
|
|
25
|
-
f_show = lambda do |name, content, records, done, logger|
|
26
|
-
|
25
|
+
f_show = lambda do |name, content, records, done, error, state, logger|
|
26
|
+
if error.nil?
|
27
|
+
# ... do your stuff with records ...
|
28
|
+
else
|
29
|
+
puts error.message
|
30
|
+
end
|
31
|
+
puts state
|
27
32
|
if done
|
28
33
|
puts "done full harvesting"
|
29
34
|
end
|
@@ -59,7 +64,8 @@ The code will do this by querying `https://eudml.org/oai/OAIHandler?verb=ListRec
|
|
59
64
|
query parameters.
|
60
65
|
The custom function provided as `f_digest` will then be called at each iteration.
|
61
66
|
This will be provided schedule `name` and `content`, the partial list of `records` as a hash,
|
62
|
-
a `done` flag (full harvesting complete),
|
67
|
+
a `done` flag (full harvesting complete), an `error` exception if happened, the harvesting `state`,
|
68
|
+
and the same logger used internally by the schedules manager.
|
63
69
|
and it will write the new one to the state file, until no token is provided (end of the harvesting).
|
64
70
|
As soon as the schedule is added, it is executed.
|
65
71
|
It is possible to add all schedules in advance, then call `sleep` for infinite event loop.
|
@@ -0,0 +1,63 @@
|
|
1
|
+
{
|
2
|
+
"interval": "P1W",
|
3
|
+
"repository": {
|
4
|
+
"uri": "https://eudml.org/oai/OAIHandler",
|
5
|
+
"repository_name": "REPOX Repository",
|
6
|
+
"protocol_version": "2.0",
|
7
|
+
"admin_email": [
|
8
|
+
"mailto:gilberto.pedrosa@ist.utl.pt"
|
9
|
+
],
|
10
|
+
"earliest_datestamp": "1970-01-01T00:00:00+00:00",
|
11
|
+
"deleted_records": "persistent",
|
12
|
+
"granularity": "YYYY-MM-DD",
|
13
|
+
"metadata_format": [
|
14
|
+
"oai_dc",
|
15
|
+
"ese",
|
16
|
+
"eudml-article2",
|
17
|
+
"eudml-book2"
|
18
|
+
],
|
19
|
+
"set": [
|
20
|
+
"BDIM",
|
21
|
+
"BulDML",
|
22
|
+
"CEDRAM",
|
23
|
+
"DMLE",
|
24
|
+
"DML_CZ_Monograph",
|
25
|
+
"DML_CZ_Proceeding",
|
26
|
+
"DML_CZ_Serial",
|
27
|
+
"EDPS",
|
28
|
+
"ELibM",
|
29
|
+
"GALLICA",
|
30
|
+
"GDZ_Band",
|
31
|
+
"GDZ_Mathematica",
|
32
|
+
"GDZ_Monographs",
|
33
|
+
"GDZ_RusDML",
|
34
|
+
"HDML_Books",
|
35
|
+
"HDML_Conferences",
|
36
|
+
"HDML_Journals",
|
37
|
+
"MISANU",
|
38
|
+
"NUMDAM",
|
39
|
+
"NUMDAM_book",
|
40
|
+
"PLDML",
|
41
|
+
"PLDML_book",
|
42
|
+
"PMath"
|
43
|
+
],
|
44
|
+
"id": "02df523af427deb93b7cb4600ca347f9297d0e31d51c2783c634459dac457bd0"
|
45
|
+
},
|
46
|
+
"active": true,
|
47
|
+
"transformer": {
|
48
|
+
"transformer_name": [
|
49
|
+
"dummy_transformer"
|
50
|
+
],
|
51
|
+
"type": {
|
52
|
+
"id": "b1671aad-e825-4b5a-b50b-d8591b425e2a",
|
53
|
+
"value": "dummy_type"
|
54
|
+
},
|
55
|
+
"uri": "http://dummy-uri.org/",
|
56
|
+
"id": "7889ce03-28d9-479d-bb9a-b239f179453a"
|
57
|
+
},
|
58
|
+
"format": "oai_dc",
|
59
|
+
"set": "CEDRAM",
|
60
|
+
"from": "1970-01-01T00:00:00+00:00",
|
61
|
+
"until": "1999-01-12T09:43:02+00:00",
|
62
|
+
"id": "dc34623d-2ae2-4e90-91d8-26f4ba29a056"
|
63
|
+
}
|
data/examples/example_01.rb
CHANGED
@@ -4,8 +4,13 @@ require 'oai_schedules/manager'
|
|
4
4
|
|
5
5
|
# usage with folder listener
|
6
6
|
|
7
|
-
f_show = lambda do |name, content, records, done, logger|
|
8
|
-
|
7
|
+
f_show = lambda do |name, content, records, done, error, state, logger|
|
8
|
+
if error.nil?
|
9
|
+
# ... do your stuff with records ...
|
10
|
+
else
|
11
|
+
puts error.message
|
12
|
+
end
|
13
|
+
puts state
|
9
14
|
if done
|
10
15
|
puts "done full harvesting"
|
11
16
|
end
|
data/examples/example_02.rb
CHANGED
@@ -4,8 +4,13 @@ require 'oai_schedules/manager'
|
|
4
4
|
|
5
5
|
# usage with programmatic schedules addition / modify / remove
|
6
6
|
|
7
|
-
f_show = lambda do |name, content, records, done, logger|
|
8
|
-
|
7
|
+
f_show = lambda do |name, content, records, done, error, state, logger|
|
8
|
+
if error.nil?
|
9
|
+
# ... do your stuff with records ...
|
10
|
+
else
|
11
|
+
puts error.message
|
12
|
+
end
|
13
|
+
puts state
|
9
14
|
if done
|
10
15
|
puts "done full harvesting"
|
11
16
|
end
|
@@ -230,7 +230,8 @@ module OAISchedules
|
|
230
230
|
if (content["format"] != @schedules[name][:content]["format"]) \
|
231
231
|
|| (content["set"] != @schedules[name][:content]["set"])
|
232
232
|
state["resumption_token"] = nil
|
233
|
-
state["
|
233
|
+
state["count_success"] = 0
|
234
|
+
state["count_fails"] = 0
|
234
235
|
end
|
235
236
|
end
|
236
237
|
|
@@ -339,8 +340,11 @@ module OAISchedules
|
|
339
340
|
when StateHarvesting::IDLE
|
340
341
|
state_machine.add_event(EventHarvesting::REQUEST_HARVEST)
|
341
342
|
when StateHarvesting::HARVESTING
|
342
|
-
unless state.has_key?("
|
343
|
-
state["
|
343
|
+
unless state.has_key?("count_success")
|
344
|
+
state["count_success"] = 0
|
345
|
+
end
|
346
|
+
unless state.has_key?("count_fails")
|
347
|
+
state["count_fails"] = 0
|
344
348
|
end
|
345
349
|
if !state["resumption_token"].nil?
|
346
350
|
format = ""
|
@@ -354,30 +358,39 @@ module OAISchedules
|
|
354
358
|
to = content["until"] || ""
|
355
359
|
set = content["set"] || ""
|
356
360
|
resumption_token = ""
|
357
|
-
state["count_partial_harversting"] = 0
|
358
361
|
end
|
359
|
-
data =
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
362
|
+
data = nil
|
363
|
+
error = nil
|
364
|
+
begin
|
365
|
+
data = oai_get_records(
|
366
|
+
name,
|
367
|
+
content["repository"]["uri"],
|
368
|
+
format,
|
369
|
+
from,
|
370
|
+
to,
|
371
|
+
set,
|
372
|
+
resumption_token
|
373
|
+
)
|
374
|
+
state["resumption_token"] = data["resumptionToken"]
|
375
|
+
state["count_success"] += 1
|
376
|
+
rescue StandardError => e
|
377
|
+
state["count_fails"] += 1
|
378
|
+
error = e
|
379
|
+
end
|
370
380
|
path_file_state = get_path_state_file_from_schedule_name(name)
|
371
381
|
@logger.info("#{name}: writing to state file #{path_file_state}")
|
372
382
|
write_state_file(path_file_state, state)
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
383
|
+
done = false
|
384
|
+
if error.nil?
|
385
|
+
if !data["resumptionToken"].nil?
|
386
|
+
state_machine.add_event(EventHarvesting::DONE_HARVEST)
|
387
|
+
done = false
|
388
|
+
else
|
389
|
+
state_machine.add_event(EventHarvesting::DONE_FULL_HARVEST)
|
390
|
+
done = true
|
391
|
+
end
|
379
392
|
end
|
380
|
-
@f_digest&.call(name, content, data, done, @logger)
|
393
|
+
@f_digest&.call(name, content, data, done, error, state, @logger)
|
381
394
|
break
|
382
395
|
when StateHarvesting::COMPLETE
|
383
396
|
@logger.warn("#{name}: full harvesting complete")
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oai_schedules
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Davide Monari
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-04-29 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: concurrent-ruby
|
@@ -78,6 +78,7 @@ files:
|
|
78
78
|
- LICENSE.txt
|
79
79
|
- README.md
|
80
80
|
- Rakefile
|
81
|
+
- examples/dir_schedules/schedule_dc34623d-2ae2-4e90-91d8-26f4ba29a056.json
|
81
82
|
- examples/dir_schedules/schedule_sample.json
|
82
83
|
- examples/dir_state/.gitkeep
|
83
84
|
- examples/example_01.rb
|