oai_schedules 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1fe0345feb8fe60475f4668b9dfabadfe525b9251d60778cabcbb27f7c20d0b1
4
+ data.tar.gz: 4407a7a41d10b7a8dc30e8261a99eb74ea9cfdd5bc35e8592cbf27f76c114def
5
+ SHA512:
6
+ metadata.gz: 29278455d3eceff2ce5b53bbb65f4074dd87b16a4389258fb77e4567b0d20343214d864ab166fcc8a7c2978ff2f8a20659946ee5fa990a15b1e1422f3d7bfcb1
7
+ data.tar.gz: 6d39fee175e2e2a8b872529ca79fac3b0c1b22f53f8502011e514bf1d769c99bacd37c066fb5682505a88c7d72284275876e52c66a5b2afa163674c4c0a0a42e
data/.standard.yml ADDED
@@ -0,0 +1,3 @@
1
+ # For available configuration options, see:
2
+ # https://github.com/standardrb/standard
3
+ ruby_version: 3.1
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2025-03-20
4
+
5
+ - Initial release
@@ -0,0 +1,132 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, caste, color, religion, or sexual
10
+ identity and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ * Demonstrating empathy and kindness toward other people
21
+ * Being respectful of differing opinions, viewpoints, and experiences
22
+ * Giving and gracefully accepting constructive feedback
23
+ * Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience
25
+ * Focusing on what is best not just for us as individuals, but for the overall
26
+ community
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ * The use of sexualized language or imagery, and sexual attention or advances of
31
+ any kind
32
+ * Trolling, insulting or derogatory comments, and personal or political attacks
33
+ * Public or private harassment
34
+ * Publishing others' private information, such as a physical or email address,
35
+ without their explicit permission
36
+ * Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting
38
+
39
+ ## Enforcement Responsibilities
40
+
41
+ Community leaders are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Community leaders have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official email address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the community leaders responsible for enforcement at
63
+ [INSERT CONTACT METHOD].
64
+ All complaints will be reviewed and investigated promptly and fairly.
65
+
66
+ All community leaders are obligated to respect the privacy and security of the
67
+ reporter of any incident.
68
+
69
+ ## Enforcement Guidelines
70
+
71
+ Community leaders will follow these Community Impact Guidelines in determining
72
+ the consequences for any action they deem in violation of this Code of Conduct:
73
+
74
+ ### 1. Correction
75
+
76
+ **Community Impact**: Use of inappropriate language or other behavior deemed
77
+ unprofessional or unwelcome in the community.
78
+
79
+ **Consequence**: A private, written warning from community leaders, providing
80
+ clarity around the nature of the violation and an explanation of why the
81
+ behavior was inappropriate. A public apology may be requested.
82
+
83
+ ### 2. Warning
84
+
85
+ **Community Impact**: A violation through a single incident or series of
86
+ actions.
87
+
88
+ **Consequence**: A warning with consequences for continued behavior. No
89
+ interaction with the people involved, including unsolicited interaction with
90
+ those enforcing the Code of Conduct, for a specified period of time. This
91
+ includes avoiding interactions in community spaces as well as external channels
92
+ like social media. Violating these terms may lead to a temporary or permanent
93
+ ban.
94
+
95
+ ### 3. Temporary Ban
96
+
97
+ **Community Impact**: A serious violation of community standards, including
98
+ sustained inappropriate behavior.
99
+
100
+ **Consequence**: A temporary ban from any sort of interaction or public
101
+ communication with the community for a specified period of time. No public or
102
+ private interaction with the people involved, including unsolicited interaction
103
+ with those enforcing the Code of Conduct, is allowed during this period.
104
+ Violating these terms may lead to a permanent ban.
105
+
106
+ ### 4. Permanent Ban
107
+
108
+ **Community Impact**: Demonstrating a pattern of violation of community
109
+ standards, including sustained inappropriate behavior, harassment of an
110
+ individual, or aggression toward or disparagement of classes of individuals.
111
+
112
+ **Consequence**: A permanent ban from any sort of public interaction within the
113
+ community.
114
+
115
+ ## Attribution
116
+
117
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118
+ version 2.1, available at
119
+ [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
120
+
121
+ Community Impact Guidelines were inspired by
122
+ [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
123
+
124
+ For answers to common questions about this code of conduct, see the FAQ at
125
+ [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
126
+ [https://www.contributor-covenant.org/translations][translations].
127
+
128
+ [homepage]: https://www.contributor-covenant.org
129
+ [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
130
+ [Mozilla CoC]: https://github.com/mozilla/diversity
131
+ [FAQ]: https://www.contributor-covenant.org/faq
132
+ [translations]: https://www.contributor-covenant.org/translations
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Davide Monari
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # OaiSchedules
2
+
3
+ TODO: Delete this and the text below, and describe your gem
4
+
5
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/oai_schedules`. To experiment with that code, run `bin/console` for an interactive prompt.
6
+
7
+ ## Installation
8
+
9
+ TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
10
+
11
+ Install the gem and add to the application's Gemfile by executing:
12
+
13
+ ```bash
14
+ bundle add UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
15
+ ```
16
+
17
+ If bundler is not being used to manage dependencies, install the gem by executing:
18
+
19
+ ```bash
20
+ gem install UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/oai_schedules. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/oai_schedules/blob/master/CODE_OF_CONDUCT.md).
36
+
37
+ ## License
38
+
39
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
40
+
41
+ ## Code of Conduct
42
+
43
+ Everyone interacting in the OaiSchedules project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/oai_schedules/blob/master/CODE_OF_CONDUCT.md).
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "minitest/test_task"
5
+
6
+ Minitest::TestTask.create
7
+
8
+ require "standard/rake"
9
+
10
+ task default: %i[test standard]
@@ -0,0 +1,10 @@
1
+ {
2
+ "interval": "PT2S",
3
+ "active": true,
4
+ "repository": {
5
+ "uri": "https://eu.alma.exlibrisgroup.com/view/oai/32KUL_KUL/request"
6
+ },
7
+ "format": "marc21",
8
+ "set": "KUL_Rapid_Journals_Print_LendableInternational",
9
+ "from": "2025-03-23T00:00:00Z"
10
+ }
File without changes
@@ -0,0 +1,11 @@
1
+
2
+ $LOAD_PATH << '../lib'
3
+ require 'oai_schedules/manager'
4
+
5
+ # usage with folder listener
6
+
7
+ manager = OAISchedules::Manager.new(path_dir_schedules: "./dir_schedules", path_dir_state: "./dir_state")
8
+ # # alternative:
9
+ # manager = OAISchedules::Manager.new()
10
+ # manager.set_listener_dir_schedules("./dir_schedules")
11
+ manager.run_listener_dir_schedules(block: true)
@@ -0,0 +1,17 @@
1
+
2
+ $LOAD_PATH << '../lib'
3
+ require 'oai_schedules/manager'
4
+
5
+ # usage with programmatic schedules addition / modify / remove
6
+
7
+ manager = OAISchedules::Manager.new(path_dir_state: "./dir_state")
8
+ content_schedule = {
9
+ "interval" => "PT2S",
10
+ "active" => true,
11
+ "repository" => {
12
+ "uri" => "https://eudml.org/oai/OAIHandler"
13
+ },
14
+ "format" => "oai_dc"
15
+ }
16
+ manager.add_schedule("my_sample_schedule", content_schedule)
17
+ sleep
@@ -0,0 +1,448 @@
1
+
2
+
3
+ require 'listen'
4
+ require 'pathname'
5
+ require 'json'
6
+ require 'concurrent-ruby'
7
+ require 'logger'
8
+ require 'data_collector'
9
+ require 'iso8601'
10
+
11
+
12
+
13
+
14
+ module OAISchedules
15
+
16
+ class Manager
17
+
18
+ class TaskObserver
19
+
20
+ def initialize(logger, name)
21
+ @logger = logger
22
+ @name = name
23
+ end
24
+ def update(time, result, ex)
25
+ if result
26
+ # @logger.info("#{@name}: execution successfully returned: #{result}")
27
+ elsif ex.is_a?(Concurrent::TimeoutError)
28
+ # @logger.warn("#{@name}: execution timed out")
29
+ else
30
+ @logger.error("#{@name}: execution failed with error: #{ex}")
31
+ end
32
+ end
33
+
34
+ end
35
+
36
+ private_constant :TaskObserver
37
+
38
+ module StateHarvesting
39
+ NOT_IDENTIFIED = 0
40
+ IDENTIFYING = 1
41
+ IDLE = 2
42
+ HARVESTING = 3
43
+ COMPLETE = 4
44
+ end
45
+
46
+ module EventHarvesting
47
+ REQUEST_IDENTIFY = 0
48
+ DONE_IDENTIFY = 1
49
+ REQUEST_HARVEST = 2
50
+ DONE_HARVEST = 3
51
+ DONE_FULL_HARVEST = 4
52
+ end
53
+
54
+ class StateMachineHarvesting
55
+
56
+ attr_reader :state
57
+
58
+ def initialize()
59
+ @state = StateHarvesting::NOT_IDENTIFIED
60
+ end
61
+
62
+ def reset()
63
+ @state = StateHarvesting::NOT_IDENTIFIED
64
+ end
65
+
66
+ def add_event(event)
67
+ case @state
68
+ when StateHarvesting::NOT_IDENTIFIED
69
+
70
+ case event
71
+ when EventHarvesting::REQUEST_IDENTIFY
72
+ @state = StateHarvesting::IDENTIFYING
73
+ else
74
+ @state
75
+ end
76
+
77
+ when StateHarvesting::IDENTIFYING
78
+
79
+ case event
80
+ when EventHarvesting::DONE_IDENTIFY
81
+ @state = StateHarvesting::IDLE
82
+ else
83
+ @state
84
+ end
85
+
86
+ when StateHarvesting::IDLE
87
+
88
+ case event
89
+ when EventHarvesting::REQUEST_HARVEST
90
+ @state = StateHarvesting::HARVESTING
91
+ else
92
+ @state
93
+ end
94
+
95
+ when StateHarvesting::HARVESTING
96
+
97
+ case event
98
+ when EventHarvesting::DONE_HARVEST
99
+ @state = StateHarvesting::IDLE
100
+ when EventHarvesting::DONE_FULL_HARVEST
101
+ @state = StateHarvesting::COMPLETE
102
+ else
103
+ @state
104
+ end
105
+
106
+ else
107
+ @state
108
+ end
109
+ end
110
+
111
+
112
+ end
113
+
114
+ private_constant :StateMachineHarvesting
115
+
116
+
117
+ class IntervalTooSmall < StandardError
118
+ end
119
+
120
+ attr_accessor :path_dir_state
121
+
122
+ def initialize(path_dir_schedules: nil, path_dir_state: nil)
123
+ @logger = Logger.new(STDOUT)
124
+ @path_dir_state = path_dir_state
125
+ @schedules = {}
126
+ @path_dir_schedules = path_dir_schedules
127
+ @listener_dir_schedules = nil
128
+ unless path_dir_schedules.nil?
129
+ set_listener_dir_schedules(path_dir_schedules)
130
+ end
131
+ end
132
+
133
+
134
+ def set_listener_dir_schedules(path_dir_schedules)
135
+ @path_dir_schedules = path_dir_schedules
136
+ paths_files_in_dir_schedules = get_all_files_in_dir(@path_dir_schedules)
137
+ @logger.info("schedules found in #{@path_dir_schedules}:")
138
+ @logger.info(paths_files_in_dir_schedules)
139
+ paths_files_in_dir_schedules.each do |fp|
140
+ handle_file_added(fp)
141
+ end
142
+ @listener_dir_schedules = Listen.to(@path_dir_schedules, only: /\.json$/) do |modified, added, removed|
143
+ modified.each do |fp|
144
+ handle_file_modified(fp)
145
+ end
146
+ added.each do |fp|
147
+ handle_file_added(fp)
148
+ end
149
+ removed.each do |fp|
150
+ handle_file_removed(fp)
151
+ end
152
+ end
153
+ end
154
+
155
+
156
+ def run_listener_dir_schedules(block: true)
157
+ if !@listener_dir_schedules.nil?
158
+ @logger.info("running listener in #{@path_dir_schedules}...")
159
+ @listener_dir_schedules.start
160
+ if block then sleep end
161
+ else
162
+ @logger.warn("no listening provided")
163
+ end
164
+ end
165
+
166
+
167
+ def stop_listener_dir_schedules()
168
+ @logger.info("stopping listener in #{@path_dir_schedules}...")
169
+ @listener_dir_schedules.stop
170
+ end
171
+
172
+
173
+ def listener_dir_schedules_running?
174
+ @listener_dir_schedules.processing?
175
+ end
176
+
177
+ def add_schedule(name, content)
178
+ # read state file if existing
179
+ path_file_state = get_path_state_file_from_schedule_name(name)
180
+ state = {}
181
+ if File.file?(path_file_state)
182
+ state = read_state_file(path_file_state)
183
+ end
184
+ # create task
185
+ task = Concurrent::TimerTask.new(run_now: false) {
186
+ logic(
187
+ name,
188
+ @schedules[name][:content],
189
+ @schedules[name][:state_machine],
190
+ @schedules[name][:state]
191
+ )
192
+ }
193
+ task.add_observer(TaskObserver.new(@logger, name))
194
+ # add item to schedules
195
+ @schedules[name] = {
196
+ content: content,
197
+ task: task,
198
+ state_machine: StateMachineHarvesting.new,
199
+ state: state
200
+ }
201
+ # configure task parameters
202
+ handle_schedule_task(name)
203
+ end
204
+
205
+
206
+
207
+ def remove_schedule(name)
208
+ @schedules[name][:task].shutdown
209
+ @schedules[name].delete(name)
210
+ end
211
+
212
+
213
+
214
+ def modify_schedule(name, content)
215
+ @schedules[name][:content] = content
216
+ handle_schedule_task(name)
217
+ end
218
+
219
+
220
+
221
+
222
+ private
223
+
224
+
225
+ def handle_schedule_task(name)
226
+ task = @schedules[name][:task]
227
+ interval_s_safe = 60
228
+ begin
229
+ duration = ISO8601::Duration.new(@schedules[name][:content]["interval"])
230
+ interval_s = duration.to_seconds
231
+ @logger.info("#{name}: task interval (s): #{interval_s}")
232
+ th_interval_s = 0.1 # protects from negative, 0 or small time intervals
233
+ if interval_s < th_interval_s
234
+ raise IntervalTooSmall, "Interval smaller than #{th_interval_s} seconds"
235
+ end
236
+ rescue ISO8601::Errors::UnknownPattern, IntervalTooSmall => e
237
+ @logger.error("#{name}: #{e.message}")
238
+ @logger.error("#{name}: task will be forced as inactive, interval of #{interval_s_safe} seconds")
239
+ @schedules[name][:content]["active"] = false
240
+ interval_s = interval_s_safe
241
+ ensure
242
+ task.execution_interval = interval_s
243
+ end
244
+ @logger.info("#{name}: task active: #{@schedules[name][:content]["active"]}")
245
+ if @schedules[name][:content]["active"]
246
+ task.execute
247
+ else
248
+ task.shutdown
249
+ end
250
+ end
251
+
252
+
253
+
254
+
255
+ def get_name_schedule_from_name_file(name_file)
256
+ prefix = "schedule_"
257
+ name_schedule = nil
258
+ if name_file.start_with?(prefix) && name_file.end_with?(".json")
259
+ name_file_noext = name_file.chomp(".json")
260
+ tmp = name_file_noext.split(prefix)
261
+ if tmp.size > 1
262
+ name_schedule = tmp[1]
263
+ @logger.info("name schedule: #{name_schedule}")
264
+ return name_schedule
265
+ end
266
+ end
267
+ @logger.info("#{name_file} not a schedule file")
268
+ name_schedule
269
+ end
270
+
271
+ def read_schedule_file(fp)
272
+ @logger.info("reading schedule file: #{fp}")
273
+ content_schedule = JSON.parse(File.read(fp))
274
+ content_schedule
275
+ end
276
+
277
+
278
+ def read_state_file(fp)
279
+ @logger.info("reading state file: #{fp}")
280
+ content_state = JSON.parse(File.read(fp))
281
+ content_state
282
+ end
283
+
284
+ def write_state_file(fp, content)
285
+ @logger.info("writing state file: #{fp}")
286
+ File.open(fp, 'w') do |f|
287
+ f.write(JSON.pretty_generate(content))
288
+ end
289
+ end
290
+
291
+
292
+ def get_path_state_file_from_schedule_name(name)
293
+ File.join(@path_dir_state, "state_#{name}.json")
294
+ end
295
+
296
+
297
+ def get_name_file_from_path_file(fp)
298
+ Pathname.new(fp).basename.to_s
299
+ end
300
+
301
+
302
+ def get_all_files_in_dir(path_dir)
303
+ Dir.entries(path_dir).map { |f| File.join(path_dir, f) } .select { |f| File.file?(f) }
304
+ end
305
+
306
+
307
+ def logic(name, content, state_machine, state)
308
+ loop do
309
+ @logger.info("#{name}: handling state: #{state_machine.state}")
310
+ case state_machine.state
311
+ when StateHarvesting::NOT_IDENTIFIED
312
+ state_machine.add_event(EventHarvesting::REQUEST_IDENTIFY)
313
+ when StateHarvesting::IDENTIFYING
314
+ data = oai_identify(
315
+ name,
316
+ content["repository"]["uri"]
317
+ )
318
+ state["identify"] = data
319
+ state_machine.add_event(EventHarvesting::DONE_IDENTIFY)
320
+ break
321
+ when StateHarvesting::IDLE
322
+ state_machine.add_event(EventHarvesting::REQUEST_HARVEST)
323
+ when StateHarvesting::HARVESTING
324
+ unless state.has_key?("count_partial_harversting")
325
+ state["count_partial_harversting"] = 0
326
+ end
327
+ if !state["resumption_token"].nil?
328
+ format = ""
329
+ from = ""
330
+ to = ""
331
+ set = ""
332
+ resumption_token = state["resumption_token"]
333
+ else
334
+ format = content["format"] || ""
335
+ from = content["from"] || ""
336
+ to = content["until"] || ""
337
+ set = content["set"] || ""
338
+ resumption_token = ""
339
+ state["count_partial_harversting"] = 0
340
+ end
341
+ data = oai_get_records(
342
+ name,
343
+ content["repository"]["uri"],
344
+ format,
345
+ from,
346
+ to,
347
+ set,
348
+ resumption_token
349
+ )
350
+ state["resumption_token"] = data["resumptionToken"]
351
+ state["count_partial_harversting"] += 1
352
+ path_file_state = get_path_state_file_from_schedule_name(name)
353
+ @logger.info("#{name}: writing to state file #{path_file_state}")
354
+ write_state_file(path_file_state, state)
355
+ if !data["resumptionToken"].nil?
356
+ state_machine.add_event(EventHarvesting::DONE_HARVEST)
357
+ else
358
+ state_machine.add_event(EventHarvesting::DONE_FULL_HARVEST)
359
+ end
360
+ break
361
+ when StateHarvesting::COMPLETE
362
+ @logger.warn("#{name}: full harvesting complete")
363
+ content["active"] = false
364
+ handle_schedule_task(name)
365
+ break
366
+ else
367
+ @logger.warn("#{name}: state #{state_machine.state} not known")
368
+ end
369
+ end
370
+ {}
371
+ end
372
+
373
+
374
+ def oai_identify(name, url_base)
375
+ verb = "Identify"
376
+ url_query = "#{url_base}?verb=#{verb}"
377
+ @logger.info("#{name}: fetching from #{url_query}")
378
+ data = DataCollector::Core.filter(DataCollector::Input.new.from_uri(url_query), "$..#{verb}")
379
+ if data.empty?
380
+ raise StandardError, "#{name}: GET #{url_query}: URL not available, or response has no element #{verb}"
381
+ end
382
+ data = data[0]
383
+ rescue DataCollector::InputError => e
384
+ raise RuntimeError, "#{name}: #{url_query} not found, or server error"
385
+ rescue StandardError => e
386
+ raise RuntimeError, "#{name}: #{e.message}"
387
+ end
388
+
389
+
390
+ def oai_get_records(name, url_base, metadata_prefix, from, to, set, resumption_token)
391
+ verb = "ListRecords"
392
+ url_query = "#{url_base}?verb=#{verb}"
393
+ unless metadata_prefix.empty?
394
+ url_query += "&metadataPrefix=#{metadata_prefix}"
395
+ end
396
+ unless from.empty?
397
+ url_query += "&from=#{from}"
398
+ end
399
+ unless to.empty?
400
+ url_query += "&until=#{to}"
401
+ end
402
+ unless set.empty?
403
+ url_query += "&set=#{set}"
404
+ end
405
+ unless resumption_token.empty?
406
+ url_query += "&resumptionToken=#{resumption_token}"
407
+ end
408
+ @logger.info("#{name}: fetching from #{url_query}")
409
+ data = DataCollector::Core.filter(DataCollector::Input.new.from_uri(url_query), "$..#{verb}")
410
+ if data.empty?
411
+ raise StandardError, "#{name}: GET #{url_query}: URL not available, or response has no element #{verb}"
412
+ end
413
+ data = data[0]
414
+ rescue DataCollector::InputError => e
415
+ raise RuntimeError, "#{name}: #{url_query} not found, or server error"
416
+ rescue StandardError => e
417
+ raise RuntimeError, "#{name}: #{e.message}"
418
+ end
419
+
420
+
421
+ def handle_file_added(fp)
422
+ @logger.info("added file: #{fp}")
423
+ name_file = get_name_file_from_path_file(fp)
424
+ name_schedule = get_name_schedule_from_name_file(name_file)
425
+ if name_schedule.nil? then return end
426
+ content_schedule = read_schedule_file(fp)
427
+ add_schedule(name_schedule, content_schedule)
428
+ end
429
+
430
+ def handle_file_removed(fp)
431
+ @logger.info("removed file: #{fp}")
432
+ name_file = get_name_file_from_path_file(fp)
433
+ name_schedule = get_name_schedule_from_name_file(name_file)
434
+ remove_schedule(name_schedule)
435
+ end
436
+
437
+ def handle_file_modified(fp)
438
+ @logger.info("modified file: #{fp}")
439
+ name_file = get_name_file_from_path_file(fp)
440
+ name_schedule = get_name_schedule_from_name_file(name_file)
441
+ if name_schedule.nil? then return end
442
+ content_schedule = read_schedule_file(fp)
443
+ modify_schedule(name_schedule, content_schedule)
444
+ end
445
+
446
+ end
447
+
448
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OaiSchedules
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "oai_schedules/version"
4
+
5
+ module OaiSchedules
6
+ class Error < StandardError; end
7
+ # Your code goes here...
8
+ end
@@ -0,0 +1,4 @@
1
+ module OaiSchedules
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: oai_schedules
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Davide Monari
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 2025-03-25 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: concurrent-ruby
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 1.3.5
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: 1.3.5
26
+ - !ruby/object:Gem::Dependency
27
+ name: data_collector
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: 0.61.0
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.61.0
40
+ - !ruby/object:Gem::Dependency
41
+ name: iso8601
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: 0.13.0
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: 0.13.0
54
+ - !ruby/object:Gem::Dependency
55
+ name: listen
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: 3.9.0
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: 3.9.0
68
+ description: gem to run concurrent OAI-PHM harvesting schedules
69
+ email:
70
+ - davide.monari@kuleuven.be
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - ".standard.yml"
76
+ - CHANGELOG.md
77
+ - CODE_OF_CONDUCT.md
78
+ - LICENSE.txt
79
+ - README.md
80
+ - Rakefile
81
+ - examples/dir_schedules/schedule_sample.json
82
+ - examples/dir_state/.gitkeep
83
+ - examples/example_01.rb
84
+ - examples/example_02.rb
85
+ - lib/oai_schedules.rb
86
+ - lib/oai_schedules/manager.rb
87
+ - lib/oai_schedules/version.rb
88
+ - sig/oai_schedules.rbs
89
+ homepage: https://github.com/libis/oai-schedules
90
+ licenses:
91
+ - MIT
92
+ metadata:
93
+ allowed_push_host: https://rubygems.org
94
+ homepage_uri: https://github.com/libis/oai-schedules
95
+ source_code_uri: https://github.com/libis/oai-schedules
96
+ changelog_uri: https://github.com/libis/oai-schedules/blob/main/CHANGELOG.md
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: 3.1.0
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubygems_version: 3.6.2
112
+ specification_version: 4
113
+ summary: gem to run concurrent OAI-PHM harvesting schedules
114
+ test_files: []