relaton-w3c 2.1.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <grammar xmlns="http://relaxng.org/ns/structure/1.0">
3
- <include href="basicdoc.rng"/>
4
- <include href="relaton-w3c.rng"/>
5
- <start>
6
- <choice>
7
- <ref name="bibitem"/>
8
- <ref name="bibdata"/>
9
- </choice>
10
- </start>
11
- </grammar>
@@ -1,11 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <grammar xmlns="http://relaxng.org/ns/structure/1.0">
3
- <include href="biblio-standoc.rng">
4
- <define name="DocumentType">
5
- <choice>
6
- <value>groupNote</value>
7
- <value>technicalReport</value>
8
- </choice>
9
- </define>
10
- </include>
11
- </grammar>
@@ -1,62 +0,0 @@
1
- require "concurrent/map"
2
-
3
- module Relaton
4
- module W3c
5
- module RateLimitHandler
6
- MAX_RETRIES = 5
7
- RETRYABLE_ERRORS = [
8
- NameError, Lutaml::Hal::ConnectionError, Lutaml::Hal::TimeoutError,
9
- Lutaml::Hal::ServerError, Faraday::ConnectionFailed, Net::OpenTimeout,
10
- ].freeze
11
-
12
- # Concurrent::Map so multiple fetcher threads can hit the cache without
13
- # a global lock. Duplicate concurrent fetches of the same URL are
14
- # possible but harmless; the second write just replaces the first.
15
- def self.fetched_objects
16
- @fetched_objects ||= Concurrent::Map.new
17
- end
18
-
19
- def realize(obj)
20
- href = resolve_href(obj)
21
- return RateLimitHandler.fetched_objects[href] if RateLimitHandler.fetched_objects.key?(href)
22
-
23
- attempt = 1
24
- begin
25
- RateLimitHandler.fetched_objects[href] = obj.realize
26
- rescue *RETRYABLE_ERRORS => e
27
- if attempt < MAX_RETRIES
28
- sleep_time = attempt * attempt
29
- attempt += 1
30
- Util.warn "Rate limit exceeded for #{href}, retrying in #{sleep_time} seconds..."
31
- sleep sleep_time
32
- retry
33
- elsif e.is_a?(Lutaml::Hal::ServerError)
34
- # Persistent 5xx — cache nil so a permanently broken upstream
35
- # resource is skipped on the next lookup instead of re-tried.
36
- Util.warn "Server error for #{href}, skipping: #{e.message}"
37
- RateLimitHandler.fetched_objects[href] = nil
38
- else
39
- # Do not cache on retries exhausted — transient failures should not
40
- # permanently poison the cache; subsequent calls will retry fresh.
41
- Util.warn "Failed to realize object: #{href}, error: #{e.message}"
42
- end
43
- rescue Lutaml::Hal::NotFoundError
44
- Util.warn "Object not found: #{href}"
45
- RateLimitHandler.fetched_objects[href] = nil
46
- rescue Lutaml::Hal::Error => e
47
- # Non-retryable client-side errors (403/401/400 and any other
48
- # Lutaml::Hal::Error not matched above) — skip the resource and
49
- # continue rather than aborting the whole crawl.
50
- Util.warn "Client error for #{href}, skipping: #{e.message}"
51
- RateLimitHandler.fetched_objects[href] = nil
52
- end
53
- end
54
-
55
- private
56
-
57
- def resolve_href(obj)
58
- obj.href || obj.links.self.href
59
- end
60
- end
61
- end
62
- end