relaton-w3c 2.1.1 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +15 -6
- data/Gemfile +1 -0
- data/lib/relaton/w3c/data_fetcher.rb +73 -19
- data/lib/relaton/w3c/data_parser.rb +9 -6
- data/lib/relaton/w3c/safe_realize.rb +55 -0
- data/lib/relaton/w3c/version.rb +1 -1
- data/relaton-w3c.gemspec +1 -9
- metadata +5 -122
- data/grammars/basicdoc.rng +0 -2140
- data/grammars/biblio-standoc.rng +0 -268
- data/grammars/biblio.rng +0 -2125
- data/grammars/relaton-w3c-compile.rng +0 -11
- data/grammars/relaton-w3c.rng +0 -11
- data/lib/relaton/w3c/rate_limit_handler.rb +0 -51
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
|
|
3
|
-
<include href="basicdoc.rng"/>
|
|
4
|
-
<include href="relaton-w3c.rng"/>
|
|
5
|
-
<start>
|
|
6
|
-
<choice>
|
|
7
|
-
<ref name="bibitem"/>
|
|
8
|
-
<ref name="bibdata"/>
|
|
9
|
-
</choice>
|
|
10
|
-
</start>
|
|
11
|
-
</grammar>
|
data/grammars/relaton-w3c.rng
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
|
|
3
|
-
<include href="biblio-standoc.rng">
|
|
4
|
-
<define name="DocumentType">
|
|
5
|
-
<choice>
|
|
6
|
-
<value>groupNote</value>
|
|
7
|
-
<value>technicalReport</value>
|
|
8
|
-
</choice>
|
|
9
|
-
</define>
|
|
10
|
-
</include>
|
|
11
|
-
</grammar>
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
module Relaton
|
|
2
|
-
module W3c
|
|
3
|
-
module RateLimitHandler
|
|
4
|
-
MAX_RETRIES = 5
|
|
5
|
-
RETRYABLE_ERRORS = [
|
|
6
|
-
NameError, Lutaml::Hal::ConnectionError, Lutaml::Hal::TimeoutError,
|
|
7
|
-
Lutaml::Hal::ServerError, Faraday::ConnectionFailed, Net::OpenTimeout,
|
|
8
|
-
].freeze
|
|
9
|
-
|
|
10
|
-
def self.fetched_objects
|
|
11
|
-
@fetched_objects ||= {}
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def realize(obj)
|
|
15
|
-
href = resolve_href(obj)
|
|
16
|
-
return RateLimitHandler.fetched_objects[href] if RateLimitHandler.fetched_objects.key?(href)
|
|
17
|
-
|
|
18
|
-
attempt = 1
|
|
19
|
-
begin
|
|
20
|
-
RateLimitHandler.fetched_objects[href] = obj.realize
|
|
21
|
-
rescue *RETRYABLE_ERRORS => e
|
|
22
|
-
if attempt < MAX_RETRIES
|
|
23
|
-
sleep_time = attempt * attempt
|
|
24
|
-
attempt += 1
|
|
25
|
-
Util.warn "Rate limit exceeded for #{href}, retrying in #{sleep_time} seconds..."
|
|
26
|
-
sleep sleep_time
|
|
27
|
-
retry
|
|
28
|
-
elsif e.is_a?(Lutaml::Hal::ServerError)
|
|
29
|
-
# Persistent 5xx — cache nil so a permanently broken upstream
|
|
30
|
-
# resource is skipped on the next lookup instead of re-tried.
|
|
31
|
-
Util.warn "Server error for #{href}, skipping: #{e.message}"
|
|
32
|
-
RateLimitHandler.fetched_objects[href] = nil
|
|
33
|
-
else
|
|
34
|
-
# Do not cache on retries exhausted — transient failures should not
|
|
35
|
-
# permanently poison the cache; subsequent calls will retry fresh.
|
|
36
|
-
Util.warn "Failed to realize object: #{href}, error: #{e.message}"
|
|
37
|
-
end
|
|
38
|
-
rescue Lutaml::Hal::NotFoundError
|
|
39
|
-
Util.warn "Object not found: #{href}"
|
|
40
|
-
RateLimitHandler.fetched_objects[href] = nil
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
private
|
|
45
|
-
|
|
46
|
-
def resolve_href(obj)
|
|
47
|
-
obj.href || obj.links.self.href
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
end
|