relaton-cie 1.19.1 → 1.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton_cie/data_fetcher.rb +21 -9
- data/lib/relaton_cie/scrapper.rb +15 -7
- data/lib/relaton_cie/version.rb +1 -1
- data/relaton_cie.gemspec +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: dd5511102fe58e22ea8bc53bab129f8ae6fc00b9fb5fd7647d4ff20e12f1a912
|
|
4
|
+
data.tar.gz: f7e2cb38965575cfe36976076cf08014d6d695a36452daef02634cef6dfaace1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f3f17b076c36358e79ad828500f727ff102938461826fd54b844a09db211befe4c40a6b9321269b6e9f0a553abd0fadaeb77342f8f5e418e811d7eff31917a9c
|
|
7
|
+
data.tar.gz: dede39458440d919b9babdfb95909503e2ddfcbeb5902c37e78617002793b869f41ab41b21971dac07c95f445a33a0b0866cabcae229a3f86d943336ddcbf7fe
|
|
@@ -17,7 +17,18 @@ module RelatonCie
|
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
def agent
|
|
20
|
-
@agent
|
|
20
|
+
return @agent if @agent
|
|
21
|
+
|
|
22
|
+
@agent = Mechanize.new
|
|
23
|
+
@agent.request_headers = {
|
|
24
|
+
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
25
|
+
"Accept-Language" => "en-US,en;q=0.5",
|
|
26
|
+
"Connection" => "keep-alive",
|
|
27
|
+
"sec-ch-ua" => '"Chromium";v="91", "Google Chrome";v="91", ";Not A Brand";v="99"',
|
|
28
|
+
"Sec-Fetch-Dest" => "document"
|
|
29
|
+
}
|
|
30
|
+
@agent.user_agent_alias = "Linux Firefox"
|
|
31
|
+
@agent
|
|
21
32
|
end
|
|
22
33
|
|
|
23
34
|
def index
|
|
@@ -139,17 +150,18 @@ module RelatonCie
|
|
|
139
150
|
(?:,?\s(?<sname2>[\w-]{2,})(?=,\s+\w\.))?
|
|
140
151
|
(?:,?\s(?<fname>[\w-]{2,})(?!,\s+\w\.))?
|
|
141
152
|
(?:(?:\s?,\s?|\s)(?<init>(?:\w(?:\s?\.|\s|,|$)[\s-]?)+))?
|
|
142
|
-
(?:(
|
|
153
|
+
(?:(?:[,;]\s*|\s+|\.|(?<=\s))(?:and\s)?)?/x =~ authors
|
|
143
154
|
raise StandardError, "Author name not found in \"#{authors}\"" unless $LAST_MATCH_INFO
|
|
144
155
|
|
|
145
156
|
authors.sub! $LAST_MATCH_INFO.to_s, ""
|
|
146
157
|
sname = [sname1, sname2].compact.join " "
|
|
147
158
|
surname = RelatonBib::LocalizedString.new sname, "en", "Latn"
|
|
148
|
-
|
|
149
|
-
|
|
159
|
+
forename = []
|
|
160
|
+
forename << RelatonBib::Forename.new(content: fname, language: "en", script: "Latn") if fname
|
|
161
|
+
(init&.strip || "").split(/(?:,|\.)(?:-|\s)?/).each do |int|
|
|
162
|
+
forename << RelatonBib::Forename.new(content: "", initial: int.strip, language: "en", script: "Latn")
|
|
150
163
|
end
|
|
151
|
-
|
|
152
|
-
fullname = RelatonBib::FullName.new surname: surname, forename: forename, initial: initial
|
|
164
|
+
fullname = RelatonBib::FullName.new surname: surname, forename: forename
|
|
153
165
|
person = RelatonBib::Person.new name: fullname
|
|
154
166
|
contribs << { entity: person, role: [{ type: "author" }] }
|
|
155
167
|
end
|
|
@@ -189,7 +201,7 @@ module RelatonCie
|
|
|
189
201
|
|
|
190
202
|
# @param hit [Nokogiri::HTML::Element]
|
|
191
203
|
def parse_page(hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
192
|
-
url =
|
|
204
|
+
url = hit.at('h3/a')[:href]
|
|
193
205
|
doc = time_req { agent.get url }
|
|
194
206
|
item = BibliographicItem.new(
|
|
195
207
|
type: "standard", link: fetch_link(url), docnumber: fetch_docnumber(hit),
|
|
@@ -220,8 +232,8 @@ module RelatonCie
|
|
|
220
232
|
def time_req
|
|
221
233
|
t1 = Time.now
|
|
222
234
|
result = yield
|
|
223
|
-
t =
|
|
224
|
-
sleep t
|
|
235
|
+
t = [4 - (Time.now - t1), 2].max
|
|
236
|
+
sleep t
|
|
225
237
|
result
|
|
226
238
|
end
|
|
227
239
|
|
data/lib/relaton_cie/scrapper.rb
CHANGED
|
@@ -11,22 +11,30 @@ module RelatonCie
|
|
|
11
11
|
row = index.search(code).min_by { |r| r[:id] }
|
|
12
12
|
return unless row
|
|
13
13
|
|
|
14
|
-
parse_page "#{ENDPOINT}#{row[:file]}"
|
|
15
|
-
rescue OpenURI::HTTPError => e
|
|
16
|
-
|
|
14
|
+
parse_page "#{ENDPOINT}#{row[:file]}", code
|
|
15
|
+
# rescue OpenURI::HTTPError => e
|
|
16
|
+
# return if e.io.status.first == "404"
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
# raise RelatonBib::RequestError, "No document found for #{code} reference. #{e.message}"
|
|
19
19
|
end
|
|
20
20
|
|
|
21
21
|
private
|
|
22
22
|
|
|
23
23
|
# @param url [String]
|
|
24
24
|
# @retrurn [RelatoCie::BibliographicItem]
|
|
25
|
-
def parse_page(url)
|
|
26
|
-
|
|
27
|
-
bib_hash = RelatonBib::HashConverter.hash_to_bib YAML.safe_load(
|
|
25
|
+
def parse_page(url, code)
|
|
26
|
+
resp = Mechanize.new.get url
|
|
27
|
+
bib_hash = RelatonBib::HashConverter.hash_to_bib YAML.safe_load(resp.body)
|
|
28
28
|
bib_hash[:fetched] = Date.today.to_s
|
|
29
29
|
RelatonCie::BibliographicItem.new(**bib_hash)
|
|
30
|
+
rescue Mechanize::ResponseCodeError => e
|
|
31
|
+
return if e.response_code == "404"
|
|
32
|
+
|
|
33
|
+
raise RelatonBib::RequestError, "No document found for #{code} reference. #{e.message}"
|
|
34
|
+
rescue Mechanize::RedirectLimitReachedError, Timeout::Error,
|
|
35
|
+
Mechanize::UnauthorizedError, Mechanize::UnsupportedSchemeError,
|
|
36
|
+
Mechanize::ResponseReadError, Mechanize::ChunkedTerminationError => e
|
|
37
|
+
raise RelatonBib::RequestError, "No document found for #{code} reference. #{e.message}"
|
|
30
38
|
end
|
|
31
39
|
end
|
|
32
40
|
end
|
data/lib/relaton_cie/version.rb
CHANGED
data/relaton_cie.gemspec
CHANGED
|
@@ -31,6 +31,6 @@ Gem::Specification.new do |spec|
|
|
|
31
31
|
|
|
32
32
|
spec.add_dependency "mechanize", "~> 2.10"
|
|
33
33
|
spec.add_dependency "parslet", "~> 2.0.0"
|
|
34
|
-
spec.add_dependency "relaton-bib", "~> 1.
|
|
34
|
+
spec.add_dependency "relaton-bib", "~> 1.20.0"
|
|
35
35
|
spec.add_dependency "relaton-index", "~> 0.2.0"
|
|
36
36
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-cie
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.20.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-01-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: mechanize
|
|
@@ -44,14 +44,14 @@ dependencies:
|
|
|
44
44
|
requirements:
|
|
45
45
|
- - "~>"
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: 1.
|
|
47
|
+
version: 1.20.0
|
|
48
48
|
type: :runtime
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: 1.
|
|
54
|
+
version: 1.20.0
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: relaton-index
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -109,7 +109,7 @@ licenses:
|
|
|
109
109
|
- BSD-2-Clause
|
|
110
110
|
metadata:
|
|
111
111
|
homepage_uri: https://github.com/metanorma/relaton-cie
|
|
112
|
-
post_install_message:
|
|
112
|
+
post_install_message:
|
|
113
113
|
rdoc_options: []
|
|
114
114
|
require_paths:
|
|
115
115
|
- lib
|
|
@@ -124,8 +124,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
124
124
|
- !ruby/object:Gem::Version
|
|
125
125
|
version: '0'
|
|
126
126
|
requirements: []
|
|
127
|
-
rubygems_version: 3.
|
|
128
|
-
signing_key:
|
|
127
|
+
rubygems_version: 3.5.22
|
|
128
|
+
signing_key:
|
|
129
129
|
specification_version: 4
|
|
130
130
|
summary: 'RelatonEcma: retrieve CIE Standards for bibliographic use using the BibliographicItem
|
|
131
131
|
model.'
|