relaton-nist 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/relaton_nist/scrapper.rb +16 -9
- data/lib/relaton_nist/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84efea59e2f35758f44b21e42574ebc3bfbda66f95d7d1c905ae645d42d55711
|
4
|
+
data.tar.gz: c58d8aa8f2bc6da70c04793d196f90b4b5a0ab7868b97f7ef17fc3df3ff5f17b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22d62f0a3a04d03efa7ca6bf2f8d2dda988da7346fac312ac58d43998a712f9bd35bf2481f2a6aebfd4507dcc4b6e8d9f75bc10de0fd51e5e2e9f22edeeecad2
|
7
|
+
data.tar.gz: 8193871dba1c5c64ba39839ed38eb34632c653ef55daf4e6850a5d4fe73446af64d27fd7eb2170ed73d0b79efb96ddbb8fee37871b986422b4ea0c1586157b7d
|
data/.gitignore
CHANGED
@@ -87,7 +87,12 @@ module RelatonNist
|
|
87
87
|
# @return [Array<Nokogiri::HTML::Document, String>]
|
88
88
|
def get_page(url)
|
89
89
|
uri = URI url
|
90
|
-
resp = Net::HTTP.get_response(uri)
|
90
|
+
resp = Net::HTTP.get_response(uri)
|
91
|
+
%r{(?<=newLocation = 'https://' \+ window.location.hostname \+ ')(?<path>[^']+)} =~ resp.body
|
92
|
+
if path
|
93
|
+
uri = URI HitCollection::DOMAIN + path
|
94
|
+
resp = Net::HTTP.get_response(uri)
|
95
|
+
end
|
91
96
|
Nokogiri::HTML(resp.body)
|
92
97
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
93
98
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
@@ -104,7 +109,7 @@ module RelatonNist
|
|
104
109
|
else
|
105
110
|
doc.at(
|
106
111
|
"//div[contains(@class, 'publications-detail')]/h3",
|
107
|
-
)&.text&.strip
|
112
|
+
)&.text&.strip.sub(/(?<=\w)\([^\)]+\)$/) { |m| " " + m.upcase}.squeeze " "
|
108
113
|
end
|
109
114
|
item_ref ||= "?"
|
110
115
|
[RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
|
@@ -138,7 +143,7 @@ module RelatonNist
|
|
138
143
|
when "withdrawn"
|
139
144
|
stage = "final"
|
140
145
|
subst = "withdrawn"
|
141
|
-
when
|
146
|
+
when /^draft/
|
142
147
|
stage = "draft-public"
|
143
148
|
subst = "active"
|
144
149
|
else
|
@@ -352,9 +357,9 @@ module RelatonNist
|
|
352
357
|
links << { type: "uri", content: doc["uri"] } if doc["uri"]
|
353
358
|
doi = "https://doi.org/" + doc["doi"] if doc["doi"]
|
354
359
|
else
|
355
|
-
pub = doc.at "//p/strong[
|
356
|
-
pdf = pub
|
357
|
-
doi = pub
|
360
|
+
pub = doc.at "//p/strong[contains(., 'Publication:')]"
|
361
|
+
pdf = pub&.at "./following-sibling::a[.=' Local Download']"
|
362
|
+
doi = pub&.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
|
358
363
|
links << { type: "pdf", content: pdf[:href] } if pdf
|
359
364
|
end
|
360
365
|
links << { type: "doi", content: doi } if doi
|
@@ -417,10 +422,12 @@ module RelatonNist
|
|
417
422
|
else idx + 1
|
418
423
|
end
|
419
424
|
|
420
|
-
content = s.text.match(/^[^\(]+/).to_s.strip.
|
425
|
+
content = s.text.match(/^[^\(]+/).to_s.strip.squeeze " "
|
421
426
|
|
422
|
-
ref = case
|
423
|
-
when
|
427
|
+
ref = case s.text
|
428
|
+
when /^Draft/ then content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
|
429
|
+
when /\(Draft\)/ then content + " (#{iter}PD)"
|
430
|
+
else content
|
424
431
|
end
|
425
432
|
|
426
433
|
fref = RelatonBib::FormattedRef.new(
|
data/lib/relaton_nist/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01-
|
11
|
+
date: 2020-01-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|