relaton-nist 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/relaton_nist/scrapper.rb +16 -9
- data/lib/relaton_nist/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84efea59e2f35758f44b21e42574ebc3bfbda66f95d7d1c905ae645d42d55711
|
4
|
+
data.tar.gz: c58d8aa8f2bc6da70c04793d196f90b4b5a0ab7868b97f7ef17fc3df3ff5f17b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22d62f0a3a04d03efa7ca6bf2f8d2dda988da7346fac312ac58d43998a712f9bd35bf2481f2a6aebfd4507dcc4b6e8d9f75bc10de0fd51e5e2e9f22edeeecad2
|
7
|
+
data.tar.gz: 8193871dba1c5c64ba39839ed38eb34632c653ef55daf4e6850a5d4fe73446af64d27fd7eb2170ed73d0b79efb96ddbb8fee37871b986422b4ea0c1586157b7d
|
data/.gitignore
CHANGED
@@ -87,7 +87,12 @@ module RelatonNist
|
|
87
87
|
# @return [Array<Nokogiri::HTML::Document, String>]
|
88
88
|
def get_page(url)
|
89
89
|
uri = URI url
|
90
|
-
resp = Net::HTTP.get_response(uri)
|
90
|
+
resp = Net::HTTP.get_response(uri)
|
91
|
+
%r{(?<=newLocation = 'https://' \+ window.location.hostname \+ ')(?<path>[^']+)} =~ resp.body
|
92
|
+
if path
|
93
|
+
uri = URI HitCollection::DOMAIN + path
|
94
|
+
resp = Net::HTTP.get_response(uri)
|
95
|
+
end
|
91
96
|
Nokogiri::HTML(resp.body)
|
92
97
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
93
98
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
@@ -104,7 +109,7 @@ module RelatonNist
|
|
104
109
|
else
|
105
110
|
doc.at(
|
106
111
|
"//div[contains(@class, 'publications-detail')]/h3",
|
107
|
-
)&.text&.strip
|
112
|
+
)&.text&.strip.sub(/(?<=\w)\([^\)]+\)$/) { |m| " " + m.upcase}.squeeze " "
|
108
113
|
end
|
109
114
|
item_ref ||= "?"
|
110
115
|
[RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
|
@@ -138,7 +143,7 @@ module RelatonNist
|
|
138
143
|
when "withdrawn"
|
139
144
|
stage = "final"
|
140
145
|
subst = "withdrawn"
|
141
|
-
when
|
146
|
+
when /^draft/
|
142
147
|
stage = "draft-public"
|
143
148
|
subst = "active"
|
144
149
|
else
|
@@ -352,9 +357,9 @@ module RelatonNist
|
|
352
357
|
links << { type: "uri", content: doc["uri"] } if doc["uri"]
|
353
358
|
doi = "https://doi.org/" + doc["doi"] if doc["doi"]
|
354
359
|
else
|
355
|
-
pub = doc.at "//p/strong[
|
356
|
-
pdf = pub
|
357
|
-
doi = pub
|
360
|
+
pub = doc.at "//p/strong[contains(., 'Publication:')]"
|
361
|
+
pdf = pub&.at "./following-sibling::a[.=' Local Download']"
|
362
|
+
doi = pub&.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
|
358
363
|
links << { type: "pdf", content: pdf[:href] } if pdf
|
359
364
|
end
|
360
365
|
links << { type: "doi", content: doi } if doi
|
@@ -417,10 +422,12 @@ module RelatonNist
|
|
417
422
|
else idx + 1
|
418
423
|
end
|
419
424
|
|
420
|
-
content = s.text.match(/^[^\(]+/).to_s.strip.
|
425
|
+
content = s.text.match(/^[^\(]+/).to_s.strip.squeeze " "
|
421
426
|
|
422
|
-
ref = case
|
423
|
-
when
|
427
|
+
ref = case s.text
|
428
|
+
when /^Draft/ then content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
|
429
|
+
when /\(Draft\)/ then content + " (#{iter}PD)"
|
430
|
+
else content
|
424
431
|
end
|
425
432
|
|
426
433
|
fref = RelatonBib::FormattedRef.new(
|
data/lib/relaton_nist/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01-
|
11
|
+
date: 2020-01-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|