relaton-nist 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile.lock +5 -3
- data/lib/relaton_nist.rb +16 -1
- data/lib/relaton_nist/data/pubs-export.zip +0 -0
- data/lib/relaton_nist/hit_collection.rb +97 -30
- data/lib/relaton_nist/nist_bibliography.rb +12 -17
- data/lib/relaton_nist/scrapper.rb +246 -107
- data/lib/relaton_nist/version.rb +1 -1
- data/relaton_nist.gemspec +1 -0
- metadata +18 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 42ae6073ff5e1cbaba11be06d27c9e0da310bcf4
|
4
|
+
data.tar.gz: fd4e493fc0f7f3edefe2af58f26a76954c2b1e2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51ca4ccf407bb4355f669fc82ea2950a720f75af506174a58b807c86e109b21808fc4f380363258941c37328e544f9658c75d9ee24966124f3532f0abf8df421
|
7
|
+
data.tar.gz: e23643ecd8e7a685f2542660bf064ef30ff027460b9898aa6a1435f21f7a2ec4b311bdaf7306ed078e920b5b728d7b46402d368d9d6535bc968474793e30ad1a
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-nist (0.2.
|
4
|
+
relaton-nist (0.2.2)
|
5
5
|
relaton-bib (~> 0.2.0)
|
6
|
+
rubyzip
|
6
7
|
|
7
8
|
GEM
|
8
9
|
remote: https://rubygems.org/
|
@@ -32,9 +33,9 @@ GEM
|
|
32
33
|
pry-byebug (3.7.0)
|
33
34
|
byebug (~> 11.0)
|
34
35
|
pry (~> 0.10)
|
35
|
-
public_suffix (3.1.
|
36
|
+
public_suffix (3.1.1)
|
36
37
|
rake (10.5.0)
|
37
|
-
relaton-bib (0.2.
|
38
|
+
relaton-bib (0.2.3)
|
38
39
|
addressable
|
39
40
|
nokogiri (~> 1.8.4)
|
40
41
|
rspec (3.8.0)
|
@@ -52,6 +53,7 @@ GEM
|
|
52
53
|
rspec-support (3.8.2)
|
53
54
|
ruby-debug-ide (0.7.0)
|
54
55
|
rake (>= 0.8.1)
|
56
|
+
rubyzip (1.2.3)
|
55
57
|
safe_yaml (1.0.5)
|
56
58
|
simplecov (0.16.1)
|
57
59
|
docile (~> 1.1)
|
data/lib/relaton_nist.rb
CHANGED
@@ -8,5 +8,20 @@ end
|
|
8
8
|
|
9
9
|
module RelatonNist
|
10
10
|
class Error < StandardError; end
|
11
|
-
|
11
|
+
|
12
|
+
class << self
|
13
|
+
# @param date [String]
|
14
|
+
# @return [Date, NilClass]
|
15
|
+
def parse_date(sdate)
|
16
|
+
if /(?<date>\w+\s\d{4})/ =~ sdate # February 2012
|
17
|
+
Date.strptime(date, "%B %Y")
|
18
|
+
elsif /(?<date>\w+\s\d{1,2},\s\d{4})/ =~ sdate # February 11, 2012
|
19
|
+
Date.strptime(date, "%B %d, %Y")
|
20
|
+
elsif /(?<date>\d{4}-\d{2}-\d{2})/ =~ sdate # 2012-02-11
|
21
|
+
Date.parse(date)
|
22
|
+
elsif /(?<date>\d{4}-\d{2})/ =~ sdate # 2012-02
|
23
|
+
Date.strptime date, "%Y-%m"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
12
27
|
end
|
Binary file
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "zip"
|
4
|
+
require "fileutils"
|
3
5
|
require "relaton_nist/hit"
|
4
6
|
require "addressable/uri"
|
5
7
|
require "open-uri"
|
@@ -7,8 +9,8 @@ require "open-uri"
|
|
7
9
|
module RelatonNist
|
8
10
|
# Page of hit collection.
|
9
11
|
class HitCollection < Array
|
10
|
-
|
11
12
|
DOMAIN = "https://csrc.nist.gov"
|
13
|
+
DATAFILE = File.expand_path "data/pubs-export.zip", __dir__
|
12
14
|
|
13
15
|
# @return [TrueClass, FalseClass]
|
14
16
|
attr_reader :fetched
|
@@ -28,13 +30,58 @@ module RelatonNist
|
|
28
30
|
def initialize(ref_nbr, year = nil, opts = {})
|
29
31
|
@text = ref_nbr
|
30
32
|
@year = year
|
33
|
+
|
34
|
+
/(?<docid>(SP|FIPS)\s[0-9-]+)/ =~ text
|
35
|
+
hits = docid ? from_json(docid, **opts) : from_csrc(**opts)
|
36
|
+
|
37
|
+
hits.sort! do |a, b|
|
38
|
+
if a.sort_value != b.sort_value
|
39
|
+
b.sort_value - a.sort_value
|
40
|
+
else
|
41
|
+
(b.hit[:release_date] - a.hit[:release_date]).to_i
|
42
|
+
end
|
43
|
+
end
|
44
|
+
concat hits
|
45
|
+
@fetched = false
|
46
|
+
end
|
47
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
48
|
+
|
49
|
+
# @return [Iecbib::HitCollection]
|
50
|
+
def fetch
|
51
|
+
workers = RelatonBib::WorkersPool.new 4
|
52
|
+
workers.worker(&:fetch)
|
53
|
+
each do |hit|
|
54
|
+
workers << hit
|
55
|
+
end
|
56
|
+
workers.end
|
57
|
+
workers.result
|
58
|
+
@fetched = true
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_s
|
63
|
+
inspect
|
64
|
+
end
|
65
|
+
|
66
|
+
# @return [String]
|
67
|
+
def inspect
|
68
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
74
|
+
|
75
|
+
# @param stage [String]
|
76
|
+
# @return [Array<RelatonNist::Hit>]
|
77
|
+
def from_csrc(**opts)
|
31
78
|
from, to = nil
|
32
79
|
if year
|
33
|
-
d
|
80
|
+
d = Date.strptime year, "%Y"
|
34
81
|
from = d.strftime "%m/%d/%Y"
|
35
82
|
to = d.next_year.prev_day.strftime "%m/%d/%Y"
|
36
83
|
end
|
37
|
-
url = "#{DOMAIN}/publications/search?keywords-lg=#{
|
84
|
+
url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"
|
38
85
|
url += "&dateFrom-lg=#{from}" if from
|
39
86
|
url += "&dateTo-lg=#{to}" if to
|
40
87
|
url += if /PD/ =~ opts[:stage]
|
@@ -44,7 +91,7 @@ module RelatonNist
|
|
44
91
|
end
|
45
92
|
|
46
93
|
doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
|
47
|
-
|
94
|
+
doc.css("table.publications-table > tbody > tr").map do |h|
|
48
95
|
link = h.at("td/div/strong/a")
|
49
96
|
serie = h.at("td[1]").text.strip
|
50
97
|
code = h.at("td[2]").text.strip
|
@@ -59,39 +106,59 @@ module RelatonNist
|
|
59
106
|
}, self
|
60
107
|
)
|
61
108
|
end
|
62
|
-
|
63
|
-
|
64
|
-
|
109
|
+
end
|
110
|
+
|
111
|
+
# Fetches data form json
|
112
|
+
# @param docid [String]
|
113
|
+
def from_json(docid, **opts)
|
114
|
+
data.select do |doc|
|
115
|
+
if year
|
116
|
+
d = Date.strptime year, "%Y"
|
117
|
+
idate = RelatonNist.parse_date doc["issued-date"]
|
118
|
+
next unless idate.between? d, d.next_year.prev_day
|
119
|
+
end
|
120
|
+
if /PD/ =~ opts[:stage]
|
121
|
+
next unless %w[draft-public draft-prelim].include? doc["status"]
|
65
122
|
else
|
66
|
-
|
123
|
+
next unless doc["status"] == "final"
|
67
124
|
end
|
125
|
+
doc["docidentifier"] =~ Regexp.new(docid)
|
126
|
+
end.map do |h|
|
127
|
+
/(?<serie>(?<=-)\w+$)/ =~ h["series"]
|
128
|
+
title = [h["title-main"], h["title-sub"]].compact.join " - "
|
129
|
+
release_date = RelatonNist.parse_date h["published-date"]
|
130
|
+
Hit.new(
|
131
|
+
{
|
132
|
+
code: h["docidentifier"], serie: serie.upcase, title: title,
|
133
|
+
url: h["uri"], status: h["status"], release_date: release_date,
|
134
|
+
json: h
|
135
|
+
}, self
|
136
|
+
)
|
68
137
|
end
|
69
|
-
concat hits
|
70
|
-
# concat(hits.map { |h| Hit.new(h, self) })
|
71
|
-
@fetched = false
|
72
|
-
# @hit_pages = hit_pages
|
73
138
|
end
|
74
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
75
139
|
|
76
|
-
#
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
140
|
+
# Fetches json data
|
141
|
+
# @return [Hash]
|
142
|
+
def data
|
143
|
+
ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
144
|
+
if !ctime || ctime.to_date < Date.today
|
145
|
+
resp = OpenURI.open_uri("https://csrc.nist.gov/CSRC/media/feeds/metanorma/pubs-export.meta")
|
146
|
+
if !ctime || ctime < resp.last_modified
|
147
|
+
@data = nil
|
148
|
+
zip = OpenURI.open_uri "https://csrc.nist.gov/CSRC/media/feeds/metanorma/pubs-export.zip"
|
149
|
+
FileUtils.mv zip.path, DATAFILE
|
150
|
+
end
|
82
151
|
end
|
83
|
-
|
84
|
-
workers.result
|
85
|
-
@fetched = true
|
86
|
-
self
|
87
|
-
end
|
152
|
+
return if @data
|
88
153
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
154
|
+
Zip::File.open(DATAFILE) do |zf|
|
155
|
+
zf.each do |f|
|
156
|
+
@data = JSON.parse f.get_input_stream.read
|
157
|
+
break
|
158
|
+
end
|
159
|
+
end
|
160
|
+
@data
|
95
161
|
end
|
162
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
96
163
|
end
|
97
164
|
end
|
@@ -15,8 +15,7 @@ module RelatonNist
|
|
15
15
|
def search(text, year = nil, opts = {})
|
16
16
|
HitCollection.new text, year, opts
|
17
17
|
rescue OpenURI::HTTPError, SocketError
|
18
|
-
|
19
|
-
[]
|
18
|
+
raise RelatonBib::RequestError, "Could not access https://www.nist.gov"
|
20
19
|
end
|
21
20
|
|
22
21
|
# @param code [String] the NIST standard Code to look up (e..g "8200")
|
@@ -83,26 +82,22 @@ module RelatonNist
|
|
83
82
|
# @retur [Hash]
|
84
83
|
def nistbib_results_filter(result, year, opts)
|
85
84
|
missed_years = []
|
85
|
+
iter = opts[:stage]&.slice(-3, 1)
|
86
|
+
iteration = case iter
|
87
|
+
when "I" then "1"
|
88
|
+
when "F" then "final"
|
89
|
+
else iter
|
90
|
+
end
|
86
91
|
result.each_slice(3) do |s| # ISO website only allows 3 connections
|
87
92
|
fetch_pages(s, 3).each_with_index do |r, _i|
|
88
93
|
if opts[:issued_date]
|
89
|
-
r.dates.select { |d| d.type == "issued"
|
90
|
-
|
91
|
-
end
|
94
|
+
ids = r.dates.select { |d| d.type == "issued" && d.on == opts[:issued_date] }
|
95
|
+
next if ids.empty?
|
92
96
|
elsif opts[:updated_date]
|
93
|
-
r.dates.select { |d| d.type == "published"
|
94
|
-
|
95
|
-
end
|
96
|
-
end
|
97
|
-
if opts[:stage]
|
98
|
-
iter = opts[:stage][-3]
|
99
|
-
iteration = case iter
|
100
|
-
when "I" then 1
|
101
|
-
when "F" then "final"
|
102
|
-
else iter.to_i
|
103
|
-
end
|
104
|
-
next if iter && r.status.iteration != iteration
|
97
|
+
pds = r.dates.select { |d| d.type == "published" && d.on == opts[:updated_date] }
|
98
|
+
next if pds.empty?
|
105
99
|
end
|
100
|
+
next if iter && r.status.iteration != iteration
|
106
101
|
return { ret: r } if !year
|
107
102
|
|
108
103
|
r.dates.select { |d| d.type == "published" }.each do |d|
|
@@ -11,23 +11,55 @@ module RelatonNist
|
|
11
11
|
# @param hit_data [Hash]
|
12
12
|
# @return [Hash]
|
13
13
|
def parse_page(hit_data)
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
item_data = if hit_data[:json]
|
15
|
+
from_json hit_data
|
16
|
+
else
|
17
|
+
from_csrs hit_data
|
18
|
+
end
|
17
19
|
doctype = "standard"
|
18
20
|
titles = fetch_titles(hit_data)
|
19
|
-
unless /^(SP|NISTIR|FIPS)
|
20
|
-
doctype = id_cleanup(docid[0].id)
|
21
|
-
docid[0] = RelatonBib::DocumentIdentifier.new(
|
21
|
+
unless /^(SP|NISTIR|FIPS) / =~ item_data[:docid][0].id
|
22
|
+
doctype = id_cleanup(item_data[:docid][0].id)
|
23
|
+
item_data[:docid][0] = RelatonBib::DocumentIdentifier.new(
|
24
|
+
id: titles[0][:content], type: "NIST",
|
25
|
+
)
|
22
26
|
end
|
27
|
+
item_data[:fetched] = Date.today.to_s
|
28
|
+
item_data[:type] = "standard"
|
29
|
+
item_data[:titles] = titles
|
30
|
+
item_data[:doctype] = doctype
|
31
|
+
|
32
|
+
NistBibliographicItem.new(**item_data)
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def from_json(hit_data)
|
38
|
+
json = hit_data[:json]
|
39
|
+
{
|
40
|
+
link: fetch_link(json),
|
41
|
+
docid: fetch_docid(json["docidentifier"]),
|
42
|
+
dates: fetch_dates(json, hit_data[:release_date]),
|
43
|
+
contributors: fetch_contributors(json),
|
44
|
+
edition: fetch_edition(json),
|
45
|
+
language: [json["language"]],
|
46
|
+
script: [json["script"]],
|
47
|
+
# abstract: fetch_abstract(doc),
|
48
|
+
docstatus: fetch_status(json, hit_data[:status]),
|
49
|
+
copyright: fetch_copyright(json["published-date"]),
|
50
|
+
relations: fetch_relations_json(json),
|
51
|
+
# series: fetch_series(json),
|
52
|
+
keyword: fetch_keywords(json),
|
53
|
+
commentperiod: fetch_commentperiod_json(json),
|
54
|
+
}
|
55
|
+
end
|
23
56
|
|
24
|
-
|
25
|
-
|
26
|
-
|
57
|
+
def from_csrs(hit_data)
|
58
|
+
doc = get_page hit_data[:url]
|
59
|
+
{
|
27
60
|
# id: fetch_id(doc),
|
28
|
-
titles: titles,
|
29
61
|
link: fetch_link(doc),
|
30
|
-
docid:
|
62
|
+
docid: fetch_docid(doc),
|
31
63
|
dates: fetch_dates(doc, hit_data[:release_date]),
|
32
64
|
contributors: fetch_contributors(doc),
|
33
65
|
edition: fetch_edition(hit_data[:code]),
|
@@ -40,8 +72,7 @@ module RelatonNist
|
|
40
72
|
series: fetch_series(doc),
|
41
73
|
keyword: fetch_keywords(doc),
|
42
74
|
commentperiod: fetch_commentperiod(doc),
|
43
|
-
|
44
|
-
)
|
75
|
+
}
|
45
76
|
end
|
46
77
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
47
78
|
|
@@ -52,8 +83,6 @@ module RelatonNist
|
|
52
83
|
id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
|
53
84
|
end
|
54
85
|
|
55
|
-
private
|
56
|
-
|
57
86
|
# Get page.
|
58
87
|
# @param path [String] page's path
|
59
88
|
# @return [Array<Nokogiri::HTML::Document, String>]
|
@@ -61,16 +90,23 @@ module RelatonNist
|
|
61
90
|
uri = URI url
|
62
91
|
resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
|
63
92
|
Nokogiri::HTML(resp.body)
|
93
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
94
|
+
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
|
95
|
+
raise RelatonBib::RequestError, "Could not access #{url}"
|
64
96
|
end
|
65
97
|
|
66
98
|
# Fetch docid.
|
67
|
-
# @param doc [Nokogiri::HTML::Document]
|
99
|
+
# @param doc [Nokogiri::HTML::Document, String]
|
68
100
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
69
101
|
def fetch_docid(doc)
|
70
|
-
item_ref = doc.
|
71
|
-
|
72
|
-
|
73
|
-
|
102
|
+
item_ref = if doc.is_a? String
|
103
|
+
doc
|
104
|
+
else
|
105
|
+
doc.at(
|
106
|
+
"//div[contains(@class, 'publications-detail')]/h3",
|
107
|
+
)&.text&.strip
|
108
|
+
end
|
109
|
+
item_ref ||= "?"
|
74
110
|
[RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
|
75
111
|
end
|
76
112
|
|
@@ -83,56 +119,48 @@ module RelatonNist
|
|
83
119
|
# end
|
84
120
|
|
85
121
|
# Fetch status.
|
86
|
-
# @param doc [Nokogiri::HTML::Document]
|
122
|
+
# @param doc [Nokogiri::HTML::Document, Hash]
|
87
123
|
# @param status [String]
|
88
|
-
# @return [
|
124
|
+
# @return [RelatonNist::DocumentStatus]
|
89
125
|
def fetch_status(doc, status)
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
when "retired draft"
|
95
|
-
stage = "draft-public"
|
96
|
-
subst = "retired"
|
97
|
-
when "withdrawn"
|
98
|
-
stage = "final"
|
99
|
-
subst = "withdrawn"
|
100
|
-
when "draft"
|
101
|
-
stage = "draft-public"
|
102
|
-
subst = "active"
|
126
|
+
if doc.is_a? Hash
|
127
|
+
stage = doc["status"]
|
128
|
+
subst = doc["substage"]
|
129
|
+
iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
|
103
130
|
else
|
104
|
-
|
105
|
-
|
106
|
-
|
131
|
+
case status
|
132
|
+
when "draft (withdrawn)"
|
133
|
+
stage = "draft-public"
|
134
|
+
subst = "withdrawn"
|
135
|
+
when "retired draft"
|
136
|
+
stage = "draft-public"
|
137
|
+
subst = "retired"
|
138
|
+
when "withdrawn"
|
139
|
+
stage = "final"
|
140
|
+
subst = "withdrawn"
|
141
|
+
when "draft"
|
142
|
+
stage = "draft-public"
|
143
|
+
subst = "active"
|
144
|
+
else
|
145
|
+
stage = status
|
146
|
+
subst = "active"
|
147
|
+
end
|
148
|
+
|
149
|
+
iter = nil
|
150
|
+
if stage.include? "draft"
|
151
|
+
iter = 1
|
152
|
+
history = doc.xpath("//span[@id='pub-history-container']/a"\
|
153
|
+
"|//span[@id='pub-history-container']/span")
|
154
|
+
history.each_with_index do |h, idx|
|
155
|
+
next if h.name == "a"
|
107
156
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
history = doc.xpath("//span[@id='pub-history-container']/a"\
|
112
|
-
"|//span[@id='pub-history-container']/span")
|
113
|
-
history.each_with_index do |h, idx|
|
114
|
-
next if h.name == "a"
|
115
|
-
|
116
|
-
iter = idx + 1 if idx.positive?
|
117
|
-
# iter = if lsif idx < (history.size - 1) && !history.last.text.include?("Draft")
|
118
|
-
# "final"
|
119
|
-
# elsif idx.positive? then idx + 1
|
120
|
-
# end
|
121
|
-
break
|
157
|
+
iter = idx + 1 if idx.positive?
|
158
|
+
break
|
159
|
+
end
|
122
160
|
end
|
123
161
|
end
|
124
162
|
|
125
|
-
|
126
|
-
# substage = "withdrawn"
|
127
|
-
# else
|
128
|
-
# substage = "active"
|
129
|
-
# item_ref = doc.at(
|
130
|
-
# "//div[contains(@class, 'publications-detail')]/h3",
|
131
|
-
# ).text.strip
|
132
|
-
# wip = item_ref.match(/(?<=\()\w+/).to_s
|
133
|
-
# stage = "draft-public" if wip == "DRAFT"
|
134
|
-
# end
|
135
|
-
RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter
|
163
|
+
RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
|
136
164
|
end
|
137
165
|
|
138
166
|
# Fetch titles.
|
@@ -144,46 +172,87 @@ module RelatonNist
|
|
144
172
|
|
145
173
|
# Fetch dates
|
146
174
|
# @param doc [Nokogiri::HTML::Document]
|
175
|
+
# @param release_date [Date]
|
147
176
|
# @return [Array<Hash>]
|
148
177
|
def fetch_dates(doc, release_date)
|
149
178
|
dates = [{ type: "published", on: release_date.to_s }]
|
150
179
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
180
|
+
if doc.is_a? Hash
|
181
|
+
issued = RelatonNist.parse_date doc["issued-date"]
|
182
|
+
updated = RelatonNist.parse_date doc["updated-date"]
|
183
|
+
dates << { type: "updated", on: updated.to_s } if updated
|
184
|
+
obsoleted = RelatonNist.parse_date doc["obsoleted-date"]
|
185
|
+
dates << { type: "obsoleted", on: obsoleted.to_s } if obsoleted
|
186
|
+
else
|
187
|
+
d = doc.at("//span[@id='pub-release-date']").text.strip
|
188
|
+
issued = RelatonNist.parse_date d
|
189
|
+
end
|
190
|
+
dates << { type: "issued", on: issued.to_s }
|
159
191
|
dates
|
160
192
|
end
|
161
193
|
|
194
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
195
|
+
# @param doc [Nokogiri::HTML::Document, Hash]
|
196
|
+
# @return [Array<RelatonBib::ContributionInfo>]
|
162
197
|
def fetch_contributors(doc)
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
198
|
+
contribs = []
|
199
|
+
if doc.is_a? Hash
|
200
|
+
contribs += contributors_json(
|
201
|
+
doc["authors"], "author", doc["language"], doc["script"]
|
202
|
+
)
|
203
|
+
contribs + contributors_json(
|
204
|
+
doc["editors"], "editor", doc["language"], doc["script"]
|
205
|
+
)
|
206
|
+
else
|
207
|
+
name = "National Institute of Standards and Technology"
|
208
|
+
org = RelatonBib::Organization.new(
|
209
|
+
name: name, url: "www.nist.gov", abbreviation: "NIST",
|
210
|
+
)
|
211
|
+
contribs << RelatonBib::ContributionInfo.new(entity: org, role: ["publisher"])
|
212
|
+
authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
|
213
|
+
contribs += contributors(authors, "author")
|
214
|
+
editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
|
215
|
+
contribs + contributors(editors, "editor")
|
216
|
+
end
|
217
|
+
end
|
173
218
|
|
174
|
-
|
175
|
-
|
219
|
+
# @param doc [Array<Hash>]
|
220
|
+
# @param role [String]
|
221
|
+
# @return [Array<RelatonBib::ContributionInfo>]
|
222
|
+
def contributors_json(doc, role, lang = "en", script = "Latn")
|
223
|
+
doc.map do |contr|
|
224
|
+
if contr["affiliation"]
|
225
|
+
if contr["affiliation"]["acronym"]
|
226
|
+
abbrev = RelatonBib::LocalizedString.new(contr["affiliation"]["acronym"])
|
227
|
+
end
|
228
|
+
org = RelatonBib::Organization.new(
|
229
|
+
name: contr["affiliation"]["name"], abbreviation: abbrev,
|
230
|
+
)
|
231
|
+
end
|
232
|
+
if contr["surname"]
|
233
|
+
affiliation = RelatonBib::Affilation.new org
|
234
|
+
entity = RelatonBib::Person.new(
|
235
|
+
name: full_name(contr, lang, script), affiliation: [affiliation],
|
236
|
+
)
|
237
|
+
else
|
238
|
+
entity = org
|
239
|
+
end
|
240
|
+
RelatonBib::ContributionInfo.new entity: entity, role: [role]
|
241
|
+
end
|
176
242
|
end
|
177
243
|
|
178
244
|
# rubocop:disable Metrics/CyclomaticComplexity
|
179
|
-
|
245
|
+
# @param doc [Nokogiri::HTML::Element, Array<Hash>]
|
246
|
+
# @param role [String]
|
247
|
+
# @return [Array<RelatonBib::ContributionInfo>]
|
248
|
+
def contributors(doc, role, lang = "en", script = "Latn")
|
180
249
|
return [] if doc.nil?
|
181
250
|
|
182
251
|
doc.text.split(", ").map do |contr|
|
183
252
|
/(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr
|
184
253
|
if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
|
185
254
|
fullname = RelatonBib::FullName.new(
|
186
|
-
completename: RelatonBib::LocalizedString.new(an,
|
255
|
+
completename: RelatonBib::LocalizedString.new(an, lang, script),
|
187
256
|
)
|
188
257
|
case abbrev
|
189
258
|
when "NIST"
|
@@ -199,7 +268,7 @@ module RelatonNist
|
|
199
268
|
org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
|
200
269
|
affiliation = RelatonBib::Affilation.new org
|
201
270
|
entity = RelatonBib::Person.new(
|
202
|
-
name: fullname, affiliation: [affiliation],
|
271
|
+
name: fullname, affiliation: [affiliation],
|
203
272
|
)
|
204
273
|
else
|
205
274
|
entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
|
@@ -207,17 +276,49 @@ module RelatonNist
|
|
207
276
|
RelatonBib::ContributionInfo.new entity: entity, role: [role]
|
208
277
|
end
|
209
278
|
end
|
210
|
-
# rubocop:enable Metrics/CyclomaticComplexity
|
279
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength
|
280
|
+
|
281
|
+
# @param name [Hash]
|
282
|
+
# @param lang [Strong]
|
283
|
+
# @param script [String]
|
284
|
+
# @return [RelatonBib::FullName]
|
285
|
+
def full_name(name, lang, script)
|
286
|
+
RelatonBib::FullName.new(
|
287
|
+
surname: RelatonBib::LocalizedString.new(name["surname"], lang, script),
|
288
|
+
forenames: name_parts(name["givenName"], lang, script),
|
289
|
+
additions: name_parts(name["suffix"], lang, script),
|
290
|
+
prefix: name_parts(name["title"], lang, script),
|
291
|
+
completename: RelatonBib::LocalizedString.new(name["fullName"], lang, script),
|
292
|
+
)
|
293
|
+
end
|
211
294
|
|
212
|
-
|
213
|
-
|
295
|
+
# @param part [String, NilClass]
|
296
|
+
# @param lang [Strong]
|
297
|
+
# @param script [String]
|
298
|
+
# @return [Array<RelatonBib::LocalizedString>]
|
299
|
+
def name_parts(part, lang, script)
|
300
|
+
return [] unless part
|
301
|
+
|
302
|
+
[RelatonBib::LocalizedString.new(name[part], lang, script)]
|
303
|
+
end
|
304
|
+
|
305
|
+
# @param doc [String, Hash]
|
306
|
+
# @return [String, NilClass]
|
307
|
+
def fetch_edition(doc)
|
308
|
+
if doc.is_a? Hash
|
309
|
+
return unless doc["edition"]
|
310
|
+
|
311
|
+
rev = doc["edition"]
|
312
|
+
else
|
313
|
+
return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
|
314
|
+
end
|
214
315
|
|
215
316
|
"Revision #{rev}"
|
216
317
|
end
|
217
318
|
|
218
319
|
# Fetch abstracts.
|
219
320
|
# @param doc [Nokigiri::HTML::Document]
|
220
|
-
# @return [Array<
|
321
|
+
# @return [Array<Hash>]
|
221
322
|
def fetch_abstract(doc)
|
222
323
|
abstract_content = doc.xpath('//div[contains(@class, "pub-abstract-callout")]/div[1]/p').text
|
223
324
|
[{
|
@@ -229,58 +330,82 @@ module RelatonNist
|
|
229
330
|
end
|
230
331
|
|
231
332
|
# Fetch copyright.
|
232
|
-
# @param
|
333
|
+
# @param doc [Nokogiri::HTL::Document, String]
|
233
334
|
# @return [Hash]
|
234
335
|
def fetch_copyright(doc)
|
235
336
|
name = "National Institute of Standards and Technology"
|
236
337
|
url = "www.nist.gov"
|
237
|
-
d = doc.
|
338
|
+
d = if doc.is_a? String then doc
|
339
|
+
else
|
340
|
+
doc.at("//span[@id='pub-release-date']").text.strip
|
341
|
+
end
|
238
342
|
from = d.match(/\d{4}/).to_s
|
239
343
|
{ owner: { name: name, abbreviation: "NIST", url: url }, from: from }
|
240
344
|
end
|
241
345
|
|
242
346
|
# Fetch links.
|
243
|
-
# @param doc [Nokogiri::HTML::Document]
|
347
|
+
# @param doc [Nokogiri::HTML::Document, Hash]
|
244
348
|
# @return [Array<Hash>]
|
245
349
|
def fetch_link(doc)
|
246
|
-
pub = doc.at "//p/strong[.='Publication:']"
|
247
350
|
links = []
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
351
|
+
if doc.is_a? Hash
|
352
|
+
links << { type: "uri", content: doc["uri"] } if doc["uri"]
|
353
|
+
doi = "https://doi.org/" + doc["doi"] if doc["doi"]
|
354
|
+
else
|
355
|
+
pub = doc.at "//p/strong[.='Publication:']"
|
356
|
+
pdf = pub.at "./following-sibling::a[.=' Local Download']"
|
357
|
+
doi = pub.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
|
358
|
+
links << { type: "pdf", content: pdf[:href] } if pdf
|
359
|
+
end
|
360
|
+
links << { type: "doi", content: doi } if doi
|
252
361
|
links
|
253
362
|
end
|
254
363
|
|
255
364
|
# Fetch relations.
|
256
365
|
# @param doc [Nokogiri::HTML::Document]
|
257
|
-
# @return [Array<
|
366
|
+
# @return [Array<RelatonBib::DocumentRelation>]
|
258
367
|
def fetch_relations(doc)
|
259
368
|
relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
|
260
|
-
doc_relation "supersedes", r
|
369
|
+
doc_relation "supersedes", r.text, DOMAIN + r[:href]
|
261
370
|
end
|
262
371
|
|
263
372
|
relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
|
264
|
-
doc_relation "partOf", r
|
373
|
+
doc_relation "partOf", r.text, DOMAIN + r[:href]
|
265
374
|
end
|
266
375
|
|
267
376
|
relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
|
268
|
-
doc_relation "updates", r
|
377
|
+
doc_relation "updates", r.text, DOMAIN + r[:href]
|
269
378
|
end
|
270
379
|
end
|
271
380
|
|
272
|
-
def
|
381
|
+
def fetch_relations_json(doc)
|
382
|
+
relations = doc["supersedes"].map do |r|
|
383
|
+
doc_relation "supersedes", r["docidentifier"], r["uri"]
|
384
|
+
end
|
385
|
+
|
386
|
+
relations + doc["superseded-by"].map do |r|
|
387
|
+
doc_relation "updates", r["docidentifier"], r["uri"]
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
# @param type [String]
|
392
|
+
# @param ref [String]
|
393
|
+
# @param uri [String]
|
394
|
+
# @return [RelatonBib::DocumentRelation]
|
395
|
+
def doc_relation(type, ref, uri, lang = "en", script = "Latn")
|
273
396
|
RelatonBib::DocumentRelation.new(
|
274
397
|
type: type,
|
275
398
|
bibitem: RelatonBib::BibliographicItem.new(
|
276
399
|
formattedref: RelatonBib::FormattedRef.new(
|
277
|
-
content: ref
|
400
|
+
content: ref, language: lang, script: script, format: "text/plain",
|
278
401
|
),
|
279
|
-
link: [RelatonBib::TypedUri.new(type: "src", content:
|
402
|
+
link: [RelatonBib::TypedUri.new(type: "src", content: uri)],
|
280
403
|
),
|
281
404
|
)
|
282
405
|
end
|
283
406
|
|
407
|
+
# @param doc [Nokogiri::HTML::Document]
|
408
|
+
# @return [Array<RelatonBib::Series>]
|
284
409
|
def fetch_series(doc)
|
285
410
|
series = doc.xpath "//span[@id='pub-history-container']/a"\
|
286
411
|
"|//span[@id='pub-history-container']/span"
|
@@ -305,11 +430,19 @@ module RelatonNist
|
|
305
430
|
end.select { |s| s }
|
306
431
|
end
|
307
432
|
|
433
|
+
# @param doc [Nokogiri::HTML::Document, Hash]
|
434
|
+
# @return [Array<RelatonNist::Keyword>]
|
308
435
|
def fetch_keywords(doc)
|
309
|
-
kws = doc.
|
310
|
-
|
436
|
+
kws = if doc.is_a? Hash
|
437
|
+
doc["keywords"]
|
438
|
+
else
|
439
|
+
doc.xpath "//span[@id='pub-keywords-container']/span"
|
440
|
+
end
|
441
|
+
kws.map { |kw| Keyword.new kw.is_a?(String) ? kw : kw.text }
|
311
442
|
end
|
312
443
|
|
444
|
+
# @param doc [Nokogiri::HTML::Document]
|
445
|
+
# @return [RelatonNist::CommentPeriod, NilClass]
|
313
446
|
def fetch_commentperiod(doc)
|
314
447
|
cp = doc.at "//span[@id='pub-comments-due']"
|
315
448
|
return unless cp
|
@@ -324,6 +457,12 @@ module RelatonNist
|
|
324
457
|
extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
|
325
458
|
CommentPeriod.new from, to, extended
|
326
459
|
end
|
460
|
+
|
461
|
+
# @param json [Hash]
|
462
|
+
# @return [RelatonNist::CommentPeriod, NilClass]
|
463
|
+
def fetch_commentperiod_json(json)
|
464
|
+
CommentPeriod.new json["comment-from"], json["comment-to"] if json["comment-from"]
|
465
|
+
end
|
327
466
|
end
|
328
467
|
end
|
329
468
|
end
|
data/lib/relaton_nist/version.rb
CHANGED
data/relaton_nist.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -178,6 +178,20 @@ dependencies:
|
|
178
178
|
- - "~>"
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: 0.2.0
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: rubyzip
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
181
195
|
description: 'RelatonNist: retrive NIST standards.'
|
182
196
|
email:
|
183
197
|
- open.source@ribose.com
|
@@ -200,6 +214,7 @@ files:
|
|
200
214
|
- lib/relaton/processor.rb
|
201
215
|
- lib/relaton_nist.rb
|
202
216
|
- lib/relaton_nist/comment_period.rb
|
217
|
+
- lib/relaton_nist/data/pubs-export.zip
|
203
218
|
- lib/relaton_nist/document_status.rb
|
204
219
|
- lib/relaton_nist/hit.rb
|
205
220
|
- lib/relaton_nist/hit_collection.rb
|
@@ -230,7 +245,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
230
245
|
version: '0'
|
231
246
|
requirements: []
|
232
247
|
rubyforge_project:
|
233
|
-
rubygems_version: 2.
|
248
|
+
rubygems_version: 2.6.12
|
234
249
|
signing_key:
|
235
250
|
specification_version: 4
|
236
251
|
summary: 'RelatonNist: retrive NIST standards.'
|