relaton-nist 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.rubocop.yml +10 -0
- data/.travis.yml +17 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +85 -0
- data/LICENSE.txt +21 -0
- data/README.adoc +128 -0
- data/Rakefile +6 -0
- data/appveyor.yml +30 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/relaton/processor.rb +23 -0
- data/lib/relaton_nist/comment_period.rb +30 -0
- data/lib/relaton_nist/document_status.rb +24 -0
- data/lib/relaton_nist/hit.rb +54 -0
- data/lib/relaton_nist/hit_collection.rb +90 -0
- data/lib/relaton_nist/keyword.rb +16 -0
- data/lib/relaton_nist/nist_bibliographic_item.rb +78 -0
- data/lib/relaton_nist/nist_bibliography.rb +150 -0
- data/lib/relaton_nist/scrapper.rb +329 -0
- data/lib/relaton_nist/version.rb +3 -0
- data/lib/relaton_nist/xml_parser.rb +47 -0
- data/lib/relaton_nist.rb +12 -0
- data/relaton_nist.gemspec +39 -0
- metadata +237 -0
@@ -0,0 +1,150 @@
|
|
1
|
+
require "relaton_bib"
|
2
|
+
require "relaton_nist/nist_bibliographic_item"
|
3
|
+
require "relaton_nist/scrapper"
|
4
|
+
require "relaton_nist/hit_collection"
|
5
|
+
require "relaton_nist/xml_parser"
|
6
|
+
require "relaton_nist/keyword"
|
7
|
+
require "relaton_nist/comment_period"
|
8
|
+
require "relaton_nist/document_status"
|
9
|
+
|
10
|
+
module RelatonNist
|
11
|
+
class NistBibliography
|
12
|
+
class << self
|
13
|
+
# @param text [String]
|
14
|
+
# @return [RelatonNist::HitCollection]
|
15
|
+
def search(text, year = nil, opts = {})
|
16
|
+
HitCollection.new text, year, opts
|
17
|
+
rescue OpenURI::HTTPError, SocketError
|
18
|
+
warn "Could not access https://www.nist.gov"
|
19
|
+
[]
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param code [String] the NIST standard Code to look up (e..g "8200")
|
23
|
+
# @param year [String] the year the standard was published (optional)
|
24
|
+
#
|
25
|
+
# @param opts [Hash] options
|
26
|
+
# @option opts [TrueClass, FalseClass] :all_parts restricted to all parts
|
27
|
+
# if all-parts reference is required
|
28
|
+
# @option opts [TrueClass, FalseClass] :bibdata
|
29
|
+
#
|
30
|
+
# @return [String] Relaton XML serialisation of reference
|
31
|
+
def get(code, year = nil, opts = {})
|
32
|
+
/^(?<code2>[^\(]+)(\((?<date2>\w+\s(\d{2},\s)?\d{4})\))?\s?\(?((?<=\()(?<stage>[^\)]+))?/ =~ code
|
33
|
+
if code2
|
34
|
+
code = code2.strip
|
35
|
+
if date2
|
36
|
+
if /\w+\s\d{4}/ =~ date2
|
37
|
+
opts[:issued_date] = Time.strptime date2, "%B %Y"
|
38
|
+
elsif /\w+\s\d{2},\s\d{4}/ =~ date2
|
39
|
+
opts[:updated_date] = Time.strptime date2, "%B %d, %Y"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
opts[:stage] = stage if stage
|
43
|
+
end
|
44
|
+
|
45
|
+
if year.nil?
|
46
|
+
/^(?<code1>[^:]+):(?<year1>[^:]+)$/ =~ code
|
47
|
+
unless code1.nil?
|
48
|
+
code = code1
|
49
|
+
year = year1
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
code += "-1" if opts[:all_parts]
|
54
|
+
ret = nistbib_get1(code, year, opts)
|
55
|
+
# return nil if ret.nil?
|
56
|
+
# ret.to_most_recent_reference unless year || opts[:keep_year]
|
57
|
+
# ret.to_all_parts if opts[:all_parts]
|
58
|
+
ret
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def nistbib_get1(code, year, opts)
|
64
|
+
result = nistbib_search_filter(code, year, opts) || (return nil)
|
65
|
+
ret = nistbib_results_filter(result, year, opts)
|
66
|
+
return ret[:ret] if ret[:ret]
|
67
|
+
|
68
|
+
fetch_ref_err(code, year, ret[:years])
|
69
|
+
end
|
70
|
+
|
71
|
+
# Sort through the results from RelatonNist, fetching them three at a time,
|
72
|
+
# and return the first result that matches the code,
|
73
|
+
# matches the year (if provided), and which # has a title (amendments do not).
|
74
|
+
# Only expects the first page of results to be populated.
|
75
|
+
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
76
|
+
# If no match, returns any years which caused mismatch, for error reporting
|
77
|
+
#
|
78
|
+
# @param opts [Hash] options
|
79
|
+
# @option opts [Time] :issued_date
|
80
|
+
# @option opts [Time] :issued_date
|
81
|
+
# @option opts [String] :stage
|
82
|
+
#
|
83
|
+
# @retur [Hash]
|
84
|
+
def nistbib_results_filter(result, year, opts)
|
85
|
+
missed_years = []
|
86
|
+
result.each_slice(3) do |s| # ISO website only allows 3 connections
|
87
|
+
fetch_pages(s, 3).each_with_index do |r, _i|
|
88
|
+
if opts[:issued_date]
|
89
|
+
r.dates.select { |d| d.type == "issued" }.each do |d|
|
90
|
+
next unless opts[:issued_date] == d.on
|
91
|
+
end
|
92
|
+
elsif opts[:updated_date]
|
93
|
+
r.dates.select { |d| d.type == "published" }.each do |d|
|
94
|
+
next unless opts[:updated_date] == d.on
|
95
|
+
end
|
96
|
+
end
|
97
|
+
if opts[:stage]
|
98
|
+
iter = opts[:stage][-3]
|
99
|
+
iteration = case iter
|
100
|
+
when "I" then 1
|
101
|
+
when "F" then "final"
|
102
|
+
else iter.to_i
|
103
|
+
end
|
104
|
+
next if iter && r.status.iteration != iteration
|
105
|
+
end
|
106
|
+
return { ret: r } if !year
|
107
|
+
|
108
|
+
r.dates.select { |d| d.type == "published" }.each do |d|
|
109
|
+
return { ret: r } if year.to_i == d.on.year
|
110
|
+
|
111
|
+
missed_years << d.on.year
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
{ years: missed_years }
|
116
|
+
end
|
117
|
+
|
118
|
+
def fetch_pages(s, n)
|
119
|
+
workers = RelatonBib::WorkersPool.new n
|
120
|
+
workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
121
|
+
s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
122
|
+
workers.end
|
123
|
+
workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
|
124
|
+
end
|
125
|
+
|
126
|
+
def nistbib_search_filter(code, year, opts)
|
127
|
+
docid = code.match(%r{[0-9-]{3,}}).to_s
|
128
|
+
serie = code.match(%r{(FISP|SP|NISTIR)(?=\s)})
|
129
|
+
warn "fetching #{code}..."
|
130
|
+
result = search(code, year, opts)
|
131
|
+
result.select do |i|
|
132
|
+
i.hit[:code]&.include?(docid) && (!serie || i.hit[:serie] == serie.to_s)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def fetch_ref_err(code, year, missed_years)
|
137
|
+
id = year ? "#{code}:#{year}" : code
|
138
|
+
warn "WARNING: no match found online for #{id}. "\
|
139
|
+
"The code must be exactly like it is on the standards website."
|
140
|
+
warn "(There was no match for #{year}, though there were matches "\
|
141
|
+
"found for #{missed_years.join(', ')}.)" unless missed_years.empty?
|
142
|
+
if /\d-\d/ =~ code
|
143
|
+
warn "The provided document part may not exist, or the document "\
|
144
|
+
"may no longer be published in parts."
|
145
|
+
end
|
146
|
+
nil
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,329 @@
|
|
1
|
+
require "relaton_bib"
|
2
|
+
|
3
|
+
module RelatonNist
|
4
|
+
class Scrapper
|
5
|
+
class << self
|
6
|
+
DOMAIN = "https://csrc.nist.gov".freeze
|
7
|
+
|
8
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
9
|
+
|
10
|
+
# Parse page.
|
11
|
+
# @param hit_data [Hash]
|
12
|
+
# @return [Hash]
|
13
|
+
def parse_page(hit_data)
|
14
|
+
doc = get_page hit_data[:url]
|
15
|
+
|
16
|
+
docid = fetch_docid(doc)
|
17
|
+
doctype = "standard"
|
18
|
+
titles = fetch_titles(hit_data)
|
19
|
+
unless /^(SP|NISTIR|FIPS) /.match docid[0].id
|
20
|
+
doctype = id_cleanup(docid[0].id)
|
21
|
+
docid[0] = RelatonBib::DocumentIdentifier.new(id: titles[0][:content], type: "NIST")
|
22
|
+
end
|
23
|
+
|
24
|
+
NistBibliographicItem.new(
|
25
|
+
fetched: Date.today.to_s,
|
26
|
+
type: "standard",
|
27
|
+
# id: fetch_id(doc),
|
28
|
+
titles: titles,
|
29
|
+
link: fetch_link(doc),
|
30
|
+
docid: docid,
|
31
|
+
dates: fetch_dates(doc, hit_data[:release_date]),
|
32
|
+
contributors: fetch_contributors(doc),
|
33
|
+
edition: fetch_edition(hit_data[:code]),
|
34
|
+
language: ["en"],
|
35
|
+
script: ["Latn"],
|
36
|
+
abstract: fetch_abstract(doc),
|
37
|
+
docstatus: fetch_status(doc, hit_data[:status]),
|
38
|
+
copyright: fetch_copyright(doc),
|
39
|
+
relations: fetch_relations(doc),
|
40
|
+
series: fetch_series(doc),
|
41
|
+
keyword: fetch_keywords(doc),
|
42
|
+
commentperiod: fetch_commentperiod(doc),
|
43
|
+
doctype: doctype,
|
44
|
+
)
|
45
|
+
end
|
46
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
47
|
+
|
48
|
+
# Strip status from doc id
|
49
|
+
# @param id String
|
50
|
+
# @return String
|
51
|
+
def id_cleanup(id)
|
52
|
+
id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
# Get page.
|
58
|
+
# @param path [String] page's path
|
59
|
+
# @return [Array<Nokogiri::HTML::Document, String>]
|
60
|
+
def get_page(url)
|
61
|
+
uri = URI url
|
62
|
+
resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
|
63
|
+
Nokogiri::HTML(resp.body)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Fetch docid.
|
67
|
+
# @param doc [Nokogiri::HTML::Document]
|
68
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
69
|
+
def fetch_docid(doc)
|
70
|
+
item_ref = doc.at("//div[contains(@class, 'publications-detail')]/h3").
|
71
|
+
text.strip
|
72
|
+
return [RelatonBib::DocumentIdentifier.new(type: "NIST", id: "?")] unless item_ref
|
73
|
+
|
74
|
+
[RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
|
75
|
+
end
|
76
|
+
|
77
|
+
# Fetch id.
|
78
|
+
# @param doc [Nokogiri::HTML::Document]
|
79
|
+
# @return [String]
|
80
|
+
# def fetch_id(doc)
|
81
|
+
# doc.at("//div[contains(@class, 'publications-detail')]/h3").text.
|
82
|
+
# strip.gsub(/\s/, "")
|
83
|
+
# end
|
84
|
+
|
85
|
+
# Fetch status.
|
86
|
+
# @param doc [Nokogiri::HTML::Document]
|
87
|
+
# @param status [String]
|
88
|
+
# @return [Hash]
|
89
|
+
def fetch_status(doc, status)
|
90
|
+
case status
|
91
|
+
when "draft (withdrawn)"
|
92
|
+
stage = "draft-public"
|
93
|
+
subst = "withdrawn"
|
94
|
+
when "retired draft"
|
95
|
+
stage = "draft-public"
|
96
|
+
subst = "retired"
|
97
|
+
when "withdrawn"
|
98
|
+
stage = "final"
|
99
|
+
subst = "withdrawn"
|
100
|
+
when "draft"
|
101
|
+
stage = "draft-public"
|
102
|
+
subst = "active"
|
103
|
+
else
|
104
|
+
stage = status
|
105
|
+
subst = "active"
|
106
|
+
end
|
107
|
+
|
108
|
+
iter = nil
|
109
|
+
if stage.include? "draft"
|
110
|
+
iter = 1
|
111
|
+
history = doc.xpath("//span[@id='pub-history-container']/a"\
|
112
|
+
"|//span[@id='pub-history-container']/span")
|
113
|
+
history.each_with_index do |h, idx|
|
114
|
+
next if h.name == "a"
|
115
|
+
|
116
|
+
iter = idx + 1 if idx.positive?
|
117
|
+
# iter = if lsif idx < (history.size - 1) && !history.last.text.include?("Draft")
|
118
|
+
# "final"
|
119
|
+
# elsif idx.positive? then idx + 1
|
120
|
+
# end
|
121
|
+
break
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# if doc.at "//p/strong[text()='Withdrawn:']"
|
126
|
+
# substage = "withdrawn"
|
127
|
+
# else
|
128
|
+
# substage = "active"
|
129
|
+
# item_ref = doc.at(
|
130
|
+
# "//div[contains(@class, 'publications-detail')]/h3",
|
131
|
+
# ).text.strip
|
132
|
+
# wip = item_ref.match(/(?<=\()\w+/).to_s
|
133
|
+
# stage = "draft-public" if wip == "DRAFT"
|
134
|
+
# end
|
135
|
+
RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter
|
136
|
+
end
|
137
|
+
|
138
|
+
# Fetch titles.
|
139
|
+
# @param hit_data [Hash]
|
140
|
+
# @return [Array<Hash>]
|
141
|
+
def fetch_titles(hit_data)
|
142
|
+
[{ content: hit_data[:title], language: "en", script: "Latn", format: "text/plain" }]
|
143
|
+
end
|
144
|
+
|
145
|
+
# Fetch dates
|
146
|
+
# @param doc [Nokogiri::HTML::Document]
|
147
|
+
# @return [Array<Hash>]
|
148
|
+
def fetch_dates(doc, release_date)
|
149
|
+
dates = [{ type: "published", on: release_date.to_s }]
|
150
|
+
|
151
|
+
d = doc.at("//span[@id='pub-release-date']").text.strip
|
152
|
+
date = if /(?<date>\w+\s\d{4})/ =~ d
|
153
|
+
Date.strptime(date, "%B %Y")
|
154
|
+
elsif /(?<date>\w+\s\d{1,2},\s\d{4})/ =~ d
|
155
|
+
Date.strptime(date, "%B %d, %Y")
|
156
|
+
end
|
157
|
+
dates << { type: "issued", on: date.to_s }
|
158
|
+
|
159
|
+
dates
|
160
|
+
end
|
161
|
+
|
162
|
+
def fetch_contributors(doc)
|
163
|
+
name = "National Institute of Standards and Technology"
|
164
|
+
org = RelatonBib::Organization.new(
|
165
|
+
name: name, url: "www.nist.gov", abbreviation: "NIST",
|
166
|
+
)
|
167
|
+
contribs = [
|
168
|
+
RelatonBib::ContributionInfo.new(entity: org, role: ["publisher"]),
|
169
|
+
]
|
170
|
+
|
171
|
+
authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
|
172
|
+
contribs += contributors(authors, "author")
|
173
|
+
|
174
|
+
editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
|
175
|
+
contribs + contributors(editors, "editor")
|
176
|
+
end
|
177
|
+
|
178
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
179
|
+
def contributors(doc, role)
|
180
|
+
return [] if doc.nil?
|
181
|
+
|
182
|
+
doc.text.split(", ").map do |contr|
|
183
|
+
/(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr
|
184
|
+
if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
|
185
|
+
fullname = RelatonBib::FullName.new(
|
186
|
+
completename: RelatonBib::LocalizedString.new(an, "en", "Latn"),
|
187
|
+
)
|
188
|
+
case abbrev
|
189
|
+
when "NIST"
|
190
|
+
org_name = "National Institute of Standards and Technology"
|
191
|
+
url = "www.nist.gov"
|
192
|
+
when "MITRE"
|
193
|
+
org_name = abbrev
|
194
|
+
url = "www.mitre.org"
|
195
|
+
else
|
196
|
+
org_name = abbrev
|
197
|
+
url = nil
|
198
|
+
end
|
199
|
+
org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
|
200
|
+
affiliation = RelatonBib::Affilation.new org
|
201
|
+
entity = RelatonBib::Person.new(
|
202
|
+
name: fullname, affiliation: [affiliation], contacts: [],
|
203
|
+
)
|
204
|
+
else
|
205
|
+
entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
|
206
|
+
end
|
207
|
+
RelatonBib::ContributionInfo.new entity: entity, role: [role]
|
208
|
+
end
|
209
|
+
end
|
210
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
211
|
+
|
212
|
+
def fetch_edition(code)
|
213
|
+
return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ code
|
214
|
+
|
215
|
+
"Revision #{rev}"
|
216
|
+
end
|
217
|
+
|
218
|
+
# Fetch abstracts.
|
219
|
+
# @param doc [Nokigiri::HTML::Document]
|
220
|
+
# @return [Array<Array>]
|
221
|
+
def fetch_abstract(doc)
|
222
|
+
abstract_content = doc.xpath('//div[contains(@class, "pub-abstract-callout")]/div[1]/p').text
|
223
|
+
[{
|
224
|
+
content: abstract_content,
|
225
|
+
language: "en",
|
226
|
+
script: "Latn",
|
227
|
+
format: "text/plain",
|
228
|
+
}]
|
229
|
+
end
|
230
|
+
|
231
|
+
# Fetch copyright.
|
232
|
+
# @param title [String]
|
233
|
+
# @return [Hash]
|
234
|
+
def fetch_copyright(doc)
|
235
|
+
name = "National Institute of Standards and Technology"
|
236
|
+
url = "www.nist.gov"
|
237
|
+
d = doc.at("//span[@id='pub-release-date']").text.strip
|
238
|
+
from = d.match(/\d{4}/).to_s
|
239
|
+
{ owner: { name: name, abbreviation: "NIST", url: url }, from: from }
|
240
|
+
end
|
241
|
+
|
242
|
+
# Fetch links.
|
243
|
+
# @param doc [Nokogiri::HTML::Document]
|
244
|
+
# @return [Array<Hash>]
|
245
|
+
def fetch_link(doc)
|
246
|
+
pub = doc.at "//p/strong[.='Publication:']"
|
247
|
+
links = []
|
248
|
+
pdf = pub.at "./following-sibling::a[.=' Local Download']"
|
249
|
+
links << { type: "pdf", content: pdf[:href] } if pdf
|
250
|
+
doi = pub.at("./following-sibling::a[contains(.,'(DOI)')]")
|
251
|
+
links << { type: "doi", content: doi[:href] } if doi
|
252
|
+
links
|
253
|
+
end
|
254
|
+
|
255
|
+
# Fetch relations.
|
256
|
+
# @param doc [Nokogiri::HTML::Document]
|
257
|
+
# @return [Array<Hash>]
|
258
|
+
def fetch_relations(doc)
|
259
|
+
relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
|
260
|
+
doc_relation "supersedes", r
|
261
|
+
end
|
262
|
+
|
263
|
+
relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
|
264
|
+
doc_relation "partOf", r
|
265
|
+
end
|
266
|
+
|
267
|
+
relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
|
268
|
+
doc_relation "updates", r
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def doc_relation(type, ref)
|
273
|
+
RelatonBib::DocumentRelation.new(
|
274
|
+
type: type,
|
275
|
+
bibitem: RelatonBib::BibliographicItem.new(
|
276
|
+
formattedref: RelatonBib::FormattedRef.new(
|
277
|
+
content: ref.text, language: "en", script: "Latn", format: "text/plain",
|
278
|
+
),
|
279
|
+
link: [RelatonBib::TypedUri.new(type: "src", content: DOMAIN + ref[:href])],
|
280
|
+
),
|
281
|
+
)
|
282
|
+
end
|
283
|
+
|
284
|
+
def fetch_series(doc)
|
285
|
+
series = doc.xpath "//span[@id='pub-history-container']/a"\
|
286
|
+
"|//span[@id='pub-history-container']/span"
|
287
|
+
series.map.with_index do |s, idx|
|
288
|
+
next if s.name == "span"
|
289
|
+
|
290
|
+
iter = if idx.zero? then "I"
|
291
|
+
# elsif status == "final" && idx == (series.size - 1) then "F"
|
292
|
+
else idx + 1
|
293
|
+
end
|
294
|
+
|
295
|
+
content = s.text.match(/^[^\(]+/).to_s.strip.gsub " ", " "
|
296
|
+
|
297
|
+
ref = case content.match(/\w+/).to_s
|
298
|
+
when "Draft" then content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
|
299
|
+
end
|
300
|
+
|
301
|
+
fref = RelatonBib::FormattedRef.new(
|
302
|
+
content: ref, language: "en", script: "Latn", format: "text/plain",
|
303
|
+
)
|
304
|
+
RelatonBib::Series.new(formattedref: fref)
|
305
|
+
end.select { |s| s }
|
306
|
+
end
|
307
|
+
|
308
|
+
def fetch_keywords(doc)
|
309
|
+
kws = doc.xpath "//span[@id='pub-keywords-container']/span"
|
310
|
+
kws.map { |kw| Keyword.new kw.text }
|
311
|
+
end
|
312
|
+
|
313
|
+
def fetch_commentperiod(doc)
|
314
|
+
cp = doc.at "//span[@id='pub-comments-due']"
|
315
|
+
return unless cp
|
316
|
+
|
317
|
+
to = Date.strptime cp.text.strip, "%B %d, %Y"
|
318
|
+
|
319
|
+
d = doc.at("//span[@id='pub-release-date']").text.strip
|
320
|
+
from = Date.strptime(d, "%B %Y").to_s
|
321
|
+
|
322
|
+
ex = doc.at "//strong[contains(.,'The comment closing date has been extended to')]"
|
323
|
+
ext = ex&.text&.match(/\w+\s\d{2},\s\d{4}/).to_s
|
324
|
+
extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
|
325
|
+
CommentPeriod.new from, to, extended
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module RelatonNist
|
2
|
+
class XMLParser < RelatonBib::XMLParser
|
3
|
+
class << self
|
4
|
+
def from_xml(xml)
|
5
|
+
doc = Nokogiri::XML xml
|
6
|
+
nistitem = doc.at("/bibitem|/bibdata")
|
7
|
+
NistBibliographicItem.new(item_data(nistitem))
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def item_data(nistitem)
|
13
|
+
data = super
|
14
|
+
ext = nistitem.at "./ext"
|
15
|
+
return data unless ext
|
16
|
+
|
17
|
+
data[:keyword] = fetch_keyword(ext)
|
18
|
+
data[:commentperiod] = fetch_commentperiod(ext)
|
19
|
+
data
|
20
|
+
end
|
21
|
+
|
22
|
+
def fetch_status(item)
|
23
|
+
status = item.at "./status"
|
24
|
+
return unless status
|
25
|
+
|
26
|
+
DocumentStatus.new(
|
27
|
+
stage: status.at("stage")&.text,
|
28
|
+
substage: status.at("substage")&.text,
|
29
|
+
iteration: status.at("iteration")&.text,
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
def fetch_commentperiod(item)
|
34
|
+
cp = item.at "./commentperiod"
|
35
|
+
return unless cp
|
36
|
+
|
37
|
+
CommentPeriod.new cp.at("from").text, cp.at("to")&.text, cp.at("extended")&.text
|
38
|
+
end
|
39
|
+
|
40
|
+
def fetch_keyword(item)
|
41
|
+
item.xpath("./keyword").map do |kw|
|
42
|
+
Keyword.new kw.children.first.to_xml
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/relaton_nist.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require "relaton_nist/version"
|
2
|
+
require "relaton_nist/nist_bibliography"
|
3
|
+
|
4
|
+
if defined? Relaton
|
5
|
+
require_relative "relaton/processor"
|
6
|
+
Relaton::Registry.instance.register(Relaton::RelatonNist::Processor)
|
7
|
+
end
|
8
|
+
|
9
|
+
module RelatonNist
|
10
|
+
class Error < StandardError; end
|
11
|
+
# Your code goes here...
|
12
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "relaton_nist/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "relaton-nist"
|
7
|
+
spec.version = RelatonNist::VERSION
|
8
|
+
spec.authors = ["Ribose Inc."]
|
9
|
+
spec.email = ["open.source@ribose.com"]
|
10
|
+
|
11
|
+
spec.summary = "RelatonNist: retrive NIST standards."
|
12
|
+
spec.description = "RelatonNist: retrive NIST standards."
|
13
|
+
spec.homepage = "https://github.com/metanorma/relaton-nist"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
# Specify which files should be added to the gem when it is released.
|
17
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
18
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
19
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
25
|
+
|
26
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
27
|
+
spec.add_development_dependency "byebug"
|
28
|
+
spec.add_development_dependency "debase"
|
29
|
+
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
30
|
+
spec.add_development_dependency "pry-byebug"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
33
|
+
spec.add_development_dependency "ruby-debug-ide"
|
34
|
+
spec.add_development_dependency "simplecov"
|
35
|
+
spec.add_development_dependency "vcr"
|
36
|
+
spec.add_development_dependency "webmock"
|
37
|
+
|
38
|
+
spec.add_dependency "relaton-bib", "~> 0.1.6"
|
39
|
+
end
|