relaton-nist 1.7.4 → 1.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -1
- data/README.adoc +19 -0
- data/bin/rspec +29 -0
- data/grammars/basicdoc.rng +165 -20
- data/grammars/biblio.rng +5 -6
- data/grammars/isodoc.rng +532 -16
- data/grammars/nist.rng +9 -0
- data/grammars/reqt.rng +31 -2
- data/lib/relaton_nist/data_fetcher.rb +221 -0
- data/lib/relaton_nist/hit.rb +3 -1
- data/lib/relaton_nist/hit_collection.rb +71 -58
- data/lib/relaton_nist/nist_bibliographic_item.rb +1 -1
- data/lib/relaton_nist/nist_bibliography.rb +16 -6
- data/lib/relaton_nist/processor.rb +5 -0
- data/lib/relaton_nist/scrapper.rb +198 -265
- data/lib/relaton_nist/version.rb +1 -1
- data/lib/relaton_nist/xml_parser.rb +1 -1
- data/lib/relaton_nist.rb +1 -0
- data/relaton_nist.gemspec +3 -5
- metadata +9 -7
@@ -14,9 +14,9 @@ module RelatonNist
|
|
14
14
|
# @param text [String]
|
15
15
|
# @return [RelatonNist::HitCollection]
|
16
16
|
def search(text, year = nil, opts = {})
|
17
|
-
HitCollection.
|
18
|
-
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError
|
19
|
-
raise RelatonBib::RequestError,
|
17
|
+
HitCollection.search text, year, opts
|
18
|
+
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError => e
|
19
|
+
raise RelatonBib::RequestError, e.message
|
20
20
|
end
|
21
21
|
|
22
22
|
# @param code [String] the NIST standard Code to look up (e..g "8200")
|
@@ -174,7 +174,17 @@ module RelatonNist
|
|
174
174
|
}
|
175
175
|
ref = matches[:code] ? "#{matches[:serie]} #{matches[:code]}" : code
|
176
176
|
result = search(ref, year, opts)
|
177
|
-
result.select { |i| search_filter i, matches, code }
|
177
|
+
selected_result = result.select { |i| search_filter i, matches, code }
|
178
|
+
return selected_result if selected_result.any? || !matches[:code]
|
179
|
+
|
180
|
+
search full_ref(matches)
|
181
|
+
end
|
182
|
+
|
183
|
+
def full_ref(matches)
|
184
|
+
ref = "#{matches[:serie]} #{matches[:code]}"
|
185
|
+
ref += long_to_short(matches[:prt1], matches[:prt2]).to_s
|
186
|
+
ref += long_to_short(matches[:vol1], matches[:vol2]).to_s
|
187
|
+
ref
|
178
188
|
end
|
179
189
|
|
180
190
|
def match(regex, code)
|
@@ -192,11 +202,11 @@ module RelatonNist
|
|
192
202
|
(?<code>[0-9-]{3,}[A-Z]?)
|
193
203
|
(?<prt1>pt\d+)?
|
194
204
|
(?<vol1>v\d+)?
|
195
|
-
(?<ver1>ver[\d
|
205
|
+
(?<ver1>ver[\d.]+)?
|
196
206
|
(?<rev1>r\d+)?
|
197
207
|
(\s(?<prt2>Part\s\d+))?
|
198
208
|
(\s(?<vol2>Vol\.\s\d+))?
|
199
|
-
(\s(?<ver2>(Ver\.|Version)\s[\d
|
209
|
+
(\s(?<ver2>(Ver\.|Version)\s[\d.]+))?
|
200
210
|
(\s(?<rev2>Rev\.\s\d+))?
|
201
211
|
(\s(?<add>Add)endum)?
|
202
212
|
}x =~ item.hit[:code]
|
@@ -7,6 +7,7 @@ module RelatonNist
|
|
7
7
|
@prefix = "NIST"
|
8
8
|
@defaultprefix = %r{^(NIST|NISTGCR|ITL Bulletin|JPCRD|NISTIR|CSRC|FIPS)(/[^\s])?\s}
|
9
9
|
@idtype = "NIST"
|
10
|
+
@datasets = %w[nist-tech-pubs]
|
10
11
|
end
|
11
12
|
|
12
13
|
# @param code [String]
|
@@ -17,6 +18,10 @@ module RelatonNist
|
|
17
18
|
::RelatonNist::NistBibliography.get(code, date, opts)
|
18
19
|
end
|
19
20
|
|
21
|
+
def fetch_data(_source, opts)
|
22
|
+
DataFetcher.fetch(**opts)
|
23
|
+
end
|
24
|
+
|
20
25
|
# @param xml [String]
|
21
26
|
# @return [RelatonNist::GbBibliographicItem]
|
22
27
|
def from_xml(xml)
|
@@ -11,23 +11,17 @@ module RelatonNist
|
|
11
11
|
# @param hit_data [Hash]
|
12
12
|
# @return [Hash]
|
13
13
|
def parse_page(hit_data)
|
14
|
-
item_data =
|
15
|
-
from_json hit_data
|
16
|
-
else
|
17
|
-
from_csrs hit_data
|
18
|
-
end
|
19
|
-
# doctype = "standard"
|
14
|
+
item_data = from_json hit_data
|
20
15
|
titles = fetch_titles(hit_data)
|
21
16
|
unless /^(SP|NISTIR|FIPS) /.match? item_data[:docid][0].id
|
22
|
-
# doctype = id_cleanup(item_data[:docid][0].id)
|
23
17
|
item_data[:docid][0] = RelatonBib::DocumentIdentifier.new(
|
24
|
-
id: titles[0][:content].upcase, type: "NIST"
|
18
|
+
id: titles[0][:content].upcase, type: "NIST",
|
25
19
|
)
|
26
20
|
end
|
27
21
|
item_data[:fetched] = Date.today.to_s
|
28
22
|
item_data[:type] = "standard"
|
29
23
|
item_data[:title] = titles
|
30
|
-
item_data[:doctype] = "standard"
|
24
|
+
item_data[:doctype] = "standard"
|
31
25
|
|
32
26
|
NistBibliographicItem.new(**item_data)
|
33
27
|
end
|
@@ -44,7 +38,7 @@ module RelatonNist
|
|
44
38
|
edition: fetch_edition(json),
|
45
39
|
language: [json["language"]],
|
46
40
|
script: [json["script"]],
|
47
|
-
docstatus: fetch_status(json, hit_data[:status]),
|
41
|
+
docstatus: fetch_status(json), # hit_data[:status]),
|
48
42
|
copyright: fetch_copyright(json["published-date"]),
|
49
43
|
relation: fetch_relations_json(json),
|
50
44
|
place: ["Gaithersburg, MD"],
|
@@ -53,119 +47,58 @@ module RelatonNist
|
|
53
47
|
}
|
54
48
|
end
|
55
49
|
|
56
|
-
def from_csrs(hit_data)
|
57
|
-
doc = get_page hit_data[:url]
|
58
|
-
{
|
59
|
-
# id: fetch_id(doc),
|
60
|
-
link: fetch_link(doc),
|
61
|
-
docid: fetch_docid(doc),
|
62
|
-
date: fetch_dates(doc, hit_data[:release_date]),
|
63
|
-
contributor: fetch_contributors(doc),
|
64
|
-
edition: fetch_edition(hit_data[:code]),
|
65
|
-
language: ["en"],
|
66
|
-
script: ["Latn"],
|
67
|
-
abstract: fetch_abstract(doc),
|
68
|
-
docstatus: fetch_status(doc, hit_data[:status]),
|
69
|
-
copyright: fetch_copyright(doc),
|
70
|
-
relation: fetch_relations(doc),
|
71
|
-
series: fetch_series(doc),
|
72
|
-
keyword: fetch_keywords(doc),
|
73
|
-
commentperiod: fetch_commentperiod(doc),
|
74
|
-
}
|
75
|
-
end
|
76
50
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
77
51
|
|
78
|
-
# Strip status from doc id
|
79
|
-
# @param id String
|
80
|
-
# @return String
|
81
|
-
# def id_cleanup(id)
|
82
|
-
# id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
|
83
|
-
# end
|
84
|
-
|
85
|
-
# Get page.
|
86
|
-
# @param path [String] page's path
|
87
|
-
# @return [Array<Nokogiri::HTML::Document, String>]
|
88
|
-
def get_page(url)
|
89
|
-
uri = URI url
|
90
|
-
resp = Net::HTTP.get_response(uri)
|
91
|
-
%r{(?<=newLocation = 'https://' \+ window.location.hostname \+ ')(?<path>[^']+)} =~ resp.body
|
92
|
-
if path
|
93
|
-
uri = URI HitCollection::DOMAIN + path
|
94
|
-
resp = Net::HTTP.get_response(uri)
|
95
|
-
end
|
96
|
-
Nokogiri::HTML(resp.body)
|
97
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
98
|
-
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
99
|
-
Net::ProtocolError, OpenSSL::SSL::SSLError
|
100
|
-
raise RelatonBib::RequestError, "Could not access #{url}"
|
101
|
-
end
|
102
|
-
|
103
52
|
# Fetch docid.
|
104
|
-
# @param
|
53
|
+
# @param docid [String]
|
105
54
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
106
|
-
def fetch_docid(
|
107
|
-
item_ref =
|
108
|
-
|
109
|
-
|
110
|
-
"//div[contains(@class, 'publications-detail')]/h3"
|
111
|
-
)&.text&.strip&.sub(/(?<=\w)\([^\)]+\)$/) do |m|
|
112
|
-
" " + m.upcase
|
113
|
-
end&.squeeze(" ")&.gsub(/ |\n|\r/, "")
|
114
|
-
end
|
115
|
-
item_ref ||= "?"
|
116
|
-
item_ref.sub! /\sAddendum$/, "-Add"
|
55
|
+
def fetch_docid(docid)
|
56
|
+
item_ref = docid
|
57
|
+
# item_ref ||= "?"
|
58
|
+
item_ref.sub!(/\sAddendum$/, "-Add")
|
117
59
|
[RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
|
118
60
|
end
|
119
61
|
|
120
|
-
# Fetch id.
|
121
|
-
# @param doc [Nokogiri::HTML::Document]
|
122
|
-
# @return [String]
|
123
|
-
# def fetch_id(doc)
|
124
|
-
# doc.at("//div[contains(@class, 'publications-detail')]/h3").text.
|
125
|
-
# strip.gsub(/\s/, "")
|
126
|
-
# end
|
127
|
-
|
128
62
|
# Fetch status.
|
129
|
-
# @param doc [
|
130
|
-
# @param status [String]
|
63
|
+
# @param doc [Hash]
|
131
64
|
# @return [RelatonNist::DocumentStatus]
|
132
|
-
def fetch_status(doc, status)
|
133
|
-
if doc.is_a? Hash
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
else
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
end
|
65
|
+
def fetch_status(doc) # , status)
|
66
|
+
# if doc.is_a? Hash
|
67
|
+
stage = doc["status"]
|
68
|
+
subst = doc["substage"]
|
69
|
+
iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
|
70
|
+
# else
|
71
|
+
# case status
|
72
|
+
# when "draft (obsolete)"
|
73
|
+
# stage = "draft-public"
|
74
|
+
# subst = "withdrawn"
|
75
|
+
# when "retired draft"
|
76
|
+
# stage = "draft-public"
|
77
|
+
# subst = "retired"
|
78
|
+
# when "withdrawn"
|
79
|
+
# stage = "final"
|
80
|
+
# subst = "withdrawn"
|
81
|
+
# when /^draft/
|
82
|
+
# stage = "draft-public"
|
83
|
+
# subst = "active"
|
84
|
+
# else
|
85
|
+
# stage = status
|
86
|
+
# subst = "active"
|
87
|
+
# end
|
88
|
+
|
89
|
+
# iter = nil
|
90
|
+
# if stage.include? "draft"
|
91
|
+
# iter = 1
|
92
|
+
# history = doc.xpath("//span[@id='pub-history-container']/a"\
|
93
|
+
# "|//span[@id='pub-history-container']/span")
|
94
|
+
# history.each_with_index do |h, idx|
|
95
|
+
# next if h.name == "a"
|
96
|
+
|
97
|
+
# iter = idx + 1 if idx.positive?
|
98
|
+
# break
|
99
|
+
# end
|
100
|
+
# end
|
101
|
+
# end
|
169
102
|
|
170
103
|
RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
|
171
104
|
end
|
@@ -179,49 +112,49 @@ module RelatonNist
|
|
179
112
|
end
|
180
113
|
|
181
114
|
# Fetch dates
|
182
|
-
# @param doc [
|
115
|
+
# @param doc [Hash]
|
183
116
|
# @param release_date [Date]
|
184
117
|
# @return [Array<Hash>]
|
185
118
|
def fetch_dates(doc, release_date) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
186
119
|
dates = [{ type: "published", on: release_date.to_s }]
|
187
120
|
|
188
|
-
if doc.is_a? Hash
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
else
|
195
|
-
|
196
|
-
|
197
|
-
end
|
121
|
+
# if doc.is_a? Hash
|
122
|
+
issued = RelatonBib.parse_date doc["issued-date"]
|
123
|
+
updated = RelatonBib.parse_date doc["updated-date"]
|
124
|
+
dates << { type: "updated", on: updated.to_s } if updated
|
125
|
+
obsoleted = RelatonBib.parse_date doc["obsoleted-date"]
|
126
|
+
dates << { type: "obsoleted", on: obsoleted.to_s } if obsoleted
|
127
|
+
# else
|
128
|
+
# d = doc.at("//span[@id='pub-release-date']")&.text&.strip
|
129
|
+
# issued = RelatonBib.parse_date d
|
130
|
+
# end
|
198
131
|
dates << { type: "issued", on: issued.to_s }
|
199
132
|
dates
|
200
133
|
end
|
201
134
|
|
202
135
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
203
|
-
# @param doc [
|
136
|
+
# @param doc [Hash]
|
204
137
|
# @return [Array<RelatonBib::ContributionInfo>]
|
205
138
|
def fetch_contributors(doc)
|
206
139
|
contribs = []
|
207
|
-
if doc.is_a? Hash
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
else
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
end
|
140
|
+
# if doc.is_a? Hash
|
141
|
+
contribs += contributors_json(
|
142
|
+
doc["authors"], "author", doc["language"], doc["script"]
|
143
|
+
)
|
144
|
+
contribs + contributors_json(
|
145
|
+
doc["editors"], "editor", doc["language"], doc["script"]
|
146
|
+
)
|
147
|
+
# else
|
148
|
+
# name = "National Institute of Standards and Technology"
|
149
|
+
# org = RelatonBib::Organization.new(
|
150
|
+
# name: name, url: "www.nist.gov", abbreviation: "NIST",
|
151
|
+
# )
|
152
|
+
# contribs << RelatonBib::ContributionInfo.new(entity: org, role: [type: "publisher"])
|
153
|
+
# authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
|
154
|
+
# contribs += contributors(authors, "author")
|
155
|
+
# editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
|
156
|
+
# contribs + contributors(editors, "editor")
|
157
|
+
# end
|
225
158
|
end
|
226
159
|
|
227
160
|
# @param doc [Array<Hash>]
|
@@ -256,37 +189,37 @@ module RelatonNist
|
|
256
189
|
# @param doc [Nokogiri::HTML::Element, Array<Hash>]
|
257
190
|
# @param role [String]
|
258
191
|
# @return [Array<RelatonBib::ContributionInfo>]
|
259
|
-
def contributors(doc, role, lang = "en", script = "Latn")
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
end
|
192
|
+
# def contributors(doc, role, lang = "en", script = "Latn")
|
193
|
+
# return [] if doc.nil?
|
194
|
+
|
195
|
+
# doc.text.split(", ").map do |contr|
|
196
|
+
# /(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr.strip
|
197
|
+
# if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
|
198
|
+
# fullname = RelatonBib::FullName.new(
|
199
|
+
# completename: RelatonBib::LocalizedString.new(an, lang, script)
|
200
|
+
# )
|
201
|
+
# case abbrev
|
202
|
+
# when "NIST"
|
203
|
+
# org_name = "National Institute of Standards and Technology"
|
204
|
+
# url = "www.nist.gov"
|
205
|
+
# when "MITRE"
|
206
|
+
# org_name = abbrev
|
207
|
+
# url = "www.mitre.org"
|
208
|
+
# else
|
209
|
+
# org_name = abbrev
|
210
|
+
# url = nil
|
211
|
+
# end
|
212
|
+
# org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
|
213
|
+
# affiliation = RelatonBib::Affiliation.new organization: org
|
214
|
+
# entity = RelatonBib::Person.new(
|
215
|
+
# name: fullname, affiliation: [affiliation],
|
216
|
+
# )
|
217
|
+
# else
|
218
|
+
# entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
|
219
|
+
# end
|
220
|
+
# RelatonBib::ContributionInfo.new entity: entity, role: [type: role]
|
221
|
+
# end
|
222
|
+
# end
|
290
223
|
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength
|
291
224
|
|
292
225
|
# @param name [Hash]
|
@@ -313,16 +246,16 @@ module RelatonNist
|
|
313
246
|
[RelatonBib::LocalizedString.new(part, lang, script)]
|
314
247
|
end
|
315
248
|
|
316
|
-
# @param doc [
|
249
|
+
# @param doc [Hash]
|
317
250
|
# @return [String, NilClass]
|
318
251
|
def fetch_edition(doc)
|
319
|
-
if doc.is_a? Hash
|
320
|
-
|
252
|
+
# if doc.is_a? Hash
|
253
|
+
return unless doc["edition"]
|
321
254
|
|
322
|
-
|
323
|
-
else
|
324
|
-
|
325
|
-
end
|
255
|
+
rev = doc["edition"]
|
256
|
+
# else
|
257
|
+
# return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
|
258
|
+
# end
|
326
259
|
|
327
260
|
"Revision #{rev}"
|
328
261
|
end
|
@@ -330,17 +263,17 @@ module RelatonNist
|
|
330
263
|
# Fetch abstracts.
|
331
264
|
# @param doc [Nokigiri::HTML::Document]
|
332
265
|
# @return [Array<Hash>]
|
333
|
-
def fetch_abstract(doc)
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
end
|
266
|
+
# def fetch_abstract(doc)
|
267
|
+
# abstract_content = doc.xpath(
|
268
|
+
# '//div[contains(@class, "pub-abstract-callout")]/div[1]/p',
|
269
|
+
# ).text
|
270
|
+
# [{
|
271
|
+
# content: abstract_content,
|
272
|
+
# language: "en",
|
273
|
+
# script: "Latn",
|
274
|
+
# format: "text/plain",
|
275
|
+
# }]
|
276
|
+
# end
|
344
277
|
|
345
278
|
# Fetch copyright.
|
346
279
|
# @param doc [Nokogiri::HTL::Document, String]
|
@@ -348,30 +281,30 @@ module RelatonNist
|
|
348
281
|
def fetch_copyright(doc)
|
349
282
|
name = "National Institute of Standards and Technology"
|
350
283
|
url = "www.nist.gov"
|
351
|
-
d = if doc.is_a? String then doc
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
from =
|
284
|
+
# d = if doc.is_a? String then doc
|
285
|
+
# else
|
286
|
+
# doc.at("//span[@id='pub-release-date']")&.text&.strip
|
287
|
+
# end
|
288
|
+
from = doc&.match(/\d{4}/)&.to_s
|
356
289
|
[{ owner: [{ name: name, abbreviation: "NIST", url: url }], from: from }]
|
357
290
|
end
|
358
291
|
|
359
292
|
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
360
293
|
|
361
294
|
# Fetch links.
|
362
|
-
# @param doc [
|
295
|
+
# @param doc [Hash]
|
363
296
|
# @return [Array<Hash>]
|
364
297
|
def fetch_link(doc)
|
365
298
|
links = []
|
366
|
-
if doc.is_a? Hash
|
367
|
-
|
368
|
-
|
369
|
-
else
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
end
|
299
|
+
# if doc.is_a? Hash
|
300
|
+
links << { type: "uri", content: doc["uri"] } if doc["uri"]
|
301
|
+
doi = "https://doi.org/" + doc["doi"] if doc["doi"]
|
302
|
+
# else
|
303
|
+
# pub = doc.at "//p/strong[contains(., 'Publication:')]"
|
304
|
+
# pdf = pub&.at "./following-sibling::a[.=' Local Download']"
|
305
|
+
# doi = pub&.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
|
306
|
+
# links << { type: "pdf", content: pdf[:href] } if pdf
|
307
|
+
# end
|
375
308
|
links << { type: "doi", content: doi } if doi
|
376
309
|
links
|
377
310
|
end
|
@@ -380,19 +313,19 @@ module RelatonNist
|
|
380
313
|
# Fetch relations.
|
381
314
|
# @param doc [Nokogiri::HTML::Document]
|
382
315
|
# @return [Array<RelatonNist::DocumentRelation>]
|
383
|
-
def fetch_relations(doc)
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
end
|
316
|
+
# def fetch_relations(doc)
|
317
|
+
# relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
|
318
|
+
# doc_relation "supersedes", r.text, DOMAIN + r[:href]
|
319
|
+
# end
|
320
|
+
|
321
|
+
# relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
|
322
|
+
# doc_relation "partOf", r.text, DOMAIN + r[:href]
|
323
|
+
# end
|
324
|
+
|
325
|
+
# relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
|
326
|
+
# doc_relation "updates", r.text, DOMAIN + r[:href]
|
327
|
+
# end
|
328
|
+
# end
|
396
329
|
# rubocop:enable Metrics/AbcSize
|
397
330
|
|
398
331
|
def fetch_relations_json(doc)
|
@@ -425,62 +358,62 @@ module RelatonNist
|
|
425
358
|
|
426
359
|
# @param doc [Nokogiri::HTML::Document]
|
427
360
|
# @return [Array<RelatonBib::Series>]
|
428
|
-
def fetch_series(doc)
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
end
|
361
|
+
# def fetch_series(doc)
|
362
|
+
# series = doc.xpath "//span[@id='pub-history-container']/a"\
|
363
|
+
# "|//span[@id='pub-history-container']/span"
|
364
|
+
# series.map.with_index do |s, idx|
|
365
|
+
# next if s.name == "span"
|
366
|
+
|
367
|
+
# iter = if idx.zero? then "I"
|
368
|
+
# else idx + 1
|
369
|
+
# end
|
370
|
+
|
371
|
+
# content = s.text.match(/^[^\(]+/).to_s.strip.squeeze " "
|
372
|
+
|
373
|
+
# ref = case s.text
|
374
|
+
# when /^Draft/
|
375
|
+
# content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
|
376
|
+
# when /\(Draft\)/ then content + " (#{iter}PD)"
|
377
|
+
# else content
|
378
|
+
# end
|
379
|
+
|
380
|
+
# fref = RelatonBib::FormattedRef.new(
|
381
|
+
# content: ref, language: "en", script: "Latn", format: "text/plain",
|
382
|
+
# )
|
383
|
+
# RelatonBib::Series.new(formattedref: fref)
|
384
|
+
# end.select { |s| s }
|
385
|
+
# end
|
453
386
|
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
454
387
|
|
455
|
-
# @param doc [
|
388
|
+
# @param doc [Hash]
|
456
389
|
# @return [Array<RelatonNist::Keyword>]
|
457
390
|
def fetch_keywords(doc)
|
458
|
-
kws = if doc.is_a? Hash
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
391
|
+
# kws = if doc.is_a? Hash
|
392
|
+
# doc["keywords"]
|
393
|
+
# else
|
394
|
+
# doc.xpath "//span[@id='pub-keywords-container']/span"
|
395
|
+
# end
|
396
|
+
doc["keywords"].map { |kw| kw.is_a?(String) ? kw : kw.text }
|
464
397
|
end
|
465
398
|
|
466
399
|
# rubocop:disable Metrics/AbcSize
|
467
400
|
# @param doc [Nokogiri::HTML::Document]
|
468
401
|
# @return [RelatonNist::CommentPeriod, NilClass]
|
469
|
-
def fetch_commentperiod(doc)
|
470
|
-
|
471
|
-
|
402
|
+
# def fetch_commentperiod(doc)
|
403
|
+
# cp = doc.at "//span[@id='pub-comments-due']"
|
404
|
+
# return unless cp
|
472
405
|
|
473
|
-
|
406
|
+
# to = Date.strptime cp.text.strip, "%B %d, %Y"
|
474
407
|
|
475
|
-
|
476
|
-
|
408
|
+
# d = doc.at("//span[@id='pub-release-date']").text.strip
|
409
|
+
# from = Date.strptime(d, "%B %Y").to_s
|
477
410
|
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
end
|
411
|
+
# ex = doc.at "//strong[contains(.,'The comment closing date has been "\
|
412
|
+
# "extended to')]"
|
413
|
+
# ext = ex&.text&.match(/\w+\s\d{2},\s\d{4}/).to_s
|
414
|
+
# extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
|
415
|
+
# CommentPeriod.new from: from, to: to, extended: extended
|
416
|
+
# end
|
484
417
|
# rubocop:enable Metrics/AbcSize
|
485
418
|
|
486
419
|
# @param json [Hash]
|