relaton-nist 1.16.1 → 1.16.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +20 -9
- data/lib/relaton_nist/config.rb +10 -0
- data/lib/relaton_nist/data_fetcher.rb +25 -307
- data/lib/relaton_nist/hit_collection.rb +4 -2
- data/lib/relaton_nist/nist_bibliography.rb +9 -10
- data/lib/relaton_nist/tech_pubs_parser.rb +293 -0
- data/lib/relaton_nist/util.rb +9 -0
- data/lib/relaton_nist/version.rb +1 -1
- data/lib/relaton_nist.rb +8 -4
- metadata +5 -3
- data/resp.html +0 -665
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b00f5d1fb998afa9409df9108f6336cf067fc41faef13e8aca3a95cc8b289596
|
4
|
+
data.tar.gz: 17f09df20a20798c1a56b5b7b887d7be9a8509e3a948b731603cbd95e684346e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1dfc55cfa95ad85bf38a3a582d5d92fea954f7fd7e66537d59d78f672efd798f10bb1bed8ee804c4c9b062017fa7202edd38295b9456c990368883bb77861436
|
7
|
+
data.tar.gz: 0e3b65872354a4c48dfd1f752ec160d3d2bdd47bafa9e678a3a5e200189589ce1e09b25e17df7c05bf6f643290f10ad3925bd9efb415525cf1c91894d3c6f92a
|
data/README.adoc
CHANGED
@@ -71,13 +71,24 @@ Or install it yourself as:
|
|
71
71
|
|
72
72
|
== Usage
|
73
73
|
|
74
|
-
===
|
74
|
+
=== Configuration
|
75
|
+
|
76
|
+
Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonNist.configure` block.
|
75
77
|
|
76
78
|
[source,ruby]
|
77
79
|
----
|
78
80
|
require 'relaton_nist'
|
79
81
|
=> true
|
80
82
|
|
83
|
+
RelatonNist.configure do |config|
|
84
|
+
config.logger.level = Logger::DEBUG
|
85
|
+
end
|
86
|
+
----
|
87
|
+
|
88
|
+
=== Search for a standard using keywords
|
89
|
+
|
90
|
+
[source,ruby]
|
91
|
+
----
|
81
92
|
hit_collection = RelatonNist::NistBibliography.search("8200")
|
82
93
|
=> <RelatonNist::HitCollection:0x007fc069d19da0 @ref=8200 @fetched=false>
|
83
94
|
|
@@ -114,8 +125,8 @@ item.to_xml bibdata: true
|
|
114
125
|
[source,ruby]
|
115
126
|
----
|
116
127
|
RelatonNist::NistBibliography.get("NIST IR 8200", "2018", {})
|
117
|
-
[relaton-nist] (
|
118
|
-
[relaton-nist] (
|
128
|
+
[relaton-nist] (NIST IR 8200) fetching...
|
129
|
+
[relaton-nist] (NIST IR 8200) found `NIST IR 8200`
|
119
130
|
=> #<RelatonNist::NistBibliographicItem:0x00007fab74a572c0
|
120
131
|
...
|
121
132
|
----
|
@@ -143,8 +154,8 @@ NIST FIPS {docnumber}
|
|
143
154
|
[source,ruby]
|
144
155
|
----
|
145
156
|
RelatonNist::NistBibliography.get("SP 800-205 (February 2019) (PD)")
|
146
|
-
[relaton-nist] (
|
147
|
-
[relaton-nist] (
|
157
|
+
[relaton-nist] (SP 800-205) fetching...
|
158
|
+
[relaton-nist] (SP 800-205) found `SP 800-205 (Draft)`
|
148
159
|
=> #<RelatonNist::NistBibliographicItem:0x007fc059934768
|
149
160
|
...
|
150
161
|
----
|
@@ -161,8 +172,8 @@ Referehces can contain optional parameters `{ptN}{vN}{verN}{rN}{/Add}`:
|
|
161
172
|
[source,ruby]
|
162
173
|
----
|
163
174
|
item = RelatonNist::NistBibliography.get 'NIST SP 800-67r1'
|
164
|
-
[relaton-nist] (
|
165
|
-
[relaton-nist] (
|
175
|
+
[relaton-nist] (NIST SP 800-67r1) fetching...
|
176
|
+
[relaton-nist] (NIST SP 800-67r1) found `SP 800-67 Rev. 1`
|
166
177
|
=> #<RelatonNist::NistBibliographicItem:0x00007fab748ae978
|
167
178
|
...
|
168
179
|
|
@@ -170,8 +181,8 @@ item.docidentifier.first.id
|
|
170
181
|
=> "SP 800-67 Rev. 1"
|
171
182
|
|
172
183
|
item = RelatonNist::NistBibliography.get 'SP 800-38A/Add'
|
173
|
-
[relaton-nist] (
|
174
|
-
[relaton-nist] (
|
184
|
+
[relaton-nist] (SP 800-38A/Add) fetching...
|
185
|
+
[relaton-nist] (SP 800-38A/Add) found `SP 800-38A-Add`
|
175
186
|
=> #<RelatonNist::NistBibliographicItem:0x007fd88c21d880
|
176
187
|
...
|
177
188
|
|
@@ -4,20 +4,7 @@ require "yaml"
|
|
4
4
|
|
5
5
|
module RelatonNist
|
6
6
|
class DataFetcher
|
7
|
-
RELATION_TYPES = {
|
8
|
-
"replaces" => "obsoletes",
|
9
|
-
"isVersionOf" => "editionOf",
|
10
|
-
"hasTranslation" => "hasTranslation",
|
11
|
-
"isTranslationOf" => "translatedFrom",
|
12
|
-
"hasPreprint" => "hasReprint",
|
13
|
-
"isPreprintOf" => "hasDraft",
|
14
|
-
"isSupplementTo" => "complements",
|
15
|
-
"isPartOf" => "partOf",
|
16
|
-
"hasPart" => "hasPart",
|
17
|
-
}.freeze
|
18
|
-
|
19
7
|
URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
|
20
|
-
NS = "http://www.crossref.org/relations.xsd"
|
21
8
|
|
22
9
|
def initialize(output, format)
|
23
10
|
@output = output
|
@@ -30,267 +17,6 @@ module RelatonNist
|
|
30
17
|
@index ||= Relaton::Index.find_or_create :nist, file: "index-v1.yaml"
|
31
18
|
end
|
32
19
|
|
33
|
-
def parse_docid(doc) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
34
|
-
[
|
35
|
-
{ type: "NIST", id: pub_id(doc), primary: true },
|
36
|
-
{ type: "DOI", id: fetch_doi(doc) },
|
37
|
-
# { type: "NIST", id: anchor(doc), scope: "anchor" },
|
38
|
-
]
|
39
|
-
end
|
40
|
-
|
41
|
-
#
|
42
|
-
# Parse document's ID from XML
|
43
|
-
#
|
44
|
-
# @param [Nokogiri::XML::Element] doc XML element
|
45
|
-
#
|
46
|
-
# @return [String] document's ID
|
47
|
-
#
|
48
|
-
def pub_id(doc)
|
49
|
-
# anchor(doc).gsub(".", " ")
|
50
|
-
fetch_doi(doc).split("/")[1..].join("/").gsub(".", " ").sub(/^nist\sir/, "NIST IR")
|
51
|
-
end
|
52
|
-
|
53
|
-
def fetch_doi(doc) # rubocop:disable Metrics/CyclomaticComplexity
|
54
|
-
id = doc.at("doi_data/doi").text
|
55
|
-
case id
|
56
|
-
when "10.6028/NBS.CIRC.e2e" then "10.6028/NBS.CIRC.2e2"
|
57
|
-
when "10.6028/NBS.CIRC.sup" then "10.6028/NBS.CIRC.24e7sup"
|
58
|
-
when "10.6028/NBS.CIRC.supJun1925-Jun1926" then "10.6028/NBS.CIRC.24e7sup2"
|
59
|
-
when "10.6028/NBS.CIRC.supJun1925-Jun1927" then "10.6028/NBS.CIRC.24e7sup3"
|
60
|
-
when "10.6028/NBS.CIRC.24supJuly1922" then "10.6028/NBS.CIRC.24e6sup"
|
61
|
-
when "10.6028/NBS.CIRC.24supJan1924" then "10.6028/NBS.CIRC.24e6sup2"
|
62
|
-
else id
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
# def anchor(doc)
|
67
|
-
# fetch_doi(doc).split("/")[1..-1].join "/"
|
68
|
-
# end
|
69
|
-
|
70
|
-
# @param doc [Nokogiri::XML::Element]
|
71
|
-
# @return [Array<RelatonBib::DocumentIdentifier>]
|
72
|
-
def fetch_docid(doc)
|
73
|
-
parse_docid(doc).map do |id|
|
74
|
-
RelatonBib::DocumentIdentifier.new(**id)
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# @param doc [Nokogiri::XML::Element]
|
79
|
-
# @return [RelatonBib::TypedTitleStringCollection, Array]
|
80
|
-
def fetch_title(doc)
|
81
|
-
t = doc.xpath("titles/title|titles/subtitle")
|
82
|
-
return [] unless t.any?
|
83
|
-
|
84
|
-
# RelatonBib::TypedTitleString.from_string t.map(&:text).join, "en", "Latn"
|
85
|
-
[{ content: t.map(&:text).join, language: "en", script: "Latn",
|
86
|
-
format: "text/plain" }]
|
87
|
-
end
|
88
|
-
|
89
|
-
# @param doc [Nokogiri::XML::Element]
|
90
|
-
# @return [Array<RelatonBib::BibliographicDate>]
|
91
|
-
def fetch_date(doc)
|
92
|
-
doc.xpath("publication_date|approval_date").map do |dt|
|
93
|
-
on = dt.at("year").text
|
94
|
-
if (m = dt.at "month")
|
95
|
-
on += "-#{m.text}"
|
96
|
-
d = dt.at "day"
|
97
|
-
on += "-#{d.text}" if d
|
98
|
-
end
|
99
|
-
type = dt.name == "publication_date" ? "published" : "confirmed"
|
100
|
-
RelatonBib::BibliographicDate.new(type: type, on: on)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
# @param doc [Nokogiri::XML::Element]
|
105
|
-
# @return [String]
|
106
|
-
def fetch_edition(doc)
|
107
|
-
doc.at("edition_number")&.text
|
108
|
-
end
|
109
|
-
|
110
|
-
# @param doc [Nokogiri::XML::Element]
|
111
|
-
# @return [Array<Hash>]
|
112
|
-
def fetch_relation(doc) # rubocop:disable Metrics/AbcSize
|
113
|
-
doc.xpath("./ns:program/ns:related_item", ns: NS).map do |rel|
|
114
|
-
rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: NS)
|
115
|
-
id = rdoi.text.split("/")[1..].join("/").gsub(".", " ")
|
116
|
-
fref = RelatonBib::FormattedRef.new content: id
|
117
|
-
docid = RelatonBib::DocumentIdentifier.new(type: "NIST", id: id, primary: true)
|
118
|
-
bibitem = RelatonBib::BibliographicItem.new formattedref: fref, docid: [docid]
|
119
|
-
type = RELATION_TYPES[rdoi["relationship-type"]]
|
120
|
-
warn "Relation type #{rdoi['relationship-type']} not found" unless type
|
121
|
-
{ type: type, bibitem: bibitem }
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def fetch_status(doc)
|
126
|
-
s = doc.at("./ns:program/ns:related_item/ns:*[@relationship-type='isPreprintOf']", ns: NS)
|
127
|
-
return unless s
|
128
|
-
|
129
|
-
RelatonBib::DocumentStatus.new stage: "preprint"
|
130
|
-
end
|
131
|
-
|
132
|
-
# @param doc [Nokogiri::XML::Element]
|
133
|
-
# @return [Array<RelatonBib::TypedUri>]
|
134
|
-
def fetch_link(doc)
|
135
|
-
pdf = doc.at("doi_data/resource").text
|
136
|
-
doi = "https://doi.org/#{fetch_doi(doc)}"
|
137
|
-
[{ type: "doi", content: doi }, { type: "pdf", content: pdf }].map do |l|
|
138
|
-
RelatonBib::TypedUri.new(**l)
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
# @param doc [Nokogiri::XML::Element]
|
143
|
-
# @return [Array<RelatonBib::FormattedString>]
|
144
|
-
def fetch_abstract(doc)
|
145
|
-
doc.xpath(
|
146
|
-
"jats:abstract/jats:p", "jats" => "http://www.ncbi.nlm.nih.gov/JATS1"
|
147
|
-
).each_with_object([]) do |a, m|
|
148
|
-
next if a.text.empty?
|
149
|
-
|
150
|
-
m << RelatonBib::FormattedString.new(content: a.text, language: doc["language"], script: "Latn")
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
# @param doc [Nokogiri::XML::Element]
|
155
|
-
# @return [Array<Hash>]
|
156
|
-
def fetch_contributor(doc)
|
157
|
-
contribs = doc.xpath("contributors/person_name").map do |p|
|
158
|
-
person = RelatonBib::Person.new(name: fullname(p, doc),
|
159
|
-
affiliation: affiliation(doc))
|
160
|
-
{ entity: person, role: [{ type: p["contributor_role"] }] }
|
161
|
-
end
|
162
|
-
contribs + doc.xpath("publisher").map do |p|
|
163
|
-
{ entity: create_org(p), role: [{ type: "publisher" }] }
|
164
|
-
end
|
165
|
-
end
|
166
|
-
|
167
|
-
#
|
168
|
-
# Create full name object from person name element.
|
169
|
-
#
|
170
|
-
# @param [Nokogiri::XML::Element] person name element
|
171
|
-
# @param [Nokogiri::XML::Element] doc document element
|
172
|
-
#
|
173
|
-
# @return [RelatonBib::FullName] full name object
|
174
|
-
#
|
175
|
-
def fullname(person, doc)
|
176
|
-
forename, initials = forename_initial(person, doc)
|
177
|
-
surname = localized_string person.at("surname").text, doc
|
178
|
-
ident = person.xpath("ORCID").map do |id|
|
179
|
-
RelatonBib::PersonIdentifier.new "orcid", id.text
|
180
|
-
end
|
181
|
-
RelatonBib::FullName.new(surname: surname, forename: forename,
|
182
|
-
initials: initials, identifier: ident)
|
183
|
-
end
|
184
|
-
|
185
|
-
#
|
186
|
-
# Create forename and initials objects from person name element.
|
187
|
-
#
|
188
|
-
# @param [Nokogiri::XML::Element] person person name element
|
189
|
-
# @param [Nokogiri::XML::Element] doc document element
|
190
|
-
#
|
191
|
-
# @return [Array<Array<RelatonBib::LocalizedString>>] forename and initials
|
192
|
-
#
|
193
|
-
def forename_initial(person, doc) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
194
|
-
fnames = []
|
195
|
-
fname = person.at("given_name")&.text
|
196
|
-
if fname
|
197
|
-
if /^(?<inits>(?:\w[.\s]+|[A-Z]{1,2}$)+)$/ =~ fname
|
198
|
-
ints = inits.split(/[.\s]*/)
|
199
|
-
fnames << forename(doc, fname, ints.shift)
|
200
|
-
ints.each { |i| fnames << forename(doc, nil, i) }
|
201
|
-
else
|
202
|
-
fn = forename(doc, fname)
|
203
|
-
fnames << fn if fn
|
204
|
-
end
|
205
|
-
end
|
206
|
-
initials = localized_string inits, doc if not(inits.nil? || inits.empty?)
|
207
|
-
[fnames, initials]
|
208
|
-
end
|
209
|
-
|
210
|
-
#
|
211
|
-
# Create forename object
|
212
|
-
#
|
213
|
-
# @param [Nokogiri::XML::Element] doc document element
|
214
|
-
# @param [String, nil] cnt forename content
|
215
|
-
# @param [String, nil] init initial content
|
216
|
-
#
|
217
|
-
# @return [RelatonBib::Forename] forename object
|
218
|
-
#
|
219
|
-
def forename(doc, cnt, init = nil)
|
220
|
-
return if (cnt.nil? || cnt.empty?) && (init.nil? || init.empty?)
|
221
|
-
|
222
|
-
RelatonBib::Forename.new(
|
223
|
-
content: cnt, language: doc["language"], script: "Latn", initial: init,
|
224
|
-
)
|
225
|
-
end
|
226
|
-
|
227
|
-
#
|
228
|
-
# Create localized string
|
229
|
-
#
|
230
|
-
# @param [String] content content of string
|
231
|
-
# @param [Nokogiri::XML::Elemrnt] doc XML element
|
232
|
-
#
|
233
|
-
# @return [RelatonBib::LocalizedString] localized string
|
234
|
-
#
|
235
|
-
def localized_string(content, doc)
|
236
|
-
RelatonBib::LocalizedString.new content, doc["language"], "Latn"
|
237
|
-
end
|
238
|
-
|
239
|
-
#
|
240
|
-
# Create publisher organization
|
241
|
-
#
|
242
|
-
# @param [Nokogiri::XML::Element] pub publisher element
|
243
|
-
#
|
244
|
-
# @return [RelatonBib::Organization] publisher organization
|
245
|
-
#
|
246
|
-
def create_org(pub)
|
247
|
-
name = pub.at("publisher_name").text
|
248
|
-
abbr = pub.at("../institution[institution_name[.='#{name}']]/institution_acronym")&.text
|
249
|
-
place = pub.at("./publisher_place") ||
|
250
|
-
pub.at("../institution[institution_name[.='#{name}']]/institution_place")
|
251
|
-
cont = []
|
252
|
-
if place
|
253
|
-
city, state = place.text.split(", ")
|
254
|
-
cont << RelatonBib::Address.new(street: [], city: city, state: state, country: "US")
|
255
|
-
end
|
256
|
-
RelatonBib::Organization.new name: name, abbreviation: abbr, contact: cont
|
257
|
-
end
|
258
|
-
|
259
|
-
#
|
260
|
-
# Create affiliation organization
|
261
|
-
#
|
262
|
-
# @param [Nokogiri::XML::Element] doc affiliation element
|
263
|
-
#
|
264
|
-
# @return [Array<RelatonBib::Affiliation>] affiliation
|
265
|
-
#
|
266
|
-
def affiliation(doc)
|
267
|
-
doc.xpath("./institution/institution_department").map do |id|
|
268
|
-
org = RelatonBib::Organization.new name: id.text
|
269
|
-
RelatonBib::Affiliation.new organization: org
|
270
|
-
end
|
271
|
-
end
|
272
|
-
|
273
|
-
# @param doc [Nokogiri::XML::Element]
|
274
|
-
# @return [Array<String>]
|
275
|
-
def fetch_place(doc)
|
276
|
-
doc.xpath("institution/institution_place").map(&:text)
|
277
|
-
end
|
278
|
-
|
279
|
-
#
|
280
|
-
# Fetches series
|
281
|
-
#
|
282
|
-
# @param [Nokogiri::XML::Element] doc document element
|
283
|
-
#
|
284
|
-
# @return [Array<RelatonBib::Series>] series
|
285
|
-
#
|
286
|
-
def fetch_series(doc)
|
287
|
-
prf, srs, num = pub_id(doc).split
|
288
|
-
sname = series[srs] || srs
|
289
|
-
title = RelatonBib::TypedTitleString.new(content: "#{prf} #{sname}")
|
290
|
-
abbr = RelatonBib::LocalizedString.new srs
|
291
|
-
[RelatonBib::Series.new(title: title, abbreviation: abbr, number: num)]
|
292
|
-
end
|
293
|
-
|
294
20
|
def series
|
295
21
|
@series ||= YAML.load_file File.expand_path("series.yaml", __dir__)
|
296
22
|
end
|
@@ -300,7 +26,7 @@ module RelatonNist
|
|
300
26
|
#
|
301
27
|
# @param bib [RelatonNist::NistBibliographicItem]
|
302
28
|
#
|
303
|
-
def write_file(bib) # rubocop:disable Metrics/AbcSize
|
29
|
+
def write_file(bib) # rubocop:disable Metrics/AbcSize
|
304
30
|
id = bib.docidentifier[0].id.gsub(%r{[/\s:.]}, "_").upcase.sub(/^NIST_IR/, "NISTIR")
|
305
31
|
file = File.join(@output, "#{id}.#{@ext}")
|
306
32
|
if @files.include? file
|
@@ -308,38 +34,16 @@ module RelatonNist
|
|
308
34
|
# warn "Link: #{bib.link.detect { |l| l.type == 'src' }.content}"
|
309
35
|
else @files << file
|
310
36
|
end
|
311
|
-
output = case @format
|
312
|
-
when "yaml" then bib.to_hash.to_yaml
|
313
|
-
when "xml" then bib.to_xml bibdata: true
|
314
|
-
else bib.send "to_#{@format}"
|
315
|
-
end
|
316
37
|
index.add_or_update bib.docidentifier[0].id, file
|
317
|
-
File.write file, output, encoding: "UTF-8"
|
38
|
+
File.write file, output(bib), encoding: "UTF-8"
|
318
39
|
end
|
319
40
|
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
#
|
327
|
-
def parse_doc(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
328
|
-
# mtd = doc.at('doi_record/report-paper/report-paper_metadata')
|
329
|
-
item = RelatonNist::NistBibliographicItem.new(
|
330
|
-
type: "standard", docid: fetch_docid(doc),
|
331
|
-
title: fetch_title(doc), link: fetch_link(doc), abstract: fetch_abstract(doc),
|
332
|
-
date: fetch_date(doc), edition: fetch_edition(doc),
|
333
|
-
contributor: fetch_contributor(doc), relation: fetch_relation(doc),
|
334
|
-
docstatus: fetch_status(doc), place: fetch_place(doc), series: fetch_series(doc),
|
335
|
-
language: [doc["language"]], script: ["Latn"], doctype: "standard"
|
336
|
-
)
|
337
|
-
write_file item
|
338
|
-
rescue StandardError => e
|
339
|
-
warn "Document: #{doc.at('doi').text}"
|
340
|
-
warn e.message
|
341
|
-
warn e.backtrace[0..5].join("\n")
|
342
|
-
# raise e
|
41
|
+
def output(bib)
|
42
|
+
case @format
|
43
|
+
when "yaml" then bib.to_hash.to_yaml
|
44
|
+
when "xml" then bib.to_xml bibdata: true
|
45
|
+
else bib.send "to_#{@format}"
|
46
|
+
end
|
343
47
|
end
|
344
48
|
|
345
49
|
#
|
@@ -349,13 +53,13 @@ module RelatonNist
|
|
349
53
|
t1 = Time.now
|
350
54
|
puts "Started at: #{t1}"
|
351
55
|
|
352
|
-
docs = Nokogiri::XML OpenURI.open_uri URL
|
353
56
|
FileUtils.mkdir_p @output
|
354
57
|
FileUtils.rm Dir[File.join(@output, "*.#{@ext}")]
|
355
|
-
docs.xpath("/body/query/doi_record/report-paper/report-paper_metadata")
|
356
|
-
.each { |doc| parse_doc doc }
|
357
58
|
|
59
|
+
fetch_tech_pubs
|
60
|
+
add_static_files
|
358
61
|
index.save
|
62
|
+
|
359
63
|
t2 = Time.now
|
360
64
|
puts "Stopped at: #{t2}"
|
361
65
|
puts "Done in: #{(t2 - t1).round} sec."
|
@@ -364,6 +68,20 @@ module RelatonNist
|
|
364
68
|
warn e.backtrace[0..5].join("\n")
|
365
69
|
end
|
366
70
|
|
71
|
+
def fetch_tech_pubs
|
72
|
+
docs = Nokogiri::XML OpenURI.open_uri URL
|
73
|
+
docs.xpath(
|
74
|
+
"/body/query/doi_record/report-paper/report-paper_metadata",
|
75
|
+
).each { |doc| write_file TechPubsParser.parse(doc, series) }
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_static_files
|
79
|
+
Dir["./static/*.yaml"].each do |file|
|
80
|
+
hash = YAML.load_file file
|
81
|
+
write_file RelatonNist::NistBibliographicItem.from_hash(hash)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
367
85
|
#
|
368
86
|
# Fetch all the documnts from dataset
|
369
87
|
#
|
@@ -72,7 +72,7 @@ module RelatonNist
|
|
72
72
|
{
|
73
73
|
# prefix: match(/^(?:NIST|NBS)\s?/, code),
|
74
74
|
series: match(/(?<val>(?:SP|FIPS|CSWP|IR|ITL\sBulletin|White\sPaper))\s/, code),
|
75
|
-
code: match(/(?<val>[0-9-]+(?:(?!(?:ver|r|v|pt)\d|-add\d?)[A-Za-z-])
|
75
|
+
code: match(/(?<val>[0-9-]+(?:(?!(?:ver|r|v|pt)\d|-add\d?)[A-Za-z-])*|Research\sLibrary)/, code),
|
76
76
|
prt: match(/(?:pt|\sPart\s)(?<val>\d+)/, code),
|
77
77
|
vol: match(/(?:v|\sVol\.\s)(?<val>\d+)/, code),
|
78
78
|
ver: match(/(?:ver|\sVer\.\s|Version\s)(?<val>[\d.]+)/, code),
|
@@ -112,7 +112,7 @@ module RelatonNist
|
|
112
112
|
@refparts ||= {
|
113
113
|
perfix: match(/^(NIST|NBS)/, text),
|
114
114
|
series: match(/(SP|FIPS|CSWP|IR|ITL\sBulletin|White\sPaper)(?=\.|\s)/, text),
|
115
|
-
code: match(/(?<=\.|\s)[0-9-]+(?:(?!(ver|r|v|pt)\d|-add\d?)[A-Za-z-])
|
115
|
+
code: match(/(?<=\.|\s)[0-9-]+(?:(?!(ver|r|v|pt)\d|-add\d?)[A-Za-z-])*|Research\sLibrary/, text),
|
116
116
|
prt: match(/(?:(?<dl>\.)?pt(?(<dl>)-)|\sPart\s)(?<val>[A-Z\d]+)/, text),
|
117
117
|
vol: match(/(?:(?<dl>\.)?v(?(<dl>)-)|\sVol\.\s)(?<val>\d+)/, text),
|
118
118
|
ver: match(/(?:(?<dl>\.)?\s?ver|\sVer\.\s)(?<val>\d(?(<dl>)[-\d]|[.\d])*)/, text)&.gsub(/-/, "."),
|
@@ -241,6 +241,8 @@ module RelatonNist
|
|
241
241
|
# @return [Array<Hash>] selected data
|
242
242
|
#
|
243
243
|
def select_data # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
|
244
|
+
return [] unless refparts[:code]
|
245
|
+
|
244
246
|
ref = "#{refparts[:series]} #{refparts[:code]}"
|
245
247
|
d = Date.strptime year, "%Y" if year
|
246
248
|
statuses = %w[draft-public draft-prelim]
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require "relaton_bib"
|
2
1
|
require "relaton_nist/nist_bibliographic_item"
|
3
2
|
require "relaton_nist/document_relation"
|
4
3
|
require "relaton_nist/scrapper"
|
@@ -40,7 +39,7 @@ module RelatonNist
|
|
40
39
|
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
41
40
|
return fetch_ref_err(code, year, []) if code.match?(/\sEP$/)
|
42
41
|
|
43
|
-
/^(?<code2>[^(]+)(?:\((?<date2>\w+\s(?:\d{2},\s)?\d{4})\))?\s?\(?(?:(?<=\()(?<stage>
|
42
|
+
/^(?<code2>[^(]+)(?:\((?<date2>\w+\s(?:\d{2},\s)?\d{4})\))?\s?\(?(?:(?<=\()(?<stage>(?:I|F|\d)PD))?/ =~ code
|
44
43
|
stage ||= /(?<=\.)PD-\w+(?=\.)/.match(code)&.to_s
|
45
44
|
if code2
|
46
45
|
code = code2.strip
|
@@ -85,7 +84,7 @@ module RelatonNist
|
|
85
84
|
result = nistbib_search_filter(code, year, opts) || (return nil)
|
86
85
|
ret = nistbib_results_filter(result, year, opts)
|
87
86
|
if ret[:ret]
|
88
|
-
warn "
|
87
|
+
Util.warn "(#{code}) found `#{ret[:ret].docidentifier.first.id}`"
|
89
88
|
ret[:ret]
|
90
89
|
else
|
91
90
|
fetch_ref_err(code, year, ret[:years])
|
@@ -161,7 +160,7 @@ module RelatonNist
|
|
161
160
|
# @return [RelatonNist::HitCollection] hits collection
|
162
161
|
#
|
163
162
|
def nistbib_search_filter(code, year, opts)
|
164
|
-
warn "
|
163
|
+
Util.warn "(#{code}) fetching..."
|
165
164
|
result = search(code, year, opts)
|
166
165
|
result.search_filter
|
167
166
|
end
|
@@ -177,15 +176,15 @@ module RelatonNist
|
|
177
176
|
#
|
178
177
|
def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength
|
179
178
|
id = year ? "#{code}:#{year}" : code
|
180
|
-
warn "
|
181
|
-
|
179
|
+
Util.warn "WARNING: no match found online for `#{id}`. " \
|
180
|
+
"The code must be exactly like it is on the standards website."
|
182
181
|
unless missed_years.empty?
|
183
|
-
warn "
|
184
|
-
|
182
|
+
Util.warn "(There was no match for #{year}, though there " \
|
183
|
+
"were matches found for `#{missed_years.join('`, `')}`.)"
|
185
184
|
end
|
186
185
|
if /\d-\d/.match? code
|
187
|
-
warn "
|
188
|
-
|
186
|
+
Util.warn "The provided document part may not exist, " \
|
187
|
+
"or the document may no longer be published in parts."
|
189
188
|
end
|
190
189
|
nil
|
191
190
|
end
|