relaton-nist 1.12.3 → 1.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +34 -3
- data/grammars/biblio.rng +8 -5
- data/grammars/isodoc.rng +27 -1
- data/lib/relaton_nist/data_fetcher.rb +70 -19
- data/lib/relaton_nist/hit.rb +10 -3
- data/lib/relaton_nist/hit_collection.rb +164 -56
- data/lib/relaton_nist/nist_bibliography.rb +74 -120
- data/lib/relaton_nist/processor.rb +1 -1
- data/lib/relaton_nist/version.rb +1 -1
- data/relaton_nist.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c997f338c657a003a0bff7e75b650f59b86d4a72026a0b12734357bc48c10cdc
|
4
|
+
data.tar.gz: 5d76af2d37037bb027f9875180bbcc967ca133260eb4a5494112fe81cccf5e04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5971a572c9a7a5aefb4087f96787255687195c3e0cc75a49f740b57af1bb05fa48f56c2bd721cbb4b5aa7e5178cb93048c97a8782d152b4b040dd3158129ba14
|
7
|
+
data.tar.gz: f266510c36ce07666a74b9881e439df01067f6baee05e03c267ce3754bef5dcf24b7391082a4fdc1d8122121f17b1fc30e3ba8377dadd86692ff80cd8efa962d
|
data/README.adoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
=
|
1
|
+
= Relaton for NIST: bibliographic retrieval of NIST publications
|
2
2
|
|
3
3
|
image:https://img.shields.io/gem/v/relaton-nist.svg["Gem Version", link="https://rubygems.org/gems/relaton-nist"]
|
4
4
|
image:https://github.com/relaton/relaton-nist/workflows/macos/badge.svg["Build Status (macOS)", link="https://github.com/relaton/relaton-nist/actions?workflow=macos"]
|
@@ -8,10 +8,41 @@ image:https://codeclimate.com/github/relaton/relaton-nist/badges/gpa.svg["Code C
|
|
8
8
|
image:https://img.shields.io/github/issues-pr-raw/relaton/relaton-nist.svg["Pull Requests", link="https://github.com/relaton/relaton-nist/pulls"]
|
9
9
|
image:https://img.shields.io/github/commits-since/relaton/relaton-nist/latest.svg["Commits since latest",link="https://github.com/relaton/relaton-nist/releases"]
|
10
10
|
|
11
|
+
== Purpose
|
11
12
|
|
12
|
-
|
13
|
+
`relaton-nist` provides bibliographic information of NIST publications using the
|
14
|
+
https://github.com/metanorma/metanorma-model-nist#nist-bibliographic-item-model[NistBibliographicItem model].
|
15
|
+
|
16
|
+
Relaton for NIST has been developed in cooperation with the NIST Cybersecurity
|
17
|
+
Resource Center (CSRC) and the Computer Security Division (ITL/CSD).
|
18
|
+
|
19
|
+
== Data sources
|
20
|
+
|
21
|
+
Relaton for NIST retrieves bibliographic information from two sources:
|
22
|
+
|
23
|
+
* bibliographic feed from the NIST Cybersecurity Resource Center (CSRC) of
|
24
|
+
all CSRC publications (in Relaton JSON)
|
25
|
+
* bibliographic dataset from the NIST Library through the Information Services
|
26
|
+
Office (ISO) that contains information about all NIST Technical Publications
|
27
|
+
(https://github.com/usnistgov/NIST-Tech-Pubs[GitHub])
|
28
|
+
|
29
|
+
Bibliographic information offered through CSRC are provided with enhanced
|
30
|
+
metadata that is not available at the NIST Library dataset, including:
|
31
|
+
|
32
|
+
* public drafts (the NIST Library dataset only contains final publications)
|
33
|
+
* revision information: revision number, iteration
|
34
|
+
* document stage information: retired, withdrawn, etc.
|
35
|
+
* bibliographic dates, including issued date, updated date, published date,
|
36
|
+
obsolete date, commenting period
|
37
|
+
* document relationships: supersession and replacements
|
38
|
+
* contacts: enhanced name parts and affiliation information
|
39
|
+
|
40
|
+
Relaton for NIST therefore uses the following order of priority for the data
|
41
|
+
sources:
|
42
|
+
|
43
|
+
. bibliographic feed from NIST CSRC
|
44
|
+
. NIST Library dataset
|
13
45
|
|
14
|
-
You can use it to retrieve metadata of NIST Standards from https://csrc.nist.gov, and access such metadata through the `IsoBibliographicItem` object.
|
15
46
|
|
16
47
|
== Installation
|
17
48
|
|
data/grammars/biblio.rng
CHANGED
@@ -225,9 +225,9 @@
|
|
225
225
|
<zeroOrMore>
|
226
226
|
<ref name="forename"/>
|
227
227
|
</zeroOrMore>
|
228
|
-
<
|
229
|
-
<ref name="
|
230
|
-
</
|
228
|
+
<optional>
|
229
|
+
<ref name="formatted-initials"/>
|
230
|
+
</optional>
|
231
231
|
<ref name="surname"/>
|
232
232
|
<zeroOrMore>
|
233
233
|
<ref name="addition"/>
|
@@ -247,8 +247,8 @@
|
|
247
247
|
<ref name="LocalizedString"/>
|
248
248
|
</element>
|
249
249
|
</define>
|
250
|
-
<define name="
|
251
|
-
<element name="
|
250
|
+
<define name="formatted-initials">
|
251
|
+
<element name="formatted-initials">
|
252
252
|
<ref name="LocalizedString"/>
|
253
253
|
</element>
|
254
254
|
</define>
|
@@ -264,6 +264,9 @@
|
|
264
264
|
</define>
|
265
265
|
<define name="forename">
|
266
266
|
<element name="forename">
|
267
|
+
<optional>
|
268
|
+
<attribute name="initial"/>
|
269
|
+
</optional>
|
267
270
|
<ref name="LocalizedString"/>
|
268
271
|
</element>
|
269
272
|
</define>
|
data/grammars/isodoc.rng
CHANGED
@@ -69,6 +69,17 @@
|
|
69
69
|
</zeroOrMore>
|
70
70
|
</element>
|
71
71
|
</define>
|
72
|
+
<define name="AdmonitionType">
|
73
|
+
<choice>
|
74
|
+
<value>warning</value>
|
75
|
+
<value>note</value>
|
76
|
+
<value>tip</value>
|
77
|
+
<value>important</value>
|
78
|
+
<value>caution</value>
|
79
|
+
<value>statement</value>
|
80
|
+
<value>editorial</value>
|
81
|
+
</choice>
|
82
|
+
</define>
|
72
83
|
<define name="index">
|
73
84
|
<element name="index">
|
74
85
|
<optional>
|
@@ -1274,7 +1285,12 @@
|
|
1274
1285
|
</define>
|
1275
1286
|
<define name="span">
|
1276
1287
|
<element name="span">
|
1277
|
-
<
|
1288
|
+
<optional>
|
1289
|
+
<attribute name="class"/>
|
1290
|
+
</optional>
|
1291
|
+
<optional>
|
1292
|
+
<attribute name="style"/>
|
1293
|
+
</optional>
|
1278
1294
|
<oneOrMore>
|
1279
1295
|
<ref name="TextElement"/>
|
1280
1296
|
</oneOrMore>
|
@@ -2493,6 +2509,16 @@
|
|
2493
2509
|
<text/>
|
2494
2510
|
</element>
|
2495
2511
|
</optional>
|
2512
|
+
<optional>
|
2513
|
+
<element name="amendment">
|
2514
|
+
<text/>
|
2515
|
+
</element>
|
2516
|
+
</optional>
|
2517
|
+
<optional>
|
2518
|
+
<element name="corrigendum">
|
2519
|
+
<text/>
|
2520
|
+
</element>
|
2521
|
+
</optional>
|
2496
2522
|
<optional>
|
2497
2523
|
<element name="language">
|
2498
2524
|
<text/>
|
@@ -137,26 +137,10 @@ module RelatonNist
|
|
137
137
|
|
138
138
|
# @param doc [Nokogiri::XML::Element]
|
139
139
|
# @return [Array<Hash>]
|
140
|
-
def fetch_contributor(doc)
|
140
|
+
def fetch_contributor(doc)
|
141
141
|
contribs = doc.xpath("contributors/person_name").map do |p|
|
142
|
-
|
143
|
-
|
144
|
-
p.at("given_name")&.text&.split&.each do |fn|
|
145
|
-
if /^(?<init>\w)\.?$/ =~ fn
|
146
|
-
initial << RelatonBib::LocalizedString.new(init, doc["language"], "Latn")
|
147
|
-
else
|
148
|
-
forename << RelatonBib::LocalizedString.new(fn, doc["language"], "Latn")
|
149
|
-
end
|
150
|
-
end
|
151
|
-
sname = p.at("surname").text
|
152
|
-
surname = RelatonBib::LocalizedString.new sname, doc["language"], "Latn"
|
153
|
-
ident = p.xpath("ORCID").map do |id|
|
154
|
-
RelatonBib::PersonIdentifier.new "orcid", id.text
|
155
|
-
end
|
156
|
-
fullname = RelatonBib::FullName.new(
|
157
|
-
surname: surname, forename: forename, initial: initial, identifier: ident,
|
158
|
-
)
|
159
|
-
person = RelatonBib::Person.new name: fullname, affiliation: affiliation(doc)
|
142
|
+
person = RelatonBib::Person.new(name: fullname(p, doc),
|
143
|
+
affiliation: affiliation(doc))
|
160
144
|
{ entity: person, role: [{ type: p["contributor_role"] }] }
|
161
145
|
end
|
162
146
|
contribs + doc.xpath("publisher").map do |p|
|
@@ -164,6 +148,73 @@ module RelatonNist
|
|
164
148
|
end
|
165
149
|
end
|
166
150
|
|
151
|
+
#
|
152
|
+
# Create full name object from person name element.
|
153
|
+
#
|
154
|
+
# @param [Nokogiri::XML::Element] person name element
|
155
|
+
# @param [Nokogiri::XML::Element] doc document element
|
156
|
+
#
|
157
|
+
# @return [RelatonBib::FullName] full name object
|
158
|
+
#
|
159
|
+
def fullname(person, doc)
|
160
|
+
forename, initials = forename_initial(person, doc)
|
161
|
+
surname = localized_string person.at("surname").text, doc
|
162
|
+
ident = person.xpath("ORCID").map do |id|
|
163
|
+
RelatonBib::PersonIdentifier.new "orcid", id.text
|
164
|
+
end
|
165
|
+
RelatonBib::FullName.new(surname: surname, forename: forename,
|
166
|
+
initials: initials, identifier: ident)
|
167
|
+
end
|
168
|
+
|
169
|
+
#
|
170
|
+
# Create forename and initials objects from person name element.
|
171
|
+
#
|
172
|
+
# @param [Nokogiri::XML::Element] person person name element
|
173
|
+
# @param [Nokogiri::XML::Element] doc document element
|
174
|
+
#
|
175
|
+
# @return [Array<Array<RelatonBib::LocalizedString>>] forename and initials
|
176
|
+
#
|
177
|
+
def forename_initial(person, doc) # rubocop:disable Metrics/MethodLength
|
178
|
+
fnames = []
|
179
|
+
fname = person.at("given_name")&.text
|
180
|
+
if fname
|
181
|
+
if /^(?<inits>(?:\w[.\s]+|[A-Z]{1,2}$)+)$/ =~ fname
|
182
|
+
ints = inits.split(/[.\s]*/)
|
183
|
+
fnames << forename(doc, fname, ints.shift)
|
184
|
+
ints.each { |i| fnames << forename(doc, nil, i) }
|
185
|
+
else fnames << forename(doc, fname)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
[fnames, localized_string(inits, doc)]
|
189
|
+
end
|
190
|
+
|
191
|
+
#
|
192
|
+
# Create forename object
|
193
|
+
#
|
194
|
+
# @param [Nokogiri::XML::Element] doc document element
|
195
|
+
# @param [String, nil] cnt forename content
|
196
|
+
# @param [String, nil] init initial content
|
197
|
+
#
|
198
|
+
# @return [RelatonBib::Forename] forename object
|
199
|
+
#
|
200
|
+
def forename(doc, cnt, init = nil)
|
201
|
+
RelatonBib::Forename.new(
|
202
|
+
content: cnt, language: doc["language"], script: "Latn", initial: init,
|
203
|
+
)
|
204
|
+
end
|
205
|
+
|
206
|
+
#
|
207
|
+
# Create localized string
|
208
|
+
#
|
209
|
+
# @param [String] content content of string
|
210
|
+
# @param [Nokogiri::XML::Elemrnt] doc XML element
|
211
|
+
#
|
212
|
+
# @return [RelatonBib::LocalizedString] localized string
|
213
|
+
#
|
214
|
+
def localized_string(content, doc)
|
215
|
+
RelatonBib::LocalizedString.new content, doc["language"], "Latn"
|
216
|
+
end
|
217
|
+
|
167
218
|
#
|
168
219
|
# Create publisher organization
|
169
220
|
#
|
data/lib/relaton_nist/hit.rb
CHANGED
@@ -5,16 +5,23 @@ module RelatonNist
|
|
5
5
|
class Hit < RelatonBib::Hit
|
6
6
|
attr_writer :fetch
|
7
7
|
|
8
|
+
#
|
8
9
|
# Parse page.
|
9
|
-
#
|
10
|
+
#
|
11
|
+
# @return [RelatonNist::NistBliographicItem] bibliographic item
|
12
|
+
#
|
10
13
|
def fetch
|
11
14
|
@fetch ||= Scrapper.parse_page @hit
|
12
15
|
end
|
13
16
|
|
14
|
-
#
|
17
|
+
#
|
18
|
+
# Calculate sorting weigth of hit by series, code, title, addendum, and status
|
19
|
+
#
|
20
|
+
# @return [Iteger] sorting weigth
|
21
|
+
#
|
15
22
|
def sort_value # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
16
23
|
@sort_value ||= begin
|
17
|
-
sort_phrase = [hit[:
|
24
|
+
sort_phrase = [hit[:series], hit[:code], hit[:title]].join " "
|
18
25
|
corr = hit_collection&.text&.split&.map do |w|
|
19
26
|
if w =~ /\w+/ &&
|
20
27
|
sort_phrase =~ Regexp.new(Regexp.escape(w), Regexp::IGNORECASE)
|
@@ -7,7 +7,7 @@ require "addressable/uri"
|
|
7
7
|
require "open-uri"
|
8
8
|
|
9
9
|
module RelatonNist
|
10
|
-
#
|
10
|
+
# Hit collection.
|
11
11
|
class HitCollection < RelatonBib::HitCollection
|
12
12
|
DOMAIN = "https://csrc.nist.gov"
|
13
13
|
PUBS_EXPORT = URI.join(DOMAIN, "/CSRC/media/feeds/metanorma/pubs-export")
|
@@ -15,18 +15,135 @@ module RelatonNist
|
|
15
15
|
DATAFILE = File.expand_path "pubs-export.zip", DATAFILEDIR
|
16
16
|
GHNISTDATA = "https://raw.githubusercontent.com/relaton/relaton-data-nist/main/data/"
|
17
17
|
|
18
|
+
#
|
19
|
+
# Create hits collection instance and search hits
|
20
|
+
#
|
21
|
+
# @param [Hash] opts options
|
22
|
+
# @option opts [String] :stage stage of document
|
23
|
+
#
|
24
|
+
# @return [RelatonNist::HitCollection] hits collection
|
25
|
+
#
|
18
26
|
def self.search(text, year = nil, opts = {})
|
19
27
|
new(text, year).search(opts)
|
20
28
|
end
|
21
29
|
|
30
|
+
#
|
31
|
+
# Search nits in JSON file or GitHub repo
|
32
|
+
#
|
33
|
+
# @param [Hash] opts options
|
34
|
+
# @option opts [String] :stage stage of document
|
35
|
+
#
|
36
|
+
# @return [RelatonNist::HitCollection] hits collection
|
37
|
+
#
|
22
38
|
def search(opts)
|
23
39
|
@array = from_json(**opts)
|
24
40
|
@array = from_ga unless @array.any?
|
25
41
|
sort_hits!
|
26
42
|
end
|
27
43
|
|
44
|
+
#
|
45
|
+
# Filter hits by reference's parts
|
46
|
+
#
|
47
|
+
# @return [Array<RelatonNist::Hit>] hits
|
48
|
+
#
|
49
|
+
def search_filter # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
50
|
+
@array.select do |item|
|
51
|
+
%r{
|
52
|
+
^(?:(?:NIST|NBS)\s?)?
|
53
|
+
(?:(?<series>(?:SP|FIPS|IR|ITL\sBulletin|White\sPaper))\s)?
|
54
|
+
(?<code>[0-9-]{3,}[A-Z]?)
|
55
|
+
(?<prt1>pt\d+)?
|
56
|
+
(?<vol1>v\d+)?
|
57
|
+
(?<ver1>ver[\d.]+)?
|
58
|
+
(?<rev1>r\d+)?
|
59
|
+
(?:\s(?<prt2>Part\s\d+))?
|
60
|
+
(?:\s(?<vol2>Vol\.\s\d+))?
|
61
|
+
(?:\s(?<ver2>(?:Ver\.|Version)\s[\d.]+))?
|
62
|
+
(?:\s(?<rev2>Rev\.\s\d+))?
|
63
|
+
(?:\s(?<add>Add)endum)?
|
64
|
+
}x =~ item.hit[:code]
|
65
|
+
(refparts[:code] && [series, item.hit[:series]].include?(refparts[:series]) && refparts[:code] == code &&
|
66
|
+
long_to_short(refparts[:prt1], refparts[:prt2]) == long_to_short(prt1, prt2) &&
|
67
|
+
long_to_short(refparts[:vol1], refparts[:vol2]) == long_to_short(vol1, vol2) &&
|
68
|
+
long_to_short(refparts[:ver1], refparts[:ver2]) == long_to_short(ver1, ver2) &&
|
69
|
+
long_to_short(refparts[:rev1], refparts[:rev2]) == long_to_short(rev1, rev2) &&
|
70
|
+
long_to_short(refparts[:add1], refparts[:add2]) == add) || item.hit[:title]&.include?(text.sub(/^NIST\s/, ""))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
28
74
|
private
|
29
75
|
|
76
|
+
#
|
77
|
+
# Parse reference parts
|
78
|
+
#
|
79
|
+
# @return [Hash] reference parts
|
80
|
+
#
|
81
|
+
def refparts # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
82
|
+
@refparts ||= {
|
83
|
+
perfix: match(/^(NIST|NBS)\s?/, text),
|
84
|
+
series: match(/(SP|FIPS|IR|ITL\sBulletin|White\sPaper)(?=\.|\s)/, text),
|
85
|
+
code: match(/(?<=\.|\s)[0-9-]{3,}[A-Z]?/, text),
|
86
|
+
prt1: match(/(?<=(\.))?pt(?(1)-)[A-Z\d]+/, text),
|
87
|
+
vol1: match(/(?<=(\.))?v(?(1)-)\d+/, text),
|
88
|
+
ver1: match(/(?<=(\.))?ver(?(1)[-\d]|[.\d])+/, text)&.gsub(/-/, "."),
|
89
|
+
rev1: match(/(?<=[^a-z])(?<=(\.))?r(?(1)-)\d+/, text),
|
90
|
+
add1: match(/(?<=(\.))?add(?(1)-)\d+/, text),
|
91
|
+
prt2: match(/(?<=\s)Part\s[A-Z\d]+/, text),
|
92
|
+
vol2: match(/(?<=\s)Vol\.\s\d+/, text),
|
93
|
+
ver2: match(/(?<=\s)Ver\.\s\d+/, text),
|
94
|
+
rev2: match(/(?<=\s)Rev\.\s\d+/, text),
|
95
|
+
add2: match(/(?<=\/)Add/, text),
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
#
|
100
|
+
# Match regex to reference
|
101
|
+
#
|
102
|
+
# @param [Regexp] regex regex
|
103
|
+
# @param [String] code reference
|
104
|
+
#
|
105
|
+
# @return [String, nil] matched string
|
106
|
+
#
|
107
|
+
def match(regex, code)
|
108
|
+
regex.match(code)&.to_s
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# Generate reference from parts
|
113
|
+
#
|
114
|
+
# @return [String] reference
|
115
|
+
#
|
116
|
+
def full_ref # rubocop:disable Metrics/AbcSize
|
117
|
+
@full_ref ||= begin
|
118
|
+
ref = "#{refparts[:perfix]}#{refparts[:series]} #{refparts[:code]}"
|
119
|
+
ref += long_to_short(refparts[:prt1], refparts[:prt2]).to_s
|
120
|
+
ref += long_to_short(refparts[:vol1], refparts[:vol2]).to_s
|
121
|
+
ref
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
#
|
126
|
+
# Return short version of ID part with removed "-" or convert long version to short.
|
127
|
+
# Converts "pt-1" to "pt1" and "Part 1" to "pt1", "v-1" to "v1" and "Vol. 1" to "v1",
|
128
|
+
# "ver-1" to "ver1" and "Ver. 1" to "ver1", "r-1" to "r1" and "Rev. 1" to "r1".
|
129
|
+
#
|
130
|
+
# @param short [String]
|
131
|
+
# @param long [String]
|
132
|
+
#
|
133
|
+
# @return [String, nil]
|
134
|
+
#
|
135
|
+
def long_to_short(short, long)
|
136
|
+
return short.sub(/-/, "") if short
|
137
|
+
return unless long
|
138
|
+
|
139
|
+
long.sub(/Part\s/, "pt").sub(/Vol\.\s/, "v").sub(/Rev\.\s/, "r").sub(/(Ver\.|Version)\s/, "ver")
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
# Sort hits by sort_value and release date
|
144
|
+
#
|
145
|
+
# @return [self] sorted hits collection
|
146
|
+
#
|
30
147
|
def sort_hits!
|
31
148
|
@array.sort! do |a, b|
|
32
149
|
if a.sort_value == b.sort_value
|
@@ -38,8 +155,16 @@ module RelatonNist
|
|
38
155
|
self
|
39
156
|
end
|
40
157
|
|
41
|
-
|
42
|
-
|
158
|
+
#
|
159
|
+
# Get hit from GitHub repo
|
160
|
+
#
|
161
|
+
# @return [Array<RelatonNist::Hit>] hits
|
162
|
+
#
|
163
|
+
# @raise [OpenURI::HTTPError] if GitHub repo is not available
|
164
|
+
#
|
165
|
+
def from_ga # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
166
|
+
ref = full_ref
|
167
|
+
fn = ref.gsub(%r{[/\s:.]}, "_").upcase
|
43
168
|
yaml = OpenURI.open_uri "#{GHNISTDATA}#{fn}.yaml"
|
44
169
|
hash = YAML.safe_load yaml
|
45
170
|
bib = RelatonNist::NistBibliographicItem.from_hash hash
|
@@ -52,63 +177,35 @@ module RelatonNist
|
|
52
177
|
raise e
|
53
178
|
end
|
54
179
|
|
55
|
-
#
|
56
|
-
|
57
|
-
# @param stage [String]
|
58
|
-
# @return [Array<RelatonNist::Hit>]
|
59
|
-
# def from_csrc(**opts)
|
60
|
-
# from, to = nil
|
61
|
-
# if year
|
62
|
-
# d = Date.strptime year, "%Y"
|
63
|
-
# from = d.strftime "%m/%d/%Y"
|
64
|
-
# to = d.next_year.prev_day.strftime "%m/%d/%Y"
|
65
|
-
# end
|
66
|
-
# url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"\
|
67
|
-
# "&sortBy-lg=relevence"
|
68
|
-
# url += "&dateFrom-lg=#{from}" if from
|
69
|
-
# url += "&dateTo-lg=#{to}" if to
|
70
|
-
# url += if /PD/.match? opts[:stage]
|
71
|
-
# "&status-lg=Draft,Retired Draft,Withdrawn"
|
72
|
-
# else
|
73
|
-
# "&status-lg=Final,Withdrawn"
|
74
|
-
# end
|
75
|
-
|
76
|
-
# doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
|
77
|
-
# doc.css("table.publications-table > tbody > tr").map do |h|
|
78
|
-
# link = h.at("td/div/strong/a")
|
79
|
-
# serie = h.at("td[1]").text.strip
|
80
|
-
# code = h.at("td[2]").text.strip
|
81
|
-
# title = link.text
|
82
|
-
# doc_url = DOMAIN + link[:href]
|
83
|
-
# status = h.at("td[4]").text.strip.downcase
|
84
|
-
# release_date = Date.strptime h.at("td[5]").text.strip, "%m/%d/%Y"
|
85
|
-
# Hit.new(
|
86
|
-
# {
|
87
|
-
# code: code, serie: serie, title: title, url: doc_url,
|
88
|
-
# status: status, release_date: release_date
|
89
|
-
# }, self
|
90
|
-
# )
|
91
|
-
# end
|
92
|
-
# end
|
93
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
94
|
-
|
180
|
+
#
|
95
181
|
# Fetches data form json
|
96
|
-
#
|
97
|
-
# @
|
182
|
+
#
|
183
|
+
# @param opts [Hash] options
|
184
|
+
# @option opts [String] :stage stage of document
|
185
|
+
#
|
186
|
+
# @return [Array<RelatonNist::Hit>] hits
|
187
|
+
#
|
98
188
|
def from_json(**opts)
|
99
189
|
select_data(**opts).map do |h|
|
100
|
-
/(?<
|
190
|
+
/(?<series>(?<=-)\w+$)/ =~ h["series"]
|
101
191
|
title = [h["title-main"], h["title-sub"]].compact.join " - "
|
102
192
|
release_date = RelatonBib.parse_date h["published-date"], false
|
103
|
-
Hit.new({ code: h["docidentifier"],
|
193
|
+
Hit.new({ code: h["docidentifier"], series: series.upcase, title: title,
|
104
194
|
url: h["uri"], status: h["status"],
|
105
195
|
release_date: release_date, json: h }, self)
|
106
196
|
end
|
107
197
|
end
|
108
198
|
|
109
|
-
#
|
110
|
-
#
|
199
|
+
#
|
200
|
+
# Select data from json
|
201
|
+
#
|
202
|
+
# @param opts [Hash] options
|
203
|
+
# @option opts [String] :stage stage of document
|
204
|
+
#
|
205
|
+
# @return [Array<Hash>] selected data
|
206
|
+
#
|
111
207
|
def select_data(**opts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
|
208
|
+
ref = "#{refparts[:series]} #{refparts[:code]}"
|
112
209
|
d = Date.strptime year, "%Y" if year
|
113
210
|
statuses = %w[draft-public draft-prelim]
|
114
211
|
data.select do |doc|
|
@@ -119,13 +216,18 @@ module RelatonNist
|
|
119
216
|
else
|
120
217
|
next unless doc["status"] == "final"
|
121
218
|
end
|
122
|
-
doc["docidentifier"].include?
|
219
|
+
doc["docidentifier"].include?(ref) || doc["docidentifier"].include?(full_ref)
|
123
220
|
end
|
124
221
|
end
|
125
222
|
|
126
|
-
#
|
223
|
+
#
|
224
|
+
# Check if issued date is match to year
|
225
|
+
#
|
226
|
+
# @param doc [Hash] document's metadata
|
127
227
|
# @param date [Date] first day of year
|
128
|
-
#
|
228
|
+
#
|
229
|
+
# @return [Boolean]
|
230
|
+
#
|
129
231
|
def match_year?(doc, date)
|
130
232
|
return true unless year
|
131
233
|
|
@@ -133,8 +235,11 @@ module RelatonNist
|
|
133
235
|
idate.between? date, date.next_year.prev_day
|
134
236
|
end
|
135
237
|
|
238
|
+
#
|
136
239
|
# Fetches json data form server
|
137
|
-
#
|
240
|
+
#
|
241
|
+
# @return [Array<Hash>] json data
|
242
|
+
#
|
138
243
|
def data
|
139
244
|
ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
140
245
|
if !ctime || ctime.to_date < Date.today || File.size(DATAFILE).zero?
|
@@ -143,11 +248,12 @@ module RelatonNist
|
|
143
248
|
unzip
|
144
249
|
end
|
145
250
|
|
251
|
+
#
|
146
252
|
# Fetch data form server and save it to file
|
147
253
|
#
|
148
|
-
# @prarm ctime [Time,
|
254
|
+
# @prarm ctime [Time, nil] file creation time
|
255
|
+
#
|
149
256
|
def fetch_data(ctime)
|
150
|
-
# resp = OpenURI.open_uri("#{PUBS_EXPORT}.meta")
|
151
257
|
if !ctime || ctime < OpenURI.open_uri("#{PUBS_EXPORT}.meta").last_modified
|
152
258
|
@data = nil
|
153
259
|
uri_open = URI.method(:open) || Kernel.method(:open)
|
@@ -156,9 +262,11 @@ module RelatonNist
|
|
156
262
|
end
|
157
263
|
end
|
158
264
|
|
265
|
+
#
|
159
266
|
# upack zip file
|
160
267
|
#
|
161
|
-
# @return [Hash]
|
268
|
+
# @return [Array<Hash>] json data
|
269
|
+
#
|
162
270
|
def unzip
|
163
271
|
return @data if @data
|
164
272
|
|
@@ -11,8 +11,13 @@ require "relaton_nist/hash_converter"
|
|
11
11
|
module RelatonNist
|
12
12
|
class NistBibliography
|
13
13
|
class << self
|
14
|
-
#
|
15
|
-
#
|
14
|
+
#
|
15
|
+
# Search NIST docuemnts by reference
|
16
|
+
#
|
17
|
+
# @param text [String] reference
|
18
|
+
#
|
19
|
+
# @return [RelatonNist::HitCollection] search result
|
20
|
+
#
|
16
21
|
def search(text, year = nil, opts = {})
|
17
22
|
ref = text.sub(/^NIST\sIR/, "NISTIR")
|
18
23
|
HitCollection.search ref, year, opts
|
@@ -20,15 +25,18 @@ module RelatonNist
|
|
20
25
|
raise RelatonBib::RequestError, e.message
|
21
26
|
end
|
22
27
|
|
28
|
+
#
|
29
|
+
# Get NIST document by reference
|
30
|
+
#
|
23
31
|
# @param code [String] the NIST standard Code to look up (e..g "8200")
|
24
32
|
# @param year [String] the year the standard was published (optional)
|
25
33
|
#
|
26
34
|
# @param opts [Hash] options
|
27
|
-
# @option opts [
|
35
|
+
# @option opts [Boolean] :all_parts restricted to all parts
|
28
36
|
# if all-parts reference is required
|
29
|
-
# @option opts [TrueClass, FalseClass] :bibdata
|
30
37
|
#
|
31
|
-
# @return [
|
38
|
+
# @return [RelatonNist::NistBibliographicItem, nil] bibliographic item
|
39
|
+
#
|
32
40
|
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
33
41
|
return fetch_ref_err(code, year, []) if code.match?(/\sEP$/)
|
34
42
|
|
@@ -56,12 +64,24 @@ module RelatonNist
|
|
56
64
|
end
|
57
65
|
|
58
66
|
code += "-1" if opts[:all_parts]
|
59
|
-
|
67
|
+
nistbib_get(code, year, opts)
|
60
68
|
end
|
61
69
|
|
62
70
|
private
|
63
71
|
|
64
|
-
|
72
|
+
#
|
73
|
+
# Get NIST document by reference
|
74
|
+
#
|
75
|
+
# @param [String] code reference
|
76
|
+
# @param [String] year year
|
77
|
+
# @param [Hash] opts options
|
78
|
+
# @option opts [Date] :issued_date issued date
|
79
|
+
# @option opts [Date] :updated_date updated date
|
80
|
+
# @option opts [String] :stage stage
|
81
|
+
#
|
82
|
+
# @return [RelatonNist::NistBibliographicItem, nil] bibliographic item
|
83
|
+
#
|
84
|
+
def nistbib_get(code, year, opts)
|
65
85
|
result = nistbib_search_filter(code, year, opts) || (return nil)
|
66
86
|
ret = nistbib_results_filter(result, year, opts)
|
67
87
|
if ret[:ret]
|
@@ -72,6 +92,7 @@ module RelatonNist
|
|
72
92
|
end
|
73
93
|
end
|
74
94
|
|
95
|
+
#
|
75
96
|
# Sort through the results from RelatonNist, fetching them three at a time,
|
76
97
|
# and return the first result that matches the code,
|
77
98
|
# matches the year (if provided), and which # has a title (amendments do not).
|
@@ -80,12 +101,13 @@ module RelatonNist
|
|
80
101
|
# If no match, returns any years which caused mismatch, for error reporting
|
81
102
|
#
|
82
103
|
# @param opts [Hash] options
|
83
|
-
# @option opts [
|
84
|
-
# @option opts [
|
85
|
-
# @option opts [String] :stage
|
104
|
+
# @option opts [Date] :issued_date issued date
|
105
|
+
# @option opts [Date] :issued_date issued date
|
106
|
+
# @option opts [String] :stage stage
|
107
|
+
#
|
108
|
+
# @return [Hash] result
|
86
109
|
#
|
87
|
-
#
|
88
|
-
def nistbib_results_filter(result, year, opts)
|
110
|
+
def nistbib_results_filter(result, year, opts) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
89
111
|
missed_years = []
|
90
112
|
iter = /\w+(?=PD)|(?<=PD-)\w+/.match(opts[:stage])&.to_s
|
91
113
|
iteration = case iter
|
@@ -119,9 +141,14 @@ module RelatonNist
|
|
119
141
|
{ years: missed_years }
|
120
142
|
end
|
121
143
|
|
122
|
-
#
|
123
|
-
#
|
124
|
-
#
|
144
|
+
#
|
145
|
+
# Fetch pages for all the hits in parallel
|
146
|
+
#
|
147
|
+
# @param hits [RelatonNist::HitCollection] hits
|
148
|
+
# @param threads [Integer] number of threads
|
149
|
+
#
|
150
|
+
# @return [Array<RelatonNist::NistBibliographicItem>] bibliographic items
|
151
|
+
#
|
125
152
|
def fetch_pages(hits, threads)
|
126
153
|
workers = RelatonBib::WorkersPool.new threads
|
127
154
|
workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
@@ -130,115 +157,42 @@ module RelatonNist
|
|
130
157
|
workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
131
158
|
end
|
132
159
|
|
133
|
-
#
|
134
|
-
#
|
135
|
-
#
|
136
|
-
# @
|
137
|
-
|
160
|
+
#
|
161
|
+
# Get search results and filter them by code and year
|
162
|
+
#
|
163
|
+
# @param code [String] reference
|
164
|
+
# @param year [String, nil] year
|
165
|
+
# @param opts [Hash] options
|
166
|
+
# @option opts [String] :stage stage
|
167
|
+
#
|
168
|
+
# @return [RelatonNist::HitCollection] hits collection
|
169
|
+
#
|
170
|
+
def nistbib_search_filter(code, year, opts)
|
138
171
|
warn "[relaton-nist] (\"#{code}\") fetching..."
|
139
|
-
|
140
|
-
|
141
|
-
# (?<serie>(SP|FIPS|NISTIR|ITL\sBulletin|White\sPaper))\s
|
142
|
-
# (?<code>[0-9-]{3,}[A-Z]?)
|
143
|
-
# (?<prt1>pt\d+)?
|
144
|
-
# (?<vol1>v\d+)?
|
145
|
-
# (?<ver1>ver[\d\.]+)?
|
146
|
-
# (?<rev1>r\d+)?
|
147
|
-
# (\s(?<prt2>Part\s\d+))?
|
148
|
-
# (\s(?<vol2>Vol\.\s\d+))?
|
149
|
-
# (\s(?<ver2>(Ver\.|Version)\s[\d\.]+))?
|
150
|
-
# (\s(?<rev2>Rev\.\s\d+))?
|
151
|
-
# (\/(?<upd>Add))?
|
152
|
-
# }x.match(code)
|
153
|
-
# match ||= %r{
|
154
|
-
# ^NIST\.
|
155
|
-
# (?<serie>(SP|FIPS|IR|ITL\sBulletin|White\sPaper))\.
|
156
|
-
# ((PD-\d+|PUB)\.)?
|
157
|
-
# (?<code>[0-9-]{3,}[A-Z]?)
|
158
|
-
# (\.(?<prt1>pt-\d+))?
|
159
|
-
# (\.(?<vol1>v-\d+))?
|
160
|
-
# (\.(?<ver1>ver-[\d\.]+))?
|
161
|
-
# (\.(?<rev1>r-\d+))?
|
162
|
-
# }x.match(code)
|
163
|
-
matches = {
|
164
|
-
serie: match(/(SP|FIPS|(NIST)?\s?IR|ITL\sBulletin|White\sPaper)(?=\.|\s)/, code),
|
165
|
-
code: match(/(?<=\.|\s)[0-9-]{3,}[A-Z]?/, code),
|
166
|
-
prt1: match(/(?<=(\.))?pt(?(1)-)[A-Z\d]+/, code),
|
167
|
-
vol1: match(/(?<=(\.))?v(?(1)-)\d+/, code),
|
168
|
-
ver1: match(/(?<=(\.))?ver(?(1)[-\d]|[\.\d])+/, code)&.gsub(/-/, "."),
|
169
|
-
rev1: match(/(?<=[^a-z])(?<=(\.))?r(?(1)-)\d+/, code),
|
170
|
-
add1: match(/(?<=(\.))?add(?(1)-)\d+/, code),
|
171
|
-
prt2: match(/(?<=\s)Part\s[A-Z\d]+/, code),
|
172
|
-
vol2: match(/(?<=\s)Vol\.\s\d+/, code),
|
173
|
-
ver2: match(/(?<=\s)Ver\.\s\d+/, code),
|
174
|
-
rev2: match(/(?<=\s)Rev\.\s\d+/, code),
|
175
|
-
add2: match(/(?<=\/)Add/, code),
|
176
|
-
}
|
177
|
-
ref = matches[:code] ? "#{matches[:serie]} #{matches[:code]}" : code
|
178
|
-
result = search(ref, year, opts)
|
179
|
-
selected_result = result.select { |i| search_filter i, matches, code }
|
180
|
-
return selected_result if selected_result.any? || !matches[:code]
|
181
|
-
|
182
|
-
search full_ref(matches)
|
183
|
-
end
|
184
|
-
|
185
|
-
def full_ref(matches)
|
186
|
-
ref = "#{matches[:serie]} #{matches[:code]}"
|
187
|
-
ref += long_to_short(matches[:prt1], matches[:prt2]).to_s
|
188
|
-
ref += long_to_short(matches[:vol1], matches[:vol2]).to_s
|
189
|
-
ref
|
190
|
-
end
|
191
|
-
|
192
|
-
def match(regex, code)
|
193
|
-
regex.match(code)&.to_s
|
194
|
-
end
|
195
|
-
|
196
|
-
# @param item [RelatonNist::Hit]
|
197
|
-
# @param matches [Hash]
|
198
|
-
# @param text [String]
|
199
|
-
# @return [Boolean]
|
200
|
-
def search_filter(item, matches, text) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
201
|
-
%r{
|
202
|
-
^(?:(?:NIST)\s)?
|
203
|
-
(?:(?<serie>(?:SP|FIPS|NISTIR|ITL\sBulletin|White\sPaper))\s)?
|
204
|
-
(?<code>[0-9-]{3,}[A-Z]?)
|
205
|
-
(?<prt1>pt\d+)?
|
206
|
-
(?<vol1>v\d+)?
|
207
|
-
(?<ver1>ver[\d.]+)?
|
208
|
-
(?<rev1>r\d+)?
|
209
|
-
(?:\s(?<prt2>Part\s\d+))?
|
210
|
-
(?:\s(?<vol2>Vol\.\s\d+))?
|
211
|
-
(?:\s(?<ver2>(?:Ver\.|Version)\s[\d.]+))?
|
212
|
-
(?:\s(?<rev2>Rev\.\s\d+))?
|
213
|
-
(?:\s(?<add>Add)endum)?
|
214
|
-
}x =~ item.hit[:code]
|
215
|
-
(matches[:code] && [serie, item.hit[:serie]].include?(matches[:serie]) && matches[:code] == code &&
|
216
|
-
long_to_short(matches[:prt1], matches[:prt2]) == long_to_short(prt1, prt2) &&
|
217
|
-
long_to_short(matches[:vol1], matches[:vol2]) == long_to_short(vol1, vol2) &&
|
218
|
-
long_to_short(matches[:ver1], matches[:ver2]) == long_to_short(ver1, ver2) &&
|
219
|
-
long_to_short(matches[:rev1], matches[:rev2]) == long_to_short(rev1, rev2) &&
|
220
|
-
long_to_short(matches[:add1], matches[:add2]) == add) || item.hit[:title]&.include?(text.sub(/^NIST\s/, ""))
|
172
|
+
result = search(code, year, opts)
|
173
|
+
result.search_filter
|
221
174
|
end
|
222
175
|
|
223
|
-
#
|
224
|
-
#
|
225
|
-
#
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
def fetch_ref_err(code, year, missed_years)
|
176
|
+
#
|
177
|
+
# Outputs warning message if no match found
|
178
|
+
#
|
179
|
+
# @param [String] code reference
|
180
|
+
# @param [String, nil] year year
|
181
|
+
# @param [Array<String>] missed_years missed years
|
182
|
+
#
|
183
|
+
# @return [nil] nil
|
184
|
+
#
|
185
|
+
def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength
|
234
186
|
id = year ? "#{code}:#{year}" : code
|
235
|
-
warn "[relaton-nist] WARNING: no match found online for #{id}. "\
|
236
|
-
|
237
|
-
|
238
|
-
"
|
239
|
-
|
240
|
-
|
241
|
-
|
187
|
+
warn "[relaton-nist] WARNING: no match found online for #{id}. " \
|
188
|
+
"The code must be exactly like it is on the standards website."
|
189
|
+
unless missed_years.empty?
|
190
|
+
warn "[relaton-nist] (There was no match for #{year}, though there " \
|
191
|
+
"were matches found for #{missed_years.join(', ')}.)"
|
192
|
+
end
|
193
|
+
if /\d-\d/.match? code
|
194
|
+
warn "[relaton-nist] The provided document part may not exist, " \
|
195
|
+
"or the document may no longer be published in parts."
|
242
196
|
end
|
243
197
|
nil
|
244
198
|
end
|
@@ -5,7 +5,7 @@ module RelatonNist
|
|
5
5
|
def initialize # rubocop:disable Lint/MissingSuper
|
6
6
|
@short = :relaton_nist
|
7
7
|
@prefix = "NIST"
|
8
|
-
@defaultprefix = %r{^(NIST|NISTGCR|ITL Bulletin|JPCRD|NISTIR|CSRC|FIPS)(/[^\s])?\s}
|
8
|
+
@defaultprefix = %r{^(NIST|NBS|NISTGCR|ITL Bulletin|JPCRD|NISTIR|CSRC|FIPS)(/[^\s])?\s}
|
9
9
|
@idtype = "NIST"
|
10
10
|
@datasets = %w[nist-tech-pubs]
|
11
11
|
end
|
data/lib/relaton_nist/version.rb
CHANGED
data/relaton_nist.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -128,14 +128,14 @@ dependencies:
|
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 1.
|
131
|
+
version: 1.13.0
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 1.
|
138
|
+
version: 1.13.0
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: rubyzip
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|