relaton-nist 1.14.6 → 1.14.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +2 -1
- data/README.adoc +8 -0
- data/lib/relaton_nist/data_fetcher.rb +12 -3
- data/lib/relaton_nist/hit.rb +19 -19
- data/lib/relaton_nist/hit_collection.rb +36 -31
- data/lib/relaton_nist/nist_bibliography.rb +6 -13
- data/lib/relaton_nist/scrapper.rb +11 -0
- data/lib/relaton_nist/series.yaml +1 -1
- data/lib/relaton_nist/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bb068b75779f92292d1fef25e9ae392a76df53d5d52bc2a644fb9d16ec0c5998
|
4
|
+
data.tar.gz: f64ba4dd75be3fd93a5a134c4babe39f08d4b6f774e2d488ba8df525f469fa1c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2f68dc9000e02d2b23a611d02d84d76222d24811313b88fa5ccef8fbda2564058a93dd6ef1b0f71a7b36e0f278cb1107046c47b5098d3d0c43a423f235c3c70
|
7
|
+
data.tar.gz: c38c9c1d2faf307843417b2be67d52a1064160b8ad3b2412c120b26cbe730ecef57ea1019d75eddf47d3e9d5155109571cf1ba69c2a3bce17d02960576ce9e3d
|
@@ -7,7 +7,8 @@ on:
|
|
7
7
|
inputs:
|
8
8
|
next_version:
|
9
9
|
description: |
|
10
|
-
Next release version. Possible values: x.y.z, major, minor, patch or pre|rc|etc
|
10
|
+
Next release version. Possible values: x.y.z, major, minor, patch (or pre|rc|etc).
|
11
|
+
Also, you can pass 'skip' to skip 'git tag' and do 'gem push' for the current version
|
11
12
|
required: true
|
12
13
|
default: 'skip'
|
13
14
|
repository_dispatch:
|
data/README.adoc
CHANGED
@@ -43,6 +43,14 @@ sources:
|
|
43
43
|
. bibliographic feed from NIST CSRC
|
44
44
|
. NIST Library dataset
|
45
45
|
|
46
|
+
The NIST CSRC provides:
|
47
|
+
|
48
|
+
* NIST SP 800-*
|
49
|
+
* NIST SP 500-*
|
50
|
+
* NIST SP 1800-*
|
51
|
+
* NIST FIPS {31, 39, 41, 46, 46-$$*$$, 48, 65, 73, 74, 81, 83, 87, 102, 112, 113, 139, 140-$$*$$, 141, 171, 180, 180-$$*$$, 181, 186, 186-$$*$$, 188, 190, 191, 196, 197, 198, 198-1, 199, 200, 201, 201-*, 202}
|
52
|
+
|
53
|
+
The NIST Library dataset provides documents listed in the https://github.com/relaton/relaton-data-nist/blob/main/index-v1.yaml[index].
|
46
54
|
|
47
55
|
== Installation
|
48
56
|
|
@@ -36,9 +36,16 @@ module RelatonNist
|
|
36
36
|
]
|
37
37
|
end
|
38
38
|
|
39
|
+
#
|
40
|
+
# Parse document's ID from XML
|
41
|
+
#
|
42
|
+
# @param [Nokogiri::XML::Element] doc XML element
|
43
|
+
#
|
44
|
+
# @return [String] document's ID
|
45
|
+
#
|
39
46
|
def pub_id(doc)
|
40
47
|
# anchor(doc).gsub(".", " ")
|
41
|
-
fetch_doi(doc).split("/")[1..].join("/").gsub(".", " ")
|
48
|
+
fetch_doi(doc).split("/")[1..].join("/").gsub(".", " ").sub(/^nist\sir/, "NIST IR")
|
42
49
|
end
|
43
50
|
|
44
51
|
def fetch_doi(doc) # rubocop:disable Metrics/CyclomaticComplexity
|
@@ -269,8 +276,6 @@ module RelatonNist
|
|
269
276
|
# @return [Array<RelatonBib::Series>] series
|
270
277
|
#
|
271
278
|
def fetch_series(doc)
|
272
|
-
series_path = File.expand_path("series.yaml", __dir__)
|
273
|
-
series = YAML.load_file series_path
|
274
279
|
prf, srs, num = pub_id(doc).split
|
275
280
|
sname = series[srs] || srs
|
276
281
|
title = RelatonBib::TypedTitleString.new(content: "#{prf} #{sname}")
|
@@ -278,6 +283,10 @@ module RelatonNist
|
|
278
283
|
[RelatonBib::Series.new(title: title, abbreviation: abbr, number: num)]
|
279
284
|
end
|
280
285
|
|
286
|
+
def series
|
287
|
+
@series ||= YAML.load_file File.expand_path("series.yaml", __dir__)
|
288
|
+
end
|
289
|
+
|
281
290
|
#
|
282
291
|
# Save document
|
283
292
|
#
|
data/lib/relaton_nist/hit.rb
CHANGED
@@ -19,24 +19,24 @@ module RelatonNist
|
|
19
19
|
#
|
20
20
|
# @return [Iteger] sorting weigth
|
21
21
|
#
|
22
|
-
def sort_value # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
end
|
22
|
+
# def sort_value # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
23
|
+
# @sort_value ||= begin
|
24
|
+
# sort_phrase = [hit[:series], hit[:code]].join " "
|
25
|
+
# corr = hit_collection&.text&.split&.map do |w|
|
26
|
+
# if w =~ /\w+/ &&
|
27
|
+
# sort_phrase =~ Regexp.new(Regexp.escape(w), Regexp::IGNORECASE)
|
28
|
+
# 1
|
29
|
+
# else 0
|
30
|
+
# end
|
31
|
+
# end&.sum.to_i
|
32
|
+
# corr + case hit[:status]
|
33
|
+
# when "final" then 4
|
34
|
+
# when "withdrawn" then 3
|
35
|
+
# when "draft" then 2
|
36
|
+
# when "draft (obsolete)" then 1
|
37
|
+
# else 0
|
38
|
+
# end
|
39
|
+
# end
|
40
|
+
# end
|
41
41
|
end
|
42
42
|
end
|
@@ -56,13 +56,13 @@ module RelatonNist
|
|
56
56
|
def search_filter # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/MethodLength
|
57
57
|
@array.select do |item|
|
58
58
|
parts = doi_parts(item.hit[:json]) || code_parts(item.hit[:code])
|
59
|
-
|
59
|
+
refparts[:code] && [parts[:series], item.hit[:series]].include?(refparts[:series]) &&
|
60
60
|
refparts[:code].casecmp(parts[:code].upcase).zero? &&
|
61
|
-
|
62
|
-
(refparts[:vol] == parts[:vol]) &&
|
63
|
-
(refparts[:ver] == parts[:ver]) &&
|
64
|
-
(refparts[:rev] == parts[:rev]) &&
|
65
|
-
refparts[:draft] == parts[:draft] && refparts[:add] == parts[:add]
|
61
|
+
refparts[:prt] == parts[:prt] &&
|
62
|
+
(refparts[:vol].nil? || refparts[:vol] == parts[:vol]) &&
|
63
|
+
(refparts[:ver].nil? || refparts[:ver] == parts[:ver]) &&
|
64
|
+
(refparts[:rev].nil? || refparts[:rev] == parts[:rev]) &&
|
65
|
+
refparts[:draft] == parts[:draft] && refparts[:add] == parts[:add]
|
66
66
|
end
|
67
67
|
end
|
68
68
|
|
@@ -71,8 +71,8 @@ module RelatonNist
|
|
71
71
|
def code_parts(code) # rubocop:disable Metrics/MethodLength
|
72
72
|
{
|
73
73
|
# prefix: match(/^(?:NIST|NBS)\s?/, code),
|
74
|
-
series: match(/(?<val>(?:SP|FIPS|IR|ITL\sBulletin|White\sPaper))\s/, code),
|
75
|
-
code: match(/(?<val>[0-9-]
|
74
|
+
series: match(/(?<val>(?:SP|FIPS|CSWP|IR|ITL\sBulletin|White\sPaper))\s/, code),
|
75
|
+
code: match(/(?<val>[0-9-]+(?:(?!(?:ver|r|v|pt)\d|-add\d?)[A-Za-z-])*)/, code),
|
76
76
|
prt: match(/(?:pt|\sPart\s)(?<val>\d+)/, code),
|
77
77
|
vol: match(/(?:v|\sVol\.\s)(?<val>\d+)/, code),
|
78
78
|
ver: match(/(?:ver|\sVer\.\s|Version\s)(?<val>[\d.]+)/, code),
|
@@ -81,7 +81,7 @@ module RelatonNist
|
|
81
81
|
# (?:\s(?<vol2>Vol\.\s\d+))?
|
82
82
|
# (?:\s(?<ver2>(?:Ver\.|Version)\s[\d.]+))?
|
83
83
|
# (?:\s(?<rev2>Rev\.\s\d+))?
|
84
|
-
add: match(
|
84
|
+
add: match(/(?:-add|\sAdd)(?:endum)?(?<val>\d*)/, code),
|
85
85
|
draft: !match(/\((?:Retired\s)?Draft\)/, code).nil?,
|
86
86
|
}
|
87
87
|
end
|
@@ -92,8 +92,8 @@ module RelatonNist
|
|
92
92
|
id = json["doi"].split("/").last
|
93
93
|
{
|
94
94
|
# prefix: match(/^(?:NIST|NBS)\./, id),
|
95
|
-
series: match(/(?:SP|FIPS|IR|ITL\sBulletin|White\sPaper)(?=\.)/, id),
|
96
|
-
code: match(/(?<=\.)\d
|
95
|
+
series: match(/(?:SP|FIPS|CSWP|IR|ITL\sBulletin|White\sPaper)(?=\.)/, id),
|
96
|
+
code: match(/(?<=\.)\d+(?:-\d+)*(?:[[:alpha:]](?!\d|raft|er|t?\d))?/, id),
|
97
97
|
prt: match(/pt?(?<val>\d+)/, id),
|
98
98
|
vol: match(/v(?<val>\d+)(?!\.\d)/, id),
|
99
99
|
ver: match(/v(?:er)?(?<val>[\d.]+)/, id),
|
@@ -110,9 +110,9 @@ module RelatonNist
|
|
110
110
|
#
|
111
111
|
def refparts # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
112
112
|
@refparts ||= {
|
113
|
-
perfix: match(/^(NIST|NBS)
|
114
|
-
series: match(/(SP|FIPS|IR|ITL\sBulletin|White\sPaper)(?=\.|\s)/, text),
|
115
|
-
code: match(/(?<=\.|\s)[0-9-]
|
113
|
+
perfix: match(/^(NIST|NBS)/, text),
|
114
|
+
series: match(/(SP|FIPS|CSWP|IR|ITL\sBulletin|White\sPaper)(?=\.|\s)/, text),
|
115
|
+
code: match(/(?<=\.|\s)[0-9-]+(?:(?!(ver|r|v|pt)\d|-add\d?)[A-Za-z-])*/, text),
|
116
116
|
prt: match(/(?:(?<dl>\.)?pt(?(<dl>)-)|\sPart\s)(?<val>[A-Z\d]+)/, text),
|
117
117
|
vol: match(/(?:(?<dl>\.)?v(?(<dl>)-)|\sVol\.\s)(?<val>\d+)/, text),
|
118
118
|
ver: match(/(?:(?<dl>\.)?\s?ver|\sVer\.\s)(?<val>\d(?(<dl>)[-\d]|[.\d])*)/, text)&.gsub(/-/, "."),
|
@@ -149,10 +149,11 @@ module RelatonNist
|
|
149
149
|
#
|
150
150
|
def full_ref # rubocop:disable Metrics/AbcSize
|
151
151
|
@full_ref ||= begin
|
152
|
-
ref =
|
152
|
+
ref = [refparts[:perfix], refparts[:series], refparts[:code]].compact.join " "
|
153
153
|
ref += "pt#{refparts[:prt]}" if refparts[:prt] # long_to_short(refparts, "prt").to_s
|
154
154
|
ref += "ver#{refparts[:ver]}" if refparts[:ver] # long_to_short(refparts, "vol").to_s
|
155
155
|
ref += "v#{refparts[:vol]}" if refparts[:vol]
|
156
|
+
ref += "r#{refparts[:rev]}" if refparts[:rev]
|
156
157
|
ref
|
157
158
|
end
|
158
159
|
end
|
@@ -164,11 +165,10 @@ module RelatonNist
|
|
164
165
|
#
|
165
166
|
def sort_hits!
|
166
167
|
@array.sort! do |a, b|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
end
|
168
|
+
code = a.hit[:code] <=> b.hit[:code]
|
169
|
+
next code unless code.zero?
|
170
|
+
|
171
|
+
b.hit[:release_date] <=> a.hit[:release_date]
|
172
172
|
end
|
173
173
|
self
|
174
174
|
end
|
@@ -184,16 +184,20 @@ module RelatonNist
|
|
184
184
|
ref = full_ref
|
185
185
|
# fn = ref.gsub(%r{[/\s:.]}, "_").upcase
|
186
186
|
index = Relaton::Index.find_or_create :nist, url: "#{GHNISTDATA}index-v1.zip", file: INDEX_FILE
|
187
|
-
|
188
|
-
return [] unless row
|
187
|
+
rows = index.search(ref).sort_by { |r| r[:id] }
|
188
|
+
# return [] unless row
|
189
189
|
|
190
|
-
yaml = OpenURI.open_uri "#{GHNISTDATA}#{row[:file]}"
|
191
|
-
hash = YAML.safe_load yaml
|
192
|
-
hash["fetched"] = Date.today.to_s
|
193
|
-
bib = RelatonNist::NistBibliographicItem.from_hash hash
|
194
|
-
|
195
|
-
|
196
|
-
|
190
|
+
# yaml = OpenURI.open_uri "#{GHNISTDATA}#{row[:file]}"
|
191
|
+
# hash = YAML.safe_load yaml
|
192
|
+
# hash["fetched"] = Date.today.to_s
|
193
|
+
# bib = RelatonNist::NistBibliographicItem.from_hash hash
|
194
|
+
# id = bib.docidentifier.find(&:primary).id
|
195
|
+
|
196
|
+
rows.map do |row|
|
197
|
+
Hit.new({ code: row[:id], path: row[:file] }, self)
|
198
|
+
end
|
199
|
+
# hit.fetch = bib
|
200
|
+
# [hit]
|
197
201
|
rescue OpenURI::HTTPError => e
|
198
202
|
return [] if e.io.status[0] == "404"
|
199
203
|
|
@@ -263,8 +267,9 @@ module RelatonNist
|
|
263
267
|
def match_year?(doc, date)
|
264
268
|
return true unless year
|
265
269
|
|
266
|
-
|
267
|
-
|
270
|
+
d = doc["issued-date"] || doc["published-date"]
|
271
|
+
pidate = RelatonBib.parse_date d, false
|
272
|
+
pidate.between? date, date.next_year.prev_day
|
268
273
|
end
|
269
274
|
end
|
270
275
|
end
|
@@ -12,7 +12,7 @@ module RelatonNist
|
|
12
12
|
class NistBibliography
|
13
13
|
class << self
|
14
14
|
#
|
15
|
-
# Search NIST
|
15
|
+
# Search NIST documents by reference
|
16
16
|
#
|
17
17
|
# @param text [String] reference
|
18
18
|
#
|
@@ -47,9 +47,9 @@ module RelatonNist
|
|
47
47
|
if date2
|
48
48
|
case date2
|
49
49
|
when /\w+\s\d{4}/
|
50
|
-
opts[:
|
50
|
+
opts[:date] = Date.strptime date2, "%B %Y"
|
51
51
|
when /\w+\s\d{2},\s\d{4}/
|
52
|
-
opts[:
|
52
|
+
opts[:date] = Date.strptime date2, "%B %d, %Y"
|
53
53
|
end
|
54
54
|
end
|
55
55
|
opts[:stage] = stage if stage
|
@@ -117,16 +117,9 @@ module RelatonNist
|
|
117
117
|
end
|
118
118
|
result.each_slice(3) do |s| # ISO website only allows 3 connections
|
119
119
|
fetch_pages(s, 3).each_with_index do |r, _i|
|
120
|
-
if opts[:
|
121
|
-
|
122
|
-
|
123
|
-
end
|
124
|
-
next if ids.empty?
|
125
|
-
elsif opts[:updated_date]
|
126
|
-
pds = r.date.select do |d|
|
127
|
-
d.type == "published" && d.on(:date) == opts[:updated_date]
|
128
|
-
end
|
129
|
-
next if pds.empty?
|
120
|
+
if opts[:date]
|
121
|
+
dates = r.date.select { |d| d.on(:date) == opts[:date] }
|
122
|
+
next if dates.empty?
|
130
123
|
end
|
131
124
|
next if iter && r.status.iteration != iteration
|
132
125
|
return { ret: r } if !year
|
@@ -11,6 +11,17 @@ module RelatonNist
|
|
11
11
|
# @param hit_data [Hash]
|
12
12
|
# @return [Hash]
|
13
13
|
def parse_page(hit_data)
|
14
|
+
hit_data[:url] ? parse_json(hit_data) : fetch_gh(hit_data)
|
15
|
+
end
|
16
|
+
|
17
|
+
def fetch_gh(hit_data)
|
18
|
+
yaml = OpenURI.open_uri "#{HitCollection::GHNISTDATA}#{hit_data[:path]}"
|
19
|
+
hash = YAML.safe_load yaml
|
20
|
+
hash["fetched"] = Date.today.to_s
|
21
|
+
NistBibliographicItem.from_hash hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_json(hit_data)
|
14
25
|
item_data = from_json hit_data
|
15
26
|
titles = fetch_titles(hit_data)
|
16
27
|
unless /^(SP|NISTIR|FIPS) /.match? item_data[:docid][0].id
|
@@ -22,7 +22,7 @@ MONO: Monographs
|
|
22
22
|
MP: Miscellaneous Publications
|
23
23
|
NCSTAR: National Construction Safety Team Act Reports
|
24
24
|
NSRDS: National Standard Reference Data Series
|
25
|
-
IR:
|
25
|
+
IR: IR (Interagency/Internal Reports)
|
26
26
|
OWMWP: Office of Weights and Measures White Papers
|
27
27
|
PC: Photographic Circulars
|
28
28
|
RPT: NBS Reports
|
data/lib/relaton_nist/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: relaton-bib
|