relaton-nist 1.13.1 → 1.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +0 -1
- data/.github/workflows/release.yml +22 -0
- data/README.adoc +30 -37
- data/grammars/basicdoc.rng +3 -27
- data/grammars/biblio-standoc.rng +164 -0
- data/grammars/biblio.rng +82 -19
- data/grammars/relaton-nist-compile.rng +11 -0
- data/grammars/relaton-nist.rng +70 -0
- data/lib/relaton_nist/data_fetcher.rb +1 -1
- data/lib/relaton_nist/hit_collection.rb +111 -58
- data/lib/relaton_nist/nist_bibliographic_item.rb +13 -2
- data/lib/relaton_nist/scrapper.rb +10 -6
- data/lib/relaton_nist/version.rb +1 -1
- data/relaton_nist.gemspec +1 -1
- data/resp.html +616 -616
- metadata +12 -11
- data/grammars/isodoc.rng +0 -2807
- data/grammars/nist.rng +0 -219
- data/grammars/reqt.rng +0 -223
@@ -308,7 +308,7 @@ module RelatonNist
|
|
308
308
|
def parse_doc(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
309
309
|
# mtd = doc.at('doi_record/report-paper/report-paper_metadata')
|
310
310
|
item = RelatonNist::NistBibliographicItem.new(
|
311
|
-
|
311
|
+
type: "standard", docid: fetch_docid(doc),
|
312
312
|
title: fetch_title(doc), link: fetch_link(doc), abstract: fetch_abstract(doc),
|
313
313
|
date: fetch_date(doc), edition: fetch_edition(doc),
|
314
314
|
contributor: fetch_contributor(doc), relation: fetch_relation(doc),
|
@@ -15,28 +15,38 @@ module RelatonNist
|
|
15
15
|
DATAFILE = File.expand_path "pubs-export.zip", DATAFILEDIR
|
16
16
|
GHNISTDATA = "https://raw.githubusercontent.com/relaton/relaton-data-nist/main/data/"
|
17
17
|
|
18
|
+
#
|
19
|
+
# @param [String] text reference
|
20
|
+
# @param [String, nil] year reference
|
21
|
+
# @param [Hash] opts options
|
22
|
+
# @option opts [String] :stage stage of document
|
23
|
+
#
|
24
|
+
def initialize(text, year = nil, opts = {})
|
25
|
+
super text, year
|
26
|
+
@opts = opts
|
27
|
+
end
|
28
|
+
|
18
29
|
#
|
19
30
|
# Create hits collection instance and search hits
|
20
31
|
#
|
32
|
+
# @param [String] text reference
|
33
|
+
# @param [String, nil] year reference
|
21
34
|
# @param [Hash] opts options
|
22
35
|
# @option opts [String] :stage stage of document
|
23
36
|
#
|
24
37
|
# @return [RelatonNist::HitCollection] hits collection
|
25
38
|
#
|
26
39
|
def self.search(text, year = nil, opts = {})
|
27
|
-
new(text, year).search
|
40
|
+
new(text, year, opts).search
|
28
41
|
end
|
29
42
|
|
30
43
|
#
|
31
44
|
# Search nits in JSON file or GitHub repo
|
32
45
|
#
|
33
|
-
# @param [Hash] opts options
|
34
|
-
# @option opts [String] :stage stage of document
|
35
|
-
#
|
36
46
|
# @return [RelatonNist::HitCollection] hits collection
|
37
47
|
#
|
38
|
-
def search
|
39
|
-
@array = from_json
|
48
|
+
def search
|
49
|
+
@array = from_json
|
40
50
|
@array = from_ga unless @array.any?
|
41
51
|
sort_hits!
|
42
52
|
end
|
@@ -46,33 +56,57 @@ module RelatonNist
|
|
46
56
|
#
|
47
57
|
# @return [Array<RelatonNist::Hit>] hits
|
48
58
|
#
|
49
|
-
def search_filter # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/
|
59
|
+
def search_filter # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/MethodLength
|
50
60
|
@array.select do |item|
|
51
|
-
|
52
|
-
|
53
|
-
(
|
54
|
-
(
|
55
|
-
(
|
56
|
-
(
|
57
|
-
(
|
58
|
-
|
59
|
-
(
|
60
|
-
(?:\s(?<vol2>Vol\.\s\d+))?
|
61
|
-
(?:\s(?<ver2>(?:Ver\.|Version)\s[\d.]+))?
|
62
|
-
(?:\s(?<rev2>Rev\.\s\d+))?
|
63
|
-
(?:\s(?<add>Add)endum)?
|
64
|
-
}x =~ item.hit[:code]
|
65
|
-
(refparts[:code] && [series, item.hit[:series]].include?(refparts[:series]) && refparts[:code] == code &&
|
66
|
-
long_to_short(refparts[:prt1], refparts[:prt2]) == long_to_short(prt1, prt2) &&
|
67
|
-
long_to_short(refparts[:vol1], refparts[:vol2]) == long_to_short(vol1, vol2) &&
|
68
|
-
long_to_short(refparts[:ver1], refparts[:ver2]) == long_to_short(ver1, ver2) &&
|
69
|
-
long_to_short(refparts[:rev1], refparts[:rev2]) == long_to_short(rev1, rev2) &&
|
70
|
-
long_to_short(refparts[:add1], refparts[:add2]) == add) || item.hit[:title]&.include?(text.sub(/^NIST\s/, ""))
|
61
|
+
parts = doi_parts(item.hit[:json]) || code_parts(item.hit[:code])
|
62
|
+
(refparts[:code] && [parts[:series], item.hit[:series]].include?(refparts[:series]) &&
|
63
|
+
refparts[:code].casecmp(parts[:code].upcase).zero? &&
|
64
|
+
(!refparts[:prt] || refparts[:prt] == parts[:prt]) &&
|
65
|
+
(!refparts[:vol] || refparts[:vol] == parts[:vol]) &&
|
66
|
+
(!refparts[:ver] || refparts[:ver] == parts[:ver]) &&
|
67
|
+
(!refparts[:rev] || refparts[:rev] == parts[:rev]) &&
|
68
|
+
refparts[:draft] == parts[:draft] && refparts[:add] == parts[:add]) ||
|
69
|
+
item.hit[:title]&.include?(text.sub(/^NIST\s/, ""))
|
71
70
|
end
|
72
71
|
end
|
73
72
|
|
74
73
|
private
|
75
74
|
|
75
|
+
def code_parts(code) # rubocop:disable Metrics/MethodLength
|
76
|
+
{
|
77
|
+
prefix: match(/^(?:NIST|NBS)\s?/, code),
|
78
|
+
series: match(/(?<val>(?:SP|FIPS|IR|ITL\sBulletin|White\sPaper))\s/, code),
|
79
|
+
code: match(/(?<val>[0-9-]{3,}[A-Z]?)/, code),
|
80
|
+
prt: match(/(?:pt|\sPart\s)(?<val>\d+)/, code),
|
81
|
+
vol: match(/(?:v|\sVol\.\s)(?<val>\d+)/, code),
|
82
|
+
ver: match(/(?:ver|\sVer\.\s|Version\s)(?<val>[\d.]+)/, code),
|
83
|
+
rev: match(/(?:r|Rev\.\s)(?<val>\d+)/, code),
|
84
|
+
# (?:\s(?<prt2>Part\s\d+))?
|
85
|
+
# (?:\s(?<vol2>Vol\.\s\d+))?
|
86
|
+
# (?:\s(?<ver2>(?:Ver\.|Version)\s[\d.]+))?
|
87
|
+
# (?:\s(?<rev2>Rev\.\s\d+))?
|
88
|
+
add: match(/\sAdd(?:endum)?(?<val>\d*)/, code),
|
89
|
+
draft: !match(/\((?:Retired\s)?Draft\)/, code).nil?,
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
93
|
+
def doi_parts(json) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
94
|
+
return unless json && json["doi"]
|
95
|
+
|
96
|
+
id = json["doi"].split("/").last
|
97
|
+
{
|
98
|
+
prefix: match(/^(?:NIST|NBS)\./, id),
|
99
|
+
series: match(/(?:SP|FIPS|IR|ITL\sBulletin|White\sPaper)(?=\.)/, id),
|
100
|
+
code: match(/(?<=\.)\d{3,}(?:-\d+)*(?:[[:alpha:]](?!\d|raft|er|t?\d))?/, id),
|
101
|
+
prt: match(/pt?(?<val>\d+)/, id),
|
102
|
+
vol: match(/v(?<val>\d+)(?!\.\d)/, id),
|
103
|
+
ver: match(/v(?:er)?(?<val>[\d.]+)/, id),
|
104
|
+
rev: match(/r(?<val>\d+)/, id),
|
105
|
+
add: match(/-Add(?<val>\d*)/, id),
|
106
|
+
draft: !match(/-draft/, id).nil?,
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
76
110
|
#
|
77
111
|
# Parse reference parts
|
78
112
|
#
|
@@ -83,16 +117,17 @@ module RelatonNist
|
|
83
117
|
perfix: match(/^(NIST|NBS)\s?/, text),
|
84
118
|
series: match(/(SP|FIPS|IR|ITL\sBulletin|White\sPaper)(?=\.|\s)/, text),
|
85
119
|
code: match(/(?<=\.|\s)[0-9-]{3,}[A-Z]?/, text),
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
120
|
+
prt: match(/(?:(?<dl>\.)?pt(?(<dl>)-)|\sPart\s)(?<val>[A-Z\d]+)/, text),
|
121
|
+
vol: match(/(?:(?<dl>\.)?v(?(<dl>)-)|\sVol\.\s)(?<val>\d+)/, text),
|
122
|
+
ver: match(/(?:(?<dl>\.)?\s?ver|\sVer\.\s)(?<val>\d(?(<dl>)[-\d]|[.\d])*)/, text)&.gsub(/-/, "."),
|
123
|
+
rev: match(/(?:(?:(?<dl>\.)|[^a-z])r|\sRev\.\s)(?(<dl>)-)(?<val>\d+)/, text),
|
124
|
+
add: match(/(?:(?<dl>\.)?add|\/Add)(?(<dl>)-)(?<val>\d*)/, text),
|
125
|
+
draft: !(match(/\((?:Draft|PD)\)/, text).nil? && @opts[:stage].nil?),
|
126
|
+
# prt2: match(/(?<=\s)Part\s[A-Z\d]+/, text),
|
127
|
+
# vol2: match(/(?<=\s)Vol\.\s\d+/, text),
|
128
|
+
# ver2: match(/(?<=\s)Ver\.\s\d+/, text),
|
129
|
+
# rev2: match(/(?<=\s)Rev\.\s\d+/, text),
|
130
|
+
# add2: match(/(?<=\/)Add/, text),
|
96
131
|
}
|
97
132
|
end
|
98
133
|
|
@@ -105,7 +140,10 @@ module RelatonNist
|
|
105
140
|
# @return [String, nil] matched string
|
106
141
|
#
|
107
142
|
def match(regex, code)
|
108
|
-
regex.match(code)
|
143
|
+
m = regex.match(code)
|
144
|
+
return unless m
|
145
|
+
|
146
|
+
m.named_captures["val"] || m.to_s
|
109
147
|
end
|
110
148
|
|
111
149
|
#
|
@@ -116,8 +154,9 @@ module RelatonNist
|
|
116
154
|
def full_ref # rubocop:disable Metrics/AbcSize
|
117
155
|
@full_ref ||= begin
|
118
156
|
ref = "#{refparts[:perfix]}#{refparts[:series]} #{refparts[:code]}"
|
119
|
-
ref +=
|
120
|
-
ref +=
|
157
|
+
ref += "pt#{refparts[:prt]}" if refparts[:prt] # long_to_short(refparts, "prt").to_s
|
158
|
+
ref += "ver#{refparts[:ver]}" if refparts[:ver] # long_to_short(refparts, "vol").to_s
|
159
|
+
ref += "v#{refparts[:vol]}" if refparts[:vol]
|
121
160
|
ref
|
122
161
|
end
|
123
162
|
end
|
@@ -127,17 +166,21 @@ module RelatonNist
|
|
127
166
|
# Converts "pt-1" to "pt1" and "Part 1" to "pt1", "v-1" to "v1" and "Vol. 1" to "v1",
|
128
167
|
# "ver-1" to "ver1" and "Ver. 1" to "ver1", "r-1" to "r1" and "Rev. 1" to "r1".
|
129
168
|
#
|
130
|
-
# @param
|
131
|
-
# @param
|
169
|
+
# @param parts [MatchData] parts of ID
|
170
|
+
# @param name [String] name of ID part
|
132
171
|
#
|
133
172
|
# @return [String, nil]
|
134
173
|
#
|
135
|
-
def long_to_short(
|
136
|
-
|
137
|
-
|
174
|
+
# def long_to_short(parts, name)
|
175
|
+
# short = parts["#{name}1".to_sym]
|
176
|
+
# return short.sub(/-/, "") if short
|
138
177
|
|
139
|
-
|
140
|
-
|
178
|
+
# long_name = "#{name}2"
|
179
|
+
# long = parts[long_name.to_sym]
|
180
|
+
# return unless long
|
181
|
+
|
182
|
+
# long.sub(/Part\s/, "pt").sub(/Vol\.\s/, "v").sub(/Rev\.\s/, "r").sub(/(Ver\.|Version)\s/, "ver")
|
183
|
+
# end
|
141
184
|
|
142
185
|
#
|
143
186
|
# Sort hits by sort_value and release date
|
@@ -167,6 +210,7 @@ module RelatonNist
|
|
167
210
|
fn = ref.gsub(%r{[/\s:.]}, "_").upcase
|
168
211
|
yaml = OpenURI.open_uri "#{GHNISTDATA}#{fn}.yaml"
|
169
212
|
hash = YAML.safe_load yaml
|
213
|
+
hash["fetched"] = Date.today.to_s
|
170
214
|
bib = RelatonNist::NistBibliographicItem.from_hash hash
|
171
215
|
hit = Hit.new({ code: text }, self)
|
172
216
|
hit.fetch = bib
|
@@ -180,38 +224,47 @@ module RelatonNist
|
|
180
224
|
#
|
181
225
|
# Fetches data form json
|
182
226
|
#
|
183
|
-
# @param opts [Hash] options
|
184
|
-
# @option opts [String] :stage stage of document
|
185
|
-
#
|
186
227
|
# @return [Array<RelatonNist::Hit>] hits
|
187
228
|
#
|
188
|
-
def from_json
|
189
|
-
select_data
|
229
|
+
def from_json
|
230
|
+
select_data.map do |h|
|
190
231
|
/(?<series>(?<=-)\w+$)/ =~ h["series"]
|
191
232
|
title = [h["title-main"], h["title-sub"]].compact.join " - "
|
192
233
|
release_date = RelatonBib.parse_date h["published-date"], false
|
193
|
-
Hit.new({ code: h
|
234
|
+
Hit.new({ code: docidentifier(h), series: series.upcase, title: title,
|
194
235
|
url: h["uri"], status: h["status"],
|
195
236
|
release_date: release_date, json: h }, self)
|
196
237
|
end
|
197
238
|
end
|
198
239
|
|
240
|
+
def docidentifier(json) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
|
241
|
+
parts = doi_parts json
|
242
|
+
return json["docidentifier"] unless parts
|
243
|
+
|
244
|
+
id = parts[:code]
|
245
|
+
id = "#{parts[:series]} #{id}" if parts[:series]
|
246
|
+
id += " Part #{parts[:prt]}" if parts[:prt]
|
247
|
+
id += " Vol. #{parts[:vol]}" if parts[:vol]
|
248
|
+
id += " Ver. #{parts[:ver]}" if parts[:ver]
|
249
|
+
id += " Rev. #{parts[:rev]}" if parts[:rev]
|
250
|
+
id += "-Add" if parts[:add]
|
251
|
+
id += " (Draft)" if parts[:draft] || @opts[:stage]
|
252
|
+
id
|
253
|
+
end
|
254
|
+
|
199
255
|
#
|
200
256
|
# Select data from json
|
201
257
|
#
|
202
|
-
# @param opts [Hash] options
|
203
|
-
# @option opts [String] :stage stage of document
|
204
|
-
#
|
205
258
|
# @return [Array<Hash>] selected data
|
206
259
|
#
|
207
|
-
def select_data
|
260
|
+
def select_data # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
|
208
261
|
ref = "#{refparts[:series]} #{refparts[:code]}"
|
209
262
|
d = Date.strptime year, "%Y" if year
|
210
263
|
statuses = %w[draft-public draft-prelim]
|
211
264
|
data.select do |doc|
|
212
265
|
next unless match_year?(doc, d)
|
213
266
|
|
214
|
-
if /PD/.match? opts[:stage]
|
267
|
+
if /PD/.match? @opts[:stage]
|
215
268
|
next unless statuses.include? doc["status"]
|
216
269
|
else
|
217
270
|
next unless doc["status"] == "final"
|
@@ -63,6 +63,15 @@ module RelatonNist
|
|
63
63
|
super
|
64
64
|
end
|
65
65
|
|
66
|
+
#
|
67
|
+
# Fetch flavor schema version
|
68
|
+
#
|
69
|
+
# @return [String] schema version
|
70
|
+
#
|
71
|
+
def ext_schema
|
72
|
+
@ext_schema ||= schema_versions["relaton-model-nist"]
|
73
|
+
end
|
74
|
+
|
66
75
|
# @param hash [Hash]
|
67
76
|
# @return [RelatonNist::GbBibliographicItem]
|
68
77
|
def self.from_hash(hash)
|
@@ -78,16 +87,18 @@ module RelatonNist
|
|
78
87
|
def to_xml(**opts)
|
79
88
|
super date_format: :short, **opts do |b|
|
80
89
|
if opts[:bibdata]
|
81
|
-
b.ext do
|
90
|
+
ext = b.ext do
|
82
91
|
b.doctype doctype if doctype
|
83
92
|
commentperiod&.to_xml b
|
84
93
|
end
|
94
|
+
ext["schema-version"] = ext_schema unless opts[:embedded]
|
85
95
|
end
|
86
96
|
end
|
87
97
|
end
|
88
98
|
|
99
|
+
# @param embedded [Boolean] embedded in another document
|
89
100
|
# @return [Hash]
|
90
|
-
def to_hash
|
101
|
+
def to_hash(embedded: false)
|
91
102
|
hash = super
|
92
103
|
# hash["keyword"] = single_element_array(keyword) if keyword&.any?
|
93
104
|
hash["commentperiod"] = commentperiod.to_hash if commentperiod
|
@@ -32,7 +32,7 @@ module RelatonNist
|
|
32
32
|
json = hit_data[:json]
|
33
33
|
{
|
34
34
|
link: fetch_link(json),
|
35
|
-
docid: fetch_docid(
|
35
|
+
docid: fetch_docid(hit_data),
|
36
36
|
date: fetch_dates(json, hit_data[:release_date]),
|
37
37
|
contributor: fetch_contributors(json),
|
38
38
|
edition: fetch_edition(json),
|
@@ -50,13 +50,17 @@ module RelatonNist
|
|
50
50
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
51
51
|
|
52
52
|
# Fetch docid.
|
53
|
-
# @param
|
53
|
+
# @param hit [RelatonHist::Hit]
|
54
54
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
55
|
-
def fetch_docid(
|
56
|
-
item_ref = docid
|
55
|
+
def fetch_docid(hit)
|
56
|
+
# item_ref = docid
|
57
|
+
# json["docidentifier"]
|
57
58
|
# item_ref ||= "?"
|
58
|
-
item_ref.sub!(/\sAddendum$/, "-Add")
|
59
|
-
[RelatonBib::DocumentIdentifier.new(id:
|
59
|
+
# item_ref.sub!(/\sAddendum$/, "-Add")
|
60
|
+
ids = [RelatonBib::DocumentIdentifier.new(id: hit[:code], type: "NIST", primary: true)]
|
61
|
+
doi = hit[:json]["doi"]&.split("/")&.last
|
62
|
+
ids << RelatonBib::DocumentIdentifier.new(id: doi, type: "DOI") if doi
|
63
|
+
ids
|
60
64
|
end
|
61
65
|
|
62
66
|
# Fetch status.
|
data/lib/relaton_nist/version.rb
CHANGED
data/relaton_nist.gemspec
CHANGED