relaton-nist 1.8.0 → 1.9.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -1
- data/README.adoc +30 -0
- data/bin/rspec +29 -0
- data/grammars/biblio.rng +1 -0
- data/grammars/isodoc.rng +72 -10
- data/grammars/nist.rng +3 -0
- data/lib/relaton_nist/data_fetcher.rb +217 -0
- data/lib/relaton_nist/hit.rb +3 -1
- data/lib/relaton_nist/hit_collection.rb +71 -58
- data/lib/relaton_nist/nist_bibliographic_item.rb +1 -1
- data/lib/relaton_nist/nist_bibliography.rb +16 -6
- data/lib/relaton_nist/processor.rb +14 -1
- data/lib/relaton_nist/scrapper.rb +45 -301
- data/lib/relaton_nist/version.rb +1 -1
- data/lib/relaton_nist/xml_parser.rb +1 -1
- data/lib/relaton_nist.rb +1 -0
- data/relaton_nist.gemspec +3 -5
- metadata +9 -7
@@ -13,75 +13,90 @@ module RelatonNist
|
|
13
13
|
PUBS_EXPORT = URI.join(DOMAIN, "/CSRC/media/feeds/metanorma/pubs-export")
|
14
14
|
DATAFILEDIR = File.expand_path ".relaton/nist", Dir.home
|
15
15
|
DATAFILE = File.expand_path "pubs-export.zip", DATAFILEDIR
|
16
|
+
GHNISTDATA = "https://raw.githubusercontent.com/relaton/relaton-data-nist/main/data/"
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
def
|
22
|
-
|
18
|
+
def self.search(text, year = nil, opts = {})
|
19
|
+
new(text, year).search(opts)
|
20
|
+
end
|
21
|
+
|
22
|
+
def search(opts)
|
23
|
+
@array = from_json(**opts)
|
24
|
+
@array = from_ga unless @array.any?
|
25
|
+
sort_hits!
|
26
|
+
end
|
23
27
|
|
24
|
-
|
25
|
-
@array = docid ? from_json(docid, **opts) : from_csrc(**opts)
|
26
|
-
@array = from_csrc(**opts) unless @array.any?
|
28
|
+
private
|
27
29
|
|
30
|
+
def sort_hits!
|
28
31
|
@array.sort! do |a, b|
|
29
|
-
if a.sort_value
|
30
|
-
b.sort_value - a.sort_value
|
31
|
-
else
|
32
|
+
if a.sort_value == b.sort_value
|
32
33
|
(b.hit[:release_date] - a.hit[:release_date]).to_i
|
34
|
+
else
|
35
|
+
b.sort_value - a.sort_value
|
33
36
|
end
|
34
37
|
end
|
38
|
+
self
|
35
39
|
end
|
36
40
|
|
37
|
-
|
41
|
+
def from_ga # rubocop:disable Metrics/AbcSize
|
42
|
+
fn = text.gsub(%r{[/\s:.]}, "_").upcase
|
43
|
+
yaml = OpenURI.open_uri "#{GHNISTDATA}#{fn}.yaml"
|
44
|
+
hash = YAML.safe_load yaml
|
45
|
+
bib = RelatonNist::NistBibliographicItem.from_hash hash
|
46
|
+
hit = Hit.new({ code: text }, self)
|
47
|
+
hit.fetch = bib
|
48
|
+
[hit]
|
49
|
+
rescue OpenURI::HTTPError => e
|
50
|
+
return [] if e.io.status[0] == "404"
|
51
|
+
|
52
|
+
raise e
|
53
|
+
end
|
38
54
|
|
39
55
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
40
56
|
|
41
57
|
# @param stage [String]
|
42
58
|
# @return [Array<RelatonNist::Hit>]
|
43
|
-
def from_csrc(**opts)
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
end
|
59
|
+
# def from_csrc(**opts)
|
60
|
+
# from, to = nil
|
61
|
+
# if year
|
62
|
+
# d = Date.strptime year, "%Y"
|
63
|
+
# from = d.strftime "%m/%d/%Y"
|
64
|
+
# to = d.next_year.prev_day.strftime "%m/%d/%Y"
|
65
|
+
# end
|
66
|
+
# url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"\
|
67
|
+
# "&sortBy-lg=relevence"
|
68
|
+
# url += "&dateFrom-lg=#{from}" if from
|
69
|
+
# url += "&dateTo-lg=#{to}" if to
|
70
|
+
# url += if /PD/.match? opts[:stage]
|
71
|
+
# "&status-lg=Draft,Retired Draft,Withdrawn"
|
72
|
+
# else
|
73
|
+
# "&status-lg=Final,Withdrawn"
|
74
|
+
# end
|
75
|
+
|
76
|
+
# doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
|
77
|
+
# doc.css("table.publications-table > tbody > tr").map do |h|
|
78
|
+
# link = h.at("td/div/strong/a")
|
79
|
+
# serie = h.at("td[1]").text.strip
|
80
|
+
# code = h.at("td[2]").text.strip
|
81
|
+
# title = link.text
|
82
|
+
# doc_url = DOMAIN + link[:href]
|
83
|
+
# status = h.at("td[4]").text.strip.downcase
|
84
|
+
# release_date = Date.strptime h.at("td[5]").text.strip, "%m/%d/%Y"
|
85
|
+
# Hit.new(
|
86
|
+
# {
|
87
|
+
# code: code, serie: serie, title: title, url: doc_url,
|
88
|
+
# status: status, release_date: release_date
|
89
|
+
# }, self
|
90
|
+
# )
|
91
|
+
# end
|
92
|
+
# end
|
77
93
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
78
94
|
|
79
95
|
# Fetches data form json
|
80
|
-
# @param docid [String]
|
81
96
|
# @param stage [String]
|
82
97
|
# @return [Array<RelatonNist::Hit>]
|
83
|
-
def from_json(
|
84
|
-
select_data(
|
98
|
+
def from_json(**opts)
|
99
|
+
select_data(**opts).map do |h|
|
85
100
|
/(?<serie>(?<=-)\w+$)/ =~ h["series"]
|
86
101
|
title = [h["title-main"], h["title-sub"]].compact.join " - "
|
87
102
|
release_date = RelatonBib.parse_date h["published-date"], false
|
@@ -91,22 +106,20 @@ module RelatonNist
|
|
91
106
|
end
|
92
107
|
end
|
93
108
|
|
94
|
-
# @param docid [String]
|
95
109
|
# @param stage [String]
|
96
110
|
# @return [Array<Hach>]
|
97
|
-
def select_data(
|
98
|
-
# ref = docid.sub(/(?<=\d{3}-\d{2})r(\d+)/, ' Rev. \1')
|
111
|
+
def select_data(**opts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
|
99
112
|
d = Date.strptime year, "%Y" if year
|
100
|
-
|
113
|
+
statuses = %w[draft-public draft-prelim]
|
101
114
|
data.select do |doc|
|
102
115
|
next unless match_year?(doc, d)
|
103
116
|
|
104
117
|
if /PD/.match? opts[:stage]
|
105
|
-
next unless
|
118
|
+
next unless statuses.include? doc["status"]
|
106
119
|
else
|
107
120
|
next unless doc["status"] == "final"
|
108
121
|
end
|
109
|
-
doc["docidentifier"].include?
|
122
|
+
doc["docidentifier"].include? text
|
110
123
|
end
|
111
124
|
end
|
112
125
|
|
@@ -134,8 +147,8 @@ module RelatonNist
|
|
134
147
|
#
|
135
148
|
# @prarm ctime [Time, NilClass]
|
136
149
|
def fetch_data(ctime)
|
137
|
-
resp = OpenURI.open_uri("#{PUBS_EXPORT}.meta")
|
138
|
-
if !ctime || ctime <
|
150
|
+
# resp = OpenURI.open_uri("#{PUBS_EXPORT}.meta")
|
151
|
+
if !ctime || ctime < OpenURI.open_uri("#{PUBS_EXPORT}.meta").last_modified
|
139
152
|
@data = nil
|
140
153
|
uri_open = URI.method(:open) || Kernel.method(:open)
|
141
154
|
FileUtils.mkdir_p DATAFILEDIR unless Dir.exist? DATAFILEDIR
|
@@ -14,9 +14,9 @@ module RelatonNist
|
|
14
14
|
# @param text [String]
|
15
15
|
# @return [RelatonNist::HitCollection]
|
16
16
|
def search(text, year = nil, opts = {})
|
17
|
-
HitCollection.
|
18
|
-
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError
|
19
|
-
raise RelatonBib::RequestError,
|
17
|
+
HitCollection.search text, year, opts
|
18
|
+
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError => e
|
19
|
+
raise RelatonBib::RequestError, e.message
|
20
20
|
end
|
21
21
|
|
22
22
|
# @param code [String] the NIST standard Code to look up (e..g "8200")
|
@@ -174,7 +174,17 @@ module RelatonNist
|
|
174
174
|
}
|
175
175
|
ref = matches[:code] ? "#{matches[:serie]} #{matches[:code]}" : code
|
176
176
|
result = search(ref, year, opts)
|
177
|
-
result.select { |i| search_filter i, matches, code }
|
177
|
+
selected_result = result.select { |i| search_filter i, matches, code }
|
178
|
+
return selected_result if selected_result.any? || !matches[:code]
|
179
|
+
|
180
|
+
search full_ref(matches)
|
181
|
+
end
|
182
|
+
|
183
|
+
def full_ref(matches)
|
184
|
+
ref = "#{matches[:serie]} #{matches[:code]}"
|
185
|
+
ref += long_to_short(matches[:prt1], matches[:prt2]).to_s
|
186
|
+
ref += long_to_short(matches[:vol1], matches[:vol2]).to_s
|
187
|
+
ref
|
178
188
|
end
|
179
189
|
|
180
190
|
def match(regex, code)
|
@@ -192,11 +202,11 @@ module RelatonNist
|
|
192
202
|
(?<code>[0-9-]{3,}[A-Z]?)
|
193
203
|
(?<prt1>pt\d+)?
|
194
204
|
(?<vol1>v\d+)?
|
195
|
-
(?<ver1>ver[\d
|
205
|
+
(?<ver1>ver[\d.]+)?
|
196
206
|
(?<rev1>r\d+)?
|
197
207
|
(\s(?<prt2>Part\s\d+))?
|
198
208
|
(\s(?<vol2>Vol\.\s\d+))?
|
199
|
-
(\s(?<ver2>(Ver\.|Version)\s[\d
|
209
|
+
(\s(?<ver2>(Ver\.|Version)\s[\d.]+))?
|
200
210
|
(\s(?<rev2>Rev\.\s\d+))?
|
201
211
|
(\s(?<add>Add)endum)?
|
202
212
|
}x =~ item.hit[:code]
|
@@ -2,11 +2,12 @@ require "relaton/processor"
|
|
2
2
|
|
3
3
|
module RelatonNist
|
4
4
|
class Processor < Relaton::Processor
|
5
|
-
def initialize
|
5
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
6
6
|
@short = :relaton_nist
|
7
7
|
@prefix = "NIST"
|
8
8
|
@defaultprefix = %r{^(NIST|NISTGCR|ITL Bulletin|JPCRD|NISTIR|CSRC|FIPS)(/[^\s])?\s}
|
9
9
|
@idtype = "NIST"
|
10
|
+
@datasets = %w[nist-tech-pubs]
|
10
11
|
end
|
11
12
|
|
12
13
|
# @param code [String]
|
@@ -17,6 +18,18 @@ module RelatonNist
|
|
17
18
|
::RelatonNist::NistBibliography.get(code, date, opts)
|
18
19
|
end
|
19
20
|
|
21
|
+
#
|
22
|
+
# Fetch all the documents from a source
|
23
|
+
#
|
24
|
+
# @param [String] _source source name
|
25
|
+
# @param [Hash] opts
|
26
|
+
# @option opts [String] :output directory to output documents
|
27
|
+
# @option opts [String] :format
|
28
|
+
#
|
29
|
+
def fetch_data(_source, opts)
|
30
|
+
DataFetcher.fetch(**opts)
|
31
|
+
end
|
32
|
+
|
20
33
|
# @param xml [String]
|
21
34
|
# @return [RelatonNist::GbBibliographicItem]
|
22
35
|
def from_xml(xml)
|