relaton-nist 1.8.0 → 1.9.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,75 +13,90 @@ module RelatonNist
13
13
  PUBS_EXPORT = URI.join(DOMAIN, "/CSRC/media/feeds/metanorma/pubs-export")
14
14
  DATAFILEDIR = File.expand_path ".relaton/nist", Dir.home
15
15
  DATAFILE = File.expand_path "pubs-export.zip", DATAFILEDIR
16
+ GHNISTDATA = "https://raw.githubusercontent.com/relaton/relaton-data-nist/main/data/"
16
17
 
17
- # @param ref_nbr [String]
18
- # @param year [String]
19
- # @param opts [Hash]
20
- # @option opts [String] :stage
21
- def initialize(ref_nbr, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize
22
- super ref_nbr, year
18
+ def self.search(text, year = nil, opts = {})
19
+ new(text, year).search(opts)
20
+ end
21
+
22
+ def search(opts)
23
+ @array = from_json(**opts)
24
+ @array = from_ga unless @array.any?
25
+ sort_hits!
26
+ end
23
27
 
24
- /(?<docid>(SP|FIPS)\s[0-9-]+)/ =~ text
25
- @array = docid ? from_json(docid, **opts) : from_csrc(**opts)
26
- @array = from_csrc(**opts) unless @array.any?
28
+ private
27
29
 
30
+ def sort_hits!
28
31
  @array.sort! do |a, b|
29
- if a.sort_value != b.sort_value
30
- b.sort_value - a.sort_value
31
- else
32
+ if a.sort_value == b.sort_value
32
33
  (b.hit[:release_date] - a.hit[:release_date]).to_i
34
+ else
35
+ b.sort_value - a.sort_value
33
36
  end
34
37
  end
38
+ self
35
39
  end
36
40
 
37
- private
41
+ def from_ga # rubocop:disable Metrics/AbcSize
42
+ fn = text.gsub(%r{[/\s:.]}, "_").upcase
43
+ yaml = OpenURI.open_uri "#{GHNISTDATA}#{fn}.yaml"
44
+ hash = YAML.safe_load yaml
45
+ bib = RelatonNist::NistBibliographicItem.from_hash hash
46
+ hit = Hit.new({ code: text }, self)
47
+ hit.fetch = bib
48
+ [hit]
49
+ rescue OpenURI::HTTPError => e
50
+ return [] if e.io.status[0] == "404"
51
+
52
+ raise e
53
+ end
38
54
 
39
55
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
40
56
 
41
57
  # @param stage [String]
42
58
  # @return [Array<RelatonNist::Hit>]
43
- def from_csrc(**opts)
44
- from, to = nil
45
- if year
46
- d = Date.strptime year, "%Y"
47
- from = d.strftime "%m/%d/%Y"
48
- to = d.next_year.prev_day.strftime "%m/%d/%Y"
49
- end
50
- url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"\
51
- "&sortBy-lg=relevence"
52
- url += "&dateFrom-lg=#{from}" if from
53
- url += "&dateTo-lg=#{to}" if to
54
- url += if /PD/.match? opts[:stage]
55
- "&status-lg=Draft,Retired Draft,Withdrawn"
56
- else
57
- "&status-lg=Final,Withdrawn"
58
- end
59
-
60
- doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
61
- doc.css("table.publications-table > tbody > tr").map do |h|
62
- link = h.at("td/div/strong/a")
63
- serie = h.at("td[1]").text.strip
64
- code = h.at("td[2]").text.strip
65
- title = link.text
66
- doc_url = DOMAIN + link[:href]
67
- status = h.at("td[4]").text.strip.downcase
68
- release_date = Date.strptime h.at("td[5]").text.strip, "%m/%d/%Y"
69
- Hit.new(
70
- {
71
- code: code, serie: serie, title: title, url: doc_url,
72
- status: status, release_date: release_date
73
- }, self
74
- )
75
- end
76
- end
59
+ # def from_csrc(**opts)
60
+ # from, to = nil
61
+ # if year
62
+ # d = Date.strptime year, "%Y"
63
+ # from = d.strftime "%m/%d/%Y"
64
+ # to = d.next_year.prev_day.strftime "%m/%d/%Y"
65
+ # end
66
+ # url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"\
67
+ # "&sortBy-lg=relevence"
68
+ # url += "&dateFrom-lg=#{from}" if from
69
+ # url += "&dateTo-lg=#{to}" if to
70
+ # url += if /PD/.match? opts[:stage]
71
+ # "&status-lg=Draft,Retired Draft,Withdrawn"
72
+ # else
73
+ # "&status-lg=Final,Withdrawn"
74
+ # end
75
+
76
+ # doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
77
+ # doc.css("table.publications-table > tbody > tr").map do |h|
78
+ # link = h.at("td/div/strong/a")
79
+ # serie = h.at("td[1]").text.strip
80
+ # code = h.at("td[2]").text.strip
81
+ # title = link.text
82
+ # doc_url = DOMAIN + link[:href]
83
+ # status = h.at("td[4]").text.strip.downcase
84
+ # release_date = Date.strptime h.at("td[5]").text.strip, "%m/%d/%Y"
85
+ # Hit.new(
86
+ # {
87
+ # code: code, serie: serie, title: title, url: doc_url,
88
+ # status: status, release_date: release_date
89
+ # }, self
90
+ # )
91
+ # end
92
+ # end
77
93
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
78
94
 
79
95
  # Fetches data form json
80
- # @param docid [String]
81
96
  # @param stage [String]
82
97
  # @return [Array<RelatonNist::Hit>]
83
- def from_json(docid, **opts)
84
- select_data(docid, **opts).map do |h|
98
+ def from_json(**opts)
99
+ select_data(**opts).map do |h|
85
100
  /(?<serie>(?<=-)\w+$)/ =~ h["series"]
86
101
  title = [h["title-main"], h["title-sub"]].compact.join " - "
87
102
  release_date = RelatonBib.parse_date h["published-date"], false
@@ -91,22 +106,20 @@ module RelatonNist
91
106
  end
92
107
  end
93
108
 
94
- # @param docid [String]
95
109
  # @param stage [String]
96
110
  # @return [Array<Hach>]
97
- def select_data(docid, **opts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
98
- # ref = docid.sub(/(?<=\d{3}-\d{2})r(\d+)/, ' Rev. \1')
111
+ def select_data(**opts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
99
112
  d = Date.strptime year, "%Y" if year
100
- # didrx = Regexp.new(docid)
113
+ statuses = %w[draft-public draft-prelim]
101
114
  data.select do |doc|
102
115
  next unless match_year?(doc, d)
103
116
 
104
117
  if /PD/.match? opts[:stage]
105
- next unless %w[draft-public draft-prelim].include? doc["status"]
118
+ next unless statuses.include? doc["status"]
106
119
  else
107
120
  next unless doc["status"] == "final"
108
121
  end
109
- doc["docidentifier"].include? docid
122
+ doc["docidentifier"].include? text
110
123
  end
111
124
  end
112
125
 
@@ -134,8 +147,8 @@ module RelatonNist
134
147
  #
135
148
  # @prarm ctime [Time, NilClass]
136
149
  def fetch_data(ctime)
137
- resp = OpenURI.open_uri("#{PUBS_EXPORT}.meta")
138
- if !ctime || ctime < resp.last_modified
150
+ # resp = OpenURI.open_uri("#{PUBS_EXPORT}.meta")
151
+ if !ctime || ctime < OpenURI.open_uri("#{PUBS_EXPORT}.meta").last_modified
139
152
  @data = nil
140
153
  uri_open = URI.method(:open) || Kernel.method(:open)
141
154
  FileUtils.mkdir_p DATAFILEDIR unless Dir.exist? DATAFILEDIR
@@ -67,7 +67,7 @@ module RelatonNist
67
67
  # @return [RelatonNist::GbBibliographicItem]
68
68
  def self.from_hash(hash)
69
69
  item_hash = RelatonNist::HashConverter.hash_to_bib(hash)
70
- new **item_hash
70
+ new(**item_hash)
71
71
  end
72
72
 
73
73
  # @param opts [Hash]
@@ -14,9 +14,9 @@ module RelatonNist
14
14
  # @param text [String]
15
15
  # @return [RelatonNist::HitCollection]
16
16
  def search(text, year = nil, opts = {})
17
- HitCollection.new text, year, opts
18
- rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError
19
- raise RelatonBib::RequestError, "Could not access https://www.nist.gov"
17
+ HitCollection.search text, year, opts
18
+ rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError => e
19
+ raise RelatonBib::RequestError, e.message
20
20
  end
21
21
 
22
22
  # @param code [String] the NIST standard Code to look up (e..g "8200")
@@ -174,7 +174,17 @@ module RelatonNist
174
174
  }
175
175
  ref = matches[:code] ? "#{matches[:serie]} #{matches[:code]}" : code
176
176
  result = search(ref, year, opts)
177
- result.select { |i| search_filter i, matches, code }
177
+ selected_result = result.select { |i| search_filter i, matches, code }
178
+ return selected_result if selected_result.any? || !matches[:code]
179
+
180
+ search full_ref(matches)
181
+ end
182
+
183
+ def full_ref(matches)
184
+ ref = "#{matches[:serie]} #{matches[:code]}"
185
+ ref += long_to_short(matches[:prt1], matches[:prt2]).to_s
186
+ ref += long_to_short(matches[:vol1], matches[:vol2]).to_s
187
+ ref
178
188
  end
179
189
 
180
190
  def match(regex, code)
@@ -192,11 +202,11 @@ module RelatonNist
192
202
  (?<code>[0-9-]{3,}[A-Z]?)
193
203
  (?<prt1>pt\d+)?
194
204
  (?<vol1>v\d+)?
195
- (?<ver1>ver[\d\.]+)?
205
+ (?<ver1>ver[\d.]+)?
196
206
  (?<rev1>r\d+)?
197
207
  (\s(?<prt2>Part\s\d+))?
198
208
  (\s(?<vol2>Vol\.\s\d+))?
199
- (\s(?<ver2>(Ver\.|Version)\s[\d\.]+))?
209
+ (\s(?<ver2>(Ver\.|Version)\s[\d.]+))?
200
210
  (\s(?<rev2>Rev\.\s\d+))?
201
211
  (\s(?<add>Add)endum)?
202
212
  }x =~ item.hit[:code]
@@ -2,11 +2,12 @@ require "relaton/processor"
2
2
 
3
3
  module RelatonNist
4
4
  class Processor < Relaton::Processor
5
- def initialize
5
+ def initialize # rubocop:disable Lint/MissingSuper
6
6
  @short = :relaton_nist
7
7
  @prefix = "NIST"
8
8
  @defaultprefix = %r{^(NIST|NISTGCR|ITL Bulletin|JPCRD|NISTIR|CSRC|FIPS)(/[^\s])?\s}
9
9
  @idtype = "NIST"
10
+ @datasets = %w[nist-tech-pubs]
10
11
  end
11
12
 
12
13
  # @param code [String]
@@ -17,6 +18,18 @@ module RelatonNist
17
18
  ::RelatonNist::NistBibliography.get(code, date, opts)
18
19
  end
19
20
 
21
+ #
22
+ # Fetch all the documents from a source
23
+ #
24
+ # @param [String] _source source name
25
+ # @param [Hash] opts
26
+ # @option opts [String] :output directory to output documents
27
+ # @option opts [String] :format
28
+ #
29
+ def fetch_data(_source, opts)
30
+ DataFetcher.fetch(**opts)
31
+ end
32
+
20
33
  # @param xml [String]
21
34
  # @return [RelatonNist::GbBibliographicItem]
22
35
  def from_xml(xml)