relaton-nist 1.9.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/rspec +29 -0
- data/lib/relaton_nist/hit.rb +3 -1
- data/lib/relaton_nist/hit_collection.rb +56 -43
- data/lib/relaton_nist/nist_bibliographic_item.rb +1 -1
- data/lib/relaton_nist/nist_bibliography.rb +15 -5
- data/lib/relaton_nist/scrapper.rb +198 -265
- data/lib/relaton_nist/version.rb +1 -1
- data/relaton_nist.gemspec +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d440504b749834c27875dc8a1ee84346451578db5dd1588b75eafdb31087b64c
|
|
4
|
+
data.tar.gz: ecfb305d172a8afa171fa4493b1ffec2dd458fd413143a7f197034a392ddf451
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7af96feb236bddbabd7e90982637b608813d5c2163c7453f217c658035cf34b0ef80adc6441d6be213b77e62b5625d95fb10a30914fc94a1b6cf966b1c921bf6
|
|
7
|
+
data.tar.gz: 9b9daa02fc2ec7df33ad5c8cdf768c711b1bac0122191e7cfc79ee7c8ed2b7de00fbe6509b3347f387cf4868b24448f91f1a687e45b80fe65ce31d3f7a46cad5
|
data/bin/rspec
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# This file was generated by Bundler.
|
|
6
|
+
#
|
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
|
8
|
+
# this file is here to facilitate running it.
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require "pathname"
|
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
|
13
|
+
Pathname.new(__FILE__).realpath)
|
|
14
|
+
|
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
|
16
|
+
|
|
17
|
+
if File.file?(bundle_binstub)
|
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
|
19
|
+
load(bundle_binstub)
|
|
20
|
+
else
|
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require "rubygems"
|
|
27
|
+
require "bundler/setup"
|
|
28
|
+
|
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/lib/relaton_nist/hit.rb
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
module RelatonNist
|
|
4
4
|
# Hit.
|
|
5
5
|
class Hit < RelatonBib::Hit
|
|
6
|
+
attr_writer :fetch
|
|
7
|
+
|
|
6
8
|
# Parse page.
|
|
7
9
|
# @return [RelatonNist::NistBliographicItem]
|
|
8
10
|
def fetch
|
|
@@ -10,7 +12,7 @@ module RelatonNist
|
|
|
10
12
|
end
|
|
11
13
|
|
|
12
14
|
# @return [Iteger]
|
|
13
|
-
def sort_value
|
|
15
|
+
def sort_value # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
|
14
16
|
@sort_value ||= begin
|
|
15
17
|
sort_phrase = [hit[:serie], hit[:code], hit[:title]].join " "
|
|
16
18
|
corr = hit_collection&.text&.split&.map do |w|
|
|
@@ -13,6 +13,7 @@ module RelatonNist
|
|
|
13
13
|
PUBS_EXPORT = URI.join(DOMAIN, "/CSRC/media/feeds/metanorma/pubs-export")
|
|
14
14
|
DATAFILEDIR = File.expand_path ".relaton/nist", Dir.home
|
|
15
15
|
DATAFILE = File.expand_path "pubs-export.zip", DATAFILEDIR
|
|
16
|
+
GHNISTDATA = "https://raw.githubusercontent.com/relaton/relaton-data-nist/main/data/"
|
|
16
17
|
|
|
17
18
|
# @param ref_nbr [String]
|
|
18
19
|
# @param year [String]
|
|
@@ -21,9 +22,9 @@ module RelatonNist
|
|
|
21
22
|
def initialize(ref_nbr, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
22
23
|
super ref_nbr, year
|
|
23
24
|
|
|
24
|
-
/(?<docid>(?:SP|FIPS)\s[0-9-]+)/ =~ text
|
|
25
|
-
@array =
|
|
26
|
-
@array =
|
|
25
|
+
# /(?<docid>(?:SP|FIPS)\s[0-9-]+)/ =~ text
|
|
26
|
+
@array = from_json(**opts)
|
|
27
|
+
@array = from_ga unless @array.any?
|
|
27
28
|
|
|
28
29
|
@array.sort! do |a, b|
|
|
29
30
|
if a.sort_value == b.sort_value
|
|
@@ -36,52 +37,65 @@ module RelatonNist
|
|
|
36
37
|
|
|
37
38
|
private
|
|
38
39
|
|
|
40
|
+
def from_ga # rubocop:disable Metrics/AbcSize
|
|
41
|
+
fn = text.gsub(%r{[/\s:.]}, "_").upcase
|
|
42
|
+
yaml = OpenURI.open_uri "#{GHNISTDATA}#{fn}.yaml"
|
|
43
|
+
hash = YAML.safe_load yaml
|
|
44
|
+
bib = RelatonNist::NistBibliographicItem.from_hash hash
|
|
45
|
+
hit = Hit.new({ code: text }, self)
|
|
46
|
+
hit.fetch = bib
|
|
47
|
+
[hit]
|
|
48
|
+
rescue OpenURI::HTTPError => e
|
|
49
|
+
return [] if e.io.status[0] == "404"
|
|
50
|
+
|
|
51
|
+
raise e
|
|
52
|
+
end
|
|
53
|
+
|
|
39
54
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
40
55
|
|
|
41
56
|
# @param stage [String]
|
|
42
57
|
# @return [Array<RelatonNist::Hit>]
|
|
43
|
-
def from_csrc(**opts)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
end
|
|
58
|
+
# def from_csrc(**opts)
|
|
59
|
+
# from, to = nil
|
|
60
|
+
# if year
|
|
61
|
+
# d = Date.strptime year, "%Y"
|
|
62
|
+
# from = d.strftime "%m/%d/%Y"
|
|
63
|
+
# to = d.next_year.prev_day.strftime "%m/%d/%Y"
|
|
64
|
+
# end
|
|
65
|
+
# url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"\
|
|
66
|
+
# "&sortBy-lg=relevence"
|
|
67
|
+
# url += "&dateFrom-lg=#{from}" if from
|
|
68
|
+
# url += "&dateTo-lg=#{to}" if to
|
|
69
|
+
# url += if /PD/.match? opts[:stage]
|
|
70
|
+
# "&status-lg=Draft,Retired Draft,Withdrawn"
|
|
71
|
+
# else
|
|
72
|
+
# "&status-lg=Final,Withdrawn"
|
|
73
|
+
# end
|
|
74
|
+
|
|
75
|
+
# doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
|
|
76
|
+
# doc.css("table.publications-table > tbody > tr").map do |h|
|
|
77
|
+
# link = h.at("td/div/strong/a")
|
|
78
|
+
# serie = h.at("td[1]").text.strip
|
|
79
|
+
# code = h.at("td[2]").text.strip
|
|
80
|
+
# title = link.text
|
|
81
|
+
# doc_url = DOMAIN + link[:href]
|
|
82
|
+
# status = h.at("td[4]").text.strip.downcase
|
|
83
|
+
# release_date = Date.strptime h.at("td[5]").text.strip, "%m/%d/%Y"
|
|
84
|
+
# Hit.new(
|
|
85
|
+
# {
|
|
86
|
+
# code: code, serie: serie, title: title, url: doc_url,
|
|
87
|
+
# status: status, release_date: release_date
|
|
88
|
+
# }, self
|
|
89
|
+
# )
|
|
90
|
+
# end
|
|
91
|
+
# end
|
|
77
92
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
|
78
93
|
|
|
79
94
|
# Fetches data form json
|
|
80
|
-
# @param docid [String]
|
|
81
95
|
# @param stage [String]
|
|
82
96
|
# @return [Array<RelatonNist::Hit>]
|
|
83
|
-
def from_json(
|
|
84
|
-
select_data(
|
|
97
|
+
def from_json(**opts)
|
|
98
|
+
select_data(**opts).map do |h|
|
|
85
99
|
/(?<serie>(?<=-)\w+$)/ =~ h["series"]
|
|
86
100
|
title = [h["title-main"], h["title-sub"]].compact.join " - "
|
|
87
101
|
release_date = RelatonBib.parse_date h["published-date"], false
|
|
@@ -91,10 +105,9 @@ module RelatonNist
|
|
|
91
105
|
end
|
|
92
106
|
end
|
|
93
107
|
|
|
94
|
-
# @param docid [String]
|
|
95
108
|
# @param stage [String]
|
|
96
109
|
# @return [Array<Hach>]
|
|
97
|
-
def select_data(
|
|
110
|
+
def select_data(**opts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
|
|
98
111
|
d = Date.strptime year, "%Y" if year
|
|
99
112
|
statuses = %w[draft-public draft-prelim]
|
|
100
113
|
data.select do |doc|
|
|
@@ -105,7 +118,7 @@ module RelatonNist
|
|
|
105
118
|
else
|
|
106
119
|
next unless doc["status"] == "final"
|
|
107
120
|
end
|
|
108
|
-
doc["docidentifier"].include?
|
|
121
|
+
doc["docidentifier"].include? text
|
|
109
122
|
end
|
|
110
123
|
end
|
|
111
124
|
|
|
@@ -15,8 +15,8 @@ module RelatonNist
|
|
|
15
15
|
# @return [RelatonNist::HitCollection]
|
|
16
16
|
def search(text, year = nil, opts = {})
|
|
17
17
|
HitCollection.new text, year, opts
|
|
18
|
-
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError
|
|
19
|
-
raise RelatonBib::RequestError,
|
|
18
|
+
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError => e
|
|
19
|
+
raise RelatonBib::RequestError, e.message
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
# @param code [String] the NIST standard Code to look up (e..g "8200")
|
|
@@ -174,7 +174,17 @@ module RelatonNist
|
|
|
174
174
|
}
|
|
175
175
|
ref = matches[:code] ? "#{matches[:serie]} #{matches[:code]}" : code
|
|
176
176
|
result = search(ref, year, opts)
|
|
177
|
-
result.select { |i| search_filter i, matches, code }
|
|
177
|
+
selected_result = result.select { |i| search_filter i, matches, code }
|
|
178
|
+
return selected_result if selected_result.any? || !matches[:code]
|
|
179
|
+
|
|
180
|
+
search full_ref(matches)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def full_ref(matches)
|
|
184
|
+
ref = "#{matches[:serie]} #{matches[:code]}"
|
|
185
|
+
ref += long_to_short(matches[:prt1], matches[:prt2]).to_s
|
|
186
|
+
ref += long_to_short(matches[:vol1], matches[:vol2]).to_s
|
|
187
|
+
ref
|
|
178
188
|
end
|
|
179
189
|
|
|
180
190
|
def match(regex, code)
|
|
@@ -192,11 +202,11 @@ module RelatonNist
|
|
|
192
202
|
(?<code>[0-9-]{3,}[A-Z]?)
|
|
193
203
|
(?<prt1>pt\d+)?
|
|
194
204
|
(?<vol1>v\d+)?
|
|
195
|
-
(?<ver1>ver[\d
|
|
205
|
+
(?<ver1>ver[\d.]+)?
|
|
196
206
|
(?<rev1>r\d+)?
|
|
197
207
|
(\s(?<prt2>Part\s\d+))?
|
|
198
208
|
(\s(?<vol2>Vol\.\s\d+))?
|
|
199
|
-
(\s(?<ver2>(Ver\.|Version)\s[\d
|
|
209
|
+
(\s(?<ver2>(Ver\.|Version)\s[\d.]+))?
|
|
200
210
|
(\s(?<rev2>Rev\.\s\d+))?
|
|
201
211
|
(\s(?<add>Add)endum)?
|
|
202
212
|
}x =~ item.hit[:code]
|
|
@@ -11,23 +11,17 @@ module RelatonNist
|
|
|
11
11
|
# @param hit_data [Hash]
|
|
12
12
|
# @return [Hash]
|
|
13
13
|
def parse_page(hit_data)
|
|
14
|
-
item_data =
|
|
15
|
-
from_json hit_data
|
|
16
|
-
else
|
|
17
|
-
from_csrs hit_data
|
|
18
|
-
end
|
|
19
|
-
# doctype = "standard"
|
|
14
|
+
item_data = from_json hit_data
|
|
20
15
|
titles = fetch_titles(hit_data)
|
|
21
16
|
unless /^(SP|NISTIR|FIPS) /.match? item_data[:docid][0].id
|
|
22
|
-
# doctype = id_cleanup(item_data[:docid][0].id)
|
|
23
17
|
item_data[:docid][0] = RelatonBib::DocumentIdentifier.new(
|
|
24
|
-
id: titles[0][:content].upcase, type: "NIST"
|
|
18
|
+
id: titles[0][:content].upcase, type: "NIST",
|
|
25
19
|
)
|
|
26
20
|
end
|
|
27
21
|
item_data[:fetched] = Date.today.to_s
|
|
28
22
|
item_data[:type] = "standard"
|
|
29
23
|
item_data[:title] = titles
|
|
30
|
-
item_data[:doctype] = "standard"
|
|
24
|
+
item_data[:doctype] = "standard"
|
|
31
25
|
|
|
32
26
|
NistBibliographicItem.new(**item_data)
|
|
33
27
|
end
|
|
@@ -44,7 +38,7 @@ module RelatonNist
|
|
|
44
38
|
edition: fetch_edition(json),
|
|
45
39
|
language: [json["language"]],
|
|
46
40
|
script: [json["script"]],
|
|
47
|
-
docstatus: fetch_status(json, hit_data[:status]),
|
|
41
|
+
docstatus: fetch_status(json), # hit_data[:status]),
|
|
48
42
|
copyright: fetch_copyright(json["published-date"]),
|
|
49
43
|
relation: fetch_relations_json(json),
|
|
50
44
|
place: ["Gaithersburg, MD"],
|
|
@@ -53,119 +47,58 @@ module RelatonNist
|
|
|
53
47
|
}
|
|
54
48
|
end
|
|
55
49
|
|
|
56
|
-
def from_csrs(hit_data)
|
|
57
|
-
doc = get_page hit_data[:url]
|
|
58
|
-
{
|
|
59
|
-
# id: fetch_id(doc),
|
|
60
|
-
link: fetch_link(doc),
|
|
61
|
-
docid: fetch_docid(doc),
|
|
62
|
-
date: fetch_dates(doc, hit_data[:release_date]),
|
|
63
|
-
contributor: fetch_contributors(doc),
|
|
64
|
-
edition: fetch_edition(hit_data[:code]),
|
|
65
|
-
language: ["en"],
|
|
66
|
-
script: ["Latn"],
|
|
67
|
-
abstract: fetch_abstract(doc),
|
|
68
|
-
docstatus: fetch_status(doc, hit_data[:status]),
|
|
69
|
-
copyright: fetch_copyright(doc),
|
|
70
|
-
relation: fetch_relations(doc),
|
|
71
|
-
series: fetch_series(doc),
|
|
72
|
-
keyword: fetch_keywords(doc),
|
|
73
|
-
commentperiod: fetch_commentperiod(doc),
|
|
74
|
-
}
|
|
75
|
-
end
|
|
76
50
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
|
77
51
|
|
|
78
|
-
# Strip status from doc id
|
|
79
|
-
# @param id String
|
|
80
|
-
# @return String
|
|
81
|
-
# def id_cleanup(id)
|
|
82
|
-
# id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
|
|
83
|
-
# end
|
|
84
|
-
|
|
85
|
-
# Get page.
|
|
86
|
-
# @param path [String] page's path
|
|
87
|
-
# @return [Array<Nokogiri::HTML::Document, String>]
|
|
88
|
-
def get_page(url)
|
|
89
|
-
uri = URI url
|
|
90
|
-
resp = Net::HTTP.get_response(uri)
|
|
91
|
-
%r{(?<=newLocation = 'https://' \+ window.location.hostname \+ ')(?<path>[^']+)} =~ resp.body
|
|
92
|
-
if path
|
|
93
|
-
uri = URI HitCollection::DOMAIN + path
|
|
94
|
-
resp = Net::HTTP.get_response(uri)
|
|
95
|
-
end
|
|
96
|
-
Nokogiri::HTML(resp.body)
|
|
97
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
|
98
|
-
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
|
99
|
-
Net::ProtocolError, OpenSSL::SSL::SSLError
|
|
100
|
-
raise RelatonBib::RequestError, "Could not access #{url}"
|
|
101
|
-
end
|
|
102
|
-
|
|
103
52
|
# Fetch docid.
|
|
104
|
-
# @param
|
|
53
|
+
# @param docid [String]
|
|
105
54
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
|
106
|
-
def fetch_docid(
|
|
107
|
-
item_ref =
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
"//div[contains(@class, 'publications-detail')]/h3"
|
|
111
|
-
)&.text&.strip&.sub(/(?<=\w)\([^\)]+\)$/) do |m|
|
|
112
|
-
" " + m.upcase
|
|
113
|
-
end&.squeeze(" ")&.gsub(/ |\n|\r/, "")
|
|
114
|
-
end
|
|
115
|
-
item_ref ||= "?"
|
|
116
|
-
item_ref.sub! /\sAddendum$/, "-Add"
|
|
55
|
+
def fetch_docid(docid)
|
|
56
|
+
item_ref = docid
|
|
57
|
+
# item_ref ||= "?"
|
|
58
|
+
item_ref.sub!(/\sAddendum$/, "-Add")
|
|
117
59
|
[RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
|
|
118
60
|
end
|
|
119
61
|
|
|
120
|
-
# Fetch id.
|
|
121
|
-
# @param doc [Nokogiri::HTML::Document]
|
|
122
|
-
# @return [String]
|
|
123
|
-
# def fetch_id(doc)
|
|
124
|
-
# doc.at("//div[contains(@class, 'publications-detail')]/h3").text.
|
|
125
|
-
# strip.gsub(/\s/, "")
|
|
126
|
-
# end
|
|
127
|
-
|
|
128
62
|
# Fetch status.
|
|
129
|
-
# @param doc [
|
|
130
|
-
# @param status [String]
|
|
63
|
+
# @param doc [Hash]
|
|
131
64
|
# @return [RelatonNist::DocumentStatus]
|
|
132
|
-
def fetch_status(doc, status)
|
|
133
|
-
if doc.is_a? Hash
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
else
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
end
|
|
65
|
+
def fetch_status(doc) # , status)
|
|
66
|
+
# if doc.is_a? Hash
|
|
67
|
+
stage = doc["status"]
|
|
68
|
+
subst = doc["substage"]
|
|
69
|
+
iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
|
|
70
|
+
# else
|
|
71
|
+
# case status
|
|
72
|
+
# when "draft (obsolete)"
|
|
73
|
+
# stage = "draft-public"
|
|
74
|
+
# subst = "withdrawn"
|
|
75
|
+
# when "retired draft"
|
|
76
|
+
# stage = "draft-public"
|
|
77
|
+
# subst = "retired"
|
|
78
|
+
# when "withdrawn"
|
|
79
|
+
# stage = "final"
|
|
80
|
+
# subst = "withdrawn"
|
|
81
|
+
# when /^draft/
|
|
82
|
+
# stage = "draft-public"
|
|
83
|
+
# subst = "active"
|
|
84
|
+
# else
|
|
85
|
+
# stage = status
|
|
86
|
+
# subst = "active"
|
|
87
|
+
# end
|
|
88
|
+
|
|
89
|
+
# iter = nil
|
|
90
|
+
# if stage.include? "draft"
|
|
91
|
+
# iter = 1
|
|
92
|
+
# history = doc.xpath("//span[@id='pub-history-container']/a"\
|
|
93
|
+
# "|//span[@id='pub-history-container']/span")
|
|
94
|
+
# history.each_with_index do |h, idx|
|
|
95
|
+
# next if h.name == "a"
|
|
96
|
+
|
|
97
|
+
# iter = idx + 1 if idx.positive?
|
|
98
|
+
# break
|
|
99
|
+
# end
|
|
100
|
+
# end
|
|
101
|
+
# end
|
|
169
102
|
|
|
170
103
|
RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
|
|
171
104
|
end
|
|
@@ -179,49 +112,49 @@ module RelatonNist
|
|
|
179
112
|
end
|
|
180
113
|
|
|
181
114
|
# Fetch dates
|
|
182
|
-
# @param doc [
|
|
115
|
+
# @param doc [Hash]
|
|
183
116
|
# @param release_date [Date]
|
|
184
117
|
# @return [Array<Hash>]
|
|
185
118
|
def fetch_dates(doc, release_date) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
186
119
|
dates = [{ type: "published", on: release_date.to_s }]
|
|
187
120
|
|
|
188
|
-
if doc.is_a? Hash
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
else
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
end
|
|
121
|
+
# if doc.is_a? Hash
|
|
122
|
+
issued = RelatonBib.parse_date doc["issued-date"]
|
|
123
|
+
updated = RelatonBib.parse_date doc["updated-date"]
|
|
124
|
+
dates << { type: "updated", on: updated.to_s } if updated
|
|
125
|
+
obsoleted = RelatonBib.parse_date doc["obsoleted-date"]
|
|
126
|
+
dates << { type: "obsoleted", on: obsoleted.to_s } if obsoleted
|
|
127
|
+
# else
|
|
128
|
+
# d = doc.at("//span[@id='pub-release-date']")&.text&.strip
|
|
129
|
+
# issued = RelatonBib.parse_date d
|
|
130
|
+
# end
|
|
198
131
|
dates << { type: "issued", on: issued.to_s }
|
|
199
132
|
dates
|
|
200
133
|
end
|
|
201
134
|
|
|
202
135
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
203
|
-
# @param doc [
|
|
136
|
+
# @param doc [Hash]
|
|
204
137
|
# @return [Array<RelatonBib::ContributionInfo>]
|
|
205
138
|
def fetch_contributors(doc)
|
|
206
139
|
contribs = []
|
|
207
|
-
if doc.is_a? Hash
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
else
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
end
|
|
140
|
+
# if doc.is_a? Hash
|
|
141
|
+
contribs += contributors_json(
|
|
142
|
+
doc["authors"], "author", doc["language"], doc["script"]
|
|
143
|
+
)
|
|
144
|
+
contribs + contributors_json(
|
|
145
|
+
doc["editors"], "editor", doc["language"], doc["script"]
|
|
146
|
+
)
|
|
147
|
+
# else
|
|
148
|
+
# name = "National Institute of Standards and Technology"
|
|
149
|
+
# org = RelatonBib::Organization.new(
|
|
150
|
+
# name: name, url: "www.nist.gov", abbreviation: "NIST",
|
|
151
|
+
# )
|
|
152
|
+
# contribs << RelatonBib::ContributionInfo.new(entity: org, role: [type: "publisher"])
|
|
153
|
+
# authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
|
|
154
|
+
# contribs += contributors(authors, "author")
|
|
155
|
+
# editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
|
|
156
|
+
# contribs + contributors(editors, "editor")
|
|
157
|
+
# end
|
|
225
158
|
end
|
|
226
159
|
|
|
227
160
|
# @param doc [Array<Hash>]
|
|
@@ -256,37 +189,37 @@ module RelatonNist
|
|
|
256
189
|
# @param doc [Nokogiri::HTML::Element, Array<Hash>]
|
|
257
190
|
# @param role [String]
|
|
258
191
|
# @return [Array<RelatonBib::ContributionInfo>]
|
|
259
|
-
def contributors(doc, role, lang = "en", script = "Latn")
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
end
|
|
192
|
+
# def contributors(doc, role, lang = "en", script = "Latn")
|
|
193
|
+
# return [] if doc.nil?
|
|
194
|
+
|
|
195
|
+
# doc.text.split(", ").map do |contr|
|
|
196
|
+
# /(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr.strip
|
|
197
|
+
# if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
|
|
198
|
+
# fullname = RelatonBib::FullName.new(
|
|
199
|
+
# completename: RelatonBib::LocalizedString.new(an, lang, script)
|
|
200
|
+
# )
|
|
201
|
+
# case abbrev
|
|
202
|
+
# when "NIST"
|
|
203
|
+
# org_name = "National Institute of Standards and Technology"
|
|
204
|
+
# url = "www.nist.gov"
|
|
205
|
+
# when "MITRE"
|
|
206
|
+
# org_name = abbrev
|
|
207
|
+
# url = "www.mitre.org"
|
|
208
|
+
# else
|
|
209
|
+
# org_name = abbrev
|
|
210
|
+
# url = nil
|
|
211
|
+
# end
|
|
212
|
+
# org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
|
|
213
|
+
# affiliation = RelatonBib::Affiliation.new organization: org
|
|
214
|
+
# entity = RelatonBib::Person.new(
|
|
215
|
+
# name: fullname, affiliation: [affiliation],
|
|
216
|
+
# )
|
|
217
|
+
# else
|
|
218
|
+
# entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
|
|
219
|
+
# end
|
|
220
|
+
# RelatonBib::ContributionInfo.new entity: entity, role: [type: role]
|
|
221
|
+
# end
|
|
222
|
+
# end
|
|
290
223
|
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength
|
|
291
224
|
|
|
292
225
|
# @param name [Hash]
|
|
@@ -313,16 +246,16 @@ module RelatonNist
|
|
|
313
246
|
[RelatonBib::LocalizedString.new(part, lang, script)]
|
|
314
247
|
end
|
|
315
248
|
|
|
316
|
-
# @param doc [
|
|
249
|
+
# @param doc [Hash]
|
|
317
250
|
# @return [String, NilClass]
|
|
318
251
|
def fetch_edition(doc)
|
|
319
|
-
if doc.is_a? Hash
|
|
320
|
-
|
|
252
|
+
# if doc.is_a? Hash
|
|
253
|
+
return unless doc["edition"]
|
|
321
254
|
|
|
322
|
-
|
|
323
|
-
else
|
|
324
|
-
|
|
325
|
-
end
|
|
255
|
+
rev = doc["edition"]
|
|
256
|
+
# else
|
|
257
|
+
# return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
|
|
258
|
+
# end
|
|
326
259
|
|
|
327
260
|
"Revision #{rev}"
|
|
328
261
|
end
|
|
@@ -330,17 +263,17 @@ module RelatonNist
|
|
|
330
263
|
# Fetch abstracts.
|
|
331
264
|
# @param doc [Nokigiri::HTML::Document]
|
|
332
265
|
# @return [Array<Hash>]
|
|
333
|
-
def fetch_abstract(doc)
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
end
|
|
266
|
+
# def fetch_abstract(doc)
|
|
267
|
+
# abstract_content = doc.xpath(
|
|
268
|
+
# '//div[contains(@class, "pub-abstract-callout")]/div[1]/p',
|
|
269
|
+
# ).text
|
|
270
|
+
# [{
|
|
271
|
+
# content: abstract_content,
|
|
272
|
+
# language: "en",
|
|
273
|
+
# script: "Latn",
|
|
274
|
+
# format: "text/plain",
|
|
275
|
+
# }]
|
|
276
|
+
# end
|
|
344
277
|
|
|
345
278
|
# Fetch copyright.
|
|
346
279
|
# @param doc [Nokogiri::HTL::Document, String]
|
|
@@ -348,30 +281,30 @@ module RelatonNist
|
|
|
348
281
|
def fetch_copyright(doc)
|
|
349
282
|
name = "National Institute of Standards and Technology"
|
|
350
283
|
url = "www.nist.gov"
|
|
351
|
-
d = if doc.is_a? String then doc
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
from =
|
|
284
|
+
# d = if doc.is_a? String then doc
|
|
285
|
+
# else
|
|
286
|
+
# doc.at("//span[@id='pub-release-date']")&.text&.strip
|
|
287
|
+
# end
|
|
288
|
+
from = doc&.match(/\d{4}/)&.to_s
|
|
356
289
|
[{ owner: [{ name: name, abbreviation: "NIST", url: url }], from: from }]
|
|
357
290
|
end
|
|
358
291
|
|
|
359
292
|
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
|
360
293
|
|
|
361
294
|
# Fetch links.
|
|
362
|
-
# @param doc [
|
|
295
|
+
# @param doc [Hash]
|
|
363
296
|
# @return [Array<Hash>]
|
|
364
297
|
def fetch_link(doc)
|
|
365
298
|
links = []
|
|
366
|
-
if doc.is_a? Hash
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
else
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
end
|
|
299
|
+
# if doc.is_a? Hash
|
|
300
|
+
links << { type: "uri", content: doc["uri"] } if doc["uri"]
|
|
301
|
+
doi = "https://doi.org/" + doc["doi"] if doc["doi"]
|
|
302
|
+
# else
|
|
303
|
+
# pub = doc.at "//p/strong[contains(., 'Publication:')]"
|
|
304
|
+
# pdf = pub&.at "./following-sibling::a[.=' Local Download']"
|
|
305
|
+
# doi = pub&.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
|
|
306
|
+
# links << { type: "pdf", content: pdf[:href] } if pdf
|
|
307
|
+
# end
|
|
375
308
|
links << { type: "doi", content: doi } if doi
|
|
376
309
|
links
|
|
377
310
|
end
|
|
@@ -380,19 +313,19 @@ module RelatonNist
|
|
|
380
313
|
# Fetch relations.
|
|
381
314
|
# @param doc [Nokogiri::HTML::Document]
|
|
382
315
|
# @return [Array<RelatonNist::DocumentRelation>]
|
|
383
|
-
def fetch_relations(doc)
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
end
|
|
316
|
+
# def fetch_relations(doc)
|
|
317
|
+
# relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
|
|
318
|
+
# doc_relation "supersedes", r.text, DOMAIN + r[:href]
|
|
319
|
+
# end
|
|
320
|
+
|
|
321
|
+
# relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
|
|
322
|
+
# doc_relation "partOf", r.text, DOMAIN + r[:href]
|
|
323
|
+
# end
|
|
324
|
+
|
|
325
|
+
# relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
|
|
326
|
+
# doc_relation "updates", r.text, DOMAIN + r[:href]
|
|
327
|
+
# end
|
|
328
|
+
# end
|
|
396
329
|
# rubocop:enable Metrics/AbcSize
|
|
397
330
|
|
|
398
331
|
def fetch_relations_json(doc)
|
|
@@ -425,62 +358,62 @@ module RelatonNist
|
|
|
425
358
|
|
|
426
359
|
# @param doc [Nokogiri::HTML::Document]
|
|
427
360
|
# @return [Array<RelatonBib::Series>]
|
|
428
|
-
def fetch_series(doc)
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
end
|
|
361
|
+
# def fetch_series(doc)
|
|
362
|
+
# series = doc.xpath "//span[@id='pub-history-container']/a"\
|
|
363
|
+
# "|//span[@id='pub-history-container']/span"
|
|
364
|
+
# series.map.with_index do |s, idx|
|
|
365
|
+
# next if s.name == "span"
|
|
366
|
+
|
|
367
|
+
# iter = if idx.zero? then "I"
|
|
368
|
+
# else idx + 1
|
|
369
|
+
# end
|
|
370
|
+
|
|
371
|
+
# content = s.text.match(/^[^\(]+/).to_s.strip.squeeze " "
|
|
372
|
+
|
|
373
|
+
# ref = case s.text
|
|
374
|
+
# when /^Draft/
|
|
375
|
+
# content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
|
|
376
|
+
# when /\(Draft\)/ then content + " (#{iter}PD)"
|
|
377
|
+
# else content
|
|
378
|
+
# end
|
|
379
|
+
|
|
380
|
+
# fref = RelatonBib::FormattedRef.new(
|
|
381
|
+
# content: ref, language: "en", script: "Latn", format: "text/plain",
|
|
382
|
+
# )
|
|
383
|
+
# RelatonBib::Series.new(formattedref: fref)
|
|
384
|
+
# end.select { |s| s }
|
|
385
|
+
# end
|
|
453
386
|
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
|
454
387
|
|
|
455
|
-
# @param doc [
|
|
388
|
+
# @param doc [Hash]
|
|
456
389
|
# @return [Array<RelatonNist::Keyword>]
|
|
457
390
|
def fetch_keywords(doc)
|
|
458
|
-
kws = if doc.is_a? Hash
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
391
|
+
# kws = if doc.is_a? Hash
|
|
392
|
+
# doc["keywords"]
|
|
393
|
+
# else
|
|
394
|
+
# doc.xpath "//span[@id='pub-keywords-container']/span"
|
|
395
|
+
# end
|
|
396
|
+
doc["keywords"].map { |kw| kw.is_a?(String) ? kw : kw.text }
|
|
464
397
|
end
|
|
465
398
|
|
|
466
399
|
# rubocop:disable Metrics/AbcSize
|
|
467
400
|
# @param doc [Nokogiri::HTML::Document]
|
|
468
401
|
# @return [RelatonNist::CommentPeriod, NilClass]
|
|
469
|
-
def fetch_commentperiod(doc)
|
|
470
|
-
|
|
471
|
-
|
|
402
|
+
# def fetch_commentperiod(doc)
|
|
403
|
+
# cp = doc.at "//span[@id='pub-comments-due']"
|
|
404
|
+
# return unless cp
|
|
472
405
|
|
|
473
|
-
|
|
406
|
+
# to = Date.strptime cp.text.strip, "%B %d, %Y"
|
|
474
407
|
|
|
475
|
-
|
|
476
|
-
|
|
408
|
+
# d = doc.at("//span[@id='pub-release-date']").text.strip
|
|
409
|
+
# from = Date.strptime(d, "%B %Y").to_s
|
|
477
410
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
end
|
|
411
|
+
# ex = doc.at "//strong[contains(.,'The comment closing date has been "\
|
|
412
|
+
# "extended to')]"
|
|
413
|
+
# ext = ex&.text&.match(/\w+\s\d{2},\s\d{4}/).to_s
|
|
414
|
+
# extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
|
|
415
|
+
# CommentPeriod.new from: from, to: to, extended: extended
|
|
416
|
+
# end
|
|
484
417
|
# rubocop:enable Metrics/AbcSize
|
|
485
418
|
|
|
486
419
|
# @param json [Hash]
|
data/lib/relaton_nist/version.rb
CHANGED
data/relaton_nist.gemspec
CHANGED
|
@@ -25,7 +25,7 @@ Gem::Specification.new do |spec|
|
|
|
25
25
|
|
|
26
26
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
|
27
27
|
spec.add_development_dependency "pry-byebug"
|
|
28
|
-
spec.add_development_dependency "rake", "~>
|
|
28
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
29
29
|
spec.add_development_dependency "rspec", "~> 3.0"
|
|
30
30
|
spec.add_development_dependency "ruby-jing"
|
|
31
31
|
spec.add_development_dependency "simplecov"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-nist
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.9.
|
|
4
|
+
version: 1.9.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-08-
|
|
11
|
+
date: 2021-08-30 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: equivalent-xml
|
|
@@ -44,14 +44,14 @@ dependencies:
|
|
|
44
44
|
requirements:
|
|
45
45
|
- - "~>"
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: '
|
|
47
|
+
version: '13.0'
|
|
48
48
|
type: :development
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: '
|
|
54
|
+
version: '13.0'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: rspec
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -166,6 +166,7 @@ files:
|
|
|
166
166
|
- README.adoc
|
|
167
167
|
- Rakefile
|
|
168
168
|
- bin/console
|
|
169
|
+
- bin/rspec
|
|
169
170
|
- bin/setup
|
|
170
171
|
- grammars/basicdoc.rng
|
|
171
172
|
- grammars/biblio.rng
|