relaton-nist 1.9.0 → 1.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +2 -0
- data/README.adoc +30 -0
- data/bin/rspec +29 -0
- data/lib/relaton_nist/data_fetcher.rb +217 -0
- data/lib/relaton_nist/hit.rb +3 -1
- data/lib/relaton_nist/hit_collection.rb +64 -50
- data/lib/relaton_nist/nist_bibliographic_item.rb +1 -1
- data/lib/relaton_nist/nist_bibliography.rb +16 -6
- data/lib/relaton_nist/processor.rb +14 -1
- data/lib/relaton_nist/scrapper.rb +45 -301
- data/lib/relaton_nist/version.rb +1 -1
- data/lib/relaton_nist.rb +1 -0
- data/relaton_nist.gemspec +2 -2
- metadata +10 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 847b161ffdf47b18befc71b090e2397e815594f4c20820b057c93608d171487f
|
4
|
+
data.tar.gz: 73748bbb1ff975876b6346f24a89ec235f4224d61aa4f714e13b8b981ece81cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 57640ab1c61feddcf2ac798128d46aff89b190670efa11b624afe9270c94c832c4f53b1cae07dc44f2d189d14b828c1ef1cd1b55d40ac5b94bd65954cd875efa
|
7
|
+
data.tar.gz: 8df687fcfae413e7efa74d6fbe1fbda4835d61b2c6e3751629365497e83e5f288ec5dcebce0b4472193b81a734d5f401babf1472c212370cfc817b8bc7d130bc
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
@@ -147,6 +147,17 @@ item.docidentifier.first.id
|
|
147
147
|
=> "SP 800-38A-Add"
|
148
148
|
----
|
149
149
|
|
150
|
+
=== Typed links
|
151
|
+
|
152
|
+
NIST documents may have `src` and `doi` link types.
|
153
|
+
|
154
|
+
[source,ruby]
|
155
|
+
----
|
156
|
+
item.link
|
157
|
+
=> [#<RelatonBib::TypedUri:0x00007f901971dc10 @content=#<Addressable::URI:0x62c URI:https://csrc.nist.gov/publications/detail/sp/800-67/rev-2/final>, @type="src">,
|
158
|
+
#<RelatonBib::TypedUri:0x00007f901971d6e8 @content=#<Addressable::URI:0x640 URI:https://doi.org/10.6028/NIST.SP.800-67r2>, @type="doi">]
|
159
|
+
----
|
160
|
+
|
150
161
|
=== Create bibliographic item from YAML
|
151
162
|
[source,ruby]
|
152
163
|
----
|
@@ -159,6 +170,25 @@ RelatonNist::NistBibliographicItem.from_hash hash
|
|
159
170
|
...
|
160
171
|
----
|
161
172
|
|
173
|
+
=== Fetch data
|
174
|
+
|
175
|
+
This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of data sources.
|
176
|
+
|
177
|
+
The method `RelatonNist::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
|
178
|
+
Arguments:
|
179
|
+
|
180
|
+
- `output` - folder to save documents (default './data').
|
181
|
+
- `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml`, `bibxxml` (default `yaml`).
|
182
|
+
|
183
|
+
[source,ruby]
|
184
|
+
----
|
185
|
+
RelatonNist::DataFetcher.fetch
|
186
|
+
Started at: 2021-09-01 18:01:01 +0200
|
187
|
+
Stopped at: 2021-09-01 18:01:43 +0200
|
188
|
+
Done in: 42 sec.
|
189
|
+
=> nil
|
190
|
+
----
|
191
|
+
|
162
192
|
== Development
|
163
193
|
|
164
194
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/bin/rspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
@@ -0,0 +1,217 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "yaml"
|
4
|
+
|
5
|
+
module RelatonNist
|
6
|
+
class DataFetcher
|
7
|
+
RELATION_TYPES = {
|
8
|
+
"replaces" => "obsoletes",
|
9
|
+
"isVersionOf" => "editionOf",
|
10
|
+
"hasTranslation" => "hasTranslation",
|
11
|
+
"isTranslationOf" => "translatedFrom",
|
12
|
+
"hasPreprint" => "hasReprint",
|
13
|
+
"isSupplementTo" => "complements",
|
14
|
+
}.freeze
|
15
|
+
URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
|
16
|
+
|
17
|
+
def initialize(output, format)
|
18
|
+
@output = output
|
19
|
+
@format = format
|
20
|
+
@ext = format.sub(/^bib/, "")
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse_docid(doc)
|
24
|
+
doi = doc.at("doi_data/doi").text
|
25
|
+
id = doc.at("publisher_item/item_number", "publisher_item/identifier").text.sub(%r{^/}, "")
|
26
|
+
case doi
|
27
|
+
when "10.6028/NBS.CIRC.12e2revjune" then id.sub!("13e", "12e")
|
28
|
+
when "10.6028/NBS.CIRC.36e2" then id.sub!("46e", "36e")
|
29
|
+
when "10.6028/NBS.HB.67suppJune1967" then id.sub!("1965", "1967")
|
30
|
+
when "10.6028/NBS.HB.105-1r1990" then id.sub!("105-1-1990", "105-1r1990")
|
31
|
+
when "10.6028/NIST.HB.150-10-1995" then id.sub!(/150-10$/, "150-10-1995")
|
32
|
+
end
|
33
|
+
[{ type: "NIST", id: id }, { type: "DOI", id: doi }]
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param doc [Nokogiri::XML::Element]
|
37
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
38
|
+
def fetch_docid(doc)
|
39
|
+
parse_docid(doc).map do |id|
|
40
|
+
RelatonBib::DocumentIdentifier.new(type: id[:type], id: id[:id])
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# @param doc [Nokogiri::XML::Element]
|
45
|
+
# @return [RelatonBib::TypedTitleStringCollection, Array]
|
46
|
+
def fetch_title(doc)
|
47
|
+
t = doc.xpath("titles/title|titles/subtitle")
|
48
|
+
return [] unless t.any?
|
49
|
+
|
50
|
+
RelatonBib::TypedTitleString.from_string t.map(&:text).join(" "), "en", "Latn"
|
51
|
+
end
|
52
|
+
|
53
|
+
# @param doc [Nokogiri::XML::Element]
|
54
|
+
# @return [Array<RelatonBib::BibliographicDate>]
|
55
|
+
def fetch_date(doc)
|
56
|
+
doc.xpath("publication_date|approval_date").map do |dt|
|
57
|
+
on = dt.at("year").text
|
58
|
+
if (m = dt.at "month")
|
59
|
+
on += "-#{m.text}"
|
60
|
+
d = dt.at "day"
|
61
|
+
on += "-#{d.text}" if d
|
62
|
+
end
|
63
|
+
type = dt.name == "publication_date" ? "published" : "confirmed"
|
64
|
+
RelatonBib::BibliographicDate.new(type: type, on: on)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# @param doc [Nokogiri::XML::Element]
|
69
|
+
# @return [String]
|
70
|
+
def fetch_edition(doc)
|
71
|
+
doc.at("edition_number")&.text
|
72
|
+
end
|
73
|
+
|
74
|
+
# @param doc [Nokogiri::XML::Element]
|
75
|
+
# @return [Array<Hash>]
|
76
|
+
def fetch_relation(doc)
|
77
|
+
ns = "http://www.crossref.org/relations.xsd"
|
78
|
+
doc.xpath("./ns:program/ns:related_item", ns: ns).map do |rel|
|
79
|
+
doi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
|
80
|
+
# ref = doi_to_id doi.text
|
81
|
+
# ref, = parse_docid doc
|
82
|
+
fref = RelatonBib::FormattedRef.new content: doi.text
|
83
|
+
bibitem = RelatonBib::BibliographicItem.new formattedref: fref
|
84
|
+
type = RELATION_TYPES[doi["relationship-type"]]
|
85
|
+
{ type: type, bibitem: bibitem }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# @param doc [Nokogiri::XML::Element]
|
90
|
+
# @return [Array<RelatonBib::TypedUri>]
|
91
|
+
def fetch_link(doc)
|
92
|
+
url = doc.at("doi_data/resource").text
|
93
|
+
[RelatonBib::TypedUri.new(type: "doi", content: url)]
|
94
|
+
end
|
95
|
+
|
96
|
+
# @param doc [Nokogiri::XML::Element]
|
97
|
+
# @return [Array<RelatonBib::FormattedString>]
|
98
|
+
def fetch_abstract(doc)
|
99
|
+
doc.xpath("jats:abstract/jats:p", "jats" => "http://www.ncbi.nlm.nih.gov/JATS1").map do |a|
|
100
|
+
RelatonBib::FormattedString.new(content: a.text, language: doc["language"], script: "Latn")
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# @param doc [Nokogiri::XML::Element]
|
105
|
+
# @return [Array<Hash>]
|
106
|
+
def fetch_contributor(doc) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
107
|
+
contribs = doc.xpath("contributors/person_name").map do |p|
|
108
|
+
forename = []
|
109
|
+
initial = []
|
110
|
+
p.at("given_name")&.text&.split&.each do |fn|
|
111
|
+
if /^(?<init>\w)\.?$/ =~ fn
|
112
|
+
initial << RelatonBib::LocalizedString.new(init, doc["language"], "Latn")
|
113
|
+
else
|
114
|
+
forename << RelatonBib::LocalizedString.new(fn, doc["language"], "Latn")
|
115
|
+
end
|
116
|
+
end
|
117
|
+
sname = p.at("surname").text
|
118
|
+
surname = RelatonBib::LocalizedString.new sname, doc["language"], "Latn"
|
119
|
+
initial = []
|
120
|
+
ident = p.xpath("ORCID").map do |id|
|
121
|
+
RelatonBib::PersonIdentifier.new "orcid", id.text
|
122
|
+
end
|
123
|
+
fullname = RelatonBib::FullName.new(
|
124
|
+
surname: surname, forename: forename, initial: initial, identifier: ident,
|
125
|
+
)
|
126
|
+
person = RelatonBib::Person.new name: fullname
|
127
|
+
{ entity: person, role: [{ type: p["contributor_role"] }] }
|
128
|
+
end
|
129
|
+
contribs + doc.xpath("publisher").map do |p|
|
130
|
+
abbr = p.at("../institution/institution_acronym")&.text
|
131
|
+
org = RelatonBib::Organization.new(name: p.at("publisher_name").text, abbreviation: abbr)
|
132
|
+
{ entity: org, role: [{ type: "publisher" }] }
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# @param doc [Nokogiri::XML::Element]
|
137
|
+
# @return [Array<String>]
|
138
|
+
def fetch_place(doc)
|
139
|
+
doc.xpath("institution/institution_place").map(&:text)
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
# Save document
|
144
|
+
#
|
145
|
+
# @param bib [RelatonNist::NistBibliographicItem]
|
146
|
+
#
|
147
|
+
def write_file(bib) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
148
|
+
id = bib.docidentifier[0].id.gsub(%r{[/\s:.]}, "_").upcase.sub(/^NIST_IR/, "NISTIR")
|
149
|
+
file = File.join(@output, "#{id}.#{@ext}")
|
150
|
+
if File.exist? file
|
151
|
+
warn "File #{file} exists. Docid: #{bib.docidentifier[0].id}"
|
152
|
+
# warn "Link: #{bib.link.detect { |l| l.type == 'src' }.content}"
|
153
|
+
else
|
154
|
+
output = case @format
|
155
|
+
when "yaml" then bib.to_hash.to_yaml
|
156
|
+
when "xml" then bib.to_xml bibdata: true
|
157
|
+
else bib.send "to_#{@format}"
|
158
|
+
end
|
159
|
+
File.write file, output, encoding: "UTF-8"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
#
|
164
|
+
# Create a document instance an save it.
|
165
|
+
#
|
166
|
+
# @param doc [Nokogiri::XML::Element]
|
167
|
+
#
|
168
|
+
# @raise [StandardError]
|
169
|
+
#
|
170
|
+
def parse_doc(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
171
|
+
# mtd = doc.at('doi_record/report-paper/report-paper_metadata')
|
172
|
+
item = RelatonNist::NistBibliographicItem.new(
|
173
|
+
type: "standard", docid: fetch_docid(doc), title: fetch_title(doc),
|
174
|
+
link: fetch_link(doc), abstract: fetch_abstract(doc),
|
175
|
+
date: fetch_date(doc), edition: fetch_edition(doc),
|
176
|
+
contributor: fetch_contributor(doc), relation: fetch_relation(doc),
|
177
|
+
place: fetch_place(doc),
|
178
|
+
language: [doc["language"]], script: ["Latn"], doctype: "standard"
|
179
|
+
)
|
180
|
+
write_file item
|
181
|
+
rescue StandardError => e
|
182
|
+
warn "Document: #{doc.at('doi').text}"
|
183
|
+
warn e.message
|
184
|
+
raise e
|
185
|
+
end
|
186
|
+
|
187
|
+
#
|
188
|
+
# Fetch all the documnts from dataset
|
189
|
+
#
|
190
|
+
def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
191
|
+
t1 = Time.now
|
192
|
+
puts "Started at: #{t1}"
|
193
|
+
|
194
|
+
docs = Nokogiri::XML OpenURI.open_uri URL
|
195
|
+
FileUtils.mkdir @output unless Dir.exist? @output
|
196
|
+
FileUtils.rm Dir[File.join(@output, "*.#{@ext}")]
|
197
|
+
docs.xpath("/body/query/doi_record/report-paper/report-paper_metadata")
|
198
|
+
.each { |doc| parse_doc doc }
|
199
|
+
|
200
|
+
t2 = Time.now
|
201
|
+
puts "Stopped at: #{t2}"
|
202
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
203
|
+
rescue StandardError => e
|
204
|
+
warn e.message
|
205
|
+
end
|
206
|
+
|
207
|
+
#
|
208
|
+
# Fetch all the documnts from dataset
|
209
|
+
#
|
210
|
+
# @param [String] output foldet name to save the documents
|
211
|
+
# @param [String] format format to save the documents (yaml, xml, bibxml)
|
212
|
+
#
|
213
|
+
def self.fetch(output: "data", format: "yaml")
|
214
|
+
new(output, format).fetch
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
data/lib/relaton_nist/hit.rb
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
module RelatonNist
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
|
+
attr_writer :fetch
|
7
|
+
|
6
8
|
# Parse page.
|
7
9
|
# @return [RelatonNist::NistBliographicItem]
|
8
10
|
def fetch
|
@@ -10,7 +12,7 @@ module RelatonNist
|
|
10
12
|
end
|
11
13
|
|
12
14
|
# @return [Iteger]
|
13
|
-
def sort_value
|
15
|
+
def sort_value # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
14
16
|
@sort_value ||= begin
|
15
17
|
sort_phrase = [hit[:serie], hit[:code], hit[:title]].join " "
|
16
18
|
corr = hit_collection&.text&.split&.map do |w|
|
@@ -13,18 +13,21 @@ module RelatonNist
|
|
13
13
|
PUBS_EXPORT = URI.join(DOMAIN, "/CSRC/media/feeds/metanorma/pubs-export")
|
14
14
|
DATAFILEDIR = File.expand_path ".relaton/nist", Dir.home
|
15
15
|
DATAFILE = File.expand_path "pubs-export.zip", DATAFILEDIR
|
16
|
+
GHNISTDATA = "https://raw.githubusercontent.com/relaton/relaton-data-nist/main/data/"
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# @option opts [String] :stage
|
21
|
-
def initialize(ref_nbr, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
22
|
-
super ref_nbr, year
|
18
|
+
def self.search(text, year = nil, opts = {})
|
19
|
+
new(text, year).search(opts)
|
20
|
+
end
|
23
21
|
|
24
|
-
|
25
|
-
@array =
|
26
|
-
@array =
|
22
|
+
def search(opts)
|
23
|
+
@array = from_json(**opts)
|
24
|
+
@array = from_ga unless @array.any?
|
25
|
+
sort_hits!
|
26
|
+
end
|
27
27
|
|
28
|
+
private
|
29
|
+
|
30
|
+
def sort_hits!
|
28
31
|
@array.sort! do |a, b|
|
29
32
|
if a.sort_value == b.sort_value
|
30
33
|
(b.hit[:release_date] - a.hit[:release_date]).to_i
|
@@ -32,56 +35,68 @@ module RelatonNist
|
|
32
35
|
b.sort_value - a.sort_value
|
33
36
|
end
|
34
37
|
end
|
38
|
+
self
|
35
39
|
end
|
36
40
|
|
37
|
-
|
41
|
+
def from_ga # rubocop:disable Metrics/AbcSize
|
42
|
+
fn = text.gsub(%r{[/\s:.]}, "_").upcase
|
43
|
+
yaml = OpenURI.open_uri "#{GHNISTDATA}#{fn}.yaml"
|
44
|
+
hash = YAML.safe_load yaml
|
45
|
+
bib = RelatonNist::NistBibliographicItem.from_hash hash
|
46
|
+
hit = Hit.new({ code: text }, self)
|
47
|
+
hit.fetch = bib
|
48
|
+
[hit]
|
49
|
+
rescue OpenURI::HTTPError => e
|
50
|
+
return [] if e.io.status[0] == "404"
|
51
|
+
|
52
|
+
raise e
|
53
|
+
end
|
38
54
|
|
39
55
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
40
56
|
|
41
57
|
# @param stage [String]
|
42
58
|
# @return [Array<RelatonNist::Hit>]
|
43
|
-
def from_csrc(**opts)
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
end
|
59
|
+
# def from_csrc(**opts)
|
60
|
+
# from, to = nil
|
61
|
+
# if year
|
62
|
+
# d = Date.strptime year, "%Y"
|
63
|
+
# from = d.strftime "%m/%d/%Y"
|
64
|
+
# to = d.next_year.prev_day.strftime "%m/%d/%Y"
|
65
|
+
# end
|
66
|
+
# url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"\
|
67
|
+
# "&sortBy-lg=relevence"
|
68
|
+
# url += "&dateFrom-lg=#{from}" if from
|
69
|
+
# url += "&dateTo-lg=#{to}" if to
|
70
|
+
# url += if /PD/.match? opts[:stage]
|
71
|
+
# "&status-lg=Draft,Retired Draft,Withdrawn"
|
72
|
+
# else
|
73
|
+
# "&status-lg=Final,Withdrawn"
|
74
|
+
# end
|
75
|
+
|
76
|
+
# doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
|
77
|
+
# doc.css("table.publications-table > tbody > tr").map do |h|
|
78
|
+
# link = h.at("td/div/strong/a")
|
79
|
+
# serie = h.at("td[1]").text.strip
|
80
|
+
# code = h.at("td[2]").text.strip
|
81
|
+
# title = link.text
|
82
|
+
# doc_url = DOMAIN + link[:href]
|
83
|
+
# status = h.at("td[4]").text.strip.downcase
|
84
|
+
# release_date = Date.strptime h.at("td[5]").text.strip, "%m/%d/%Y"
|
85
|
+
# Hit.new(
|
86
|
+
# {
|
87
|
+
# code: code, serie: serie, title: title, url: doc_url,
|
88
|
+
# status: status, release_date: release_date
|
89
|
+
# }, self
|
90
|
+
# )
|
91
|
+
# end
|
92
|
+
# end
|
77
93
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
78
94
|
|
79
95
|
# Fetches data form json
|
80
|
-
# @param docid [String]
|
81
96
|
# @param stage [String]
|
82
97
|
# @return [Array<RelatonNist::Hit>]
|
83
|
-
def from_json(
|
84
|
-
select_data(
|
98
|
+
def from_json(**opts)
|
99
|
+
select_data(**opts).map do |h|
|
85
100
|
/(?<serie>(?<=-)\w+$)/ =~ h["series"]
|
86
101
|
title = [h["title-main"], h["title-sub"]].compact.join " - "
|
87
102
|
release_date = RelatonBib.parse_date h["published-date"], false
|
@@ -91,10 +106,9 @@ module RelatonNist
|
|
91
106
|
end
|
92
107
|
end
|
93
108
|
|
94
|
-
# @param docid [String]
|
95
109
|
# @param stage [String]
|
96
110
|
# @return [Array<Hach>]
|
97
|
-
def select_data(
|
111
|
+
def select_data(**opts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
|
98
112
|
d = Date.strptime year, "%Y" if year
|
99
113
|
statuses = %w[draft-public draft-prelim]
|
100
114
|
data.select do |doc|
|
@@ -105,7 +119,7 @@ module RelatonNist
|
|
105
119
|
else
|
106
120
|
next unless doc["status"] == "final"
|
107
121
|
end
|
108
|
-
doc["docidentifier"].include?
|
122
|
+
doc["docidentifier"].include? text
|
109
123
|
end
|
110
124
|
end
|
111
125
|
|
@@ -14,9 +14,9 @@ module RelatonNist
|
|
14
14
|
# @param text [String]
|
15
15
|
# @return [RelatonNist::HitCollection]
|
16
16
|
def search(text, year = nil, opts = {})
|
17
|
-
HitCollection.
|
18
|
-
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError
|
19
|
-
raise RelatonBib::RequestError,
|
17
|
+
HitCollection.search text, year, opts
|
18
|
+
rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError => e
|
19
|
+
raise RelatonBib::RequestError, e.message
|
20
20
|
end
|
21
21
|
|
22
22
|
# @param code [String] the NIST standard Code to look up (e..g "8200")
|
@@ -174,7 +174,17 @@ module RelatonNist
|
|
174
174
|
}
|
175
175
|
ref = matches[:code] ? "#{matches[:serie]} #{matches[:code]}" : code
|
176
176
|
result = search(ref, year, opts)
|
177
|
-
result.select { |i| search_filter i, matches, code }
|
177
|
+
selected_result = result.select { |i| search_filter i, matches, code }
|
178
|
+
return selected_result if selected_result.any? || !matches[:code]
|
179
|
+
|
180
|
+
search full_ref(matches)
|
181
|
+
end
|
182
|
+
|
183
|
+
def full_ref(matches)
|
184
|
+
ref = "#{matches[:serie]} #{matches[:code]}"
|
185
|
+
ref += long_to_short(matches[:prt1], matches[:prt2]).to_s
|
186
|
+
ref += long_to_short(matches[:vol1], matches[:vol2]).to_s
|
187
|
+
ref
|
178
188
|
end
|
179
189
|
|
180
190
|
def match(regex, code)
|
@@ -192,11 +202,11 @@ module RelatonNist
|
|
192
202
|
(?<code>[0-9-]{3,}[A-Z]?)
|
193
203
|
(?<prt1>pt\d+)?
|
194
204
|
(?<vol1>v\d+)?
|
195
|
-
(?<ver1>ver[\d
|
205
|
+
(?<ver1>ver[\d.]+)?
|
196
206
|
(?<rev1>r\d+)?
|
197
207
|
(\s(?<prt2>Part\s\d+))?
|
198
208
|
(\s(?<vol2>Vol\.\s\d+))?
|
199
|
-
(\s(?<ver2>(Ver\.|Version)\s[\d
|
209
|
+
(\s(?<ver2>(Ver\.|Version)\s[\d.]+))?
|
200
210
|
(\s(?<rev2>Rev\.\s\d+))?
|
201
211
|
(\s(?<add>Add)endum)?
|
202
212
|
}x =~ item.hit[:code]
|
@@ -2,11 +2,12 @@ require "relaton/processor"
|
|
2
2
|
|
3
3
|
module RelatonNist
|
4
4
|
class Processor < Relaton::Processor
|
5
|
-
def initialize
|
5
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
6
6
|
@short = :relaton_nist
|
7
7
|
@prefix = "NIST"
|
8
8
|
@defaultprefix = %r{^(NIST|NISTGCR|ITL Bulletin|JPCRD|NISTIR|CSRC|FIPS)(/[^\s])?\s}
|
9
9
|
@idtype = "NIST"
|
10
|
+
@datasets = %w[nist-tech-pubs]
|
10
11
|
end
|
11
12
|
|
12
13
|
# @param code [String]
|
@@ -17,6 +18,18 @@ module RelatonNist
|
|
17
18
|
::RelatonNist::NistBibliography.get(code, date, opts)
|
18
19
|
end
|
19
20
|
|
21
|
+
#
|
22
|
+
# Fetch all the documents from a source
|
23
|
+
#
|
24
|
+
# @param [String] _source source name
|
25
|
+
# @param [Hash] opts
|
26
|
+
# @option opts [String] :output directory to output documents
|
27
|
+
# @option opts [String] :format
|
28
|
+
#
|
29
|
+
def fetch_data(_source, opts)
|
30
|
+
DataFetcher.fetch(**opts)
|
31
|
+
end
|
32
|
+
|
20
33
|
# @param xml [String]
|
21
34
|
# @return [RelatonNist::GbBibliographicItem]
|
22
35
|
def from_xml(xml)
|
@@ -11,23 +11,17 @@ module RelatonNist
|
|
11
11
|
# @param hit_data [Hash]
|
12
12
|
# @return [Hash]
|
13
13
|
def parse_page(hit_data)
|
14
|
-
item_data =
|
15
|
-
from_json hit_data
|
16
|
-
else
|
17
|
-
from_csrs hit_data
|
18
|
-
end
|
19
|
-
# doctype = "standard"
|
14
|
+
item_data = from_json hit_data
|
20
15
|
titles = fetch_titles(hit_data)
|
21
16
|
unless /^(SP|NISTIR|FIPS) /.match? item_data[:docid][0].id
|
22
|
-
# doctype = id_cleanup(item_data[:docid][0].id)
|
23
17
|
item_data[:docid][0] = RelatonBib::DocumentIdentifier.new(
|
24
|
-
id: titles[0][:content].upcase, type: "NIST"
|
18
|
+
id: titles[0][:content].upcase, type: "NIST",
|
25
19
|
)
|
26
20
|
end
|
27
21
|
item_data[:fetched] = Date.today.to_s
|
28
22
|
item_data[:type] = "standard"
|
29
23
|
item_data[:title] = titles
|
30
|
-
item_data[:doctype] = "standard"
|
24
|
+
item_data[:doctype] = "standard"
|
31
25
|
|
32
26
|
NistBibliographicItem.new(**item_data)
|
33
27
|
end
|
@@ -44,7 +38,7 @@ module RelatonNist
|
|
44
38
|
edition: fetch_edition(json),
|
45
39
|
language: [json["language"]],
|
46
40
|
script: [json["script"]],
|
47
|
-
docstatus: fetch_status(json, hit_data[:status]),
|
41
|
+
docstatus: fetch_status(json), # hit_data[:status]),
|
48
42
|
copyright: fetch_copyright(json["published-date"]),
|
49
43
|
relation: fetch_relations_json(json),
|
50
44
|
place: ["Gaithersburg, MD"],
|
@@ -53,120 +47,25 @@ module RelatonNist
|
|
53
47
|
}
|
54
48
|
end
|
55
49
|
|
56
|
-
def from_csrs(hit_data)
|
57
|
-
doc = get_page hit_data[:url]
|
58
|
-
{
|
59
|
-
# id: fetch_id(doc),
|
60
|
-
link: fetch_link(doc),
|
61
|
-
docid: fetch_docid(doc),
|
62
|
-
date: fetch_dates(doc, hit_data[:release_date]),
|
63
|
-
contributor: fetch_contributors(doc),
|
64
|
-
edition: fetch_edition(hit_data[:code]),
|
65
|
-
language: ["en"],
|
66
|
-
script: ["Latn"],
|
67
|
-
abstract: fetch_abstract(doc),
|
68
|
-
docstatus: fetch_status(doc, hit_data[:status]),
|
69
|
-
copyright: fetch_copyright(doc),
|
70
|
-
relation: fetch_relations(doc),
|
71
|
-
series: fetch_series(doc),
|
72
|
-
keyword: fetch_keywords(doc),
|
73
|
-
commentperiod: fetch_commentperiod(doc),
|
74
|
-
}
|
75
|
-
end
|
76
50
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
77
51
|
|
78
|
-
# Strip status from doc id
|
79
|
-
# @param id String
|
80
|
-
# @return String
|
81
|
-
# def id_cleanup(id)
|
82
|
-
# id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
|
83
|
-
# end
|
84
|
-
|
85
|
-
# Get page.
|
86
|
-
# @param path [String] page's path
|
87
|
-
# @return [Array<Nokogiri::HTML::Document, String>]
|
88
|
-
def get_page(url)
|
89
|
-
uri = URI url
|
90
|
-
resp = Net::HTTP.get_response(uri)
|
91
|
-
%r{(?<=newLocation = 'https://' \+ window.location.hostname \+ ')(?<path>[^']+)} =~ resp.body
|
92
|
-
if path
|
93
|
-
uri = URI HitCollection::DOMAIN + path
|
94
|
-
resp = Net::HTTP.get_response(uri)
|
95
|
-
end
|
96
|
-
Nokogiri::HTML(resp.body)
|
97
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
98
|
-
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
99
|
-
Net::ProtocolError, OpenSSL::SSL::SSLError
|
100
|
-
raise RelatonBib::RequestError, "Could not access #{url}"
|
101
|
-
end
|
102
|
-
|
103
52
|
# Fetch docid.
|
104
|
-
# @param
|
53
|
+
# @param docid [String]
|
105
54
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
106
|
-
def fetch_docid(
|
107
|
-
item_ref =
|
108
|
-
|
109
|
-
|
110
|
-
"//div[contains(@class, 'publications-detail')]/h3"
|
111
|
-
)&.text&.strip&.sub(/(?<=\w)\([^\)]+\)$/) do |m|
|
112
|
-
" " + m.upcase
|
113
|
-
end&.squeeze(" ")&.gsub(/ |\n|\r/, "")
|
114
|
-
end
|
115
|
-
item_ref ||= "?"
|
116
|
-
item_ref.sub! /\sAddendum$/, "-Add"
|
55
|
+
def fetch_docid(docid)
|
56
|
+
item_ref = docid
|
57
|
+
# item_ref ||= "?"
|
58
|
+
item_ref.sub!(/\sAddendum$/, "-Add")
|
117
59
|
[RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
|
118
60
|
end
|
119
61
|
|
120
|
-
# Fetch id.
|
121
|
-
# @param doc [Nokogiri::HTML::Document]
|
122
|
-
# @return [String]
|
123
|
-
# def fetch_id(doc)
|
124
|
-
# doc.at("//div[contains(@class, 'publications-detail')]/h3").text.
|
125
|
-
# strip.gsub(/\s/, "")
|
126
|
-
# end
|
127
|
-
|
128
62
|
# Fetch status.
|
129
|
-
# @param doc [
|
130
|
-
# @param status [String]
|
63
|
+
# @param doc [Hash]
|
131
64
|
# @return [RelatonNist::DocumentStatus]
|
132
|
-
def fetch_status(doc
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
|
137
|
-
else
|
138
|
-
case status
|
139
|
-
when "draft (obsolete)"
|
140
|
-
stage = "draft-public"
|
141
|
-
subst = "withdrawn"
|
142
|
-
when "retired draft"
|
143
|
-
stage = "draft-public"
|
144
|
-
subst = "retired"
|
145
|
-
when "withdrawn"
|
146
|
-
stage = "final"
|
147
|
-
subst = "withdrawn"
|
148
|
-
when /^draft/
|
149
|
-
stage = "draft-public"
|
150
|
-
subst = "active"
|
151
|
-
else
|
152
|
-
stage = status
|
153
|
-
subst = "active"
|
154
|
-
end
|
155
|
-
|
156
|
-
iter = nil
|
157
|
-
if stage.include? "draft"
|
158
|
-
iter = 1
|
159
|
-
history = doc.xpath("//span[@id='pub-history-container']/a"\
|
160
|
-
"|//span[@id='pub-history-container']/span")
|
161
|
-
history.each_with_index do |h, idx|
|
162
|
-
next if h.name == "a"
|
163
|
-
|
164
|
-
iter = idx + 1 if idx.positive?
|
165
|
-
break
|
166
|
-
end
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
65
|
+
def fetch_status(doc)
|
66
|
+
stage = doc["status"]
|
67
|
+
subst = doc["substage"]
|
68
|
+
iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
|
170
69
|
RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
|
171
70
|
end
|
172
71
|
|
@@ -179,55 +78,43 @@ module RelatonNist
|
|
179
78
|
end
|
180
79
|
|
181
80
|
# Fetch dates
|
182
|
-
# @param doc [
|
81
|
+
# @param doc [Hash]
|
183
82
|
# @param release_date [Date]
|
184
83
|
# @return [Array<Hash>]
|
185
84
|
def fetch_dates(doc, release_date) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
186
85
|
dates = [{ type: "published", on: release_date.to_s }]
|
187
86
|
|
188
|
-
if doc.is_a? Hash
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
else
|
195
|
-
|
196
|
-
|
197
|
-
end
|
87
|
+
# if doc.is_a? Hash
|
88
|
+
issued = RelatonBib.parse_date doc["issued-date"]
|
89
|
+
updated = RelatonBib.parse_date doc["updated-date"]
|
90
|
+
dates << { type: "updated", on: updated.to_s } if updated
|
91
|
+
obsoleted = RelatonBib.parse_date doc["obsoleted-date"]
|
92
|
+
dates << { type: "obsoleted", on: obsoleted.to_s } if obsoleted
|
93
|
+
# else
|
94
|
+
# d = doc.at("//span[@id='pub-release-date']")&.text&.strip
|
95
|
+
# issued = RelatonBib.parse_date d
|
96
|
+
# end
|
198
97
|
dates << { type: "issued", on: issued.to_s }
|
199
98
|
dates
|
200
99
|
end
|
201
100
|
|
202
|
-
#
|
203
|
-
# @param doc [Nokogiri::HTML::Document, Hash]
|
101
|
+
# @param doc [Hash]
|
204
102
|
# @return [Array<RelatonBib::ContributionInfo>]
|
205
103
|
def fetch_contributors(doc)
|
206
104
|
contribs = []
|
207
|
-
if doc.is_a? Hash
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
else
|
215
|
-
name = "National Institute of Standards and Technology"
|
216
|
-
org = RelatonBib::Organization.new(
|
217
|
-
name: name, url: "www.nist.gov", abbreviation: "NIST",
|
218
|
-
)
|
219
|
-
contribs << RelatonBib::ContributionInfo.new(entity: org, role: [type: "publisher"])
|
220
|
-
authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
|
221
|
-
contribs += contributors(authors, "author")
|
222
|
-
editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
|
223
|
-
contribs + contributors(editors, "editor")
|
224
|
-
end
|
105
|
+
# if doc.is_a? Hash
|
106
|
+
contribs += contributors_json(
|
107
|
+
doc["authors"], "author", doc["language"], doc["script"]
|
108
|
+
)
|
109
|
+
contribs + contributors_json(
|
110
|
+
doc["editors"], "editor", doc["language"], doc["script"]
|
111
|
+
)
|
225
112
|
end
|
226
113
|
|
227
114
|
# @param doc [Array<Hash>]
|
228
115
|
# @param role [String]
|
229
116
|
# @return [Array<RelatonBib::ContributionInfo>]
|
230
|
-
def contributors_json(doc, role, lang = "en", script = "Latn")
|
117
|
+
def contributors_json(doc, role, lang = "en", script = "Latn") # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
231
118
|
doc.map do |contr|
|
232
119
|
if contr["affiliation"]
|
233
120
|
if contr["affiliation"]["acronym"]
|
@@ -252,43 +139,6 @@ module RelatonNist
|
|
252
139
|
end.compact
|
253
140
|
end
|
254
141
|
|
255
|
-
# rubocop:disable Metrics/CyclomaticComplexity
|
256
|
-
# @param doc [Nokogiri::HTML::Element, Array<Hash>]
|
257
|
-
# @param role [String]
|
258
|
-
# @return [Array<RelatonBib::ContributionInfo>]
|
259
|
-
def contributors(doc, role, lang = "en", script = "Latn")
|
260
|
-
return [] if doc.nil?
|
261
|
-
|
262
|
-
doc.text.split(", ").map do |contr|
|
263
|
-
/(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr.strip
|
264
|
-
if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
|
265
|
-
fullname = RelatonBib::FullName.new(
|
266
|
-
completename: RelatonBib::LocalizedString.new(an, lang, script)
|
267
|
-
)
|
268
|
-
case abbrev
|
269
|
-
when "NIST"
|
270
|
-
org_name = "National Institute of Standards and Technology"
|
271
|
-
url = "www.nist.gov"
|
272
|
-
when "MITRE"
|
273
|
-
org_name = abbrev
|
274
|
-
url = "www.mitre.org"
|
275
|
-
else
|
276
|
-
org_name = abbrev
|
277
|
-
url = nil
|
278
|
-
end
|
279
|
-
org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
|
280
|
-
affiliation = RelatonBib::Affiliation.new organization: org
|
281
|
-
entity = RelatonBib::Person.new(
|
282
|
-
name: fullname, affiliation: [affiliation],
|
283
|
-
)
|
284
|
-
else
|
285
|
-
entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
|
286
|
-
end
|
287
|
-
RelatonBib::ContributionInfo.new entity: entity, role: [type: role]
|
288
|
-
end
|
289
|
-
end
|
290
|
-
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength
|
291
|
-
|
292
142
|
# @param name [Hash]
|
293
143
|
# @param lang [Strong]
|
294
144
|
# @param script [String]
|
@@ -313,87 +163,37 @@ module RelatonNist
|
|
313
163
|
[RelatonBib::LocalizedString.new(part, lang, script)]
|
314
164
|
end
|
315
165
|
|
316
|
-
# @param doc [
|
166
|
+
# @param doc [Hash]
|
317
167
|
# @return [String, NilClass]
|
318
168
|
def fetch_edition(doc)
|
319
|
-
if doc.is_a? Hash
|
320
|
-
|
321
|
-
|
322
|
-
rev = doc["edition"]
|
323
|
-
else
|
324
|
-
return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
|
325
|
-
end
|
169
|
+
# if doc.is_a? Hash
|
170
|
+
return unless doc["edition"]
|
326
171
|
|
172
|
+
rev = doc["edition"]
|
327
173
|
"Revision #{rev}"
|
328
174
|
end
|
329
175
|
|
330
|
-
# Fetch abstracts.
|
331
|
-
# @param doc [Nokigiri::HTML::Document]
|
332
|
-
# @return [Array<Hash>]
|
333
|
-
def fetch_abstract(doc)
|
334
|
-
abstract_content = doc.xpath(
|
335
|
-
'//div[contains(@class, "pub-abstract-callout")]/div[1]/p',
|
336
|
-
).text
|
337
|
-
[{
|
338
|
-
content: abstract_content,
|
339
|
-
language: "en",
|
340
|
-
script: "Latn",
|
341
|
-
format: "text/plain",
|
342
|
-
}]
|
343
|
-
end
|
344
|
-
|
345
176
|
# Fetch copyright.
|
346
177
|
# @param doc [Nokogiri::HTL::Document, String]
|
347
178
|
# @return [Array<Hash>]
|
348
179
|
def fetch_copyright(doc)
|
349
180
|
name = "National Institute of Standards and Technology"
|
350
181
|
url = "www.nist.gov"
|
351
|
-
|
352
|
-
else
|
353
|
-
doc.at("//span[@id='pub-release-date']")&.text&.strip
|
354
|
-
end
|
355
|
-
from = d&.match(/\d{4}/)&.to_s
|
182
|
+
from = doc&.match(/\d{4}/)&.to_s
|
356
183
|
[{ owner: [{ name: name, abbreviation: "NIST", url: url }], from: from }]
|
357
184
|
end
|
358
185
|
|
359
|
-
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
360
|
-
|
361
186
|
# Fetch links.
|
362
|
-
# @param doc [
|
187
|
+
# @param doc [Hash]
|
363
188
|
# @return [Array<Hash>]
|
364
189
|
def fetch_link(doc)
|
365
190
|
links = []
|
366
|
-
if doc
|
367
|
-
|
368
|
-
doi
|
369
|
-
else
|
370
|
-
pub = doc.at "//p/strong[contains(., 'Publication:')]"
|
371
|
-
pdf = pub&.at "./following-sibling::a[.=' Local Download']"
|
372
|
-
doi = pub&.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
|
373
|
-
links << { type: "pdf", content: pdf[:href] } if pdf
|
191
|
+
links << { type: "src", content: doc["uri"] } if doc["uri"]
|
192
|
+
if doc["doi"]
|
193
|
+
links << { type: "doi", content: "https://doi.org/#{doc['doi']}" }
|
374
194
|
end
|
375
|
-
links << { type: "doi", content: doi } if doi
|
376
195
|
links
|
377
196
|
end
|
378
|
-
# rubocop:enable Metrics/MethodLength
|
379
|
-
|
380
|
-
# Fetch relations.
|
381
|
-
# @param doc [Nokogiri::HTML::Document]
|
382
|
-
# @return [Array<RelatonNist::DocumentRelation>]
|
383
|
-
def fetch_relations(doc)
|
384
|
-
relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
|
385
|
-
doc_relation "supersedes", r.text, DOMAIN + r[:href]
|
386
|
-
end
|
387
|
-
|
388
|
-
relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
|
389
|
-
doc_relation "partOf", r.text, DOMAIN + r[:href]
|
390
|
-
end
|
391
|
-
|
392
|
-
relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
|
393
|
-
doc_relation "updates", r.text, DOMAIN + r[:href]
|
394
|
-
end
|
395
|
-
end
|
396
|
-
# rubocop:enable Metrics/AbcSize
|
397
197
|
|
398
198
|
def fetch_relations_json(doc)
|
399
199
|
relations = doc["supersedes"].map do |r|
|
@@ -421,67 +221,11 @@ module RelatonNist
|
|
421
221
|
)
|
422
222
|
end
|
423
223
|
|
424
|
-
#
|
425
|
-
|
426
|
-
# @param doc [Nokogiri::HTML::Document]
|
427
|
-
# @return [Array<RelatonBib::Series>]
|
428
|
-
def fetch_series(doc)
|
429
|
-
series = doc.xpath "//span[@id='pub-history-container']/a"\
|
430
|
-
"|//span[@id='pub-history-container']/span"
|
431
|
-
series.map.with_index do |s, idx|
|
432
|
-
next if s.name == "span"
|
433
|
-
|
434
|
-
iter = if idx.zero? then "I"
|
435
|
-
else idx + 1
|
436
|
-
end
|
437
|
-
|
438
|
-
content = s.text.match(/^[^\(]+/).to_s.strip.squeeze " "
|
439
|
-
|
440
|
-
ref = case s.text
|
441
|
-
when /^Draft/
|
442
|
-
content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
|
443
|
-
when /\(Draft\)/ then content + " (#{iter}PD)"
|
444
|
-
else content
|
445
|
-
end
|
446
|
-
|
447
|
-
fref = RelatonBib::FormattedRef.new(
|
448
|
-
content: ref, language: "en", script: "Latn", format: "text/plain",
|
449
|
-
)
|
450
|
-
RelatonBib::Series.new(formattedref: fref)
|
451
|
-
end.select { |s| s }
|
452
|
-
end
|
453
|
-
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
454
|
-
|
455
|
-
# @param doc [Nokogiri::HTML::Document, Hash]
|
224
|
+
# @param doc [Hash]
|
456
225
|
# @return [Array<RelatonNist::Keyword>]
|
457
226
|
def fetch_keywords(doc)
|
458
|
-
|
459
|
-
doc["keywords"]
|
460
|
-
else
|
461
|
-
doc.xpath "//span[@id='pub-keywords-container']/span"
|
462
|
-
end
|
463
|
-
kws.map { |kw| kw.is_a?(String) ? kw : kw.text }
|
464
|
-
end
|
465
|
-
|
466
|
-
# rubocop:disable Metrics/AbcSize
|
467
|
-
# @param doc [Nokogiri::HTML::Document]
|
468
|
-
# @return [RelatonNist::CommentPeriod, NilClass]
|
469
|
-
def fetch_commentperiod(doc)
|
470
|
-
cp = doc.at "//span[@id='pub-comments-due']"
|
471
|
-
return unless cp
|
472
|
-
|
473
|
-
to = Date.strptime cp.text.strip, "%B %d, %Y"
|
474
|
-
|
475
|
-
d = doc.at("//span[@id='pub-release-date']").text.strip
|
476
|
-
from = Date.strptime(d, "%B %Y").to_s
|
477
|
-
|
478
|
-
ex = doc.at "//strong[contains(.,'The comment closing date has been "\
|
479
|
-
"extended to')]"
|
480
|
-
ext = ex&.text&.match(/\w+\s\d{2},\s\d{4}/).to_s
|
481
|
-
extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
|
482
|
-
CommentPeriod.new from: from, to: to, extended: extended
|
227
|
+
doc["keywords"].map { |kw| kw.is_a?(String) ? kw : kw.text }
|
483
228
|
end
|
484
|
-
# rubocop:enable Metrics/AbcSize
|
485
229
|
|
486
230
|
# @param json [Hash]
|
487
231
|
# @return [RelatonNist::CommentPeriod, NilClass]
|
data/lib/relaton_nist/version.rb
CHANGED
data/lib/relaton_nist.rb
CHANGED
data/relaton_nist.gemspec
CHANGED
@@ -25,13 +25,13 @@ Gem::Specification.new do |spec|
|
|
25
25
|
|
26
26
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
27
27
|
spec.add_development_dependency "pry-byebug"
|
28
|
-
spec.add_development_dependency "rake", "~>
|
28
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
29
29
|
spec.add_development_dependency "rspec", "~> 3.0"
|
30
30
|
spec.add_development_dependency "ruby-jing"
|
31
31
|
spec.add_development_dependency "simplecov"
|
32
32
|
spec.add_development_dependency "vcr"
|
33
33
|
spec.add_development_dependency "webmock"
|
34
34
|
|
35
|
-
spec.add_dependency "relaton-bib", "
|
35
|
+
spec.add_dependency "relaton-bib", ">= 1.9.19"
|
36
36
|
spec.add_dependency "rubyzip"
|
37
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '13.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '13.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rspec
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -126,16 +126,16 @@ dependencies:
|
|
126
126
|
name: relaton-bib
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- - "
|
129
|
+
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 1.9.
|
131
|
+
version: 1.9.19
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- - "
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 1.9.
|
138
|
+
version: 1.9.19
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: rubyzip
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -166,6 +166,7 @@ files:
|
|
166
166
|
- README.adoc
|
167
167
|
- Rakefile
|
168
168
|
- bin/console
|
169
|
+
- bin/rspec
|
169
170
|
- bin/setup
|
170
171
|
- grammars/basicdoc.rng
|
171
172
|
- grammars/biblio.rng
|
@@ -174,6 +175,7 @@ files:
|
|
174
175
|
- grammars/reqt.rng
|
175
176
|
- lib/relaton_nist.rb
|
176
177
|
- lib/relaton_nist/comment_period.rb
|
178
|
+
- lib/relaton_nist/data_fetcher.rb
|
177
179
|
- lib/relaton_nist/document_relation.rb
|
178
180
|
- lib/relaton_nist/document_status.rb
|
179
181
|
- lib/relaton_nist/hash_converter.rb
|