relaton-nist 1.9.2 → 1.9.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/README.adoc +12 -1
- data/lib/relaton_nist/data_fetcher.rb +65 -31
- data/lib/relaton_nist/nist_bibliography.rb +6 -5
- data/lib/relaton_nist/processor.rb +9 -1
- data/lib/relaton_nist/scrapper.rb +6 -195
- data/lib/relaton_nist/version.rb +1 -1
- data/relaton_nist.gemspec +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67f9b42b4407dc8a63c3e83f75e863d260302fb3bcb61d4983cc12dff591a01b
|
4
|
+
data.tar.gz: 910b5878d79dc0f4406954e2460d39ec4ded01fe75ceb8835430f140941d6dba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ccc8b6077c52845934359b45a33a8f4991b8dec44b68ca1704fbfd35f1a72a0e42ed319d967727963d1c99d2874e4991d581291dc6e66d8557a57f3e2093b04
|
7
|
+
data.tar.gz: 2f10467a456fb8eaa30def322fa8dfcda3968b5adc18c176cccabec22a71d391c183429773d983d25f4214507684a8b674d1cf862f429d51116ed324193a1fc0
|
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
@@ -147,6 +147,17 @@ item.docidentifier.first.id
|
|
147
147
|
=> "SP 800-38A-Add"
|
148
148
|
----
|
149
149
|
|
150
|
+
=== Typed links
|
151
|
+
|
152
|
+
NIST documents may have `src` and `doi` link types.
|
153
|
+
|
154
|
+
[source,ruby]
|
155
|
+
----
|
156
|
+
item.link
|
157
|
+
=> [#<RelatonBib::TypedUri:0x00007f901971dc10 @content=#<Addressable::URI:0x62c URI:https://csrc.nist.gov/publications/detail/sp/800-67/rev-2/final>, @type="src">,
|
158
|
+
#<RelatonBib::TypedUri:0x00007f901971d6e8 @content=#<Addressable::URI:0x640 URI:https://doi.org/10.6028/NIST.SP.800-67r2>, @type="doi">]
|
159
|
+
----
|
160
|
+
|
150
161
|
=== Create bibliographic item from YAML
|
151
162
|
[source,ruby]
|
152
163
|
----
|
@@ -167,7 +178,7 @@ The method `RelatonNist::DataFetcher.fetch(output: "data", format: "yaml")` fetc
|
|
167
178
|
Arguments:
|
168
179
|
|
169
180
|
- `output` - folder to save documents (default './data').
|
170
|
-
- `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
|
181
|
+
- `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml`, `bibxxml` (default `yaml`).
|
171
182
|
|
172
183
|
[source,ruby]
|
173
184
|
----
|
@@ -1,12 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# require 'English'
|
4
|
-
# require 'mechanize'
|
5
|
-
# require "fileutils"
|
6
3
|
require "yaml"
|
7
|
-
# require "open-uri"
|
8
|
-
# require "nokogiri"
|
9
|
-
# require "relaton_nist"
|
10
4
|
|
11
5
|
module RelatonNist
|
12
6
|
class DataFetcher
|
@@ -17,32 +11,50 @@ module RelatonNist
|
|
17
11
|
"isTranslationOf" => "translatedFrom",
|
18
12
|
"hasPreprint" => "hasReprint",
|
19
13
|
"isSupplementTo" => "complements",
|
14
|
+
"isPartOf" => "partOf",
|
15
|
+
"hasPart" => "hasPart",
|
20
16
|
}.freeze
|
21
17
|
URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
|
22
18
|
|
23
19
|
def initialize(output, format)
|
24
20
|
@output = output
|
25
21
|
@format = format
|
22
|
+
@ext = format.sub(/^bib/, "")
|
26
23
|
end
|
27
24
|
|
28
|
-
def parse_docid(doc)
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
when "10.6028/NBS.
|
33
|
-
when "10.6028/NBS.
|
34
|
-
when "10.6028/
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
25
|
+
def parse_docid(doc) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
26
|
+
# case doi
|
27
|
+
# when "10.6028/NBS.CIRC.12e2revjune" then doi.sub!("13e", "12e")
|
28
|
+
# when "10.6028/NBS.CIRC.36e2" then doi.sub!("46e", "36e")
|
29
|
+
# when "10.6028/NBS.HB.67suppJune1967" then doi.sub!("1965", "1967")
|
30
|
+
# when "10.6028/NBS.HB.105-1r1990" then doi.sub!("105-1-1990", "105-1r1990")
|
31
|
+
# when "10.6028/NIST.HB.150-10-1995" then doi.sub!(/150-10$/, "150-10-1995")
|
32
|
+
# end
|
33
|
+
# anchor = doi.split("/")[1..-1].join "/"
|
34
|
+
[
|
35
|
+
{ type: "NIST", id: pub_id(doc) },
|
36
|
+
{ type: "DOI", id: doi(doc) },
|
37
|
+
{ type: "NIST", id: anchor(doc), scope: "anchor" },
|
38
|
+
]
|
39
|
+
end
|
40
|
+
|
41
|
+
def pub_id(doc)
|
42
|
+
anchor(doc).gsub(".", " ")
|
43
|
+
end
|
44
|
+
|
45
|
+
def doi(doc)
|
46
|
+
doc.at("doi_data/doi").text
|
47
|
+
end
|
48
|
+
|
49
|
+
def anchor(doc)
|
50
|
+
doi(doc).split("/")[1..-1].join "/"
|
39
51
|
end
|
40
52
|
|
41
53
|
# @param doc [Nokogiri::XML::Element]
|
42
54
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
43
55
|
def fetch_docid(doc)
|
44
56
|
parse_docid(doc).map do |id|
|
45
|
-
RelatonBib::DocumentIdentifier.new(
|
57
|
+
RelatonBib::DocumentIdentifier.new(**id)
|
46
58
|
end
|
47
59
|
end
|
48
60
|
|
@@ -52,7 +64,7 @@ module RelatonNist
|
|
52
64
|
t = doc.xpath("titles/title|titles/subtitle")
|
53
65
|
return [] unless t.any?
|
54
66
|
|
55
|
-
RelatonBib::TypedTitleString.from_string t.map(&:text).join
|
67
|
+
RelatonBib::TypedTitleString.from_string t.map(&:text).join, "en", "Latn"
|
56
68
|
end
|
57
69
|
|
58
70
|
# @param doc [Nokogiri::XML::Element]
|
@@ -81,12 +93,11 @@ module RelatonNist
|
|
81
93
|
def fetch_relation(doc)
|
82
94
|
ns = "http://www.crossref.org/relations.xsd"
|
83
95
|
doc.xpath("./ns:program/ns:related_item", ns: ns).map do |rel|
|
84
|
-
|
85
|
-
|
86
|
-
# ref, = parse_docid doc
|
87
|
-
fref = RelatonBib::FormattedRef.new content: doi.text
|
96
|
+
rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
|
97
|
+
fref = RelatonBib::FormattedRef.new content: rdoi.text
|
88
98
|
bibitem = RelatonBib::BibliographicItem.new formattedref: fref
|
89
|
-
type = RELATION_TYPES[
|
99
|
+
type = RELATION_TYPES[rdoi["relationship-type"]]
|
100
|
+
warn "Relation type #{rdoi['relationship-type']} not found" unless type
|
90
101
|
{ type: type, bibitem: bibitem }
|
91
102
|
end
|
92
103
|
end
|
@@ -128,22 +139,42 @@ module RelatonNist
|
|
128
139
|
fullname = RelatonBib::FullName.new(
|
129
140
|
surname: surname, forename: forename, initial: initial, identifier: ident,
|
130
141
|
)
|
131
|
-
person = RelatonBib::Person.new name: fullname
|
142
|
+
person = RelatonBib::Person.new name: fullname, affiliation: affiliation(doc)
|
132
143
|
{ entity: person, role: [{ type: p["contributor_role"] }] }
|
133
144
|
end
|
134
145
|
contribs + doc.xpath("publisher").map do |p|
|
135
146
|
abbr = p.at("../institution/institution_acronym")&.text
|
136
|
-
|
147
|
+
place = p.at("./publisher_place")
|
148
|
+
cont = []
|
149
|
+
if place
|
150
|
+
city, state = place.text.split(", ")
|
151
|
+
cont << RelatonBib::Address.new(street: [], city: city, state: state, country: "US")
|
152
|
+
end
|
153
|
+
org = RelatonBib::Organization.new(
|
154
|
+
name: p.at("publisher_name").text, abbreviation: abbr, contact: cont,
|
155
|
+
)
|
137
156
|
{ entity: org, role: [{ type: "publisher" }] }
|
138
157
|
end
|
139
158
|
end
|
140
159
|
|
160
|
+
def affiliation(doc)
|
161
|
+
doc.xpath("./institution/institution_department").map do |id|
|
162
|
+
org = RelatonBib::Organization.new name: id.text
|
163
|
+
RelatonBib::Affiliation.new organization: org
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
141
167
|
# @param doc [Nokogiri::XML::Element]
|
142
168
|
# @return [Array<String>]
|
143
169
|
def fetch_place(doc)
|
144
170
|
doc.xpath("institution/institution_place").map(&:text)
|
145
171
|
end
|
146
172
|
|
173
|
+
def fetch_series(doc)
|
174
|
+
title = RelatonBib::TypedTitleString.new(content: "NIST")
|
175
|
+
[RelatonBib::Series.new(title: title, number: pub_id(doc))]
|
176
|
+
end
|
177
|
+
|
147
178
|
#
|
148
179
|
# Save document
|
149
180
|
#
|
@@ -151,7 +182,7 @@ module RelatonNist
|
|
151
182
|
#
|
152
183
|
def write_file(bib) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
153
184
|
id = bib.docidentifier[0].id.gsub(%r{[/\s:.]}, "_").upcase.sub(/^NIST_IR/, "NISTIR")
|
154
|
-
file = File.join(@output, "#{id}.#{@
|
185
|
+
file = File.join(@output, "#{id}.#{@ext}")
|
155
186
|
if File.exist? file
|
156
187
|
warn "File #{file} exists. Docid: #{bib.docidentifier[0].id}"
|
157
188
|
# warn "Link: #{bib.link.detect { |l| l.type == 'src' }.content}"
|
@@ -159,6 +190,7 @@ module RelatonNist
|
|
159
190
|
output = case @format
|
160
191
|
when "yaml" then bib.to_hash.to_yaml
|
161
192
|
when "xml" then bib.to_xml bibdata: true
|
193
|
+
else bib.send "to_#{@format}"
|
162
194
|
end
|
163
195
|
File.write file, output, encoding: "UTF-8"
|
164
196
|
end
|
@@ -178,14 +210,15 @@ module RelatonNist
|
|
178
210
|
link: fetch_link(doc), abstract: fetch_abstract(doc),
|
179
211
|
date: fetch_date(doc), edition: fetch_edition(doc),
|
180
212
|
contributor: fetch_contributor(doc), relation: fetch_relation(doc),
|
181
|
-
place: fetch_place(doc),
|
213
|
+
place: fetch_place(doc), series: fetch_series(doc),
|
182
214
|
language: [doc["language"]], script: ["Latn"], doctype: "standard"
|
183
215
|
)
|
184
216
|
write_file item
|
185
217
|
rescue StandardError => e
|
186
218
|
warn "Document: #{doc.at('doi').text}"
|
187
219
|
warn e.message
|
188
|
-
|
220
|
+
warn e.backtrace[0..5].join("\n")
|
221
|
+
# raise e
|
189
222
|
end
|
190
223
|
|
191
224
|
#
|
@@ -197,7 +230,7 @@ module RelatonNist
|
|
197
230
|
|
198
231
|
docs = Nokogiri::XML OpenURI.open_uri URL
|
199
232
|
FileUtils.mkdir @output unless Dir.exist? @output
|
200
|
-
FileUtils.rm Dir[File.join(@output, "*.#{@
|
233
|
+
FileUtils.rm Dir[File.join(@output, "*.#{@ext}")]
|
201
234
|
docs.xpath("/body/query/doi_record/report-paper/report-paper_metadata")
|
202
235
|
.each { |doc| parse_doc doc }
|
203
236
|
|
@@ -206,13 +239,14 @@ module RelatonNist
|
|
206
239
|
puts "Done in: #{(t2 - t1).round} sec."
|
207
240
|
rescue StandardError => e
|
208
241
|
warn e.message
|
242
|
+
warn e.backtrace[0..5].join("\n")
|
209
243
|
end
|
210
244
|
|
211
245
|
#
|
212
246
|
# Fetch all the documnts from dataset
|
213
247
|
#
|
214
248
|
# @param [String] output foldet name to save the documents
|
215
|
-
# @param [String] format format to save the documents
|
249
|
+
# @param [String] format format to save the documents (yaml, xml, bibxml)
|
216
250
|
#
|
217
251
|
def self.fetch(output: "data", format: "yaml")
|
218
252
|
new(output, format).fetch
|
@@ -28,17 +28,18 @@ module RelatonNist
|
|
28
28
|
# @option opts [TrueClass, FalseClass] :bibdata
|
29
29
|
#
|
30
30
|
# @return [String] Relaton XML serialisation of reference
|
31
|
-
def get(code, year = nil, opts = {})
|
32
|
-
return fetch_ref_err(code, year, []) if code.match?
|
31
|
+
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
32
|
+
return fetch_ref_err(code, year, []) if code.match?(/\sEP$/)
|
33
33
|
|
34
|
-
/^(?<code2>[
|
34
|
+
/^(?<code2>[^(]+)(?:\((?<date2>\w+\s(?:\d{2},\s)?\d{4})\))?\s?\(?(?:(?<=\()(?<stage>[^\)]+))?/ =~ code
|
35
35
|
stage ||= /(?<=\.)PD-\w+(?=\.)/.match(code)&.to_s
|
36
36
|
if code2
|
37
37
|
code = code2.strip
|
38
38
|
if date2
|
39
|
-
|
39
|
+
case date2
|
40
|
+
when /\w+\s\d{4}/
|
40
41
|
opts[:issued_date] = Date.strptime date2, "%B %Y"
|
41
|
-
|
42
|
+
when /\w+\s\d{2},\s\d{4}/
|
42
43
|
opts[:updated_date] = Date.strptime date2, "%B %d, %Y"
|
43
44
|
end
|
44
45
|
end
|
@@ -2,7 +2,7 @@ require "relaton/processor"
|
|
2
2
|
|
3
3
|
module RelatonNist
|
4
4
|
class Processor < Relaton::Processor
|
5
|
-
def initialize
|
5
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
6
6
|
@short = :relaton_nist
|
7
7
|
@prefix = "NIST"
|
8
8
|
@defaultprefix = %r{^(NIST|NISTGCR|ITL Bulletin|JPCRD|NISTIR|CSRC|FIPS)(/[^\s])?\s}
|
@@ -18,6 +18,14 @@ module RelatonNist
|
|
18
18
|
::RelatonNist::NistBibliography.get(code, date, opts)
|
19
19
|
end
|
20
20
|
|
21
|
+
#
|
22
|
+
# Fetch all the documents from a source
|
23
|
+
#
|
24
|
+
# @param [String] _source source name
|
25
|
+
# @param [Hash] opts
|
26
|
+
# @option opts [String] :output directory to output documents
|
27
|
+
# @option opts [String] :format
|
28
|
+
#
|
21
29
|
def fetch_data(_source, opts)
|
22
30
|
DataFetcher.fetch(**opts)
|
23
31
|
end
|
@@ -62,44 +62,10 @@ module RelatonNist
|
|
62
62
|
# Fetch status.
|
63
63
|
# @param doc [Hash]
|
64
64
|
# @return [RelatonNist::DocumentStatus]
|
65
|
-
def fetch_status(doc)
|
66
|
-
# if doc.is_a? Hash
|
65
|
+
def fetch_status(doc)
|
67
66
|
stage = doc["status"]
|
68
67
|
subst = doc["substage"]
|
69
68
|
iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
|
70
|
-
# else
|
71
|
-
# case status
|
72
|
-
# when "draft (obsolete)"
|
73
|
-
# stage = "draft-public"
|
74
|
-
# subst = "withdrawn"
|
75
|
-
# when "retired draft"
|
76
|
-
# stage = "draft-public"
|
77
|
-
# subst = "retired"
|
78
|
-
# when "withdrawn"
|
79
|
-
# stage = "final"
|
80
|
-
# subst = "withdrawn"
|
81
|
-
# when /^draft/
|
82
|
-
# stage = "draft-public"
|
83
|
-
# subst = "active"
|
84
|
-
# else
|
85
|
-
# stage = status
|
86
|
-
# subst = "active"
|
87
|
-
# end
|
88
|
-
|
89
|
-
# iter = nil
|
90
|
-
# if stage.include? "draft"
|
91
|
-
# iter = 1
|
92
|
-
# history = doc.xpath("//span[@id='pub-history-container']/a"\
|
93
|
-
# "|//span[@id='pub-history-container']/span")
|
94
|
-
# history.each_with_index do |h, idx|
|
95
|
-
# next if h.name == "a"
|
96
|
-
|
97
|
-
# iter = idx + 1 if idx.positive?
|
98
|
-
# break
|
99
|
-
# end
|
100
|
-
# end
|
101
|
-
# end
|
102
|
-
|
103
69
|
RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
|
104
70
|
end
|
105
71
|
|
@@ -132,7 +98,6 @@ module RelatonNist
|
|
132
98
|
dates
|
133
99
|
end
|
134
100
|
|
135
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
136
101
|
# @param doc [Hash]
|
137
102
|
# @return [Array<RelatonBib::ContributionInfo>]
|
138
103
|
def fetch_contributors(doc)
|
@@ -144,23 +109,12 @@ module RelatonNist
|
|
144
109
|
contribs + contributors_json(
|
145
110
|
doc["editors"], "editor", doc["language"], doc["script"]
|
146
111
|
)
|
147
|
-
# else
|
148
|
-
# name = "National Institute of Standards and Technology"
|
149
|
-
# org = RelatonBib::Organization.new(
|
150
|
-
# name: name, url: "www.nist.gov", abbreviation: "NIST",
|
151
|
-
# )
|
152
|
-
# contribs << RelatonBib::ContributionInfo.new(entity: org, role: [type: "publisher"])
|
153
|
-
# authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
|
154
|
-
# contribs += contributors(authors, "author")
|
155
|
-
# editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
|
156
|
-
# contribs + contributors(editors, "editor")
|
157
|
-
# end
|
158
112
|
end
|
159
113
|
|
160
114
|
# @param doc [Array<Hash>]
|
161
115
|
# @param role [String]
|
162
116
|
# @return [Array<RelatonBib::ContributionInfo>]
|
163
|
-
def contributors_json(doc, role, lang = "en", script = "Latn")
|
117
|
+
def contributors_json(doc, role, lang = "en", script = "Latn") # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
164
118
|
doc.map do |contr|
|
165
119
|
if contr["affiliation"]
|
166
120
|
if contr["affiliation"]["acronym"]
|
@@ -185,43 +139,6 @@ module RelatonNist
|
|
185
139
|
end.compact
|
186
140
|
end
|
187
141
|
|
188
|
-
# rubocop:disable Metrics/CyclomaticComplexity
|
189
|
-
# @param doc [Nokogiri::HTML::Element, Array<Hash>]
|
190
|
-
# @param role [String]
|
191
|
-
# @return [Array<RelatonBib::ContributionInfo>]
|
192
|
-
# def contributors(doc, role, lang = "en", script = "Latn")
|
193
|
-
# return [] if doc.nil?
|
194
|
-
|
195
|
-
# doc.text.split(", ").map do |contr|
|
196
|
-
# /(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr.strip
|
197
|
-
# if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
|
198
|
-
# fullname = RelatonBib::FullName.new(
|
199
|
-
# completename: RelatonBib::LocalizedString.new(an, lang, script)
|
200
|
-
# )
|
201
|
-
# case abbrev
|
202
|
-
# when "NIST"
|
203
|
-
# org_name = "National Institute of Standards and Technology"
|
204
|
-
# url = "www.nist.gov"
|
205
|
-
# when "MITRE"
|
206
|
-
# org_name = abbrev
|
207
|
-
# url = "www.mitre.org"
|
208
|
-
# else
|
209
|
-
# org_name = abbrev
|
210
|
-
# url = nil
|
211
|
-
# end
|
212
|
-
# org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
|
213
|
-
# affiliation = RelatonBib::Affiliation.new organization: org
|
214
|
-
# entity = RelatonBib::Person.new(
|
215
|
-
# name: fullname, affiliation: [affiliation],
|
216
|
-
# )
|
217
|
-
# else
|
218
|
-
# entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
|
219
|
-
# end
|
220
|
-
# RelatonBib::ContributionInfo.new entity: entity, role: [type: role]
|
221
|
-
# end
|
222
|
-
# end
|
223
|
-
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength
|
224
|
-
|
225
142
|
# @param name [Hash]
|
226
143
|
# @param lang [Strong]
|
227
144
|
# @param script [String]
|
@@ -253,80 +170,30 @@ module RelatonNist
|
|
253
170
|
return unless doc["edition"]
|
254
171
|
|
255
172
|
rev = doc["edition"]
|
256
|
-
# else
|
257
|
-
# return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
|
258
|
-
# end
|
259
|
-
|
260
173
|
"Revision #{rev}"
|
261
174
|
end
|
262
175
|
|
263
|
-
# Fetch abstracts.
|
264
|
-
# @param doc [Nokigiri::HTML::Document]
|
265
|
-
# @return [Array<Hash>]
|
266
|
-
# def fetch_abstract(doc)
|
267
|
-
# abstract_content = doc.xpath(
|
268
|
-
# '//div[contains(@class, "pub-abstract-callout")]/div[1]/p',
|
269
|
-
# ).text
|
270
|
-
# [{
|
271
|
-
# content: abstract_content,
|
272
|
-
# language: "en",
|
273
|
-
# script: "Latn",
|
274
|
-
# format: "text/plain",
|
275
|
-
# }]
|
276
|
-
# end
|
277
|
-
|
278
176
|
# Fetch copyright.
|
279
177
|
# @param doc [Nokogiri::HTL::Document, String]
|
280
178
|
# @return [Array<Hash>]
|
281
179
|
def fetch_copyright(doc)
|
282
180
|
name = "National Institute of Standards and Technology"
|
283
181
|
url = "www.nist.gov"
|
284
|
-
# d = if doc.is_a? String then doc
|
285
|
-
# else
|
286
|
-
# doc.at("//span[@id='pub-release-date']")&.text&.strip
|
287
|
-
# end
|
288
182
|
from = doc&.match(/\d{4}/)&.to_s
|
289
183
|
[{ owner: [{ name: name, abbreviation: "NIST", url: url }], from: from }]
|
290
184
|
end
|
291
185
|
|
292
|
-
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
293
|
-
|
294
186
|
# Fetch links.
|
295
187
|
# @param doc [Hash]
|
296
188
|
# @return [Array<Hash>]
|
297
189
|
def fetch_link(doc)
|
298
190
|
links = []
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
# pub = doc.at "//p/strong[contains(., 'Publication:')]"
|
304
|
-
# pdf = pub&.at "./following-sibling::a[.=' Local Download']"
|
305
|
-
# doi = pub&.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
|
306
|
-
# links << { type: "pdf", content: pdf[:href] } if pdf
|
307
|
-
# end
|
308
|
-
links << { type: "doi", content: doi } if doi
|
191
|
+
links << { type: "src", content: doc["uri"] } if doc["uri"]
|
192
|
+
if doc["doi"]
|
193
|
+
links << { type: "doi", content: "https://doi.org/#{doc['doi']}" }
|
194
|
+
end
|
309
195
|
links
|
310
196
|
end
|
311
|
-
# rubocop:enable Metrics/MethodLength
|
312
|
-
|
313
|
-
# Fetch relations.
|
314
|
-
# @param doc [Nokogiri::HTML::Document]
|
315
|
-
# @return [Array<RelatonNist::DocumentRelation>]
|
316
|
-
# def fetch_relations(doc)
|
317
|
-
# relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
|
318
|
-
# doc_relation "supersedes", r.text, DOMAIN + r[:href]
|
319
|
-
# end
|
320
|
-
|
321
|
-
# relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
|
322
|
-
# doc_relation "partOf", r.text, DOMAIN + r[:href]
|
323
|
-
# end
|
324
|
-
|
325
|
-
# relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
|
326
|
-
# doc_relation "updates", r.text, DOMAIN + r[:href]
|
327
|
-
# end
|
328
|
-
# end
|
329
|
-
# rubocop:enable Metrics/AbcSize
|
330
197
|
|
331
198
|
def fetch_relations_json(doc)
|
332
199
|
relations = doc["supersedes"].map do |r|
|
@@ -354,68 +221,12 @@ module RelatonNist
|
|
354
221
|
)
|
355
222
|
end
|
356
223
|
|
357
|
-
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
358
|
-
|
359
|
-
# @param doc [Nokogiri::HTML::Document]
|
360
|
-
# @return [Array<RelatonBib::Series>]
|
361
|
-
# def fetch_series(doc)
|
362
|
-
# series = doc.xpath "//span[@id='pub-history-container']/a"\
|
363
|
-
# "|//span[@id='pub-history-container']/span"
|
364
|
-
# series.map.with_index do |s, idx|
|
365
|
-
# next if s.name == "span"
|
366
|
-
|
367
|
-
# iter = if idx.zero? then "I"
|
368
|
-
# else idx + 1
|
369
|
-
# end
|
370
|
-
|
371
|
-
# content = s.text.match(/^[^\(]+/).to_s.strip.squeeze " "
|
372
|
-
|
373
|
-
# ref = case s.text
|
374
|
-
# when /^Draft/
|
375
|
-
# content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
|
376
|
-
# when /\(Draft\)/ then content + " (#{iter}PD)"
|
377
|
-
# else content
|
378
|
-
# end
|
379
|
-
|
380
|
-
# fref = RelatonBib::FormattedRef.new(
|
381
|
-
# content: ref, language: "en", script: "Latn", format: "text/plain",
|
382
|
-
# )
|
383
|
-
# RelatonBib::Series.new(formattedref: fref)
|
384
|
-
# end.select { |s| s }
|
385
|
-
# end
|
386
|
-
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
387
|
-
|
388
224
|
# @param doc [Hash]
|
389
225
|
# @return [Array<RelatonNist::Keyword>]
|
390
226
|
def fetch_keywords(doc)
|
391
|
-
# kws = if doc.is_a? Hash
|
392
|
-
# doc["keywords"]
|
393
|
-
# else
|
394
|
-
# doc.xpath "//span[@id='pub-keywords-container']/span"
|
395
|
-
# end
|
396
227
|
doc["keywords"].map { |kw| kw.is_a?(String) ? kw : kw.text }
|
397
228
|
end
|
398
229
|
|
399
|
-
# rubocop:disable Metrics/AbcSize
|
400
|
-
# @param doc [Nokogiri::HTML::Document]
|
401
|
-
# @return [RelatonNist::CommentPeriod, NilClass]
|
402
|
-
# def fetch_commentperiod(doc)
|
403
|
-
# cp = doc.at "//span[@id='pub-comments-due']"
|
404
|
-
# return unless cp
|
405
|
-
|
406
|
-
# to = Date.strptime cp.text.strip, "%B %d, %Y"
|
407
|
-
|
408
|
-
# d = doc.at("//span[@id='pub-release-date']").text.strip
|
409
|
-
# from = Date.strptime(d, "%B %Y").to_s
|
410
|
-
|
411
|
-
# ex = doc.at "//strong[contains(.,'The comment closing date has been "\
|
412
|
-
# "extended to')]"
|
413
|
-
# ext = ex&.text&.match(/\w+\s\d{2},\s\d{4}/).to_s
|
414
|
-
# extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
|
415
|
-
# CommentPeriod.new from: from, to: to, extended: extended
|
416
|
-
# end
|
417
|
-
# rubocop:enable Metrics/AbcSize
|
418
|
-
|
419
230
|
# @param json [Hash]
|
420
231
|
# @return [RelatonNist::CommentPeriod, NilClass]
|
421
232
|
def fetch_commentperiod_json(json)
|
data/lib/relaton_nist/version.rb
CHANGED
data/relaton_nist.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -126,16 +126,16 @@ dependencies:
|
|
126
126
|
name: relaton-bib
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- - "
|
129
|
+
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 1.9.
|
131
|
+
version: 1.9.19
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- - "
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 1.9.
|
138
|
+
version: 1.9.19
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: rubyzip
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|