relaton-nist 1.9.3 → 1.9.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/grammars/biblio.rng +2 -2
- data/lib/relaton_nist/data_fetcher.rb +68 -21
- data/lib/relaton_nist/nist_bibliography.rb +6 -5
- data/lib/relaton_nist/scrapper.rb +8 -2
- data/lib/relaton_nist/version.rb +1 -1
- data/relaton_nist.gemspec +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b8d329e1c313005dc313d6a9f11581a3091c11357eaffaed991e70936d676884
|
4
|
+
data.tar.gz: ee7c8965f6ccec33db3be2a88c577fc85de894b0797c16d4b48eeb0533466fd8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f43154e131aafeb2ccebeff6444d3a5ea2a1194e010c05b72388deb118d3db92b42a51e20e71ce53872bdae88fa504f49e1b16917d6d5502f74a99c11d742af7
|
7
|
+
data.tar.gz: 9e4146500cb2724b7c42208cbb6432d1cb7b8d8b83156572e0dd3159d202504d707662995657cffb49ff6440a31d0de3398bd3ee35b744d6aaf636bdbc422154
|
data/.rubocop.yml
CHANGED
data/grammars/biblio.rng
CHANGED
@@ -11,6 +11,8 @@ module RelatonNist
|
|
11
11
|
"isTranslationOf" => "translatedFrom",
|
12
12
|
"hasPreprint" => "hasReprint",
|
13
13
|
"isSupplementTo" => "complements",
|
14
|
+
"isPartOf" => "partOf",
|
15
|
+
"hasPart" => "hasPart",
|
14
16
|
}.freeze
|
15
17
|
URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
|
16
18
|
|
@@ -20,24 +22,48 @@ module RelatonNist
|
|
20
22
|
@ext = format.sub(/^bib/, "")
|
21
23
|
end
|
22
24
|
|
23
|
-
def parse_docid(doc)
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
when "10.6028/NBS.
|
28
|
-
when "10.6028/NBS.
|
29
|
-
when "10.6028/
|
30
|
-
|
31
|
-
|
25
|
+
def parse_docid(doc) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
26
|
+
# case doi
|
27
|
+
# when "10.6028/NBS.CIRC.12e2revjune" then doi.sub!("13e", "12e")
|
28
|
+
# when "10.6028/NBS.CIRC.36e2" then doi.sub!("46e", "36e")
|
29
|
+
# when "10.6028/NBS.HB.67suppJune1967" then doi.sub!("1965", "1967")
|
30
|
+
# when "10.6028/NBS.HB.105-1r1990" then doi.sub!("105-1-1990", "105-1r1990")
|
31
|
+
# when "10.6028/NIST.HB.150-10-1995" then doi.sub!(/150-10$/, "150-10-1995")
|
32
|
+
# end
|
33
|
+
# anchor = doi.split("/")[1..-1].join "/"
|
34
|
+
[
|
35
|
+
{ type: "NIST", id: pub_id(doc) },
|
36
|
+
{ type: "DOI", id: doi(doc) },
|
37
|
+
{ type: "NIST", id: anchor(doc), scope: "anchor" },
|
38
|
+
]
|
39
|
+
end
|
40
|
+
|
41
|
+
def pub_id(doc)
|
42
|
+
anchor(doc).gsub(".", " ")
|
43
|
+
end
|
44
|
+
|
45
|
+
def doi(doc) # rubocop:disable Metrics/CyclomaticComplexity
|
46
|
+
id = doc.at("doi_data/doi").text
|
47
|
+
case id
|
48
|
+
when "10.6028/NBS.CIRC.e2e" then "10.6028/NBS.CIRC.2e2"
|
49
|
+
when "10.6028/NBS.CIRC.sup" then "10.6028/NBS.CIRC.24e7sup"
|
50
|
+
when "10.6028/NBS.CIRC.supJun1925-Jun1926" then "10.6028/NBS.CIRC.24e7sup2"
|
51
|
+
when "10.6028/NBS.CIRC.supJun1925-Jun1927" then "10.6028/NBS.CIRC.24e7sup3"
|
52
|
+
when "10.6028/NBS.CIRC.24supJuly1922" then "10.6028/NBS.CIRC.24e6sup"
|
53
|
+
when "10.6028/NBS.CIRC.24supJan1924" then "10.6028/NBS.CIRC.24e6sup2"
|
54
|
+
else id
|
32
55
|
end
|
33
|
-
|
56
|
+
end
|
57
|
+
|
58
|
+
def anchor(doc)
|
59
|
+
doi(doc).split("/")[1..-1].join "/"
|
34
60
|
end
|
35
61
|
|
36
62
|
# @param doc [Nokogiri::XML::Element]
|
37
63
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
38
64
|
def fetch_docid(doc)
|
39
65
|
parse_docid(doc).map do |id|
|
40
|
-
RelatonBib::DocumentIdentifier.new(
|
66
|
+
RelatonBib::DocumentIdentifier.new(**id)
|
41
67
|
end
|
42
68
|
end
|
43
69
|
|
@@ -47,7 +73,7 @@ module RelatonNist
|
|
47
73
|
t = doc.xpath("titles/title|titles/subtitle")
|
48
74
|
return [] unless t.any?
|
49
75
|
|
50
|
-
RelatonBib::TypedTitleString.from_string t.map(&:text).join
|
76
|
+
RelatonBib::TypedTitleString.from_string t.map(&:text).join, "en", "Latn"
|
51
77
|
end
|
52
78
|
|
53
79
|
# @param doc [Nokogiri::XML::Element]
|
@@ -76,12 +102,11 @@ module RelatonNist
|
|
76
102
|
def fetch_relation(doc)
|
77
103
|
ns = "http://www.crossref.org/relations.xsd"
|
78
104
|
doc.xpath("./ns:program/ns:related_item", ns: ns).map do |rel|
|
79
|
-
|
80
|
-
|
81
|
-
# ref, = parse_docid doc
|
82
|
-
fref = RelatonBib::FormattedRef.new content: doi.text
|
105
|
+
rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
|
106
|
+
fref = RelatonBib::FormattedRef.new content: rdoi.text
|
83
107
|
bibitem = RelatonBib::BibliographicItem.new formattedref: fref
|
84
|
-
type = RELATION_TYPES[
|
108
|
+
type = RELATION_TYPES[rdoi["relationship-type"]]
|
109
|
+
warn "Relation type #{rdoi['relationship-type']} not found" unless type
|
85
110
|
{ type: type, bibitem: bibitem }
|
86
111
|
end
|
87
112
|
end
|
@@ -123,22 +148,42 @@ module RelatonNist
|
|
123
148
|
fullname = RelatonBib::FullName.new(
|
124
149
|
surname: surname, forename: forename, initial: initial, identifier: ident,
|
125
150
|
)
|
126
|
-
person = RelatonBib::Person.new name: fullname
|
151
|
+
person = RelatonBib::Person.new name: fullname, affiliation: affiliation(doc)
|
127
152
|
{ entity: person, role: [{ type: p["contributor_role"] }] }
|
128
153
|
end
|
129
154
|
contribs + doc.xpath("publisher").map do |p|
|
130
155
|
abbr = p.at("../institution/institution_acronym")&.text
|
131
|
-
|
156
|
+
place = p.at("./publisher_place")
|
157
|
+
cont = []
|
158
|
+
if place
|
159
|
+
city, state = place.text.split(", ")
|
160
|
+
cont << RelatonBib::Address.new(street: [], city: city, state: state, country: "US")
|
161
|
+
end
|
162
|
+
org = RelatonBib::Organization.new(
|
163
|
+
name: p.at("publisher_name").text, abbreviation: abbr, contact: cont,
|
164
|
+
)
|
132
165
|
{ entity: org, role: [{ type: "publisher" }] }
|
133
166
|
end
|
134
167
|
end
|
135
168
|
|
169
|
+
def affiliation(doc)
|
170
|
+
doc.xpath("./institution/institution_department").map do |id|
|
171
|
+
org = RelatonBib::Organization.new name: id.text
|
172
|
+
RelatonBib::Affiliation.new organization: org
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
136
176
|
# @param doc [Nokogiri::XML::Element]
|
137
177
|
# @return [Array<String>]
|
138
178
|
def fetch_place(doc)
|
139
179
|
doc.xpath("institution/institution_place").map(&:text)
|
140
180
|
end
|
141
181
|
|
182
|
+
def fetch_series(doc)
|
183
|
+
title = RelatonBib::TypedTitleString.new(content: "NIST")
|
184
|
+
[RelatonBib::Series.new(title: title, number: pub_id(doc))]
|
185
|
+
end
|
186
|
+
|
142
187
|
#
|
143
188
|
# Save document
|
144
189
|
#
|
@@ -174,14 +219,15 @@ module RelatonNist
|
|
174
219
|
link: fetch_link(doc), abstract: fetch_abstract(doc),
|
175
220
|
date: fetch_date(doc), edition: fetch_edition(doc),
|
176
221
|
contributor: fetch_contributor(doc), relation: fetch_relation(doc),
|
177
|
-
place: fetch_place(doc),
|
222
|
+
place: fetch_place(doc), series: fetch_series(doc),
|
178
223
|
language: [doc["language"]], script: ["Latn"], doctype: "standard"
|
179
224
|
)
|
180
225
|
write_file item
|
181
226
|
rescue StandardError => e
|
182
227
|
warn "Document: #{doc.at('doi').text}"
|
183
228
|
warn e.message
|
184
|
-
|
229
|
+
warn e.backtrace[0..5].join("\n")
|
230
|
+
# raise e
|
185
231
|
end
|
186
232
|
|
187
233
|
#
|
@@ -202,6 +248,7 @@ module RelatonNist
|
|
202
248
|
puts "Done in: #{(t2 - t1).round} sec."
|
203
249
|
rescue StandardError => e
|
204
250
|
warn e.message
|
251
|
+
warn e.backtrace[0..5].join("\n")
|
205
252
|
end
|
206
253
|
|
207
254
|
#
|
@@ -28,17 +28,18 @@ module RelatonNist
|
|
28
28
|
# @option opts [TrueClass, FalseClass] :bibdata
|
29
29
|
#
|
30
30
|
# @return [String] Relaton XML serialisation of reference
|
31
|
-
def get(code, year = nil, opts = {})
|
32
|
-
return fetch_ref_err(code, year, []) if code.match?
|
31
|
+
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
32
|
+
return fetch_ref_err(code, year, []) if code.match?(/\sEP$/)
|
33
33
|
|
34
|
-
/^(?<code2>[
|
34
|
+
/^(?<code2>[^(]+)(?:\((?<date2>\w+\s(?:\d{2},\s)?\d{4})\))?\s?\(?(?:(?<=\()(?<stage>[^\)]+))?/ =~ code
|
35
35
|
stage ||= /(?<=\.)PD-\w+(?=\.)/.match(code)&.to_s
|
36
36
|
if code2
|
37
37
|
code = code2.strip
|
38
38
|
if date2
|
39
|
-
|
39
|
+
case date2
|
40
|
+
when /\w+\s\d{4}/
|
40
41
|
opts[:issued_date] = Date.strptime date2, "%B %Y"
|
41
|
-
|
42
|
+
when /\w+\s\d{2},\s\d{4}/
|
42
43
|
opts[:updated_date] = Date.strptime date2, "%B %d, %Y"
|
43
44
|
end
|
44
45
|
end
|
@@ -209,9 +209,15 @@ module RelatonNist
|
|
209
209
|
# @param ref [String]
|
210
210
|
# @param uri [String]
|
211
211
|
# @return [RelatonNist::DocumentRelation]
|
212
|
-
def doc_relation(type, ref, uri, lang = "en", script = "Latn")
|
212
|
+
def doc_relation(type, ref, uri, lang = "en", script = "Latn") # rubocop:disable Metrics/MethodLength
|
213
|
+
if type == "supersedes"
|
214
|
+
descr = RelatonBib::FormattedString.new(content: "supersedes", language: lang, script: script)
|
215
|
+
t = "obsoletes"
|
216
|
+
else t = type
|
217
|
+
end
|
213
218
|
DocumentRelation.new(
|
214
|
-
type:
|
219
|
+
type: t,
|
220
|
+
description: descr,
|
215
221
|
bibitem: RelatonBib::BibliographicItem.new(
|
216
222
|
formattedref: RelatonBib::FormattedRef.new(
|
217
223
|
content: ref, language: lang, script: script, format: "text/plain",
|
data/lib/relaton_nist/version.rb
CHANGED
data/relaton_nist.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -126,16 +126,16 @@ dependencies:
|
|
126
126
|
name: relaton-bib
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- - "
|
129
|
+
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 1.9.
|
131
|
+
version: 1.9.19
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- - "
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 1.9.
|
138
|
+
version: 1.9.19
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: rubyzip
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|