relaton-nist 1.9.4 → 1.9.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/grammars/biblio.rng +2 -2
- data/lib/relaton_nist/data_fetcher.rb +68 -22
- data/lib/relaton_nist/nist_bibliography.rb +6 -5
- data/lib/relaton_nist/scrapper.rb +8 -2
- data/lib/relaton_nist/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e35a427023ba3ef9662b0e1c005717e82ea3086854c615d4a69302b62f20803f
|
4
|
+
data.tar.gz: 92ee933270aaabe628a8639e8d24cf550b031bb918c04a4aa72cc7e59ad5f314
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d5a16eab60a743ee45012c494fb88af160498cb031fe66f176356dde77a29a80e87106d6c99d3cba9e70826ba47dbc987e2b0514d513c5470377492604325b4
|
7
|
+
data.tar.gz: 5ad90f3a527f7381e98510d71f2d0507569ce0b36bdf6f47b1b1e15510edbc54342ec53063e7e25ec36d97699ff586dbfc84b787efcaf546ba03e1d5fb3de2f9
|
data/grammars/biblio.rng
CHANGED
@@ -11,6 +11,8 @@ module RelatonNist
|
|
11
11
|
"isTranslationOf" => "translatedFrom",
|
12
12
|
"hasPreprint" => "hasReprint",
|
13
13
|
"isSupplementTo" => "complements",
|
14
|
+
"isPartOf" => "partOf",
|
15
|
+
"hasPart" => "hasPart",
|
14
16
|
}.freeze
|
15
17
|
URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
|
16
18
|
|
@@ -20,24 +22,48 @@ module RelatonNist
|
|
20
22
|
@ext = format.sub(/^bib/, "")
|
21
23
|
end
|
22
24
|
|
23
|
-
def parse_docid(doc)
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
when "10.6028/NBS.
|
28
|
-
when "10.6028/NBS.
|
29
|
-
when "10.6028/
|
30
|
-
|
31
|
-
|
25
|
+
def parse_docid(doc) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
26
|
+
# case doi
|
27
|
+
# when "10.6028/NBS.CIRC.12e2revjune" then doi.sub!("13e", "12e")
|
28
|
+
# when "10.6028/NBS.CIRC.36e2" then doi.sub!("46e", "36e")
|
29
|
+
# when "10.6028/NBS.HB.67suppJune1967" then doi.sub!("1965", "1967")
|
30
|
+
# when "10.6028/NBS.HB.105-1r1990" then doi.sub!("105-1-1990", "105-1r1990")
|
31
|
+
# when "10.6028/NIST.HB.150-10-1995" then doi.sub!(/150-10$/, "150-10-1995")
|
32
|
+
# end
|
33
|
+
# anchor = doi.split("/")[1..-1].join "/"
|
34
|
+
[
|
35
|
+
{ type: "NIST", id: pub_id(doc) },
|
36
|
+
{ type: "DOI", id: doi(doc) },
|
37
|
+
{ type: "NIST", id: anchor(doc), scope: "anchor" },
|
38
|
+
]
|
39
|
+
end
|
40
|
+
|
41
|
+
def pub_id(doc)
|
42
|
+
anchor(doc).gsub(".", " ")
|
43
|
+
end
|
44
|
+
|
45
|
+
def doi(doc) # rubocop:disable Metrics/CyclomaticComplexity
|
46
|
+
id = doc.at("doi_data/doi").text
|
47
|
+
case id
|
48
|
+
when "10.6028/NBS.CIRC.e2e" then "10.6028/NBS.CIRC.2e2"
|
49
|
+
when "10.6028/NBS.CIRC.sup" then "10.6028/NBS.CIRC.24e7sup"
|
50
|
+
when "10.6028/NBS.CIRC.supJun1925-Jun1926" then "10.6028/NBS.CIRC.24e7sup2"
|
51
|
+
when "10.6028/NBS.CIRC.supJun1925-Jun1927" then "10.6028/NBS.CIRC.24e7sup3"
|
52
|
+
when "10.6028/NBS.CIRC.24supJuly1922" then "10.6028/NBS.CIRC.24e6sup"
|
53
|
+
when "10.6028/NBS.CIRC.24supJan1924" then "10.6028/NBS.CIRC.24e6sup2"
|
54
|
+
else id
|
32
55
|
end
|
33
|
-
|
56
|
+
end
|
57
|
+
|
58
|
+
def anchor(doc)
|
59
|
+
doi(doc).split("/")[1..-1].join "/"
|
34
60
|
end
|
35
61
|
|
36
62
|
# @param doc [Nokogiri::XML::Element]
|
37
63
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
38
64
|
def fetch_docid(doc)
|
39
65
|
parse_docid(doc).map do |id|
|
40
|
-
RelatonBib::DocumentIdentifier.new(
|
66
|
+
RelatonBib::DocumentIdentifier.new(**id)
|
41
67
|
end
|
42
68
|
end
|
43
69
|
|
@@ -47,7 +73,7 @@ module RelatonNist
|
|
47
73
|
t = doc.xpath("titles/title|titles/subtitle")
|
48
74
|
return [] unless t.any?
|
49
75
|
|
50
|
-
RelatonBib::TypedTitleString.from_string t.map(&:text).join
|
76
|
+
RelatonBib::TypedTitleString.from_string t.map(&:text).join, "en", "Latn"
|
51
77
|
end
|
52
78
|
|
53
79
|
# @param doc [Nokogiri::XML::Element]
|
@@ -76,12 +102,11 @@ module RelatonNist
|
|
76
102
|
def fetch_relation(doc)
|
77
103
|
ns = "http://www.crossref.org/relations.xsd"
|
78
104
|
doc.xpath("./ns:program/ns:related_item", ns: ns).map do |rel|
|
79
|
-
|
80
|
-
|
81
|
-
# ref, = parse_docid doc
|
82
|
-
fref = RelatonBib::FormattedRef.new content: doi.text
|
105
|
+
rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
|
106
|
+
fref = RelatonBib::FormattedRef.new content: rdoi.text
|
83
107
|
bibitem = RelatonBib::BibliographicItem.new formattedref: fref
|
84
|
-
type = RELATION_TYPES[
|
108
|
+
type = RELATION_TYPES[rdoi["relationship-type"]]
|
109
|
+
warn "Relation type #{rdoi['relationship-type']} not found" unless type
|
85
110
|
{ type: type, bibitem: bibitem }
|
86
111
|
end
|
87
112
|
end
|
@@ -116,29 +141,48 @@ module RelatonNist
|
|
116
141
|
end
|
117
142
|
sname = p.at("surname").text
|
118
143
|
surname = RelatonBib::LocalizedString.new sname, doc["language"], "Latn"
|
119
|
-
initial = []
|
120
144
|
ident = p.xpath("ORCID").map do |id|
|
121
145
|
RelatonBib::PersonIdentifier.new "orcid", id.text
|
122
146
|
end
|
123
147
|
fullname = RelatonBib::FullName.new(
|
124
148
|
surname: surname, forename: forename, initial: initial, identifier: ident,
|
125
149
|
)
|
126
|
-
person = RelatonBib::Person.new name: fullname
|
150
|
+
person = RelatonBib::Person.new name: fullname, affiliation: affiliation(doc)
|
127
151
|
{ entity: person, role: [{ type: p["contributor_role"] }] }
|
128
152
|
end
|
129
153
|
contribs + doc.xpath("publisher").map do |p|
|
130
154
|
abbr = p.at("../institution/institution_acronym")&.text
|
131
|
-
|
155
|
+
place = p.at("./publisher_place")
|
156
|
+
cont = []
|
157
|
+
if place
|
158
|
+
city, state = place.text.split(", ")
|
159
|
+
cont << RelatonBib::Address.new(street: [], city: city, state: state, country: "US")
|
160
|
+
end
|
161
|
+
org = RelatonBib::Organization.new(
|
162
|
+
name: p.at("publisher_name").text, abbreviation: abbr, contact: cont,
|
163
|
+
)
|
132
164
|
{ entity: org, role: [{ type: "publisher" }] }
|
133
165
|
end
|
134
166
|
end
|
135
167
|
|
168
|
+
def affiliation(doc)
|
169
|
+
doc.xpath("./institution/institution_department").map do |id|
|
170
|
+
org = RelatonBib::Organization.new name: id.text
|
171
|
+
RelatonBib::Affiliation.new organization: org
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
136
175
|
# @param doc [Nokogiri::XML::Element]
|
137
176
|
# @return [Array<String>]
|
138
177
|
def fetch_place(doc)
|
139
178
|
doc.xpath("institution/institution_place").map(&:text)
|
140
179
|
end
|
141
180
|
|
181
|
+
def fetch_series(doc)
|
182
|
+
title = RelatonBib::TypedTitleString.new(content: "NIST")
|
183
|
+
[RelatonBib::Series.new(title: title, number: pub_id(doc))]
|
184
|
+
end
|
185
|
+
|
142
186
|
#
|
143
187
|
# Save document
|
144
188
|
#
|
@@ -174,14 +218,15 @@ module RelatonNist
|
|
174
218
|
link: fetch_link(doc), abstract: fetch_abstract(doc),
|
175
219
|
date: fetch_date(doc), edition: fetch_edition(doc),
|
176
220
|
contributor: fetch_contributor(doc), relation: fetch_relation(doc),
|
177
|
-
place: fetch_place(doc),
|
221
|
+
place: fetch_place(doc), series: fetch_series(doc),
|
178
222
|
language: [doc["language"]], script: ["Latn"], doctype: "standard"
|
179
223
|
)
|
180
224
|
write_file item
|
181
225
|
rescue StandardError => e
|
182
226
|
warn "Document: #{doc.at('doi').text}"
|
183
227
|
warn e.message
|
184
|
-
|
228
|
+
warn e.backtrace[0..5].join("\n")
|
229
|
+
# raise e
|
185
230
|
end
|
186
231
|
|
187
232
|
#
|
@@ -202,6 +247,7 @@ module RelatonNist
|
|
202
247
|
puts "Done in: #{(t2 - t1).round} sec."
|
203
248
|
rescue StandardError => e
|
204
249
|
warn e.message
|
250
|
+
warn e.backtrace[0..5].join("\n")
|
205
251
|
end
|
206
252
|
|
207
253
|
#
|
@@ -28,17 +28,18 @@ module RelatonNist
|
|
28
28
|
# @option opts [TrueClass, FalseClass] :bibdata
|
29
29
|
#
|
30
30
|
# @return [String] Relaton XML serialisation of reference
|
31
|
-
def get(code, year = nil, opts = {})
|
32
|
-
return fetch_ref_err(code, year, []) if code.match?
|
31
|
+
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
32
|
+
return fetch_ref_err(code, year, []) if code.match?(/\sEP$/)
|
33
33
|
|
34
|
-
/^(?<code2>[
|
34
|
+
/^(?<code2>[^(]+)(?:\((?<date2>\w+\s(?:\d{2},\s)?\d{4})\))?\s?\(?(?:(?<=\()(?<stage>[^\)]+))?/ =~ code
|
35
35
|
stage ||= /(?<=\.)PD-\w+(?=\.)/.match(code)&.to_s
|
36
36
|
if code2
|
37
37
|
code = code2.strip
|
38
38
|
if date2
|
39
|
-
|
39
|
+
case date2
|
40
|
+
when /\w+\s\d{4}/
|
40
41
|
opts[:issued_date] = Date.strptime date2, "%B %Y"
|
41
|
-
|
42
|
+
when /\w+\s\d{2},\s\d{4}/
|
42
43
|
opts[:updated_date] = Date.strptime date2, "%B %d, %Y"
|
43
44
|
end
|
44
45
|
end
|
@@ -209,9 +209,15 @@ module RelatonNist
|
|
209
209
|
# @param ref [String]
|
210
210
|
# @param uri [String]
|
211
211
|
# @return [RelatonNist::DocumentRelation]
|
212
|
-
def doc_relation(type, ref, uri, lang = "en", script = "Latn")
|
212
|
+
def doc_relation(type, ref, uri, lang = "en", script = "Latn") # rubocop:disable Metrics/MethodLength
|
213
|
+
if type == "supersedes"
|
214
|
+
descr = RelatonBib::FormattedString.new(content: "supersedes", language: lang, script: script)
|
215
|
+
t = "obsoletes"
|
216
|
+
else t = type
|
217
|
+
end
|
213
218
|
DocumentRelation.new(
|
214
|
-
type:
|
219
|
+
type: t,
|
220
|
+
description: descr,
|
215
221
|
bibitem: RelatonBib::BibliographicItem.new(
|
216
222
|
formattedref: RelatonBib::FormattedRef.new(
|
217
223
|
content: ref, language: lang, script: script, format: "text/plain",
|
data/lib/relaton_nist/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-01-
|
11
|
+
date: 2022-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|