relaton-nist 2.0.0.pre.alpha.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -0
- data/lib/relaton/nist/version.rb +1 -1
- data/relaton_nist.gemspec +1 -1
- metadata +3 -4
- data/lib/relaton/nist/tech_pubs_parser.rb +0 -321
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 44c34385848a8014460e7001487ae66c504889a55d4d43e7825436af1ed8b447
|
|
4
|
+
data.tar.gz: 5a8484fe85b73b539de8d247193359a0004586644163dea56c7d31aa67c15afd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 53e6e72ccf31a11324e4258bef3a0239820af979ecf415f366936f4c7a15cb1564d3a26dd91812bccba3b87c9163b2ce6e76f6a83342a1d017b48c0be4a6989d
|
|
7
|
+
data.tar.gz: a2c5b41f673d8c6efd526d57f5c12b79d9e2c36db3609bbdf9d4059baea2dd183914d5ea873d2524bed998e41837b605b51919e4f5bfeed3e4d2e93c37ced2aa
|
data/.github/workflows/rake.yml
CHANGED
data/lib/relaton/nist/version.rb
CHANGED
data/relaton_nist.gemspec
CHANGED
|
@@ -27,7 +27,7 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_dependency "mechanize", "~> 2.0"
|
|
28
28
|
spec.add_dependency "loc_mods", "~> 0.2.0"
|
|
29
29
|
spec.add_dependency "pubid", "~> 1.15.6"
|
|
30
|
-
spec.add_dependency "relaton-bib", "~> 2.0.0
|
|
30
|
+
spec.add_dependency "relaton-bib", "~> 2.0.0"
|
|
31
31
|
spec.add_dependency "relaton-core", "~> 0.0.13"
|
|
32
32
|
spec.add_dependency "relaton-index", "~> 0.2.0"
|
|
33
33
|
spec.add_dependency "rubyzip"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-nist
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.0
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
@@ -71,14 +71,14 @@ dependencies:
|
|
|
71
71
|
requirements:
|
|
72
72
|
- - "~>"
|
|
73
73
|
- !ruby/object:Gem::Version
|
|
74
|
-
version: 2.0.0
|
|
74
|
+
version: 2.0.0
|
|
75
75
|
type: :runtime
|
|
76
76
|
prerelease: false
|
|
77
77
|
version_requirements: !ruby/object:Gem::Requirement
|
|
78
78
|
requirements:
|
|
79
79
|
- - "~>"
|
|
80
80
|
- !ruby/object:Gem::Version
|
|
81
|
-
version: 2.0.0
|
|
81
|
+
version: 2.0.0
|
|
82
82
|
- !ruby/object:Gem::Dependency
|
|
83
83
|
name: relaton-core
|
|
84
84
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -165,7 +165,6 @@ files:
|
|
|
165
165
|
- lib/relaton/nist/relation.rb
|
|
166
166
|
- lib/relaton/nist/scraper.rb
|
|
167
167
|
- lib/relaton/nist/series.yaml
|
|
168
|
-
- lib/relaton/nist/tech_pubs_parser.rb
|
|
169
168
|
- lib/relaton/nist/util.rb
|
|
170
169
|
- lib/relaton/nist/version.rb
|
|
171
170
|
- relaton_nist.gemspec
|
|
@@ -1,321 +0,0 @@
|
|
|
1
|
-
module Relaton
|
|
2
|
-
module Nist
|
|
3
|
-
class TechPubsParser
|
|
4
|
-
RELATION_TYPES = {
|
|
5
|
-
"replaces" => "obsoletes",
|
|
6
|
-
"isVersionOf" => "editionOf",
|
|
7
|
-
"hasTranslation" => "hasTranslation",
|
|
8
|
-
"isTranslationOf" => "translatedFrom",
|
|
9
|
-
"hasPreprint" => "hasReprint",
|
|
10
|
-
"isPreprintOf" => "hasDraft",
|
|
11
|
-
"isSupplementTo" => "complements",
|
|
12
|
-
"isPartOf" => "partOf",
|
|
13
|
-
"hasPart" => "hasPart",
|
|
14
|
-
}.freeze
|
|
15
|
-
|
|
16
|
-
ATTRS = %i[docidentifier title source abstract date edition contributor
|
|
17
|
-
relation status place series].freeze
|
|
18
|
-
NS = "http://www.crossref.org/relations.xsd".freeze
|
|
19
|
-
|
|
20
|
-
def initialize(doc, series)
|
|
21
|
-
@doc = doc
|
|
22
|
-
@series = series
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
#
|
|
26
|
-
# Parse XML document
|
|
27
|
-
#
|
|
28
|
-
# @param doc [Nokogiri::XML::Element] XML document
|
|
29
|
-
# @param series [Hash] series hash map
|
|
30
|
-
#
|
|
31
|
-
# @return [Relaton::Nist::ItemData] bibliographic item
|
|
32
|
-
#
|
|
33
|
-
def self.parse(doc, series)
|
|
34
|
-
new(doc, series).parse
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
#
|
|
38
|
-
# Create document instance
|
|
39
|
-
#
|
|
40
|
-
# @return [Relaton::Nist::ItemData] bibliographic item
|
|
41
|
-
#
|
|
42
|
-
def parse
|
|
43
|
-
ItemData.new(
|
|
44
|
-
type: "standard", language: [@doc["language"]], script: ["Latn"],
|
|
45
|
-
ext: Ext.new(doctype: parse_doctype), **args
|
|
46
|
-
)
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def args
|
|
50
|
-
ATTRS.to_h { |a| [a, send("parse_#{a}")] }
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# @return [Array<Bib::Docidentifier>]
|
|
54
|
-
def parse_docidentifier
|
|
55
|
-
[
|
|
56
|
-
{ type: "NIST", content: pub_id, primary: true },
|
|
57
|
-
{ type: "DOI", content: doi },
|
|
58
|
-
].map { |id| Bib::Docidentifier.new(**id) }
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
#
|
|
62
|
-
# Parse document's ID from XML
|
|
63
|
-
#
|
|
64
|
-
# @return [String] document's ID
|
|
65
|
-
#
|
|
66
|
-
def pub_id
|
|
67
|
-
if doi
|
|
68
|
-
doi.split("/")[1..].join("/").gsub(".", " ").sub(/^[\D]+/, &:upcase)
|
|
69
|
-
else
|
|
70
|
-
@doc.at("publisher_item/item_number").text
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
def doi # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
|
|
75
|
-
return @doi if defined? @doi
|
|
76
|
-
|
|
77
|
-
@doi = begin
|
|
78
|
-
id = @doc.at("doi_data/doi")&.text
|
|
79
|
-
case id
|
|
80
|
-
when "10.6028/NBS.CIRC.e2e" then "10.6028/NBS.CIRC.2e2"
|
|
81
|
-
when "10.6028/NBS.CIRC.sup" then "10.6028/NBS.CIRC.24e7sup"
|
|
82
|
-
when "10.6028/NBS.CIRC.supJun1925-Jun1926" then "10.6028/NBS.CIRC.24e7sup2"
|
|
83
|
-
when "10.6028/NBS.CIRC.supJun1925-Jun1927" then "10.6028/NBS.CIRC.24e7sup3"
|
|
84
|
-
when "10.6028/NBS.CIRC.24supJuly1922" then "10.6028/NBS.CIRC.24e6sup"
|
|
85
|
-
when "10.6028/NBS.CIRC.24supJan1924" then "10.6028/NBS.CIRC.24e6sup2"
|
|
86
|
-
else id
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
# @return [Array<Bib::Title>]
|
|
92
|
-
def parse_title
|
|
93
|
-
t = @doc.xpath("titles/title|titles/subtitle")
|
|
94
|
-
return [] unless t.any?
|
|
95
|
-
|
|
96
|
-
[Bib::Title.new(content: t.map(&:text).join("\n"), language: "en", script: "Latn")]
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
# @return [Array<Bib::Uri>]
|
|
100
|
-
def parse_source
|
|
101
|
-
pdf_url = @doc.at("doi_data/resource").text
|
|
102
|
-
doi_url = "https://doi.org/#{doi}"
|
|
103
|
-
[
|
|
104
|
-
Bib::Uri.new(type: "doi", content: doi_url),
|
|
105
|
-
Bib::Uri.new(type: "pdf", content: pdf_url),
|
|
106
|
-
]
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
# @return [Array<Bib::LocalizedMarkedUpString>]
|
|
110
|
-
def parse_abstract
|
|
111
|
-
@doc.xpath(
|
|
112
|
-
"jats:abstract/jats:p", "jats" => "http://www.ncbi.nlm.nih.gov/JATS1"
|
|
113
|
-
).each_with_object([]) do |a, m|
|
|
114
|
-
next if a.text.empty?
|
|
115
|
-
|
|
116
|
-
m << Bib::Abstract.new(
|
|
117
|
-
content: a.text, language: @doc["language"], script: "Latn",
|
|
118
|
-
)
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
# @return [Array<Bib::Date>]
|
|
123
|
-
def parse_date
|
|
124
|
-
@doc.xpath("publication_date|approval_date").map do |dt|
|
|
125
|
-
on = dt.at("year").text
|
|
126
|
-
if (m = dt.at "month")
|
|
127
|
-
on += "-#{m.text}"
|
|
128
|
-
d = dt.at "day"
|
|
129
|
-
on += "-#{d.text}" if d
|
|
130
|
-
end
|
|
131
|
-
type = dt.name == "publication_date" ? "published" : "confirmed"
|
|
132
|
-
Bib::Date.new(type: type, at: on)
|
|
133
|
-
end
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
def parse_doctype
|
|
137
|
-
Doctype.new(content: "standard")
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
# @return [String]
|
|
141
|
-
def parse_edition
|
|
142
|
-
@doc.at("edition_number")&.text
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
# @return [Array<Bib::Contributor>]
|
|
146
|
-
def parse_contributor # rubocop:disable Metrics/AbcSize
|
|
147
|
-
contribs = @doc.xpath("contributors/person_name").map do |p|
|
|
148
|
-
person = Bib::Person.new(
|
|
149
|
-
name: fullname(p), affiliation: affiliation, identifier: identifier(p),
|
|
150
|
-
)
|
|
151
|
-
Bib::Contributor.new(
|
|
152
|
-
person: person,
|
|
153
|
-
role: [Bib::Contributor::Role.new(type: p["contributor_role"])],
|
|
154
|
-
)
|
|
155
|
-
end
|
|
156
|
-
contribs + @doc.xpath("publisher").map do |p|
|
|
157
|
-
Bib::Contributor.new(
|
|
158
|
-
organization: create_org(p),
|
|
159
|
-
role: [Bib::Contributor::Role.new(type: "publisher")],
|
|
160
|
-
)
|
|
161
|
-
end
|
|
162
|
-
end
|
|
163
|
-
|
|
164
|
-
def identifier(person)
|
|
165
|
-
person.xpath("ORCID").map do |id|
|
|
166
|
-
Bib::Person::Identifier.new(type: "orcid", content: id.text)
|
|
167
|
-
end
|
|
168
|
-
end
|
|
169
|
-
|
|
170
|
-
#
|
|
171
|
-
# Create full name object from person name element.
|
|
172
|
-
#
|
|
173
|
-
# @param [Nokogiri::XML::Element] person name element
|
|
174
|
-
#
|
|
175
|
-
# @return [Bib::FullName] full name object
|
|
176
|
-
#
|
|
177
|
-
def fullname(person)
|
|
178
|
-
fname, initials = forename_initial person
|
|
179
|
-
surname = localized_string person.at("surname")&.text
|
|
180
|
-
completename = localized_string person.text unless surname
|
|
181
|
-
Bib::FullName.new(
|
|
182
|
-
surname: surname, forename: fname, formatted_initials: initials,
|
|
183
|
-
completename: completename,
|
|
184
|
-
)
|
|
185
|
-
end
|
|
186
|
-
|
|
187
|
-
#
|
|
188
|
-
# Create affiliation organization
|
|
189
|
-
#
|
|
190
|
-
# @return [Array<Bib::Affiliation>] affiliation
|
|
191
|
-
#
|
|
192
|
-
def affiliation
|
|
193
|
-
@doc.xpath("./institution/institution_department").map do |id|
|
|
194
|
-
org = Bib::Organization.new(
|
|
195
|
-
name: [Bib::TypedLocalizedString.new(content: id.text)],
|
|
196
|
-
)
|
|
197
|
-
Bib::Affiliation.new(organization: org)
|
|
198
|
-
end
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
#
|
|
202
|
-
# Create forename and initials objects from person name element.
|
|
203
|
-
#
|
|
204
|
-
# @param [Nokogiri::XML::Element] person person name element
|
|
205
|
-
#
|
|
206
|
-
# @return [Array<Array<Bib::FullNameType::Forename>, Bib::LocalizedString>]
|
|
207
|
-
#
|
|
208
|
-
def forename_initial(person) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
209
|
-
fnames = []
|
|
210
|
-
fname = person.at("given_name")&.text
|
|
211
|
-
if fname
|
|
212
|
-
if /^(?:(?<name>\w+)\s)?(?<inits>(?:\w(?:\.|\b)\s?)+)/ =~ fname
|
|
213
|
-
ints = inits.split(/[.\s]*/)
|
|
214
|
-
fnames << forename(name, ints.shift)
|
|
215
|
-
ints.each { |i| fnames << forename(nil, i) }
|
|
216
|
-
else
|
|
217
|
-
fn = forename(fname)
|
|
218
|
-
fnames << fn if fn
|
|
219
|
-
end
|
|
220
|
-
end
|
|
221
|
-
initials = localized_string inits unless inits.nil? || inits.empty?
|
|
222
|
-
[fnames, initials]
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
#
|
|
226
|
-
# Create forename object
|
|
227
|
-
#
|
|
228
|
-
# @param [String, nil] cnt forename content
|
|
229
|
-
# @param [String, nil] init initial content
|
|
230
|
-
#
|
|
231
|
-
# @return [Bib::FullNameType::Forename] forename object
|
|
232
|
-
#
|
|
233
|
-
def forename(cnt, init = nil)
|
|
234
|
-
return if (cnt.nil? || cnt.empty?) && (init.nil? || init.empty?)
|
|
235
|
-
|
|
236
|
-
Bib::FullNameType::Forename.new(
|
|
237
|
-
content: cnt, language: @doc["language"], script: "Latn", initial: init,
|
|
238
|
-
)
|
|
239
|
-
end
|
|
240
|
-
|
|
241
|
-
#
|
|
242
|
-
# Create publisher organization
|
|
243
|
-
#
|
|
244
|
-
# @param [Nokogiri::XML::Element] pub publisher element
|
|
245
|
-
#
|
|
246
|
-
# @return [Bib::Organization] publisher organization
|
|
247
|
-
#
|
|
248
|
-
def create_org(pub) # rubocop:disable Metrics/AbcSize
|
|
249
|
-
name = pub.at("publisher_name").text
|
|
250
|
-
abbr = pub.at("../institution[institution_name[.='#{name}']]/institution_acronym")&.text
|
|
251
|
-
place = pub.at("./publisher_place") ||
|
|
252
|
-
pub.at("../institution[institution_name[.='#{name}']]/institution_place")
|
|
253
|
-
cont = []
|
|
254
|
-
if place
|
|
255
|
-
city, state = place.text.split(", ")
|
|
256
|
-
cont << Bib::Address.new(street: [], city: city, state: state, country: "US")
|
|
257
|
-
end
|
|
258
|
-
Bib::Organization.new(
|
|
259
|
-
name: [Bib::TypedLocalizedString.new(content: name)],
|
|
260
|
-
abbreviation: abbr ? Bib::LocalizedString.new(content: abbr) : nil,
|
|
261
|
-
address: cont,
|
|
262
|
-
)
|
|
263
|
-
end
|
|
264
|
-
|
|
265
|
-
# @return [Array<Nist::Relation>]
|
|
266
|
-
def parse_relation # rubocop:disable Metrics/AbcSize
|
|
267
|
-
@doc.xpath("./ns:program/ns:related_item", ns: NS).map do |rel|
|
|
268
|
-
rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: NS)
|
|
269
|
-
id = rdoi.text.split("/")[1..].join("/").gsub(".", " ")
|
|
270
|
-
fref = Bib::Formattedref.new(content: id)
|
|
271
|
-
docid = Bib::Docidentifier.new(type: "NIST", content: id, primary: true)
|
|
272
|
-
bibitem = ItemData.new(formattedref: fref, docidentifier: [docid])
|
|
273
|
-
type = RELATION_TYPES[rdoi["relationship-type"]]
|
|
274
|
-
warn "Relation type #{rdoi['relationship-type']} not found" unless type
|
|
275
|
-
Relation.new(type: type, bibitem: bibitem)
|
|
276
|
-
end
|
|
277
|
-
end
|
|
278
|
-
|
|
279
|
-
def parse_status
|
|
280
|
-
s = @doc.at("./ns:program/ns:related_item/ns:*[@relationship-type='isPreprintOf']", ns: NS)
|
|
281
|
-
return unless s
|
|
282
|
-
|
|
283
|
-
Bib::Status.new(stage: Bib::Status::Stage.new(content: "preprint"))
|
|
284
|
-
end
|
|
285
|
-
|
|
286
|
-
# @return [Array<Bib::Place>]
|
|
287
|
-
def parse_place
|
|
288
|
-
@doc.xpath("institution/institution_place").map do |p|
|
|
289
|
-
city, state = p.text.split(", ")
|
|
290
|
-
Bib::Place.new(city: city, region: [Bib::Place::RegionType.new(iso: state)])
|
|
291
|
-
end
|
|
292
|
-
end
|
|
293
|
-
|
|
294
|
-
#
|
|
295
|
-
# Fetches series
|
|
296
|
-
#
|
|
297
|
-
# @return [Array<Bib::Series>] series
|
|
298
|
-
#
|
|
299
|
-
def parse_series
|
|
300
|
-
prf, srs, num = pub_id.split
|
|
301
|
-
sname = @series[srs] || srs
|
|
302
|
-
title = Bib::Title.new(content: "#{prf} #{sname}")
|
|
303
|
-
abbr = Bib::LocalizedString.new(content: srs)
|
|
304
|
-
[Bib::Series.new(title: [title], abbreviation: abbr, number: num)]
|
|
305
|
-
end
|
|
306
|
-
|
|
307
|
-
#
|
|
308
|
-
# Create localized string
|
|
309
|
-
#
|
|
310
|
-
# @param [String] content content of string
|
|
311
|
-
#
|
|
312
|
-
# @return [Bib::LocalizedString] localized string
|
|
313
|
-
#
|
|
314
|
-
def localized_string(content)
|
|
315
|
-
return unless content
|
|
316
|
-
|
|
317
|
-
Bib::LocalizedString.new(content: content, language: @doc["language"], script: "Latn")
|
|
318
|
-
end
|
|
319
|
-
end
|
|
320
|
-
end
|
|
321
|
-
end
|