relaton-nist 1.16.1 → 1.16.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +20 -9
- data/lib/relaton_nist/config.rb +10 -0
- data/lib/relaton_nist/data_fetcher.rb +25 -307
- data/lib/relaton_nist/hit_collection.rb +4 -2
- data/lib/relaton_nist/nist_bibliography.rb +9 -10
- data/lib/relaton_nist/tech_pubs_parser.rb +293 -0
- data/lib/relaton_nist/util.rb +9 -0
- data/lib/relaton_nist/version.rb +1 -1
- data/lib/relaton_nist.rb +8 -4
- metadata +5 -3
- data/resp.html +0 -665
@@ -0,0 +1,293 @@
|
|
1
|
+
module RelatonNist
|
2
|
+
class TechPubsParser
|
3
|
+
RELATION_TYPES = {
|
4
|
+
"replaces" => "obsoletes",
|
5
|
+
"isVersionOf" => "editionOf",
|
6
|
+
"hasTranslation" => "hasTranslation",
|
7
|
+
"isTranslationOf" => "translatedFrom",
|
8
|
+
"hasPreprint" => "hasReprint",
|
9
|
+
"isPreprintOf" => "hasDraft",
|
10
|
+
"isSupplementTo" => "complements",
|
11
|
+
"isPartOf" => "partOf",
|
12
|
+
"hasPart" => "hasPart",
|
13
|
+
}.freeze
|
14
|
+
|
15
|
+
ATTRS = %i[docid title link abstract date edition contributor relation docstatus place series].freeze
|
16
|
+
NS = "http://www.crossref.org/relations.xsd".freeze
|
17
|
+
|
18
|
+
def initialize(doc, series)
|
19
|
+
@doc = doc
|
20
|
+
@series = series
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Parse XML document
|
25
|
+
#
|
26
|
+
# @param doc [Nokogiri::XML::Element] XML document
|
27
|
+
# @param series [Hash] series hash map (key: series abbreviation, value: series name)
|
28
|
+
#
|
29
|
+
# @return [RelatonNist::NistBibliographicItem] bibliographic item
|
30
|
+
#
|
31
|
+
def self.parse(doc, series)
|
32
|
+
new(doc, series).parse
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Create document instance
|
37
|
+
#
|
38
|
+
# @raise [StandardError]
|
39
|
+
#
|
40
|
+
# @return [RelatonNist::NistBibliographicItem] bibliographic item
|
41
|
+
#
|
42
|
+
def parse
|
43
|
+
RelatonNist::NistBibliographicItem.new(
|
44
|
+
type: "standard", language: [@doc["language"]], script: ["Latn"],
|
45
|
+
doctype: "standard", **args
|
46
|
+
)
|
47
|
+
rescue StandardError => e
|
48
|
+
warn "Document: `#{@doc.at('doi').text}`"
|
49
|
+
warn e.message
|
50
|
+
warn e.backtrace[0..5].join("\n")
|
51
|
+
end
|
52
|
+
|
53
|
+
def args
|
54
|
+
ATTRS.to_h { |a| [a, send("parse_#{a}")] }
|
55
|
+
end
|
56
|
+
|
57
|
+
# def anchor(doc)
|
58
|
+
# fetch_doi(doc).split("/")[1..-1].join "/"
|
59
|
+
# end
|
60
|
+
|
61
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
62
|
+
def parse_docid
|
63
|
+
[
|
64
|
+
{ type: "NIST", id: pub_id, primary: true },
|
65
|
+
{ type: "DOI", id: doi },
|
66
|
+
# { type: "NIST", id: anchor(doc), scope: "anchor" },
|
67
|
+
].map { |id| RelatonBib::DocumentIdentifier.new(**id) }
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Parse document's ID from XML
|
72
|
+
#
|
73
|
+
# @return [String] document's ID
|
74
|
+
#
|
75
|
+
def pub_id
|
76
|
+
# anchor(doc).gsub(".", " ")
|
77
|
+
doi.split("/")[1..].join("/").gsub(".", " ").sub(/^nist\sir/, "NIST IR")
|
78
|
+
end
|
79
|
+
|
80
|
+
def doi # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
|
81
|
+
@doi ||= begin
|
82
|
+
id = @doc.at("doi_data/doi").text
|
83
|
+
case id
|
84
|
+
when "10.6028/NBS.CIRC.e2e" then "10.6028/NBS.CIRC.2e2"
|
85
|
+
when "10.6028/NBS.CIRC.sup" then "10.6028/NBS.CIRC.24e7sup"
|
86
|
+
when "10.6028/NBS.CIRC.supJun1925-Jun1926" then "10.6028/NBS.CIRC.24e7sup2"
|
87
|
+
when "10.6028/NBS.CIRC.supJun1925-Jun1927" then "10.6028/NBS.CIRC.24e7sup3"
|
88
|
+
when "10.6028/NBS.CIRC.24supJuly1922" then "10.6028/NBS.CIRC.24e6sup"
|
89
|
+
when "10.6028/NBS.CIRC.24supJan1924" then "10.6028/NBS.CIRC.24e6sup2"
|
90
|
+
else id
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# @return [RelatonBib::TypedTitleStringCollection, Array]
|
96
|
+
def parse_title
|
97
|
+
t = @doc.xpath("titles/title|titles/subtitle")
|
98
|
+
return [] unless t.any?
|
99
|
+
|
100
|
+
# RelatonBib::TypedTitleString.from_string t.map(&:text).join, "en", "Latn"
|
101
|
+
[{ content: t.map(&:text).join("\n"), language: "en", script: "Latn",
|
102
|
+
format: "text/plain" }]
|
103
|
+
end
|
104
|
+
|
105
|
+
# @return [Array<RelatonBib::TypedUri>]
|
106
|
+
def parse_link
|
107
|
+
pdf_url = @doc.at("doi_data/resource").text
|
108
|
+
doi_url = "https://doi.org/#{doi}"
|
109
|
+
[{ type: "doi", content: doi_url }, { type: "pdf", content: pdf_url }]
|
110
|
+
.map { |l| RelatonBib::TypedUri.new(**l) }
|
111
|
+
end
|
112
|
+
|
113
|
+
# @return [Array<RelatonBib::FormattedString>]
|
114
|
+
def parse_abstract
|
115
|
+
@doc.xpath(
|
116
|
+
"jats:abstract/jats:p", "jats" => "http://www.ncbi.nlm.nih.gov/JATS1"
|
117
|
+
).each_with_object([]) do |a, m|
|
118
|
+
next if a.text.empty?
|
119
|
+
|
120
|
+
m << RelatonBib::FormattedString.new(content: a.text, language: @doc["language"], script: "Latn")
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# @return [Array<RelatonBib::BibliographicDate>]
|
125
|
+
def parse_date
|
126
|
+
@doc.xpath("publication_date|approval_date").map do |dt|
|
127
|
+
on = dt.at("year").text
|
128
|
+
if (m = dt.at "month")
|
129
|
+
on += "-#{m.text}"
|
130
|
+
d = dt.at "day"
|
131
|
+
on += "-#{d.text}" if d
|
132
|
+
end
|
133
|
+
type = dt.name == "publication_date" ? "published" : "confirmed"
|
134
|
+
RelatonBib::BibliographicDate.new(type: type, on: on)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# @return [String]
|
139
|
+
def parse_edition
|
140
|
+
@doc.at("edition_number")&.text
|
141
|
+
end
|
142
|
+
|
143
|
+
# @return [Array<Hash>]
|
144
|
+
def parse_contributor
|
145
|
+
contribs = @doc.xpath("contributors/person_name").map do |p|
|
146
|
+
person = RelatonBib::Person.new(name: fullname(p), affiliation: affiliation)
|
147
|
+
{ entity: person, role: [{ type: p["contributor_role"] }] }
|
148
|
+
end
|
149
|
+
contribs + @doc.xpath("publisher").map do |p|
|
150
|
+
{ entity: create_org(p), role: [{ type: "publisher" }] }
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
#
|
155
|
+
# Create full name object from person name element.
|
156
|
+
#
|
157
|
+
# @param [Nokogiri::XML::Element] person name element
|
158
|
+
#
|
159
|
+
# @return [RelatonBib::FullName] full name object
|
160
|
+
#
|
161
|
+
def fullname(person)
|
162
|
+
fname, initials = forename_initial person
|
163
|
+
surname = localized_string person.at("surname").text
|
164
|
+
ident = person.xpath("ORCID").map do |id|
|
165
|
+
RelatonBib::PersonIdentifier.new "orcid", id.text
|
166
|
+
end
|
167
|
+
RelatonBib::FullName.new(surname: surname, forename: fname,
|
168
|
+
initials: initials, identifier: ident)
|
169
|
+
end
|
170
|
+
|
171
|
+
#
|
172
|
+
# Create affiliation organization
|
173
|
+
#
|
174
|
+
# @return [Array<RelatonBib::Affiliation>] affiliation
|
175
|
+
#
|
176
|
+
def affiliation
|
177
|
+
@doc.xpath("./institution/institution_department").map do |id|
|
178
|
+
org = RelatonBib::Organization.new name: id.text
|
179
|
+
RelatonBib::Affiliation.new organization: org
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
#
|
184
|
+
# Create forename and initials objects from person name element.
|
185
|
+
#
|
186
|
+
# @param [Nokogiri::XML::Element] person person name element
|
187
|
+
#
|
188
|
+
# @return [Array<RelatonBib::Forename>, RelatonBib::LocalizedString>] forename and initialsrray<
|
189
|
+
#
|
190
|
+
def forename_initial(person) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
191
|
+
fnames = []
|
192
|
+
fname = person.at("given_name")&.text
|
193
|
+
if fname
|
194
|
+
if /^(?:(?<name>\w+)\s)?(?<inits>(?:\w(?:\.|\b)\s?)+)/ =~ fname
|
195
|
+
ints = inits.split(/[.\s]*/)
|
196
|
+
fnames << forename(name, ints.shift)
|
197
|
+
ints.each { |i| fnames << forename(nil, i) }
|
198
|
+
else
|
199
|
+
fn = forename(fname)
|
200
|
+
fnames << fn if fn
|
201
|
+
end
|
202
|
+
end
|
203
|
+
initials = localized_string inits unless inits.nil? || inits.empty?
|
204
|
+
[fnames, initials]
|
205
|
+
end
|
206
|
+
|
207
|
+
#
|
208
|
+
# Create forename object
|
209
|
+
#
|
210
|
+
# @param [String, nil] cnt forename content
|
211
|
+
# @param [String, nil] init initial content
|
212
|
+
#
|
213
|
+
# @return [RelatonBib::Forename] forename object
|
214
|
+
#
|
215
|
+
def forename(cnt, init = nil)
|
216
|
+
return if (cnt.nil? || cnt.empty?) && (init.nil? || init.empty?)
|
217
|
+
|
218
|
+
RelatonBib::Forename.new(
|
219
|
+
content: cnt, language: @doc["language"], script: "Latn", initial: init,
|
220
|
+
)
|
221
|
+
end
|
222
|
+
|
223
|
+
#
|
224
|
+
# Create publisher organization
|
225
|
+
#
|
226
|
+
# @param [Nokogiri::XML::Element] pub publisher element
|
227
|
+
#
|
228
|
+
# @return [RelatonBib::Organization] publisher organization
|
229
|
+
#
|
230
|
+
def create_org(pub)
|
231
|
+
name = pub.at("publisher_name").text
|
232
|
+
abbr = pub.at("../institution[institution_name[.='#{name}']]/institution_acronym")&.text
|
233
|
+
place = pub.at("./publisher_place") ||
|
234
|
+
pub.at("../institution[institution_name[.='#{name}']]/institution_place")
|
235
|
+
cont = []
|
236
|
+
if place
|
237
|
+
city, state = place.text.split(", ")
|
238
|
+
cont << RelatonBib::Address.new(street: [], city: city, state: state, country: "US")
|
239
|
+
end
|
240
|
+
RelatonBib::Organization.new name: name, abbreviation: abbr, contact: cont
|
241
|
+
end
|
242
|
+
|
243
|
+
# @return [Array<Hash>]
|
244
|
+
def parse_relation # rubocop:disable Metrics/AbcSize
|
245
|
+
@doc.xpath("./ns:program/ns:related_item", ns: NS).map do |rel|
|
246
|
+
rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: NS)
|
247
|
+
id = rdoi.text.split("/")[1..].join("/").gsub(".", " ")
|
248
|
+
fref = RelatonBib::FormattedRef.new content: id
|
249
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "NIST", id: id, primary: true)
|
250
|
+
bibitem = RelatonBib::BibliographicItem.new formattedref: fref, docid: [docid]
|
251
|
+
type = RELATION_TYPES[rdoi["relationship-type"]]
|
252
|
+
warn "Relation type #{rdoi['relationship-type']} not found" unless type
|
253
|
+
{ type: type, bibitem: bibitem }
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def parse_docstatus
|
258
|
+
s = @doc.at("./ns:program/ns:related_item/ns:*[@relationship-type='isPreprintOf']", ns: NS)
|
259
|
+
return unless s
|
260
|
+
|
261
|
+
RelatonBib::DocumentStatus.new stage: "preprint"
|
262
|
+
end
|
263
|
+
|
264
|
+
# @return [Array<String>]
|
265
|
+
def parse_place
|
266
|
+
@doc.xpath("institution/institution_place").map(&:text)
|
267
|
+
end
|
268
|
+
|
269
|
+
#
|
270
|
+
# Fetches series
|
271
|
+
#
|
272
|
+
# @return [Array<RelatonBib::Series>] series
|
273
|
+
#
|
274
|
+
def parse_series
|
275
|
+
prf, srs, num = pub_id.split
|
276
|
+
sname = @series[srs] || srs
|
277
|
+
title = RelatonBib::TypedTitleString.new(content: "#{prf} #{sname}")
|
278
|
+
abbr = RelatonBib::LocalizedString.new srs
|
279
|
+
[RelatonBib::Series.new(title: title, abbreviation: abbr, number: num)]
|
280
|
+
end
|
281
|
+
|
282
|
+
#
|
283
|
+
# Create localized string
|
284
|
+
#
|
285
|
+
# @param [String] content content of string
|
286
|
+
#
|
287
|
+
# @return [RelatonBib::LocalizedString] localized string
|
288
|
+
#
|
289
|
+
def localized_string(content)
|
290
|
+
RelatonBib::LocalizedString.new content, @doc["language"], "Latn"
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
data/lib/relaton_nist/version.rb
CHANGED
data/lib/relaton_nist.rb
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
require "singleton"
|
2
2
|
require "relaton/index"
|
3
|
+
require "relaton_bib"
|
3
4
|
require "relaton_nist/version"
|
5
|
+
require "relaton_nist/config"
|
6
|
+
require "relaton_nist/util"
|
4
7
|
require "relaton_nist/nist_bibliography"
|
5
8
|
require "relaton_nist/data_fetcher"
|
6
9
|
require "relaton_nist/pubs_export"
|
10
|
+
require "relaton_nist/tech_pubs_parser"
|
7
11
|
|
8
12
|
# if defined? Relaton
|
9
13
|
# require_relative "relaton/processor"
|
@@ -16,9 +20,9 @@ module RelatonNist
|
|
16
20
|
# Returns hash of XML reammar
|
17
21
|
# @return [String]
|
18
22
|
def self.grammar_hash
|
19
|
-
gem_path = File.expand_path "..", __dir__
|
20
|
-
grammars_path = File.join gem_path, "grammars", "*"
|
21
|
-
grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
|
22
|
-
Digest::MD5.hexdigest grammars
|
23
|
+
# gem_path = File.expand_path "..", __dir__
|
24
|
+
# grammars_path = File.join gem_path, "grammars", "*"
|
25
|
+
# grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
|
26
|
+
Digest::MD5.hexdigest RelatonNist::VERSION + RelatonBib::VERSION # grammars
|
23
27
|
end
|
24
28
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-nist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.16.
|
4
|
+
version: 1.16.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: relaton-bib
|
@@ -78,6 +78,7 @@ files:
|
|
78
78
|
- grammars/relaton-nist.rng
|
79
79
|
- lib/relaton_nist.rb
|
80
80
|
- lib/relaton_nist/comment_period.rb
|
81
|
+
- lib/relaton_nist/config.rb
|
81
82
|
- lib/relaton_nist/data_fetcher.rb
|
82
83
|
- lib/relaton_nist/document_relation.rb
|
83
84
|
- lib/relaton_nist/document_status.rb
|
@@ -90,10 +91,11 @@ files:
|
|
90
91
|
- lib/relaton_nist/pubs_export.rb
|
91
92
|
- lib/relaton_nist/scrapper.rb
|
92
93
|
- lib/relaton_nist/series.yaml
|
94
|
+
- lib/relaton_nist/tech_pubs_parser.rb
|
95
|
+
- lib/relaton_nist/util.rb
|
93
96
|
- lib/relaton_nist/version.rb
|
94
97
|
- lib/relaton_nist/xml_parser.rb
|
95
98
|
- relaton_nist.gemspec
|
96
|
-
- resp.html
|
97
99
|
homepage: https://github.com/metanorma/relaton-nist
|
98
100
|
licenses:
|
99
101
|
- MIT
|