relaton-nist 1.16.1 → 1.16.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,293 @@
1
+ module RelatonNist
2
+ class TechPubsParser
3
+ RELATION_TYPES = {
4
+ "replaces" => "obsoletes",
5
+ "isVersionOf" => "editionOf",
6
+ "hasTranslation" => "hasTranslation",
7
+ "isTranslationOf" => "translatedFrom",
8
+ "hasPreprint" => "hasReprint",
9
+ "isPreprintOf" => "hasDraft",
10
+ "isSupplementTo" => "complements",
11
+ "isPartOf" => "partOf",
12
+ "hasPart" => "hasPart",
13
+ }.freeze
14
+
15
+ ATTRS = %i[docid title link abstract date edition contributor relation docstatus place series].freeze
16
+ NS = "http://www.crossref.org/relations.xsd".freeze
17
+
18
+ def initialize(doc, series)
19
+ @doc = doc
20
+ @series = series
21
+ end
22
+
23
+ #
24
+ # Parse XML document
25
+ #
26
+ # @param doc [Nokogiri::XML::Element] XML document
27
+ # @param series [Hash] series hash map (key: series abbreviation, value: series name)
28
+ #
29
+ # @return [RelatonNist::NistBibliographicItem] bibliographic item
30
+ #
31
+ def self.parse(doc, series)
32
+ new(doc, series).parse
33
+ end
34
+
35
+ #
36
+ # Create document instance
37
+ #
38
+ # @raise [StandardError]
39
+ #
40
+ # @return [RelatonNist::NistBibliographicItem] bibliographic item
41
+ #
42
+ def parse
43
+ RelatonNist::NistBibliographicItem.new(
44
+ type: "standard", language: [@doc["language"]], script: ["Latn"],
45
+ doctype: "standard", **args
46
+ )
47
+ rescue StandardError => e
48
+ warn "Document: `#{@doc.at('doi').text}`"
49
+ warn e.message
50
+ warn e.backtrace[0..5].join("\n")
51
+ end
52
+
53
+ def args
54
+ ATTRS.to_h { |a| [a, send("parse_#{a}")] }
55
+ end
56
+
57
+ # def anchor(doc)
58
+ # fetch_doi(doc).split("/")[1..-1].join "/"
59
+ # end
60
+
61
+ # @return [Array<RelatonBib::DocumentIdentifier>]
62
+ def parse_docid
63
+ [
64
+ { type: "NIST", id: pub_id, primary: true },
65
+ { type: "DOI", id: doi },
66
+ # { type: "NIST", id: anchor(doc), scope: "anchor" },
67
+ ].map { |id| RelatonBib::DocumentIdentifier.new(**id) }
68
+ end
69
+
70
+ #
71
+ # Parse document's ID from XML
72
+ #
73
+ # @return [String] document's ID
74
+ #
75
+ def pub_id
76
+ # anchor(doc).gsub(".", " ")
77
+ doi.split("/")[1..].join("/").gsub(".", " ").sub(/^nist\sir/, "NIST IR")
78
+ end
79
+
80
+ def doi # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
81
+ @doi ||= begin
82
+ id = @doc.at("doi_data/doi").text
83
+ case id
84
+ when "10.6028/NBS.CIRC.e2e" then "10.6028/NBS.CIRC.2e2"
85
+ when "10.6028/NBS.CIRC.sup" then "10.6028/NBS.CIRC.24e7sup"
86
+ when "10.6028/NBS.CIRC.supJun1925-Jun1926" then "10.6028/NBS.CIRC.24e7sup2"
87
+ when "10.6028/NBS.CIRC.supJun1925-Jun1927" then "10.6028/NBS.CIRC.24e7sup3"
88
+ when "10.6028/NBS.CIRC.24supJuly1922" then "10.6028/NBS.CIRC.24e6sup"
89
+ when "10.6028/NBS.CIRC.24supJan1924" then "10.6028/NBS.CIRC.24e6sup2"
90
+ else id
91
+ end
92
+ end
93
+ end
94
+
95
+ # @return [RelatonBib::TypedTitleStringCollection, Array]
96
+ def parse_title
97
+ t = @doc.xpath("titles/title|titles/subtitle")
98
+ return [] unless t.any?
99
+
100
+ # RelatonBib::TypedTitleString.from_string t.map(&:text).join, "en", "Latn"
101
+ [{ content: t.map(&:text).join("\n"), language: "en", script: "Latn",
102
+ format: "text/plain" }]
103
+ end
104
+
105
+ # @return [Array<RelatonBib::TypedUri>]
106
+ def parse_link
107
+ pdf_url = @doc.at("doi_data/resource").text
108
+ doi_url = "https://doi.org/#{doi}"
109
+ [{ type: "doi", content: doi_url }, { type: "pdf", content: pdf_url }]
110
+ .map { |l| RelatonBib::TypedUri.new(**l) }
111
+ end
112
+
113
+ # @return [Array<RelatonBib::FormattedString>]
114
+ def parse_abstract
115
+ @doc.xpath(
116
+ "jats:abstract/jats:p", "jats" => "http://www.ncbi.nlm.nih.gov/JATS1"
117
+ ).each_with_object([]) do |a, m|
118
+ next if a.text.empty?
119
+
120
+ m << RelatonBib::FormattedString.new(content: a.text, language: @doc["language"], script: "Latn")
121
+ end
122
+ end
123
+
124
+ # @return [Array<RelatonBib::BibliographicDate>]
125
+ def parse_date
126
+ @doc.xpath("publication_date|approval_date").map do |dt|
127
+ on = dt.at("year").text
128
+ if (m = dt.at "month")
129
+ on += "-#{m.text}"
130
+ d = dt.at "day"
131
+ on += "-#{d.text}" if d
132
+ end
133
+ type = dt.name == "publication_date" ? "published" : "confirmed"
134
+ RelatonBib::BibliographicDate.new(type: type, on: on)
135
+ end
136
+ end
137
+
138
+ # @return [String]
139
+ def parse_edition
140
+ @doc.at("edition_number")&.text
141
+ end
142
+
143
+ # @return [Array<Hash>]
144
+ def parse_contributor
145
+ contribs = @doc.xpath("contributors/person_name").map do |p|
146
+ person = RelatonBib::Person.new(name: fullname(p), affiliation: affiliation)
147
+ { entity: person, role: [{ type: p["contributor_role"] }] }
148
+ end
149
+ contribs + @doc.xpath("publisher").map do |p|
150
+ { entity: create_org(p), role: [{ type: "publisher" }] }
151
+ end
152
+ end
153
+
154
+ #
155
+ # Create full name object from person name element.
156
+ #
157
+ # @param [Nokogiri::XML::Element] person name element
158
+ #
159
+ # @return [RelatonBib::FullName] full name object
160
+ #
161
+ def fullname(person)
162
+ fname, initials = forename_initial person
163
+ surname = localized_string person.at("surname").text
164
+ ident = person.xpath("ORCID").map do |id|
165
+ RelatonBib::PersonIdentifier.new "orcid", id.text
166
+ end
167
+ RelatonBib::FullName.new(surname: surname, forename: fname,
168
+ initials: initials, identifier: ident)
169
+ end
170
+
171
+ #
172
+ # Create affiliation organization
173
+ #
174
+ # @return [Array<RelatonBib::Affiliation>] affiliation
175
+ #
176
+ def affiliation
177
+ @doc.xpath("./institution/institution_department").map do |id|
178
+ org = RelatonBib::Organization.new name: id.text
179
+ RelatonBib::Affiliation.new organization: org
180
+ end
181
+ end
182
+
183
+ #
184
+ # Create forename and initials objects from person name element.
185
+ #
186
+ # @param [Nokogiri::XML::Element] person person name element
187
+ #
188
+ # @return [Array<RelatonBib::Forename>, RelatonBib::LocalizedString>] forename and initialsrray<
189
+ #
190
+ def forename_initial(person) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
191
+ fnames = []
192
+ fname = person.at("given_name")&.text
193
+ if fname
194
+ if /^(?:(?<name>\w+)\s)?(?<inits>(?:\w(?:\.|\b)\s?)+)/ =~ fname
195
+ ints = inits.split(/[.\s]*/)
196
+ fnames << forename(name, ints.shift)
197
+ ints.each { |i| fnames << forename(nil, i) }
198
+ else
199
+ fn = forename(fname)
200
+ fnames << fn if fn
201
+ end
202
+ end
203
+ initials = localized_string inits unless inits.nil? || inits.empty?
204
+ [fnames, initials]
205
+ end
206
+
207
+ #
208
+ # Create forename object
209
+ #
210
+ # @param [String, nil] cnt forename content
211
+ # @param [String, nil] init initial content
212
+ #
213
+ # @return [RelatonBib::Forename] forename object
214
+ #
215
+ def forename(cnt, init = nil)
216
+ return if (cnt.nil? || cnt.empty?) && (init.nil? || init.empty?)
217
+
218
+ RelatonBib::Forename.new(
219
+ content: cnt, language: @doc["language"], script: "Latn", initial: init,
220
+ )
221
+ end
222
+
223
+ #
224
+ # Create publisher organization
225
+ #
226
+ # @param [Nokogiri::XML::Element] pub publisher element
227
+ #
228
+ # @return [RelatonBib::Organization] publisher organization
229
+ #
230
+ def create_org(pub)
231
+ name = pub.at("publisher_name").text
232
+ abbr = pub.at("../institution[institution_name[.='#{name}']]/institution_acronym")&.text
233
+ place = pub.at("./publisher_place") ||
234
+ pub.at("../institution[institution_name[.='#{name}']]/institution_place")
235
+ cont = []
236
+ if place
237
+ city, state = place.text.split(", ")
238
+ cont << RelatonBib::Address.new(street: [], city: city, state: state, country: "US")
239
+ end
240
+ RelatonBib::Organization.new name: name, abbreviation: abbr, contact: cont
241
+ end
242
+
243
+ # @return [Array<Hash>]
244
+ def parse_relation # rubocop:disable Metrics/AbcSize
245
+ @doc.xpath("./ns:program/ns:related_item", ns: NS).map do |rel|
246
+ rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: NS)
247
+ id = rdoi.text.split("/")[1..].join("/").gsub(".", " ")
248
+ fref = RelatonBib::FormattedRef.new content: id
249
+ docid = RelatonBib::DocumentIdentifier.new(type: "NIST", id: id, primary: true)
250
+ bibitem = RelatonBib::BibliographicItem.new formattedref: fref, docid: [docid]
251
+ type = RELATION_TYPES[rdoi["relationship-type"]]
252
+ warn "Relation type #{rdoi['relationship-type']} not found" unless type
253
+ { type: type, bibitem: bibitem }
254
+ end
255
+ end
256
+
257
+ def parse_docstatus
258
+ s = @doc.at("./ns:program/ns:related_item/ns:*[@relationship-type='isPreprintOf']", ns: NS)
259
+ return unless s
260
+
261
+ RelatonBib::DocumentStatus.new stage: "preprint"
262
+ end
263
+
264
+ # @return [Array<String>]
265
+ def parse_place
266
+ @doc.xpath("institution/institution_place").map(&:text)
267
+ end
268
+
269
+ #
270
+ # Fetches series
271
+ #
272
+ # @return [Array<RelatonBib::Series>] series
273
+ #
274
+ def parse_series
275
+ prf, srs, num = pub_id.split
276
+ sname = @series[srs] || srs
277
+ title = RelatonBib::TypedTitleString.new(content: "#{prf} #{sname}")
278
+ abbr = RelatonBib::LocalizedString.new srs
279
+ [RelatonBib::Series.new(title: title, abbreviation: abbr, number: num)]
280
+ end
281
+
282
+ #
283
+ # Create localized string
284
+ #
285
+ # @param [String] content content of string
286
+ #
287
+ # @return [RelatonBib::LocalizedString] localized string
288
+ #
289
+ def localized_string(content)
290
+ RelatonBib::LocalizedString.new content, @doc["language"], "Latn"
291
+ end
292
+ end
293
+ end
@@ -0,0 +1,9 @@
1
+ module RelatonNist
2
+ module Util
3
+ extend RelatonBib::Util
4
+
5
+ def self.logger
6
+ RelatonNist.configuration.logger
7
+ end
8
+ end
9
+ end
@@ -1,3 +1,3 @@
1
1
  module RelatonNist
2
- VERSION = "1.16.1".freeze
2
+ VERSION = "1.16.3".freeze
3
3
  end
data/lib/relaton_nist.rb CHANGED
@@ -1,9 +1,13 @@
1
1
  require "singleton"
2
2
  require "relaton/index"
3
+ require "relaton_bib"
3
4
  require "relaton_nist/version"
5
+ require "relaton_nist/config"
6
+ require "relaton_nist/util"
4
7
  require "relaton_nist/nist_bibliography"
5
8
  require "relaton_nist/data_fetcher"
6
9
  require "relaton_nist/pubs_export"
10
+ require "relaton_nist/tech_pubs_parser"
7
11
 
8
12
  # if defined? Relaton
9
13
  # require_relative "relaton/processor"
@@ -16,9 +20,9 @@ module RelatonNist
16
20
  # Returns hash of XML reammar
17
21
  # @return [String]
18
22
  def self.grammar_hash
19
- gem_path = File.expand_path "..", __dir__
20
- grammars_path = File.join gem_path, "grammars", "*"
21
- grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
22
- Digest::MD5.hexdigest grammars
23
+ # gem_path = File.expand_path "..", __dir__
24
+ # grammars_path = File.join gem_path, "grammars", "*"
25
+ # grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
26
+ Digest::MD5.hexdigest RelatonNist::VERSION + RelatonBib::VERSION # grammars
23
27
  end
24
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-nist
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.1
4
+ version: 1.16.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-06 00:00:00.000000000 Z
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: relaton-bib
@@ -78,6 +78,7 @@ files:
78
78
  - grammars/relaton-nist.rng
79
79
  - lib/relaton_nist.rb
80
80
  - lib/relaton_nist/comment_period.rb
81
+ - lib/relaton_nist/config.rb
81
82
  - lib/relaton_nist/data_fetcher.rb
82
83
  - lib/relaton_nist/document_relation.rb
83
84
  - lib/relaton_nist/document_status.rb
@@ -90,10 +91,11 @@ files:
90
91
  - lib/relaton_nist/pubs_export.rb
91
92
  - lib/relaton_nist/scrapper.rb
92
93
  - lib/relaton_nist/series.yaml
94
+ - lib/relaton_nist/tech_pubs_parser.rb
95
+ - lib/relaton_nist/util.rb
93
96
  - lib/relaton_nist/version.rb
94
97
  - lib/relaton_nist/xml_parser.rb
95
98
  - relaton_nist.gemspec
96
- - resp.html
97
99
  homepage: https://github.com/metanorma/relaton-nist
98
100
  licenses:
99
101
  - MIT