relaton-nist 1.9.1 → 1.9.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d440504b749834c27875dc8a1ee84346451578db5dd1588b75eafdb31087b64c
4
- data.tar.gz: ecfb305d172a8afa171fa4493b1ffec2dd458fd413143a7f197034a392ddf451
3
+ metadata.gz: c8d36c40a2b5307192f8d25cc103cbb5f08e0b21a0a88940c8e7688417c01f4d
4
+ data.tar.gz: '048f67e6cb6835f382973576d8c97048a7462a512d7b5f8469bed3af07ec594f'
5
5
  SHA512:
6
- metadata.gz: 7af96feb236bddbabd7e90982637b608813d5c2163c7453f217c658035cf34b0ef80adc6441d6be213b77e62b5625d95fb10a30914fc94a1b6cf966b1c921bf6
7
- data.tar.gz: 9b9daa02fc2ec7df33ad5c8cdf768c711b1bac0122191e7cfc79ee7c8ed2b7de00fbe6509b3347f387cf4868b24448f91f1a687e45b80fe65ce31d3f7a46cad5
6
+ metadata.gz: e70ce8ef01f291b254e095c33ba639cfc8fcf23736357b1eed465498fc989f101fe1d05fc031726498b917f6bd227a6a3d7e1171d18ef455f693452da48a3495
7
+ data.tar.gz: f9e38480b0c3d73d48485988f86350cd50378489814328622cd7cf6da592056870ca74779feae53a607f737aa4fa2f5f8ae1323450f358d782133d9f48b6c6a1
data/.gitignore CHANGED
@@ -7,6 +7,7 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
  .vscode/
10
+ /data/
10
11
  .rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-master-ci-rubocop-yml
11
12
 
12
13
  # rspec failure tracking
data/.rubocop.yml CHANGED
@@ -2,6 +2,8 @@
2
2
  # https://github.com/riboseinc/oss-guides
3
3
  # All project-specific additions and overrides should be specified in this file.
4
4
 
5
+ require: rubocop-rails
6
+
5
7
  inherit_from:
6
8
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
9
  AllCops:
data/README.adoc CHANGED
@@ -147,6 +147,17 @@ item.docidentifier.first.id
147
147
  => "SP 800-38A-Add"
148
148
  ----
149
149
 
150
+ === Typed links
151
+
152
+ NIST documents may have `src` and `doi` link types.
153
+
154
+ [source,ruby]
155
+ ----
156
+ item.link
157
+ => [#<RelatonBib::TypedUri:0x00007f901971dc10 @content=#<Addressable::URI:0x62c URI:https://csrc.nist.gov/publications/detail/sp/800-67/rev-2/final>, @type="src">,
158
+ #<RelatonBib::TypedUri:0x00007f901971d6e8 @content=#<Addressable::URI:0x640 URI:https://doi.org/10.6028/NIST.SP.800-67r2>, @type="doi">]
159
+ ----
160
+
150
161
  === Create bibliographic item from YAML
151
162
  [source,ruby]
152
163
  ----
@@ -159,6 +170,25 @@ RelatonNist::NistBibliographicItem.from_hash hash
159
170
  ...
160
171
  ----
161
172
 
173
+ === Fetch data
174
+
175
+ This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of data sources.
176
+
177
+ The method `RelatonNist::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
178
+ Arguments:
179
+
180
+ - `output` - folder to save documents (default './data').
181
+ - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml`, `bibxxml` (default `yaml`).
182
+
183
+ [source,ruby]
184
+ ----
185
+ RelatonNist::DataFetcher.fetch
186
+ Started at: 2021-09-01 18:01:01 +0200
187
+ Stopped at: 2021-09-01 18:01:43 +0200
188
+ Done in: 42 sec.
189
+ => nil
190
+ ----
191
+
162
192
  == Development
163
193
 
164
194
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -0,0 +1,255 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+
5
+ module RelatonNist
6
+ class DataFetcher
7
+ RELATION_TYPES = {
8
+ "replaces" => "obsoletes",
9
+ "isVersionOf" => "editionOf",
10
+ "hasTranslation" => "hasTranslation",
11
+ "isTranslationOf" => "translatedFrom",
12
+ "hasPreprint" => "hasReprint",
13
+ "isSupplementTo" => "complements",
14
+ "isPartOf" => "partOf",
15
+ "hasPart" => "hasPart",
16
+ }.freeze
17
+ URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
18
+
19
+ def initialize(output, format)
20
+ @output = output
21
+ @format = format
22
+ @ext = format.sub(/^bib/, "")
23
+ end
24
+
25
+ def parse_docid(doc) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
26
+ # case doi
27
+ # when "10.6028/NBS.CIRC.12e2revjune" then doi.sub!("13e", "12e")
28
+ # when "10.6028/NBS.CIRC.36e2" then doi.sub!("46e", "36e")
29
+ # when "10.6028/NBS.HB.67suppJune1967" then doi.sub!("1965", "1967")
30
+ # when "10.6028/NBS.HB.105-1r1990" then doi.sub!("105-1-1990", "105-1r1990")
31
+ # when "10.6028/NIST.HB.150-10-1995" then doi.sub!(/150-10$/, "150-10-1995")
32
+ # end
33
+ # anchor = doi.split("/")[1..-1].join "/"
34
+ [
35
+ { type: "NIST", id: pub_id(doc) },
36
+ { type: "DOI", id: doi(doc) },
37
+ { type: "NIST", id: anchor(doc), scope: "anchor" },
38
+ ]
39
+ end
40
+
41
+ def pub_id(doc)
42
+ anchor(doc).gsub(".", " ")
43
+ end
44
+
45
+ def doi(doc)
46
+ doc.at("doi_data/doi").text
47
+ end
48
+
49
+ def anchor(doc)
50
+ doi(doc).split("/")[1..-1].join "/"
51
+ end
52
+
53
+ # @param doc [Nokogiri::XML::Element]
54
+ # @return [Array<RelatonBib::DocumentIdentifier>]
55
+ def fetch_docid(doc)
56
+ parse_docid(doc).map do |id|
57
+ RelatonBib::DocumentIdentifier.new(type: id[:type], id: id[:id])
58
+ end
59
+ end
60
+
61
+ # @param doc [Nokogiri::XML::Element]
62
+ # @return [RelatonBib::TypedTitleStringCollection, Array]
63
+ def fetch_title(doc)
64
+ t = doc.xpath("titles/title|titles/subtitle")
65
+ return [] unless t.any?
66
+
67
+ RelatonBib::TypedTitleString.from_string t.map(&:text).join, "en", "Latn"
68
+ end
69
+
70
+ # @param doc [Nokogiri::XML::Element]
71
+ # @return [Array<RelatonBib::BibliographicDate>]
72
+ def fetch_date(doc)
73
+ doc.xpath("publication_date|approval_date").map do |dt|
74
+ on = dt.at("year").text
75
+ if (m = dt.at "month")
76
+ on += "-#{m.text}"
77
+ d = dt.at "day"
78
+ on += "-#{d.text}" if d
79
+ end
80
+ type = dt.name == "publication_date" ? "published" : "confirmed"
81
+ RelatonBib::BibliographicDate.new(type: type, on: on)
82
+ end
83
+ end
84
+
85
+ # @param doc [Nokogiri::XML::Element]
86
+ # @return [String]
87
+ def fetch_edition(doc)
88
+ doc.at("edition_number")&.text
89
+ end
90
+
91
+ # @param doc [Nokogiri::XML::Element]
92
+ # @return [Array<Hash>]
93
+ def fetch_relation(doc)
94
+ ns = "http://www.crossref.org/relations.xsd"
95
+ doc.xpath("./ns:program/ns:related_item", ns: ns).map do |rel|
96
+ rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
97
+ fref = RelatonBib::FormattedRef.new content: rdoi.text
98
+ bibitem = RelatonBib::BibliographicItem.new formattedref: fref
99
+ type = RELATION_TYPES[rdoi["relationship-type"]]
100
+ warn "Relation type #{rdoi['relationship-type']} not found" unless type
101
+ { type: type, bibitem: bibitem }
102
+ end
103
+ end
104
+
105
+ # @param doc [Nokogiri::XML::Element]
106
+ # @return [Array<RelatonBib::TypedUri>]
107
+ def fetch_link(doc)
108
+ url = doc.at("doi_data/resource").text
109
+ [RelatonBib::TypedUri.new(type: "doi", content: url)]
110
+ end
111
+
112
+ # @param doc [Nokogiri::XML::Element]
113
+ # @return [Array<RelatonBib::FormattedString>]
114
+ def fetch_abstract(doc)
115
+ doc.xpath("jats:abstract/jats:p", "jats" => "http://www.ncbi.nlm.nih.gov/JATS1").map do |a|
116
+ RelatonBib::FormattedString.new(content: a.text, language: doc["language"], script: "Latn")
117
+ end
118
+ end
119
+
120
+ # @param doc [Nokogiri::XML::Element]
121
+ # @return [Array<Hash>]
122
+ def fetch_contributor(doc) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
123
+ contribs = doc.xpath("contributors/person_name").map do |p|
124
+ forename = []
125
+ initial = []
126
+ p.at("given_name")&.text&.split&.each do |fn|
127
+ if /^(?<init>\w)\.?$/ =~ fn
128
+ initial << RelatonBib::LocalizedString.new(init, doc["language"], "Latn")
129
+ else
130
+ forename << RelatonBib::LocalizedString.new(fn, doc["language"], "Latn")
131
+ end
132
+ end
133
+ sname = p.at("surname").text
134
+ surname = RelatonBib::LocalizedString.new sname, doc["language"], "Latn"
135
+ initial = []
136
+ ident = p.xpath("ORCID").map do |id|
137
+ RelatonBib::PersonIdentifier.new "orcid", id.text
138
+ end
139
+ fullname = RelatonBib::FullName.new(
140
+ surname: surname, forename: forename, initial: initial, identifier: ident,
141
+ )
142
+ person = RelatonBib::Person.new name: fullname, affiliation: affiliation(doc)
143
+ { entity: person, role: [{ type: p["contributor_role"] }] }
144
+ end
145
+ contribs + doc.xpath("publisher").map do |p|
146
+ abbr = p.at("../institution/institution_acronym")&.text
147
+ place = p.at("./publisher_place")
148
+ cont = []
149
+ if place
150
+ city, state = place.text.split(", ")
151
+ cont << RelatonBib::Address.new(street: [], city: city, state: state, country: "US")
152
+ end
153
+ org = RelatonBib::Organization.new(
154
+ name: p.at("publisher_name").text, abbreviation: abbr, contact: cont,
155
+ )
156
+ { entity: org, role: [{ type: "publisher" }] }
157
+ end
158
+ end
159
+
160
+ def affiliation(doc)
161
+ doc.xpath("./institution/institution_department").map do |id|
162
+ org = RelatonBib::Organization.new name: id.text
163
+ RelatonBib::Affiliation.new organization: org
164
+ end
165
+ end
166
+
167
+ # @param doc [Nokogiri::XML::Element]
168
+ # @return [Array<String>]
169
+ def fetch_place(doc)
170
+ doc.xpath("institution/institution_place").map(&:text)
171
+ end
172
+
173
+ def fetch_series(doc)
174
+ title = RelatonBib::TypedTitleString.new(content: "NIST")
175
+ [RelatonBib::Series.new(title: title, number: pub_id(doc))]
176
+ end
177
+
178
+ #
179
+ # Save document
180
+ #
181
+ # @param bib [RelatonNist::NistBibliographicItem]
182
+ #
183
+ def write_file(bib) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
184
+ id = bib.docidentifier[0].id.gsub(%r{[/\s:.]}, "_").upcase.sub(/^NIST_IR/, "NISTIR")
185
+ file = File.join(@output, "#{id}.#{@ext}")
186
+ if File.exist? file
187
+ warn "File #{file} exists. Docid: #{bib.docidentifier[0].id}"
188
+ # warn "Link: #{bib.link.detect { |l| l.type == 'src' }.content}"
189
+ else
190
+ output = case @format
191
+ when "yaml" then bib.to_hash.to_yaml
192
+ when "xml" then bib.to_xml bibdata: true
193
+ else bib.send "to_#{@format}"
194
+ end
195
+ File.write file, output, encoding: "UTF-8"
196
+ end
197
+ end
198
+
199
+ #
200
+ # Create a document instance an save it.
201
+ #
202
+ # @param doc [Nokogiri::XML::Element]
203
+ #
204
+ # @raise [StandardError]
205
+ #
206
+ def parse_doc(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
207
+ # mtd = doc.at('doi_record/report-paper/report-paper_metadata')
208
+ item = RelatonNist::NistBibliographicItem.new(
209
+ type: "standard", docid: fetch_docid(doc), title: fetch_title(doc),
210
+ link: fetch_link(doc), abstract: fetch_abstract(doc),
211
+ date: fetch_date(doc), edition: fetch_edition(doc),
212
+ contributor: fetch_contributor(doc), relation: fetch_relation(doc),
213
+ place: fetch_place(doc), series: fetch_series(doc),
214
+ language: [doc["language"]], script: ["Latn"], doctype: "standard"
215
+ )
216
+ write_file item
217
+ rescue StandardError => e
218
+ warn "Document: #{doc.at('doi').text}"
219
+ warn e.message
220
+ warn e.backtrace[0..5].join("\n")
221
+ # raise e
222
+ end
223
+
224
+ #
225
+ # Fetch all the documnts from dataset
226
+ #
227
+ def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
228
+ t1 = Time.now
229
+ puts "Started at: #{t1}"
230
+
231
+ docs = Nokogiri::XML OpenURI.open_uri URL
232
+ FileUtils.mkdir @output unless Dir.exist? @output
233
+ FileUtils.rm Dir[File.join(@output, "*.#{@ext}")]
234
+ docs.xpath("/body/query/doi_record/report-paper/report-paper_metadata")
235
+ .each { |doc| parse_doc doc }
236
+
237
+ t2 = Time.now
238
+ puts "Stopped at: #{t2}"
239
+ puts "Done in: #{(t2 - t1).round} sec."
240
+ rescue StandardError => e
241
+ warn e.message
242
+ warn e.backtrace[0..5].join("\n")
243
+ end
244
+
245
+ #
246
+ # Fetch all the documnts from dataset
247
+ #
248
+ # @param [String] output foldet name to save the documents
249
+ # @param [String] format format to save the documents (yaml, xml, bibxml)
250
+ #
251
+ def self.fetch(output: "data", format: "yaml")
252
+ new(output, format).fetch
253
+ end
254
+ end
255
+ end
@@ -15,17 +15,19 @@ module RelatonNist
15
15
  DATAFILE = File.expand_path "pubs-export.zip", DATAFILEDIR
16
16
  GHNISTDATA = "https://raw.githubusercontent.com/relaton/relaton-data-nist/main/data/"
17
17
 
18
- # @param ref_nbr [String]
19
- # @param year [String]
20
- # @param opts [Hash]
21
- # @option opts [String] :stage
22
- def initialize(ref_nbr, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
23
- super ref_nbr, year
24
-
25
- # /(?<docid>(?:SP|FIPS)\s[0-9-]+)/ =~ text
18
+ def self.search(text, year = nil, opts = {})
19
+ new(text, year).search(opts)
20
+ end
21
+
22
+ def search(opts)
26
23
  @array = from_json(**opts)
27
24
  @array = from_ga unless @array.any?
25
+ sort_hits!
26
+ end
28
27
 
28
+ private
29
+
30
+ def sort_hits!
29
31
  @array.sort! do |a, b|
30
32
  if a.sort_value == b.sort_value
31
33
  (b.hit[:release_date] - a.hit[:release_date]).to_i
@@ -33,10 +35,9 @@ module RelatonNist
33
35
  b.sort_value - a.sort_value
34
36
  end
35
37
  end
38
+ self
36
39
  end
37
40
 
38
- private
39
-
40
41
  def from_ga # rubocop:disable Metrics/AbcSize
41
42
  fn = text.gsub(%r{[/\s:.]}, "_").upcase
42
43
  yaml = OpenURI.open_uri "#{GHNISTDATA}#{fn}.yaml"
@@ -14,7 +14,7 @@ module RelatonNist
14
14
  # @param text [String]
15
15
  # @return [RelatonNist::HitCollection]
16
16
  def search(text, year = nil, opts = {})
17
- HitCollection.new text, year, opts
17
+ HitCollection.search text, year, opts
18
18
  rescue OpenURI::HTTPError, SocketError, OpenSSL::SSL::SSLError => e
19
19
  raise RelatonBib::RequestError, e.message
20
20
  end
@@ -28,17 +28,18 @@ module RelatonNist
28
28
  # @option opts [TrueClass, FalseClass] :bibdata
29
29
  #
30
30
  # @return [String] Relaton XML serialisation of reference
31
- def get(code, year = nil, opts = {})
32
- return fetch_ref_err(code, year, []) if code.match? /\sEP$/
31
+ def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
32
+ return fetch_ref_err(code, year, []) if code.match?(/\sEP$/)
33
33
 
34
- /^(?<code2>[^\(]+)(\((?<date2>\w+\s(\d{2},\s)?\d{4})\))?\s?\(?((?<=\()(?<stage>[^\)]+))?/ =~ code
34
+ /^(?<code2>[^(]+)(?:\((?<date2>\w+\s(?:\d{2},\s)?\d{4})\))?\s?\(?(?:(?<=\()(?<stage>[^\)]+))?/ =~ code
35
35
  stage ||= /(?<=\.)PD-\w+(?=\.)/.match(code)&.to_s
36
36
  if code2
37
37
  code = code2.strip
38
38
  if date2
39
- if /\w+\s\d{4}/.match? date2
39
+ case date2
40
+ when /\w+\s\d{4}/
40
41
  opts[:issued_date] = Date.strptime date2, "%B %Y"
41
- elsif /\w+\s\d{2},\s\d{4}/.match? date2
42
+ when /\w+\s\d{2},\s\d{4}/
42
43
  opts[:updated_date] = Date.strptime date2, "%B %d, %Y"
43
44
  end
44
45
  end
@@ -2,11 +2,12 @@ require "relaton/processor"
2
2
 
3
3
  module RelatonNist
4
4
  class Processor < Relaton::Processor
5
- def initialize
5
+ def initialize # rubocop:disable Lint/MissingSuper
6
6
  @short = :relaton_nist
7
7
  @prefix = "NIST"
8
8
  @defaultprefix = %r{^(NIST|NISTGCR|ITL Bulletin|JPCRD|NISTIR|CSRC|FIPS)(/[^\s])?\s}
9
9
  @idtype = "NIST"
10
+ @datasets = %w[nist-tech-pubs]
10
11
  end
11
12
 
12
13
  # @param code [String]
@@ -17,6 +18,18 @@ module RelatonNist
17
18
  ::RelatonNist::NistBibliography.get(code, date, opts)
18
19
  end
19
20
 
21
+ #
22
+ # Fetch all the documents from a source
23
+ #
24
+ # @param [String] _source source name
25
+ # @param [Hash] opts
26
+ # @option opts [String] :output directory to output documents
27
+ # @option opts [String] :format
28
+ #
29
+ def fetch_data(_source, opts)
30
+ DataFetcher.fetch(**opts)
31
+ end
32
+
20
33
  # @param xml [String]
21
34
  # @return [RelatonNist::GbBibliographicItem]
22
35
  def from_xml(xml)
@@ -62,44 +62,10 @@ module RelatonNist
62
62
  # Fetch status.
63
63
  # @param doc [Hash]
64
64
  # @return [RelatonNist::DocumentStatus]
65
- def fetch_status(doc) # , status)
66
- # if doc.is_a? Hash
65
+ def fetch_status(doc)
67
66
  stage = doc["status"]
68
67
  subst = doc["substage"]
69
68
  iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
70
- # else
71
- # case status
72
- # when "draft (obsolete)"
73
- # stage = "draft-public"
74
- # subst = "withdrawn"
75
- # when "retired draft"
76
- # stage = "draft-public"
77
- # subst = "retired"
78
- # when "withdrawn"
79
- # stage = "final"
80
- # subst = "withdrawn"
81
- # when /^draft/
82
- # stage = "draft-public"
83
- # subst = "active"
84
- # else
85
- # stage = status
86
- # subst = "active"
87
- # end
88
-
89
- # iter = nil
90
- # if stage.include? "draft"
91
- # iter = 1
92
- # history = doc.xpath("//span[@id='pub-history-container']/a"\
93
- # "|//span[@id='pub-history-container']/span")
94
- # history.each_with_index do |h, idx|
95
- # next if h.name == "a"
96
-
97
- # iter = idx + 1 if idx.positive?
98
- # break
99
- # end
100
- # end
101
- # end
102
-
103
69
  RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
104
70
  end
105
71
 
@@ -132,7 +98,6 @@ module RelatonNist
132
98
  dates
133
99
  end
134
100
 
135
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
136
101
  # @param doc [Hash]
137
102
  # @return [Array<RelatonBib::ContributionInfo>]
138
103
  def fetch_contributors(doc)
@@ -144,23 +109,12 @@ module RelatonNist
144
109
  contribs + contributors_json(
145
110
  doc["editors"], "editor", doc["language"], doc["script"]
146
111
  )
147
- # else
148
- # name = "National Institute of Standards and Technology"
149
- # org = RelatonBib::Organization.new(
150
- # name: name, url: "www.nist.gov", abbreviation: "NIST",
151
- # )
152
- # contribs << RelatonBib::ContributionInfo.new(entity: org, role: [type: "publisher"])
153
- # authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
154
- # contribs += contributors(authors, "author")
155
- # editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
156
- # contribs + contributors(editors, "editor")
157
- # end
158
112
  end
159
113
 
160
114
  # @param doc [Array<Hash>]
161
115
  # @param role [String]
162
116
  # @return [Array<RelatonBib::ContributionInfo>]
163
- def contributors_json(doc, role, lang = "en", script = "Latn")
117
+ def contributors_json(doc, role, lang = "en", script = "Latn") # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
164
118
  doc.map do |contr|
165
119
  if contr["affiliation"]
166
120
  if contr["affiliation"]["acronym"]
@@ -185,43 +139,6 @@ module RelatonNist
185
139
  end.compact
186
140
  end
187
141
 
188
- # rubocop:disable Metrics/CyclomaticComplexity
189
- # @param doc [Nokogiri::HTML::Element, Array<Hash>]
190
- # @param role [String]
191
- # @return [Array<RelatonBib::ContributionInfo>]
192
- # def contributors(doc, role, lang = "en", script = "Latn")
193
- # return [] if doc.nil?
194
-
195
- # doc.text.split(", ").map do |contr|
196
- # /(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr.strip
197
- # if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
198
- # fullname = RelatonBib::FullName.new(
199
- # completename: RelatonBib::LocalizedString.new(an, lang, script)
200
- # )
201
- # case abbrev
202
- # when "NIST"
203
- # org_name = "National Institute of Standards and Technology"
204
- # url = "www.nist.gov"
205
- # when "MITRE"
206
- # org_name = abbrev
207
- # url = "www.mitre.org"
208
- # else
209
- # org_name = abbrev
210
- # url = nil
211
- # end
212
- # org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
213
- # affiliation = RelatonBib::Affiliation.new organization: org
214
- # entity = RelatonBib::Person.new(
215
- # name: fullname, affiliation: [affiliation],
216
- # )
217
- # else
218
- # entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
219
- # end
220
- # RelatonBib::ContributionInfo.new entity: entity, role: [type: role]
221
- # end
222
- # end
223
- # rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength
224
-
225
142
  # @param name [Hash]
226
143
  # @param lang [Strong]
227
144
  # @param script [String]
@@ -253,80 +170,30 @@ module RelatonNist
253
170
  return unless doc["edition"]
254
171
 
255
172
  rev = doc["edition"]
256
- # else
257
- # return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
258
- # end
259
-
260
173
  "Revision #{rev}"
261
174
  end
262
175
 
263
- # Fetch abstracts.
264
- # @param doc [Nokigiri::HTML::Document]
265
- # @return [Array<Hash>]
266
- # def fetch_abstract(doc)
267
- # abstract_content = doc.xpath(
268
- # '//div[contains(@class, "pub-abstract-callout")]/div[1]/p',
269
- # ).text
270
- # [{
271
- # content: abstract_content,
272
- # language: "en",
273
- # script: "Latn",
274
- # format: "text/plain",
275
- # }]
276
- # end
277
-
278
176
  # Fetch copyright.
279
177
  # @param doc [Nokogiri::HTL::Document, String]
280
178
  # @return [Array<Hash>]
281
179
  def fetch_copyright(doc)
282
180
  name = "National Institute of Standards and Technology"
283
181
  url = "www.nist.gov"
284
- # d = if doc.is_a? String then doc
285
- # else
286
- # doc.at("//span[@id='pub-release-date']")&.text&.strip
287
- # end
288
182
  from = doc&.match(/\d{4}/)&.to_s
289
183
  [{ owner: [{ name: name, abbreviation: "NIST", url: url }], from: from }]
290
184
  end
291
185
 
292
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
293
-
294
186
  # Fetch links.
295
187
  # @param doc [Hash]
296
188
  # @return [Array<Hash>]
297
189
  def fetch_link(doc)
298
190
  links = []
299
- # if doc.is_a? Hash
300
- links << { type: "uri", content: doc["uri"] } if doc["uri"]
301
- doi = "https://doi.org/" + doc["doi"] if doc["doi"]
302
- # else
303
- # pub = doc.at "//p/strong[contains(., 'Publication:')]"
304
- # pdf = pub&.at "./following-sibling::a[.=' Local Download']"
305
- # doi = pub&.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
306
- # links << { type: "pdf", content: pdf[:href] } if pdf
307
- # end
308
- links << { type: "doi", content: doi } if doi
191
+ links << { type: "src", content: doc["uri"] } if doc["uri"]
192
+ if doc["doi"]
193
+ links << { type: "doi", content: "https://doi.org/#{doc['doi']}" }
194
+ end
309
195
  links
310
196
  end
311
- # rubocop:enable Metrics/MethodLength
312
-
313
- # Fetch relations.
314
- # @param doc [Nokogiri::HTML::Document]
315
- # @return [Array<RelatonNist::DocumentRelation>]
316
- # def fetch_relations(doc)
317
- # relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
318
- # doc_relation "supersedes", r.text, DOMAIN + r[:href]
319
- # end
320
-
321
- # relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
322
- # doc_relation "partOf", r.text, DOMAIN + r[:href]
323
- # end
324
-
325
- # relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
326
- # doc_relation "updates", r.text, DOMAIN + r[:href]
327
- # end
328
- # end
329
- # rubocop:enable Metrics/AbcSize
330
197
 
331
198
  def fetch_relations_json(doc)
332
199
  relations = doc["supersedes"].map do |r|
@@ -354,68 +221,12 @@ module RelatonNist
354
221
  )
355
222
  end
356
223
 
357
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
358
-
359
- # @param doc [Nokogiri::HTML::Document]
360
- # @return [Array<RelatonBib::Series>]
361
- # def fetch_series(doc)
362
- # series = doc.xpath "//span[@id='pub-history-container']/a"\
363
- # "|//span[@id='pub-history-container']/span"
364
- # series.map.with_index do |s, idx|
365
- # next if s.name == "span"
366
-
367
- # iter = if idx.zero? then "I"
368
- # else idx + 1
369
- # end
370
-
371
- # content = s.text.match(/^[^\(]+/).to_s.strip.squeeze " "
372
-
373
- # ref = case s.text
374
- # when /^Draft/
375
- # content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
376
- # when /\(Draft\)/ then content + " (#{iter}PD)"
377
- # else content
378
- # end
379
-
380
- # fref = RelatonBib::FormattedRef.new(
381
- # content: ref, language: "en", script: "Latn", format: "text/plain",
382
- # )
383
- # RelatonBib::Series.new(formattedref: fref)
384
- # end.select { |s| s }
385
- # end
386
- # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
387
-
388
224
  # @param doc [Hash]
389
225
  # @return [Array<RelatonNist::Keyword>]
390
226
  def fetch_keywords(doc)
391
- # kws = if doc.is_a? Hash
392
- # doc["keywords"]
393
- # else
394
- # doc.xpath "//span[@id='pub-keywords-container']/span"
395
- # end
396
227
  doc["keywords"].map { |kw| kw.is_a?(String) ? kw : kw.text }
397
228
  end
398
229
 
399
- # rubocop:disable Metrics/AbcSize
400
- # @param doc [Nokogiri::HTML::Document]
401
- # @return [RelatonNist::CommentPeriod, NilClass]
402
- # def fetch_commentperiod(doc)
403
- # cp = doc.at "//span[@id='pub-comments-due']"
404
- # return unless cp
405
-
406
- # to = Date.strptime cp.text.strip, "%B %d, %Y"
407
-
408
- # d = doc.at("//span[@id='pub-release-date']").text.strip
409
- # from = Date.strptime(d, "%B %Y").to_s
410
-
411
- # ex = doc.at "//strong[contains(.,'The comment closing date has been "\
412
- # "extended to')]"
413
- # ext = ex&.text&.match(/\w+\s\d{2},\s\d{4}/).to_s
414
- # extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
415
- # CommentPeriod.new from: from, to: to, extended: extended
416
- # end
417
- # rubocop:enable Metrics/AbcSize
418
-
419
230
  # @param json [Hash]
420
231
  # @return [RelatonNist::CommentPeriod, NilClass]
421
232
  def fetch_commentperiod_json(json)
@@ -1,3 +1,3 @@
1
1
  module RelatonNist
2
- VERSION = "1.9.1".freeze
2
+ VERSION = "1.9.6".freeze
3
3
  end
data/lib/relaton_nist.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "relaton_nist/version"
2
2
  require "relaton_nist/nist_bibliography"
3
+ require "relaton_nist/data_fetcher"
3
4
 
4
5
  # if defined? Relaton
5
6
  # require_relative "relaton/processor"
data/relaton_nist.gemspec CHANGED
@@ -32,6 +32,6 @@ Gem::Specification.new do |spec|
32
32
  spec.add_development_dependency "vcr"
33
33
  spec.add_development_dependency "webmock"
34
34
 
35
- spec.add_dependency "relaton-bib", "~> 1.9.0"
35
+ spec.add_dependency "relaton-bib", ">= 1.9.19"
36
36
  spec.add_dependency "rubyzip"
37
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-nist
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.1
4
+ version: 1.9.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-08-30 00:00:00.000000000 Z
11
+ date: 2022-01-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -126,16 +126,16 @@ dependencies:
126
126
  name: relaton-bib
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
- - - "~>"
129
+ - - ">="
130
130
  - !ruby/object:Gem::Version
131
- version: 1.9.0
131
+ version: 1.9.19
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - "~>"
136
+ - - ">="
137
137
  - !ruby/object:Gem::Version
138
- version: 1.9.0
138
+ version: 1.9.19
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: rubyzip
141
141
  requirement: !ruby/object:Gem::Requirement
@@ -175,6 +175,7 @@ files:
175
175
  - grammars/reqt.rng
176
176
  - lib/relaton_nist.rb
177
177
  - lib/relaton_nist/comment_period.rb
178
+ - lib/relaton_nist/data_fetcher.rb
178
179
  - lib/relaton_nist/document_relation.rb
179
180
  - lib/relaton_nist/document_status.rb
180
181
  - lib/relaton_nist/hash_converter.rb