relaton-nist 1.8.0 → 1.9.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -11,23 +11,17 @@ module RelatonNist
11
11
  # @param hit_data [Hash]
12
12
  # @return [Hash]
13
13
  def parse_page(hit_data)
14
- item_data = if hit_data[:json]
15
- from_json hit_data
16
- else
17
- from_csrs hit_data
18
- end
19
- # doctype = "standard"
14
+ item_data = from_json hit_data
20
15
  titles = fetch_titles(hit_data)
21
16
  unless /^(SP|NISTIR|FIPS) /.match? item_data[:docid][0].id
22
- # doctype = id_cleanup(item_data[:docid][0].id)
23
17
  item_data[:docid][0] = RelatonBib::DocumentIdentifier.new(
24
- id: titles[0][:content].upcase, type: "NIST"
18
+ id: titles[0][:content].upcase, type: "NIST",
25
19
  )
26
20
  end
27
21
  item_data[:fetched] = Date.today.to_s
28
22
  item_data[:type] = "standard"
29
23
  item_data[:title] = titles
30
- item_data[:doctype] = "standard" # doctype
24
+ item_data[:doctype] = "standard"
31
25
 
32
26
  NistBibliographicItem.new(**item_data)
33
27
  end
@@ -44,7 +38,7 @@ module RelatonNist
44
38
  edition: fetch_edition(json),
45
39
  language: [json["language"]],
46
40
  script: [json["script"]],
47
- docstatus: fetch_status(json, hit_data[:status]),
41
+ docstatus: fetch_status(json), # hit_data[:status]),
48
42
  copyright: fetch_copyright(json["published-date"]),
49
43
  relation: fetch_relations_json(json),
50
44
  place: ["Gaithersburg, MD"],
@@ -53,120 +47,25 @@ module RelatonNist
53
47
  }
54
48
  end
55
49
 
56
- def from_csrs(hit_data)
57
- doc = get_page hit_data[:url]
58
- {
59
- # id: fetch_id(doc),
60
- link: fetch_link(doc),
61
- docid: fetch_docid(doc),
62
- date: fetch_dates(doc, hit_data[:release_date]),
63
- contributor: fetch_contributors(doc),
64
- edition: fetch_edition(hit_data[:code]),
65
- language: ["en"],
66
- script: ["Latn"],
67
- abstract: fetch_abstract(doc),
68
- docstatus: fetch_status(doc, hit_data[:status]),
69
- copyright: fetch_copyright(doc),
70
- relation: fetch_relations(doc),
71
- series: fetch_series(doc),
72
- keyword: fetch_keywords(doc),
73
- commentperiod: fetch_commentperiod(doc),
74
- }
75
- end
76
50
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
77
51
 
78
- # Strip status from doc id
79
- # @param id String
80
- # @return String
81
- # def id_cleanup(id)
82
- # id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
83
- # end
84
-
85
- # Get page.
86
- # @param path [String] page's path
87
- # @return [Array<Nokogiri::HTML::Document, String>]
88
- def get_page(url)
89
- uri = URI url
90
- resp = Net::HTTP.get_response(uri)
91
- %r{(?<=newLocation = 'https://' \+ window.location.hostname \+ ')(?<path>[^']+)} =~ resp.body
92
- if path
93
- uri = URI HitCollection::DOMAIN + path
94
- resp = Net::HTTP.get_response(uri)
95
- end
96
- Nokogiri::HTML(resp.body)
97
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
98
- EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
99
- Net::ProtocolError, OpenSSL::SSL::SSLError
100
- raise RelatonBib::RequestError, "Could not access #{url}"
101
- end
102
-
103
52
  # Fetch docid.
104
- # @param doc [Nokogiri::HTML::Document, String]
53
+ # @param docid [String]
105
54
  # @return [Array<RelatonBib::DocumentIdentifier>]
106
- def fetch_docid(doc)
107
- item_ref = if doc.is_a? String then doc
108
- else
109
- doc.at(
110
- "//div[contains(@class, 'publications-detail')]/h3"
111
- )&.text&.strip&.sub(/(?<=\w)\([^\)]+\)$/) do |m|
112
- " " + m.upcase
113
- end&.squeeze(" ")&.gsub(/&#13;|\n|\r/, "")
114
- end
115
- item_ref ||= "?"
116
- item_ref.sub! /\sAddendum$/, "-Add"
55
+ def fetch_docid(docid)
56
+ item_ref = docid
57
+ # item_ref ||= "?"
58
+ item_ref.sub!(/\sAddendum$/, "-Add")
117
59
  [RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
118
60
  end
119
61
 
120
- # Fetch id.
121
- # @param doc [Nokogiri::HTML::Document]
122
- # @return [String]
123
- # def fetch_id(doc)
124
- # doc.at("//div[contains(@class, 'publications-detail')]/h3").text.
125
- # strip.gsub(/\s/, "")
126
- # end
127
-
128
62
  # Fetch status.
129
- # @param doc [Nokogiri::HTML::Document, Hash]
130
- # @param status [String]
63
+ # @param doc [Hash]
131
64
  # @return [RelatonNist::DocumentStatus]
132
- def fetch_status(doc, status)
133
- if doc.is_a? Hash
134
- stage = doc["status"]
135
- subst = doc["substage"]
136
- iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
137
- else
138
- case status
139
- when "draft (obsolete)"
140
- stage = "draft-public"
141
- subst = "withdrawn"
142
- when "retired draft"
143
- stage = "draft-public"
144
- subst = "retired"
145
- when "withdrawn"
146
- stage = "final"
147
- subst = "withdrawn"
148
- when /^draft/
149
- stage = "draft-public"
150
- subst = "active"
151
- else
152
- stage = status
153
- subst = "active"
154
- end
155
-
156
- iter = nil
157
- if stage.include? "draft"
158
- iter = 1
159
- history = doc.xpath("//span[@id='pub-history-container']/a"\
160
- "|//span[@id='pub-history-container']/span")
161
- history.each_with_index do |h, idx|
162
- next if h.name == "a"
163
-
164
- iter = idx + 1 if idx.positive?
165
- break
166
- end
167
- end
168
- end
169
-
65
+ def fetch_status(doc)
66
+ stage = doc["status"]
67
+ subst = doc["substage"]
68
+ iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
170
69
  RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
171
70
  end
172
71
 
@@ -179,55 +78,43 @@ module RelatonNist
179
78
  end
180
79
 
181
80
  # Fetch dates
182
- # @param doc [Nokogiri::HTML::Document]
81
+ # @param doc [Hash]
183
82
  # @param release_date [Date]
184
83
  # @return [Array<Hash>]
185
84
  def fetch_dates(doc, release_date) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
186
85
  dates = [{ type: "published", on: release_date.to_s }]
187
86
 
188
- if doc.is_a? Hash
189
- issued = RelatonBib.parse_date doc["issued-date"]
190
- updated = RelatonBib.parse_date doc["updated-date"]
191
- dates << { type: "updated", on: updated.to_s } if updated
192
- obsoleted = RelatonBib.parse_date doc["obsoleted-date"]
193
- dates << { type: "obsoleted", on: obsoleted.to_s } if obsoleted
194
- else
195
- d = doc.at("//span[@id='pub-release-date']")&.text&.strip
196
- issued = RelatonBib.parse_date d
197
- end
87
+ # if doc.is_a? Hash
88
+ issued = RelatonBib.parse_date doc["issued-date"]
89
+ updated = RelatonBib.parse_date doc["updated-date"]
90
+ dates << { type: "updated", on: updated.to_s } if updated
91
+ obsoleted = RelatonBib.parse_date doc["obsoleted-date"]
92
+ dates << { type: "obsoleted", on: obsoleted.to_s } if obsoleted
93
+ # else
94
+ # d = doc.at("//span[@id='pub-release-date']")&.text&.strip
95
+ # issued = RelatonBib.parse_date d
96
+ # end
198
97
  dates << { type: "issued", on: issued.to_s }
199
98
  dates
200
99
  end
201
100
 
202
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
203
- # @param doc [Nokogiri::HTML::Document, Hash]
101
+ # @param doc [Hash]
204
102
  # @return [Array<RelatonBib::ContributionInfo>]
205
103
  def fetch_contributors(doc)
206
104
  contribs = []
207
- if doc.is_a? Hash
208
- contribs += contributors_json(
209
- doc["authors"], "author", doc["language"], doc["script"]
210
- )
211
- contribs + contributors_json(
212
- doc["editors"], "editor", doc["language"], doc["script"]
213
- )
214
- else
215
- name = "National Institute of Standards and Technology"
216
- org = RelatonBib::Organization.new(
217
- name: name, url: "www.nist.gov", abbreviation: "NIST",
218
- )
219
- contribs << RelatonBib::ContributionInfo.new(entity: org, role: [type: "publisher"])
220
- authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
221
- contribs += contributors(authors, "author")
222
- editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
223
- contribs + contributors(editors, "editor")
224
- end
105
+ # if doc.is_a? Hash
106
+ contribs += contributors_json(
107
+ doc["authors"], "author", doc["language"], doc["script"]
108
+ )
109
+ contribs + contributors_json(
110
+ doc["editors"], "editor", doc["language"], doc["script"]
111
+ )
225
112
  end
226
113
 
227
114
  # @param doc [Array<Hash>]
228
115
  # @param role [String]
229
116
  # @return [Array<RelatonBib::ContributionInfo>]
230
- def contributors_json(doc, role, lang = "en", script = "Latn")
117
+ def contributors_json(doc, role, lang = "en", script = "Latn") # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
231
118
  doc.map do |contr|
232
119
  if contr["affiliation"]
233
120
  if contr["affiliation"]["acronym"]
@@ -252,43 +139,6 @@ module RelatonNist
252
139
  end.compact
253
140
  end
254
141
 
255
- # rubocop:disable Metrics/CyclomaticComplexity
256
- # @param doc [Nokogiri::HTML::Element, Array<Hash>]
257
- # @param role [String]
258
- # @return [Array<RelatonBib::ContributionInfo>]
259
- def contributors(doc, role, lang = "en", script = "Latn")
260
- return [] if doc.nil?
261
-
262
- doc.text.split(", ").map do |contr|
263
- /(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr.strip
264
- if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
265
- fullname = RelatonBib::FullName.new(
266
- completename: RelatonBib::LocalizedString.new(an, lang, script)
267
- )
268
- case abbrev
269
- when "NIST"
270
- org_name = "National Institute of Standards and Technology"
271
- url = "www.nist.gov"
272
- when "MITRE"
273
- org_name = abbrev
274
- url = "www.mitre.org"
275
- else
276
- org_name = abbrev
277
- url = nil
278
- end
279
- org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
280
- affiliation = RelatonBib::Affiliation.new organization: org
281
- entity = RelatonBib::Person.new(
282
- name: fullname, affiliation: [affiliation],
283
- )
284
- else
285
- entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
286
- end
287
- RelatonBib::ContributionInfo.new entity: entity, role: [type: role]
288
- end
289
- end
290
- # rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength
291
-
292
142
  # @param name [Hash]
293
143
  # @param lang [Strong]
294
144
  # @param script [String]
@@ -313,87 +163,37 @@ module RelatonNist
313
163
  [RelatonBib::LocalizedString.new(part, lang, script)]
314
164
  end
315
165
 
316
- # @param doc [String, Hash]
166
+ # @param doc [Hash]
317
167
  # @return [String, NilClass]
318
168
  def fetch_edition(doc)
319
- if doc.is_a? Hash
320
- return unless doc["edition"]
321
-
322
- rev = doc["edition"]
323
- else
324
- return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
325
- end
169
+ # if doc.is_a? Hash
170
+ return unless doc["edition"]
326
171
 
172
+ rev = doc["edition"]
327
173
  "Revision #{rev}"
328
174
  end
329
175
 
330
- # Fetch abstracts.
331
- # @param doc [Nokigiri::HTML::Document]
332
- # @return [Array<Hash>]
333
- def fetch_abstract(doc)
334
- abstract_content = doc.xpath(
335
- '//div[contains(@class, "pub-abstract-callout")]/div[1]/p',
336
- ).text
337
- [{
338
- content: abstract_content,
339
- language: "en",
340
- script: "Latn",
341
- format: "text/plain",
342
- }]
343
- end
344
-
345
176
  # Fetch copyright.
346
177
  # @param doc [Nokogiri::HTL::Document, String]
347
178
  # @return [Array<Hash>]
348
179
  def fetch_copyright(doc)
349
180
  name = "National Institute of Standards and Technology"
350
181
  url = "www.nist.gov"
351
- d = if doc.is_a? String then doc
352
- else
353
- doc.at("//span[@id='pub-release-date']")&.text&.strip
354
- end
355
- from = d&.match(/\d{4}/)&.to_s
182
+ from = doc&.match(/\d{4}/)&.to_s
356
183
  [{ owner: [{ name: name, abbreviation: "NIST", url: url }], from: from }]
357
184
  end
358
185
 
359
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
360
-
361
186
  # Fetch links.
362
- # @param doc [Nokogiri::HTML::Document, Hash]
187
+ # @param doc [Hash]
363
188
  # @return [Array<Hash>]
364
189
  def fetch_link(doc)
365
190
  links = []
366
- if doc.is_a? Hash
367
- links << { type: "uri", content: doc["uri"] } if doc["uri"]
368
- doi = "https://doi.org/" + doc["doi"] if doc["doi"]
369
- else
370
- pub = doc.at "//p/strong[contains(., 'Publication:')]"
371
- pdf = pub&.at "./following-sibling::a[.=' Local Download']"
372
- doi = pub&.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
373
- links << { type: "pdf", content: pdf[:href] } if pdf
191
+ links << { type: "src", content: doc["uri"] } if doc["uri"]
192
+ if doc["doi"]
193
+ links << { type: "doi", content: "https://doi.org/#{doc['doi']}" }
374
194
  end
375
- links << { type: "doi", content: doi } if doi
376
195
  links
377
196
  end
378
- # rubocop:enable Metrics/MethodLength
379
-
380
- # Fetch relations.
381
- # @param doc [Nokogiri::HTML::Document]
382
- # @return [Array<RelatonNist::DocumentRelation>]
383
- def fetch_relations(doc)
384
- relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
385
- doc_relation "supersedes", r.text, DOMAIN + r[:href]
386
- end
387
-
388
- relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
389
- doc_relation "partOf", r.text, DOMAIN + r[:href]
390
- end
391
-
392
- relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
393
- doc_relation "updates", r.text, DOMAIN + r[:href]
394
- end
395
- end
396
- # rubocop:enable Metrics/AbcSize
397
197
 
398
198
  def fetch_relations_json(doc)
399
199
  relations = doc["supersedes"].map do |r|
@@ -421,67 +221,11 @@ module RelatonNist
421
221
  )
422
222
  end
423
223
 
424
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
425
-
426
- # @param doc [Nokogiri::HTML::Document]
427
- # @return [Array<RelatonBib::Series>]
428
- def fetch_series(doc)
429
- series = doc.xpath "//span[@id='pub-history-container']/a"\
430
- "|//span[@id='pub-history-container']/span"
431
- series.map.with_index do |s, idx|
432
- next if s.name == "span"
433
-
434
- iter = if idx.zero? then "I"
435
- else idx + 1
436
- end
437
-
438
- content = s.text.match(/^[^\(]+/).to_s.strip.squeeze " "
439
-
440
- ref = case s.text
441
- when /^Draft/
442
- content.match(/(?<=Draft\s).+/).to_s + " (#{iter}PD)"
443
- when /\(Draft\)/ then content + " (#{iter}PD)"
444
- else content
445
- end
446
-
447
- fref = RelatonBib::FormattedRef.new(
448
- content: ref, language: "en", script: "Latn", format: "text/plain",
449
- )
450
- RelatonBib::Series.new(formattedref: fref)
451
- end.select { |s| s }
452
- end
453
- # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
454
-
455
- # @param doc [Nokogiri::HTML::Document, Hash]
224
+ # @param doc [Hash]
456
225
  # @return [Array<RelatonNist::Keyword>]
457
226
  def fetch_keywords(doc)
458
- kws = if doc.is_a? Hash
459
- doc["keywords"]
460
- else
461
- doc.xpath "//span[@id='pub-keywords-container']/span"
462
- end
463
- kws.map { |kw| kw.is_a?(String) ? kw : kw.text }
464
- end
465
-
466
- # rubocop:disable Metrics/AbcSize
467
- # @param doc [Nokogiri::HTML::Document]
468
- # @return [RelatonNist::CommentPeriod, NilClass]
469
- def fetch_commentperiod(doc)
470
- cp = doc.at "//span[@id='pub-comments-due']"
471
- return unless cp
472
-
473
- to = Date.strptime cp.text.strip, "%B %d, %Y"
474
-
475
- d = doc.at("//span[@id='pub-release-date']").text.strip
476
- from = Date.strptime(d, "%B %Y").to_s
477
-
478
- ex = doc.at "//strong[contains(.,'The comment closing date has been "\
479
- "extended to')]"
480
- ext = ex&.text&.match(/\w+\s\d{2},\s\d{4}/).to_s
481
- extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
482
- CommentPeriod.new from: from, to: to, extended: extended
227
+ doc["keywords"].map { |kw| kw.is_a?(String) ? kw : kw.text }
483
228
  end
484
- # rubocop:enable Metrics/AbcSize
485
229
 
486
230
  # @param json [Hash]
487
231
  # @return [RelatonNist::CommentPeriod, NilClass]
@@ -1,3 +1,3 @@
1
1
  module RelatonNist
2
- VERSION = "1.8.0".freeze
2
+ VERSION = "1.9.3".freeze
3
3
  end
@@ -17,7 +17,7 @@ module RelatonNist
17
17
  # @param item_hash [Hash]
18
18
  # @return [RelatonNist::NistBibliographicItem]
19
19
  def bib_item(item_hash)
20
- NistBibliographicItem.new **item_hash
20
+ NistBibliographicItem.new(**item_hash)
21
21
  end
22
22
 
23
23
  def fetch_status(item)
data/lib/relaton_nist.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "relaton_nist/version"
2
2
  require "relaton_nist/nist_bibliography"
3
+ require "relaton_nist/data_fetcher"
3
4
 
4
5
  # if defined? Relaton
5
6
  # require_relative "relaton/processor"
data/relaton_nist.gemspec CHANGED
@@ -21,19 +21,17 @@ Gem::Specification.new do |spec|
21
21
  spec.bindir = "exe"
22
22
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23
23
  spec.require_paths = ["lib"]
24
- spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
24
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
25
25
 
26
- # spec.add_development_dependency "debase"
27
26
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
28
27
  spec.add_development_dependency "pry-byebug"
29
- spec.add_development_dependency "rake", "~> 10.0"
28
+ spec.add_development_dependency "rake", "~> 13.0"
30
29
  spec.add_development_dependency "rspec", "~> 3.0"
31
- # spec.add_development_dependency "ruby-debug-ide"
32
30
  spec.add_development_dependency "ruby-jing"
33
31
  spec.add_development_dependency "simplecov"
34
32
  spec.add_development_dependency "vcr"
35
33
  spec.add_development_dependency "webmock"
36
34
 
37
- spec.add_dependency "relaton-bib", "~> 1.8.0"
35
+ spec.add_dependency "relaton-bib", "~> 1.9.0"
38
36
  spec.add_dependency "rubyzip"
39
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-nist
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 1.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-17 00:00:00.000000000 Z
11
+ date: 2021-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '10.0'
47
+ version: '13.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '10.0'
54
+ version: '13.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -128,14 +128,14 @@ dependencies:
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: 1.8.0
131
+ version: 1.9.0
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: 1.8.0
138
+ version: 1.9.0
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: rubyzip
141
141
  requirement: !ruby/object:Gem::Requirement
@@ -166,6 +166,7 @@ files:
166
166
  - README.adoc
167
167
  - Rakefile
168
168
  - bin/console
169
+ - bin/rspec
169
170
  - bin/setup
170
171
  - grammars/basicdoc.rng
171
172
  - grammars/biblio.rng
@@ -174,6 +175,7 @@ files:
174
175
  - grammars/reqt.rng
175
176
  - lib/relaton_nist.rb
176
177
  - lib/relaton_nist/comment_period.rb
178
+ - lib/relaton_nist/data_fetcher.rb
177
179
  - lib/relaton_nist/document_relation.rb
178
180
  - lib/relaton_nist/document_status.rb
179
181
  - lib/relaton_nist/hash_converter.rb
@@ -199,7 +201,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
199
201
  requirements:
200
202
  - - ">="
201
203
  - !ruby/object:Gem::Version
202
- version: 2.4.0
204
+ version: 2.5.0
203
205
  required_rubygems_version: !ruby/object:Gem::Requirement
204
206
  requirements:
205
207
  - - ">="