relaton-nist 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 2ab3d1123704cbe701f2f8158ee2d60be05a762141a8c10f2d9f196f8b905dab
4
- data.tar.gz: f06320c0918cf4d5b6354015e370b2cc7bab93247bfd842875086005de9ca247
2
+ SHA1:
3
+ metadata.gz: 42ae6073ff5e1cbaba11be06d27c9e0da310bcf4
4
+ data.tar.gz: fd4e493fc0f7f3edefe2af58f26a76954c2b1e2c
5
5
  SHA512:
6
- metadata.gz: 6a1bb823322716ac5115428bedbcad2cee3667bc24b8f7fc02030856f5028b92377196eaa8b4c1c6afe6cd67357af4a9d34d4fe7d09b8313e3310f6cc02394b8
7
- data.tar.gz: 4fddf90e843ed03dc7d2dc281674a5ecf0ce69d58509682a36f39044dcfb7a769378153f194872f2199c1c8b702a6ca3c88be6f618d22a28283814ead733c37c
6
+ metadata.gz: 51ca4ccf407bb4355f669fc82ea2950a720f75af506174a58b807c86e109b21808fc4f380363258941c37328e544f9658c75d9ee24966124f3532f0abf8df421
7
+ data.tar.gz: e23643ecd8e7a685f2542660bf064ef30ff027460b9898aa6a1435f21f7a2ec4b311bdaf7306ed078e920b5b728d7b46402d368d9d6535bc968474793e30ad1a
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- relaton-nist (0.2.1)
4
+ relaton-nist (0.2.2)
5
5
  relaton-bib (~> 0.2.0)
6
+ rubyzip
6
7
 
7
8
  GEM
8
9
  remote: https://rubygems.org/
@@ -32,9 +33,9 @@ GEM
32
33
  pry-byebug (3.7.0)
33
34
  byebug (~> 11.0)
34
35
  pry (~> 0.10)
35
- public_suffix (3.1.0)
36
+ public_suffix (3.1.1)
36
37
  rake (10.5.0)
37
- relaton-bib (0.2.1)
38
+ relaton-bib (0.2.3)
38
39
  addressable
39
40
  nokogiri (~> 1.8.4)
40
41
  rspec (3.8.0)
@@ -52,6 +53,7 @@ GEM
52
53
  rspec-support (3.8.2)
53
54
  ruby-debug-ide (0.7.0)
54
55
  rake (>= 0.8.1)
56
+ rubyzip (1.2.3)
55
57
  safe_yaml (1.0.5)
56
58
  simplecov (0.16.1)
57
59
  docile (~> 1.1)
@@ -8,5 +8,20 @@ end
8
8
 
9
9
  module RelatonNist
10
10
  class Error < StandardError; end
11
- # Your code goes here...
11
+
12
+ class << self
13
+ # @param date [String]
14
+ # @return [Date, NilClass]
15
+ def parse_date(sdate)
16
+ if /(?<date>\w+\s\d{4})/ =~ sdate # February 2012
17
+ Date.strptime(date, "%B %Y")
18
+ elsif /(?<date>\w+\s\d{1,2},\s\d{4})/ =~ sdate # February 11, 2012
19
+ Date.strptime(date, "%B %d, %Y")
20
+ elsif /(?<date>\d{4}-\d{2}-\d{2})/ =~ sdate # 2012-02-11
21
+ Date.parse(date)
22
+ elsif /(?<date>\d{4}-\d{2})/ =~ sdate # 2012-02
23
+ Date.strptime date, "%Y-%m"
24
+ end
25
+ end
26
+ end
12
27
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "zip"
4
+ require "fileutils"
3
5
  require "relaton_nist/hit"
4
6
  require "addressable/uri"
5
7
  require "open-uri"
@@ -7,8 +9,8 @@ require "open-uri"
7
9
  module RelatonNist
8
10
  # Page of hit collection.
9
11
  class HitCollection < Array
10
-
11
12
  DOMAIN = "https://csrc.nist.gov"
13
+ DATAFILE = File.expand_path "data/pubs-export.zip", __dir__
12
14
 
13
15
  # @return [TrueClass, FalseClass]
14
16
  attr_reader :fetched
@@ -28,13 +30,58 @@ module RelatonNist
28
30
  def initialize(ref_nbr, year = nil, opts = {})
29
31
  @text = ref_nbr
30
32
  @year = year
33
+
34
+ /(?<docid>(SP|FIPS)\s[0-9-]+)/ =~ text
35
+ hits = docid ? from_json(docid, **opts) : from_csrc(**opts)
36
+
37
+ hits.sort! do |a, b|
38
+ if a.sort_value != b.sort_value
39
+ b.sort_value - a.sort_value
40
+ else
41
+ (b.hit[:release_date] - a.hit[:release_date]).to_i
42
+ end
43
+ end
44
+ concat hits
45
+ @fetched = false
46
+ end
47
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
48
+
49
+ # @return [Iecbib::HitCollection]
50
+ def fetch
51
+ workers = RelatonBib::WorkersPool.new 4
52
+ workers.worker(&:fetch)
53
+ each do |hit|
54
+ workers << hit
55
+ end
56
+ workers.end
57
+ workers.result
58
+ @fetched = true
59
+ self
60
+ end
61
+
62
+ def to_s
63
+ inspect
64
+ end
65
+
66
+ # @return [String]
67
+ def inspect
68
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
69
+ end
70
+
71
+ private
72
+
73
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
74
+
75
+ # @param stage [String]
76
+ # @return [Array<RelatonNist::Hit>]
77
+ def from_csrc(**opts)
31
78
  from, to = nil
32
79
  if year
33
- d = Date.strptime year, "%Y"
80
+ d = Date.strptime year, "%Y"
34
81
  from = d.strftime "%m/%d/%Y"
35
82
  to = d.next_year.prev_day.strftime "%m/%d/%Y"
36
83
  end
37
- url = "#{DOMAIN}/publications/search?keywords-lg=#{ref_nbr}"
84
+ url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"
38
85
  url += "&dateFrom-lg=#{from}" if from
39
86
  url += "&dateTo-lg=#{to}" if to
40
87
  url += if /PD/ =~ opts[:stage]
@@ -44,7 +91,7 @@ module RelatonNist
44
91
  end
45
92
 
46
93
  doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
47
- hits = doc.css("table.publications-table > tbody > tr").map do |h|
94
+ doc.css("table.publications-table > tbody > tr").map do |h|
48
95
  link = h.at("td/div/strong/a")
49
96
  serie = h.at("td[1]").text.strip
50
97
  code = h.at("td[2]").text.strip
@@ -59,39 +106,59 @@ module RelatonNist
59
106
  }, self
60
107
  )
61
108
  end
62
- hits.sort! do |a, b|
63
- if a.sort_value != b.sort_value
64
- b.sort_value - a.sort_value
109
+ end
110
+
111
+ # Fetches data form json
112
+ # @param docid [String]
113
+ def from_json(docid, **opts)
114
+ data.select do |doc|
115
+ if year
116
+ d = Date.strptime year, "%Y"
117
+ idate = RelatonNist.parse_date doc["issued-date"]
118
+ next unless idate.between? d, d.next_year.prev_day
119
+ end
120
+ if /PD/ =~ opts[:stage]
121
+ next unless %w[draft-public draft-prelim].include? doc["status"]
65
122
  else
66
- (b.hit[:release_date] - a.hit[:release_date]).to_i
123
+ next unless doc["status"] == "final"
67
124
  end
125
+ doc["docidentifier"] =~ Regexp.new(docid)
126
+ end.map do |h|
127
+ /(?<serie>(?<=-)\w+$)/ =~ h["series"]
128
+ title = [h["title-main"], h["title-sub"]].compact.join " - "
129
+ release_date = RelatonNist.parse_date h["published-date"]
130
+ Hit.new(
131
+ {
132
+ code: h["docidentifier"], serie: serie.upcase, title: title,
133
+ url: h["uri"], status: h["status"], release_date: release_date,
134
+ json: h
135
+ }, self
136
+ )
68
137
  end
69
- concat hits
70
- # concat(hits.map { |h| Hit.new(h, self) })
71
- @fetched = false
72
- # @hit_pages = hit_pages
73
138
  end
74
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
75
139
 
76
- # @return [Iecbib::HitCollection]
77
- def fetch
78
- workers = RelatonBib::WorkersPool.new 4
79
- workers.worker(&:fetch)
80
- each do |hit|
81
- workers << hit
140
+ # Fetches json data
141
+ # @return [Hash]
142
+ def data
143
+ ctime = File.ctime DATAFILE if File.exist? DATAFILE
144
+ if !ctime || ctime.to_date < Date.today
145
+ resp = OpenURI.open_uri("https://csrc.nist.gov/CSRC/media/feeds/metanorma/pubs-export.meta")
146
+ if !ctime || ctime < resp.last_modified
147
+ @data = nil
148
+ zip = OpenURI.open_uri "https://csrc.nist.gov/CSRC/media/feeds/metanorma/pubs-export.zip"
149
+ FileUtils.mv zip.path, DATAFILE
150
+ end
82
151
  end
83
- workers.end
84
- workers.result
85
- @fetched = true
86
- self
87
- end
152
+ return if @data
88
153
 
89
- def to_s
90
- inspect
91
- end
92
-
93
- def inspect
94
- "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
154
+ Zip::File.open(DATAFILE) do |zf|
155
+ zf.each do |f|
156
+ @data = JSON.parse f.get_input_stream.read
157
+ break
158
+ end
159
+ end
160
+ @data
95
161
  end
162
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
96
163
  end
97
164
  end
@@ -15,8 +15,7 @@ module RelatonNist
15
15
  def search(text, year = nil, opts = {})
16
16
  HitCollection.new text, year, opts
17
17
  rescue OpenURI::HTTPError, SocketError
18
- warn "Could not access https://www.nist.gov"
19
- []
18
+ raise RelatonBib::RequestError, "Could not access https://www.nist.gov"
20
19
  end
21
20
 
22
21
  # @param code [String] the NIST standard Code to look up (e..g "8200")
@@ -83,26 +82,22 @@ module RelatonNist
83
82
  # @retur [Hash]
84
83
  def nistbib_results_filter(result, year, opts)
85
84
  missed_years = []
85
+ iter = opts[:stage]&.slice(-3, 1)
86
+ iteration = case iter
87
+ when "I" then "1"
88
+ when "F" then "final"
89
+ else iter
90
+ end
86
91
  result.each_slice(3) do |s| # ISO website only allows 3 connections
87
92
  fetch_pages(s, 3).each_with_index do |r, _i|
88
93
  if opts[:issued_date]
89
- r.dates.select { |d| d.type == "issued" }.each do |d|
90
- next unless opts[:issued_date] == d.on
91
- end
94
+ ids = r.dates.select { |d| d.type == "issued" && d.on == opts[:issued_date] }
95
+ next if ids.empty?
92
96
  elsif opts[:updated_date]
93
- r.dates.select { |d| d.type == "published" }.each do |d|
94
- next unless opts[:updated_date] == d.on
95
- end
96
- end
97
- if opts[:stage]
98
- iter = opts[:stage][-3]
99
- iteration = case iter
100
- when "I" then 1
101
- when "F" then "final"
102
- else iter.to_i
103
- end
104
- next if iter && r.status.iteration != iteration
97
+ pds = r.dates.select { |d| d.type == "published" && d.on == opts[:updated_date] }
98
+ next if pds.empty?
105
99
  end
100
+ next if iter && r.status.iteration != iteration
106
101
  return { ret: r } if !year
107
102
 
108
103
  r.dates.select { |d| d.type == "published" }.each do |d|
@@ -11,23 +11,55 @@ module RelatonNist
11
11
  # @param hit_data [Hash]
12
12
  # @return [Hash]
13
13
  def parse_page(hit_data)
14
- doc = get_page hit_data[:url]
15
-
16
- docid = fetch_docid(doc)
14
+ item_data = if hit_data[:json]
15
+ from_json hit_data
16
+ else
17
+ from_csrs hit_data
18
+ end
17
19
  doctype = "standard"
18
20
  titles = fetch_titles(hit_data)
19
- unless /^(SP|NISTIR|FIPS) /.match docid[0].id
20
- doctype = id_cleanup(docid[0].id)
21
- docid[0] = RelatonBib::DocumentIdentifier.new(id: titles[0][:content], type: "NIST")
21
+ unless /^(SP|NISTIR|FIPS) / =~ item_data[:docid][0].id
22
+ doctype = id_cleanup(item_data[:docid][0].id)
23
+ item_data[:docid][0] = RelatonBib::DocumentIdentifier.new(
24
+ id: titles[0][:content], type: "NIST",
25
+ )
22
26
  end
27
+ item_data[:fetched] = Date.today.to_s
28
+ item_data[:type] = "standard"
29
+ item_data[:titles] = titles
30
+ item_data[:doctype] = doctype
31
+
32
+ NistBibliographicItem.new(**item_data)
33
+ end
34
+
35
+ private
36
+
37
+ def from_json(hit_data)
38
+ json = hit_data[:json]
39
+ {
40
+ link: fetch_link(json),
41
+ docid: fetch_docid(json["docidentifier"]),
42
+ dates: fetch_dates(json, hit_data[:release_date]),
43
+ contributors: fetch_contributors(json),
44
+ edition: fetch_edition(json),
45
+ language: [json["language"]],
46
+ script: [json["script"]],
47
+ # abstract: fetch_abstract(doc),
48
+ docstatus: fetch_status(json, hit_data[:status]),
49
+ copyright: fetch_copyright(json["published-date"]),
50
+ relations: fetch_relations_json(json),
51
+ # series: fetch_series(json),
52
+ keyword: fetch_keywords(json),
53
+ commentperiod: fetch_commentperiod_json(json),
54
+ }
55
+ end
23
56
 
24
- NistBibliographicItem.new(
25
- fetched: Date.today.to_s,
26
- type: "standard",
57
+ def from_csrs(hit_data)
58
+ doc = get_page hit_data[:url]
59
+ {
27
60
  # id: fetch_id(doc),
28
- titles: titles,
29
61
  link: fetch_link(doc),
30
- docid: docid,
62
+ docid: fetch_docid(doc),
31
63
  dates: fetch_dates(doc, hit_data[:release_date]),
32
64
  contributors: fetch_contributors(doc),
33
65
  edition: fetch_edition(hit_data[:code]),
@@ -40,8 +72,7 @@ module RelatonNist
40
72
  series: fetch_series(doc),
41
73
  keyword: fetch_keywords(doc),
42
74
  commentperiod: fetch_commentperiod(doc),
43
- doctype: doctype,
44
- )
75
+ }
45
76
  end
46
77
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
47
78
 
@@ -52,8 +83,6 @@ module RelatonNist
52
83
  id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
53
84
  end
54
85
 
55
- private
56
-
57
86
  # Get page.
58
87
  # @param path [String] page's path
59
88
  # @return [Array<Nokogiri::HTML::Document, String>]
@@ -61,16 +90,23 @@ module RelatonNist
61
90
  uri = URI url
62
91
  resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
63
92
  Nokogiri::HTML(resp.body)
93
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
94
+ Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
95
+ raise RelatonBib::RequestError, "Could not access #{url}"
64
96
  end
65
97
 
66
98
  # Fetch docid.
67
- # @param doc [Nokogiri::HTML::Document]
99
+ # @param doc [Nokogiri::HTML::Document, String]
68
100
  # @return [Array<RelatonBib::DocumentIdentifier>]
69
101
  def fetch_docid(doc)
70
- item_ref = doc.at("//div[contains(@class, 'publications-detail')]/h3").
71
- text.strip
72
- return [RelatonBib::DocumentIdentifier.new(type: "NIST", id: "?")] unless item_ref
73
-
102
+ item_ref = if doc.is_a? String
103
+ doc
104
+ else
105
+ doc.at(
106
+ "//div[contains(@class, 'publications-detail')]/h3",
107
+ )&.text&.strip
108
+ end
109
+ item_ref ||= "?"
74
110
  [RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
75
111
  end
76
112
 
@@ -83,56 +119,48 @@ module RelatonNist
83
119
  # end
84
120
 
85
121
  # Fetch status.
86
- # @param doc [Nokogiri::HTML::Document]
122
+ # @param doc [Nokogiri::HTML::Document, Hash]
87
123
  # @param status [String]
88
- # @return [Hash]
124
+ # @return [RelatonNist::DocumentStatus]
89
125
  def fetch_status(doc, status)
90
- case status
91
- when "draft (withdrawn)"
92
- stage = "draft-public"
93
- subst = "withdrawn"
94
- when "retired draft"
95
- stage = "draft-public"
96
- subst = "retired"
97
- when "withdrawn"
98
- stage = "final"
99
- subst = "withdrawn"
100
- when "draft"
101
- stage = "draft-public"
102
- subst = "active"
126
+ if doc.is_a? Hash
127
+ stage = doc["status"]
128
+ subst = doc["substage"]
129
+ iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
103
130
  else
104
- stage = status
105
- subst = "active"
106
- end
131
+ case status
132
+ when "draft (withdrawn)"
133
+ stage = "draft-public"
134
+ subst = "withdrawn"
135
+ when "retired draft"
136
+ stage = "draft-public"
137
+ subst = "retired"
138
+ when "withdrawn"
139
+ stage = "final"
140
+ subst = "withdrawn"
141
+ when "draft"
142
+ stage = "draft-public"
143
+ subst = "active"
144
+ else
145
+ stage = status
146
+ subst = "active"
147
+ end
148
+
149
+ iter = nil
150
+ if stage.include? "draft"
151
+ iter = 1
152
+ history = doc.xpath("//span[@id='pub-history-container']/a"\
153
+ "|//span[@id='pub-history-container']/span")
154
+ history.each_with_index do |h, idx|
155
+ next if h.name == "a"
107
156
 
108
- iter = nil
109
- if stage.include? "draft"
110
- iter = 1
111
- history = doc.xpath("//span[@id='pub-history-container']/a"\
112
- "|//span[@id='pub-history-container']/span")
113
- history.each_with_index do |h, idx|
114
- next if h.name == "a"
115
-
116
- iter = idx + 1 if idx.positive?
117
- # iter = if lsif idx < (history.size - 1) && !history.last.text.include?("Draft")
118
- # "final"
119
- # elsif idx.positive? then idx + 1
120
- # end
121
- break
157
+ iter = idx + 1 if idx.positive?
158
+ break
159
+ end
122
160
  end
123
161
  end
124
162
 
125
- # if doc.at "//p/strong[text()='Withdrawn:']"
126
- # substage = "withdrawn"
127
- # else
128
- # substage = "active"
129
- # item_ref = doc.at(
130
- # "//div[contains(@class, 'publications-detail')]/h3",
131
- # ).text.strip
132
- # wip = item_ref.match(/(?<=\()\w+/).to_s
133
- # stage = "draft-public" if wip == "DRAFT"
134
- # end
135
- RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter
163
+ RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
136
164
  end
137
165
 
138
166
  # Fetch titles.
@@ -144,46 +172,87 @@ module RelatonNist
144
172
 
145
173
  # Fetch dates
146
174
  # @param doc [Nokogiri::HTML::Document]
175
+ # @param release_date [Date]
147
176
  # @return [Array<Hash>]
148
177
  def fetch_dates(doc, release_date)
149
178
  dates = [{ type: "published", on: release_date.to_s }]
150
179
 
151
- d = doc.at("//span[@id='pub-release-date']").text.strip
152
- date = if /(?<date>\w+\s\d{4})/ =~ d
153
- Date.strptime(date, "%B %Y")
154
- elsif /(?<date>\w+\s\d{1,2},\s\d{4})/ =~ d
155
- Date.strptime(date, "%B %d, %Y")
156
- end
157
- dates << { type: "issued", on: date.to_s }
158
-
180
+ if doc.is_a? Hash
181
+ issued = RelatonNist.parse_date doc["issued-date"]
182
+ updated = RelatonNist.parse_date doc["updated-date"]
183
+ dates << { type: "updated", on: updated.to_s } if updated
184
+ obsoleted = RelatonNist.parse_date doc["obsoleted-date"]
185
+ dates << { type: "obsoleted", on: obsoleted.to_s } if obsoleted
186
+ else
187
+ d = doc.at("//span[@id='pub-release-date']").text.strip
188
+ issued = RelatonNist.parse_date d
189
+ end
190
+ dates << { type: "issued", on: issued.to_s }
159
191
  dates
160
192
  end
161
193
 
194
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
195
+ # @param doc [Nokogiri::HTML::Document, Hash]
196
+ # @return [Array<RelatonBib::ContributionInfo>]
162
197
  def fetch_contributors(doc)
163
- name = "National Institute of Standards and Technology"
164
- org = RelatonBib::Organization.new(
165
- name: name, url: "www.nist.gov", abbreviation: "NIST",
166
- )
167
- contribs = [
168
- RelatonBib::ContributionInfo.new(entity: org, role: ["publisher"]),
169
- ]
170
-
171
- authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
172
- contribs += contributors(authors, "author")
198
+ contribs = []
199
+ if doc.is_a? Hash
200
+ contribs += contributors_json(
201
+ doc["authors"], "author", doc["language"], doc["script"]
202
+ )
203
+ contribs + contributors_json(
204
+ doc["editors"], "editor", doc["language"], doc["script"]
205
+ )
206
+ else
207
+ name = "National Institute of Standards and Technology"
208
+ org = RelatonBib::Organization.new(
209
+ name: name, url: "www.nist.gov", abbreviation: "NIST",
210
+ )
211
+ contribs << RelatonBib::ContributionInfo.new(entity: org, role: ["publisher"])
212
+ authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
213
+ contribs += contributors(authors, "author")
214
+ editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
215
+ contribs + contributors(editors, "editor")
216
+ end
217
+ end
173
218
 
174
- editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
175
- contribs + contributors(editors, "editor")
219
+ # @param doc [Array<Hash>]
220
+ # @param role [String]
221
+ # @return [Array<RelatonBib::ContributionInfo>]
222
+ def contributors_json(doc, role, lang = "en", script = "Latn")
223
+ doc.map do |contr|
224
+ if contr["affiliation"]
225
+ if contr["affiliation"]["acronym"]
226
+ abbrev = RelatonBib::LocalizedString.new(contr["affiliation"]["acronym"])
227
+ end
228
+ org = RelatonBib::Organization.new(
229
+ name: contr["affiliation"]["name"], abbreviation: abbrev,
230
+ )
231
+ end
232
+ if contr["surname"]
233
+ affiliation = RelatonBib::Affilation.new org
234
+ entity = RelatonBib::Person.new(
235
+ name: full_name(contr, lang, script), affiliation: [affiliation],
236
+ )
237
+ else
238
+ entity = org
239
+ end
240
+ RelatonBib::ContributionInfo.new entity: entity, role: [role]
241
+ end
176
242
  end
177
243
 
178
244
  # rubocop:disable Metrics/CyclomaticComplexity
179
- def contributors(doc, role)
245
+ # @param doc [Nokogiri::HTML::Element, Array<Hash>]
246
+ # @param role [String]
247
+ # @return [Array<RelatonBib::ContributionInfo>]
248
+ def contributors(doc, role, lang = "en", script = "Latn")
180
249
  return [] if doc.nil?
181
250
 
182
251
  doc.text.split(", ").map do |contr|
183
252
  /(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr
184
253
  if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
185
254
  fullname = RelatonBib::FullName.new(
186
- completename: RelatonBib::LocalizedString.new(an, "en", "Latn"),
255
+ completename: RelatonBib::LocalizedString.new(an, lang, script),
187
256
  )
188
257
  case abbrev
189
258
  when "NIST"
@@ -199,7 +268,7 @@ module RelatonNist
199
268
  org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
200
269
  affiliation = RelatonBib::Affilation.new org
201
270
  entity = RelatonBib::Person.new(
202
- name: fullname, affiliation: [affiliation], contacts: [],
271
+ name: fullname, affiliation: [affiliation],
203
272
  )
204
273
  else
205
274
  entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
@@ -207,17 +276,49 @@ module RelatonNist
207
276
  RelatonBib::ContributionInfo.new entity: entity, role: [role]
208
277
  end
209
278
  end
210
- # rubocop:enable Metrics/CyclomaticComplexity
279
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength
280
+
281
+ # @param name [Hash]
282
+ # @param lang [Strong]
283
+ # @param script [String]
284
+ # @return [RelatonBib::FullName]
285
+ def full_name(name, lang, script)
286
+ RelatonBib::FullName.new(
287
+ surname: RelatonBib::LocalizedString.new(name["surname"], lang, script),
288
+ forenames: name_parts(name["givenName"], lang, script),
289
+ additions: name_parts(name["suffix"], lang, script),
290
+ prefix: name_parts(name["title"], lang, script),
291
+ completename: RelatonBib::LocalizedString.new(name["fullName"], lang, script),
292
+ )
293
+ end
211
294
 
212
- def fetch_edition(code)
213
- return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ code
295
+ # @param part [String, NilClass]
296
+ # @param lang [Strong]
297
+ # @param script [String]
298
+ # @return [Array<RelatonBib::LocalizedString>]
299
+ def name_parts(part, lang, script)
300
+ return [] unless part
301
+
302
+ [RelatonBib::LocalizedString.new(name[part], lang, script)]
303
+ end
304
+
305
+ # @param doc [String, Hash]
306
+ # @return [String, NilClass]
307
+ def fetch_edition(doc)
308
+ if doc.is_a? Hash
309
+ return unless doc["edition"]
310
+
311
+ rev = doc["edition"]
312
+ else
313
+ return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
314
+ end
214
315
 
215
316
  "Revision #{rev}"
216
317
  end
217
318
 
218
319
  # Fetch abstracts.
219
320
  # @param doc [Nokigiri::HTML::Document]
220
- # @return [Array<Array>]
321
+ # @return [Array<Hash>]
221
322
  def fetch_abstract(doc)
222
323
  abstract_content = doc.xpath('//div[contains(@class, "pub-abstract-callout")]/div[1]/p').text
223
324
  [{
@@ -229,58 +330,82 @@ module RelatonNist
229
330
  end
230
331
 
231
332
  # Fetch copyright.
232
- # @param title [String]
333
+ # @param doc [Nokogiri::HTL::Document, String]
233
334
  # @return [Hash]
234
335
  def fetch_copyright(doc)
235
336
  name = "National Institute of Standards and Technology"
236
337
  url = "www.nist.gov"
237
- d = doc.at("//span[@id='pub-release-date']").text.strip
338
+ d = if doc.is_a? String then doc
339
+ else
340
+ doc.at("//span[@id='pub-release-date']").text.strip
341
+ end
238
342
  from = d.match(/\d{4}/).to_s
239
343
  { owner: { name: name, abbreviation: "NIST", url: url }, from: from }
240
344
  end
241
345
 
242
346
  # Fetch links.
243
- # @param doc [Nokogiri::HTML::Document]
347
+ # @param doc [Nokogiri::HTML::Document, Hash]
244
348
  # @return [Array<Hash>]
245
349
  def fetch_link(doc)
246
- pub = doc.at "//p/strong[.='Publication:']"
247
350
  links = []
248
- pdf = pub.at "./following-sibling::a[.=' Local Download']"
249
- links << { type: "pdf", content: pdf[:href] } if pdf
250
- doi = pub.at("./following-sibling::a[contains(.,'(DOI)')]")
251
- links << { type: "doi", content: doi[:href] } if doi
351
+ if doc.is_a? Hash
352
+ links << { type: "uri", content: doc["uri"] } if doc["uri"]
353
+ doi = "https://doi.org/" + doc["doi"] if doc["doi"]
354
+ else
355
+ pub = doc.at "//p/strong[.='Publication:']"
356
+ pdf = pub.at "./following-sibling::a[.=' Local Download']"
357
+ doi = pub.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
358
+ links << { type: "pdf", content: pdf[:href] } if pdf
359
+ end
360
+ links << { type: "doi", content: doi } if doi
252
361
  links
253
362
  end
254
363
 
255
364
  # Fetch relations.
256
365
  # @param doc [Nokogiri::HTML::Document]
257
- # @return [Array<Hash>]
366
+ # @return [Array<RelatonBib::DocumentRelation>]
258
367
  def fetch_relations(doc)
259
368
  relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
260
- doc_relation "supersedes", r
369
+ doc_relation "supersedes", r.text, DOMAIN + r[:href]
261
370
  end
262
371
 
263
372
  relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
264
- doc_relation "partOf", r
373
+ doc_relation "partOf", r.text, DOMAIN + r[:href]
265
374
  end
266
375
 
267
376
  relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
268
- doc_relation "updates", r
377
+ doc_relation "updates", r.text, DOMAIN + r[:href]
269
378
  end
270
379
  end
271
380
 
272
- def doc_relation(type, ref)
381
+ def fetch_relations_json(doc)
382
+ relations = doc["supersedes"].map do |r|
383
+ doc_relation "supersedes", r["docidentifier"], r["uri"]
384
+ end
385
+
386
+ relations + doc["superseded-by"].map do |r|
387
+ doc_relation "updates", r["docidentifier"], r["uri"]
388
+ end
389
+ end
390
+
391
+ # @param type [String]
392
+ # @param ref [String]
393
+ # @param uri [String]
394
+ # @return [RelatonBib::DocumentRelation]
395
+ def doc_relation(type, ref, uri, lang = "en", script = "Latn")
273
396
  RelatonBib::DocumentRelation.new(
274
397
  type: type,
275
398
  bibitem: RelatonBib::BibliographicItem.new(
276
399
  formattedref: RelatonBib::FormattedRef.new(
277
- content: ref.text, language: "en", script: "Latn", format: "text/plain",
400
+ content: ref, language: lang, script: script, format: "text/plain",
278
401
  ),
279
- link: [RelatonBib::TypedUri.new(type: "src", content: DOMAIN + ref[:href])],
402
+ link: [RelatonBib::TypedUri.new(type: "src", content: uri)],
280
403
  ),
281
404
  )
282
405
  end
283
406
 
407
+ # @param doc [Nokogiri::HTML::Document]
408
+ # @return [Array<RelatonBib::Series>]
284
409
  def fetch_series(doc)
285
410
  series = doc.xpath "//span[@id='pub-history-container']/a"\
286
411
  "|//span[@id='pub-history-container']/span"
@@ -305,11 +430,19 @@ module RelatonNist
305
430
  end.select { |s| s }
306
431
  end
307
432
 
433
+ # @param doc [Nokogiri::HTML::Document, Hash]
434
+ # @return [Array<RelatonNist::Keyword>]
308
435
  def fetch_keywords(doc)
309
- kws = doc.xpath "//span[@id='pub-keywords-container']/span"
310
- kws.map { |kw| Keyword.new kw.text }
436
+ kws = if doc.is_a? Hash
437
+ doc["keywords"]
438
+ else
439
+ doc.xpath "//span[@id='pub-keywords-container']/span"
440
+ end
441
+ kws.map { |kw| Keyword.new kw.is_a?(String) ? kw : kw.text }
311
442
  end
312
443
 
444
+ # @param doc [Nokogiri::HTML::Document]
445
+ # @return [RelatonNist::CommentPeriod, NilClass]
313
446
  def fetch_commentperiod(doc)
314
447
  cp = doc.at "//span[@id='pub-comments-due']"
315
448
  return unless cp
@@ -324,6 +457,12 @@ module RelatonNist
324
457
  extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
325
458
  CommentPeriod.new from, to, extended
326
459
  end
460
+
461
+ # @param json [Hash]
462
+ # @return [RelatonNist::CommentPeriod, NilClass]
463
+ def fetch_commentperiod_json(json)
464
+ CommentPeriod.new json["comment-from"], json["comment-to"] if json["comment-from"]
465
+ end
327
466
  end
328
467
  end
329
468
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonNist
2
- VERSION = "0.2.1".freeze
2
+ VERSION = "0.2.2".freeze
3
3
  end
@@ -36,4 +36,5 @@ Gem::Specification.new do |spec|
36
36
  spec.add_development_dependency "webmock"
37
37
 
38
38
  spec.add_dependency "relaton-bib", "~> 0.2.0"
39
+ spec.add_dependency "rubyzip"
39
40
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-nist
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-19 00:00:00.000000000 Z
11
+ date: 2019-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -178,6 +178,20 @@ dependencies:
178
178
  - - "~>"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 0.2.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: rubyzip
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
181
195
  description: 'RelatonNist: retrive NIST standards.'
182
196
  email:
183
197
  - open.source@ribose.com
@@ -200,6 +214,7 @@ files:
200
214
  - lib/relaton/processor.rb
201
215
  - lib/relaton_nist.rb
202
216
  - lib/relaton_nist/comment_period.rb
217
+ - lib/relaton_nist/data/pubs-export.zip
203
218
  - lib/relaton_nist/document_status.rb
204
219
  - lib/relaton_nist/hit.rb
205
220
  - lib/relaton_nist/hit_collection.rb
@@ -230,7 +245,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
230
245
  version: '0'
231
246
  requirements: []
232
247
  rubyforge_project:
233
- rubygems_version: 2.7.7
248
+ rubygems_version: 2.6.12
234
249
  signing_key:
235
250
  specification_version: 4
236
251
  summary: 'RelatonNist: retrive NIST standards.'