relaton-nist 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 2ab3d1123704cbe701f2f8158ee2d60be05a762141a8c10f2d9f196f8b905dab
4
- data.tar.gz: f06320c0918cf4d5b6354015e370b2cc7bab93247bfd842875086005de9ca247
2
+ SHA1:
3
+ metadata.gz: 42ae6073ff5e1cbaba11be06d27c9e0da310bcf4
4
+ data.tar.gz: fd4e493fc0f7f3edefe2af58f26a76954c2b1e2c
5
5
  SHA512:
6
- metadata.gz: 6a1bb823322716ac5115428bedbcad2cee3667bc24b8f7fc02030856f5028b92377196eaa8b4c1c6afe6cd67357af4a9d34d4fe7d09b8313e3310f6cc02394b8
7
- data.tar.gz: 4fddf90e843ed03dc7d2dc281674a5ecf0ce69d58509682a36f39044dcfb7a769378153f194872f2199c1c8b702a6ca3c88be6f618d22a28283814ead733c37c
6
+ metadata.gz: 51ca4ccf407bb4355f669fc82ea2950a720f75af506174a58b807c86e109b21808fc4f380363258941c37328e544f9658c75d9ee24966124f3532f0abf8df421
7
+ data.tar.gz: e23643ecd8e7a685f2542660bf064ef30ff027460b9898aa6a1435f21f7a2ec4b311bdaf7306ed078e920b5b728d7b46402d368d9d6535bc968474793e30ad1a
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- relaton-nist (0.2.1)
4
+ relaton-nist (0.2.2)
5
5
  relaton-bib (~> 0.2.0)
6
+ rubyzip
6
7
 
7
8
  GEM
8
9
  remote: https://rubygems.org/
@@ -32,9 +33,9 @@ GEM
32
33
  pry-byebug (3.7.0)
33
34
  byebug (~> 11.0)
34
35
  pry (~> 0.10)
35
- public_suffix (3.1.0)
36
+ public_suffix (3.1.1)
36
37
  rake (10.5.0)
37
- relaton-bib (0.2.1)
38
+ relaton-bib (0.2.3)
38
39
  addressable
39
40
  nokogiri (~> 1.8.4)
40
41
  rspec (3.8.0)
@@ -52,6 +53,7 @@ GEM
52
53
  rspec-support (3.8.2)
53
54
  ruby-debug-ide (0.7.0)
54
55
  rake (>= 0.8.1)
56
+ rubyzip (1.2.3)
55
57
  safe_yaml (1.0.5)
56
58
  simplecov (0.16.1)
57
59
  docile (~> 1.1)
@@ -8,5 +8,20 @@ end
8
8
 
9
9
  module RelatonNist
10
10
  class Error < StandardError; end
11
- # Your code goes here...
11
+
12
+ class << self
13
+ # @param date [String]
14
+ # @return [Date, NilClass]
15
+ def parse_date(sdate)
16
+ if /(?<date>\w+\s\d{4})/ =~ sdate # February 2012
17
+ Date.strptime(date, "%B %Y")
18
+ elsif /(?<date>\w+\s\d{1,2},\s\d{4})/ =~ sdate # February 11, 2012
19
+ Date.strptime(date, "%B %d, %Y")
20
+ elsif /(?<date>\d{4}-\d{2}-\d{2})/ =~ sdate # 2012-02-11
21
+ Date.parse(date)
22
+ elsif /(?<date>\d{4}-\d{2})/ =~ sdate # 2012-02
23
+ Date.strptime date, "%Y-%m"
24
+ end
25
+ end
26
+ end
12
27
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "zip"
4
+ require "fileutils"
3
5
  require "relaton_nist/hit"
4
6
  require "addressable/uri"
5
7
  require "open-uri"
@@ -7,8 +9,8 @@ require "open-uri"
7
9
  module RelatonNist
8
10
  # Page of hit collection.
9
11
  class HitCollection < Array
10
-
11
12
  DOMAIN = "https://csrc.nist.gov"
13
+ DATAFILE = File.expand_path "data/pubs-export.zip", __dir__
12
14
 
13
15
  # @return [TrueClass, FalseClass]
14
16
  attr_reader :fetched
@@ -28,13 +30,58 @@ module RelatonNist
28
30
  def initialize(ref_nbr, year = nil, opts = {})
29
31
  @text = ref_nbr
30
32
  @year = year
33
+
34
+ /(?<docid>(SP|FIPS)\s[0-9-]+)/ =~ text
35
+ hits = docid ? from_json(docid, **opts) : from_csrc(**opts)
36
+
37
+ hits.sort! do |a, b|
38
+ if a.sort_value != b.sort_value
39
+ b.sort_value - a.sort_value
40
+ else
41
+ (b.hit[:release_date] - a.hit[:release_date]).to_i
42
+ end
43
+ end
44
+ concat hits
45
+ @fetched = false
46
+ end
47
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
48
+
49
+ # @return [Iecbib::HitCollection]
50
+ def fetch
51
+ workers = RelatonBib::WorkersPool.new 4
52
+ workers.worker(&:fetch)
53
+ each do |hit|
54
+ workers << hit
55
+ end
56
+ workers.end
57
+ workers.result
58
+ @fetched = true
59
+ self
60
+ end
61
+
62
+ def to_s
63
+ inspect
64
+ end
65
+
66
+ # @return [String]
67
+ def inspect
68
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
69
+ end
70
+
71
+ private
72
+
73
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
74
+
75
+ # @param stage [String]
76
+ # @return [Array<RelatonNist::Hit>]
77
+ def from_csrc(**opts)
31
78
  from, to = nil
32
79
  if year
33
- d = Date.strptime year, "%Y"
80
+ d = Date.strptime year, "%Y"
34
81
  from = d.strftime "%m/%d/%Y"
35
82
  to = d.next_year.prev_day.strftime "%m/%d/%Y"
36
83
  end
37
- url = "#{DOMAIN}/publications/search?keywords-lg=#{ref_nbr}"
84
+ url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"
38
85
  url += "&dateFrom-lg=#{from}" if from
39
86
  url += "&dateTo-lg=#{to}" if to
40
87
  url += if /PD/ =~ opts[:stage]
@@ -44,7 +91,7 @@ module RelatonNist
44
91
  end
45
92
 
46
93
  doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
47
- hits = doc.css("table.publications-table > tbody > tr").map do |h|
94
+ doc.css("table.publications-table > tbody > tr").map do |h|
48
95
  link = h.at("td/div/strong/a")
49
96
  serie = h.at("td[1]").text.strip
50
97
  code = h.at("td[2]").text.strip
@@ -59,39 +106,59 @@ module RelatonNist
59
106
  }, self
60
107
  )
61
108
  end
62
- hits.sort! do |a, b|
63
- if a.sort_value != b.sort_value
64
- b.sort_value - a.sort_value
109
+ end
110
+
111
+ # Fetches data form json
112
+ # @param docid [String]
113
+ def from_json(docid, **opts)
114
+ data.select do |doc|
115
+ if year
116
+ d = Date.strptime year, "%Y"
117
+ idate = RelatonNist.parse_date doc["issued-date"]
118
+ next unless idate.between? d, d.next_year.prev_day
119
+ end
120
+ if /PD/ =~ opts[:stage]
121
+ next unless %w[draft-public draft-prelim].include? doc["status"]
65
122
  else
66
- (b.hit[:release_date] - a.hit[:release_date]).to_i
123
+ next unless doc["status"] == "final"
67
124
  end
125
+ doc["docidentifier"] =~ Regexp.new(docid)
126
+ end.map do |h|
127
+ /(?<serie>(?<=-)\w+$)/ =~ h["series"]
128
+ title = [h["title-main"], h["title-sub"]].compact.join " - "
129
+ release_date = RelatonNist.parse_date h["published-date"]
130
+ Hit.new(
131
+ {
132
+ code: h["docidentifier"], serie: serie.upcase, title: title,
133
+ url: h["uri"], status: h["status"], release_date: release_date,
134
+ json: h
135
+ }, self
136
+ )
68
137
  end
69
- concat hits
70
- # concat(hits.map { |h| Hit.new(h, self) })
71
- @fetched = false
72
- # @hit_pages = hit_pages
73
138
  end
74
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
75
139
 
76
- # @return [Iecbib::HitCollection]
77
- def fetch
78
- workers = RelatonBib::WorkersPool.new 4
79
- workers.worker(&:fetch)
80
- each do |hit|
81
- workers << hit
140
+ # Fetches json data
141
+ # @return [Hash]
142
+ def data
143
+ ctime = File.ctime DATAFILE if File.exist? DATAFILE
144
+ if !ctime || ctime.to_date < Date.today
145
+ resp = OpenURI.open_uri("https://csrc.nist.gov/CSRC/media/feeds/metanorma/pubs-export.meta")
146
+ if !ctime || ctime < resp.last_modified
147
+ @data = nil
148
+ zip = OpenURI.open_uri "https://csrc.nist.gov/CSRC/media/feeds/metanorma/pubs-export.zip"
149
+ FileUtils.mv zip.path, DATAFILE
150
+ end
82
151
  end
83
- workers.end
84
- workers.result
85
- @fetched = true
86
- self
87
- end
152
+ return if @data
88
153
 
89
- def to_s
90
- inspect
91
- end
92
-
93
- def inspect
94
- "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
154
+ Zip::File.open(DATAFILE) do |zf|
155
+ zf.each do |f|
156
+ @data = JSON.parse f.get_input_stream.read
157
+ break
158
+ end
159
+ end
160
+ @data
95
161
  end
162
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
96
163
  end
97
164
  end
@@ -15,8 +15,7 @@ module RelatonNist
15
15
  def search(text, year = nil, opts = {})
16
16
  HitCollection.new text, year, opts
17
17
  rescue OpenURI::HTTPError, SocketError
18
- warn "Could not access https://www.nist.gov"
19
- []
18
+ raise RelatonBib::RequestError, "Could not access https://www.nist.gov"
20
19
  end
21
20
 
22
21
  # @param code [String] the NIST standard Code to look up (e..g "8200")
@@ -83,26 +82,22 @@ module RelatonNist
83
82
  # @retur [Hash]
84
83
  def nistbib_results_filter(result, year, opts)
85
84
  missed_years = []
85
+ iter = opts[:stage]&.slice(-3, 1)
86
+ iteration = case iter
87
+ when "I" then "1"
88
+ when "F" then "final"
89
+ else iter
90
+ end
86
91
  result.each_slice(3) do |s| # ISO website only allows 3 connections
87
92
  fetch_pages(s, 3).each_with_index do |r, _i|
88
93
  if opts[:issued_date]
89
- r.dates.select { |d| d.type == "issued" }.each do |d|
90
- next unless opts[:issued_date] == d.on
91
- end
94
+ ids = r.dates.select { |d| d.type == "issued" && d.on == opts[:issued_date] }
95
+ next if ids.empty?
92
96
  elsif opts[:updated_date]
93
- r.dates.select { |d| d.type == "published" }.each do |d|
94
- next unless opts[:updated_date] == d.on
95
- end
96
- end
97
- if opts[:stage]
98
- iter = opts[:stage][-3]
99
- iteration = case iter
100
- when "I" then 1
101
- when "F" then "final"
102
- else iter.to_i
103
- end
104
- next if iter && r.status.iteration != iteration
97
+ pds = r.dates.select { |d| d.type == "published" && d.on == opts[:updated_date] }
98
+ next if pds.empty?
105
99
  end
100
+ next if iter && r.status.iteration != iteration
106
101
  return { ret: r } if !year
107
102
 
108
103
  r.dates.select { |d| d.type == "published" }.each do |d|
@@ -11,23 +11,55 @@ module RelatonNist
11
11
  # @param hit_data [Hash]
12
12
  # @return [Hash]
13
13
  def parse_page(hit_data)
14
- doc = get_page hit_data[:url]
15
-
16
- docid = fetch_docid(doc)
14
+ item_data = if hit_data[:json]
15
+ from_json hit_data
16
+ else
17
+ from_csrs hit_data
18
+ end
17
19
  doctype = "standard"
18
20
  titles = fetch_titles(hit_data)
19
- unless /^(SP|NISTIR|FIPS) /.match docid[0].id
20
- doctype = id_cleanup(docid[0].id)
21
- docid[0] = RelatonBib::DocumentIdentifier.new(id: titles[0][:content], type: "NIST")
21
+ unless /^(SP|NISTIR|FIPS) / =~ item_data[:docid][0].id
22
+ doctype = id_cleanup(item_data[:docid][0].id)
23
+ item_data[:docid][0] = RelatonBib::DocumentIdentifier.new(
24
+ id: titles[0][:content], type: "NIST",
25
+ )
22
26
  end
27
+ item_data[:fetched] = Date.today.to_s
28
+ item_data[:type] = "standard"
29
+ item_data[:titles] = titles
30
+ item_data[:doctype] = doctype
31
+
32
+ NistBibliographicItem.new(**item_data)
33
+ end
34
+
35
+ private
36
+
37
+ def from_json(hit_data)
38
+ json = hit_data[:json]
39
+ {
40
+ link: fetch_link(json),
41
+ docid: fetch_docid(json["docidentifier"]),
42
+ dates: fetch_dates(json, hit_data[:release_date]),
43
+ contributors: fetch_contributors(json),
44
+ edition: fetch_edition(json),
45
+ language: [json["language"]],
46
+ script: [json["script"]],
47
+ # abstract: fetch_abstract(doc),
48
+ docstatus: fetch_status(json, hit_data[:status]),
49
+ copyright: fetch_copyright(json["published-date"]),
50
+ relations: fetch_relations_json(json),
51
+ # series: fetch_series(json),
52
+ keyword: fetch_keywords(json),
53
+ commentperiod: fetch_commentperiod_json(json),
54
+ }
55
+ end
23
56
 
24
- NistBibliographicItem.new(
25
- fetched: Date.today.to_s,
26
- type: "standard",
57
+ def from_csrs(hit_data)
58
+ doc = get_page hit_data[:url]
59
+ {
27
60
  # id: fetch_id(doc),
28
- titles: titles,
29
61
  link: fetch_link(doc),
30
- docid: docid,
62
+ docid: fetch_docid(doc),
31
63
  dates: fetch_dates(doc, hit_data[:release_date]),
32
64
  contributors: fetch_contributors(doc),
33
65
  edition: fetch_edition(hit_data[:code]),
@@ -40,8 +72,7 @@ module RelatonNist
40
72
  series: fetch_series(doc),
41
73
  keyword: fetch_keywords(doc),
42
74
  commentperiod: fetch_commentperiod(doc),
43
- doctype: doctype,
44
- )
75
+ }
45
76
  end
46
77
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
47
78
 
@@ -52,8 +83,6 @@ module RelatonNist
52
83
  id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
53
84
  end
54
85
 
55
- private
56
-
57
86
  # Get page.
58
87
  # @param path [String] page's path
59
88
  # @return [Array<Nokogiri::HTML::Document, String>]
@@ -61,16 +90,23 @@ module RelatonNist
61
90
  uri = URI url
62
91
  resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
63
92
  Nokogiri::HTML(resp.body)
93
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
94
+ Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
95
+ raise RelatonBib::RequestError, "Could not access #{url}"
64
96
  end
65
97
 
66
98
  # Fetch docid.
67
- # @param doc [Nokogiri::HTML::Document]
99
+ # @param doc [Nokogiri::HTML::Document, String]
68
100
  # @return [Array<RelatonBib::DocumentIdentifier>]
69
101
  def fetch_docid(doc)
70
- item_ref = doc.at("//div[contains(@class, 'publications-detail')]/h3").
71
- text.strip
72
- return [RelatonBib::DocumentIdentifier.new(type: "NIST", id: "?")] unless item_ref
73
-
102
+ item_ref = if doc.is_a? String
103
+ doc
104
+ else
105
+ doc.at(
106
+ "//div[contains(@class, 'publications-detail')]/h3",
107
+ )&.text&.strip
108
+ end
109
+ item_ref ||= "?"
74
110
  [RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
75
111
  end
76
112
 
@@ -83,56 +119,48 @@ module RelatonNist
83
119
  # end
84
120
 
85
121
  # Fetch status.
86
- # @param doc [Nokogiri::HTML::Document]
122
+ # @param doc [Nokogiri::HTML::Document, Hash]
87
123
  # @param status [String]
88
- # @return [Hash]
124
+ # @return [RelatonNist::DocumentStatus]
89
125
  def fetch_status(doc, status)
90
- case status
91
- when "draft (withdrawn)"
92
- stage = "draft-public"
93
- subst = "withdrawn"
94
- when "retired draft"
95
- stage = "draft-public"
96
- subst = "retired"
97
- when "withdrawn"
98
- stage = "final"
99
- subst = "withdrawn"
100
- when "draft"
101
- stage = "draft-public"
102
- subst = "active"
126
+ if doc.is_a? Hash
127
+ stage = doc["status"]
128
+ subst = doc["substage"]
129
+ iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
103
130
  else
104
- stage = status
105
- subst = "active"
106
- end
131
+ case status
132
+ when "draft (withdrawn)"
133
+ stage = "draft-public"
134
+ subst = "withdrawn"
135
+ when "retired draft"
136
+ stage = "draft-public"
137
+ subst = "retired"
138
+ when "withdrawn"
139
+ stage = "final"
140
+ subst = "withdrawn"
141
+ when "draft"
142
+ stage = "draft-public"
143
+ subst = "active"
144
+ else
145
+ stage = status
146
+ subst = "active"
147
+ end
148
+
149
+ iter = nil
150
+ if stage.include? "draft"
151
+ iter = 1
152
+ history = doc.xpath("//span[@id='pub-history-container']/a"\
153
+ "|//span[@id='pub-history-container']/span")
154
+ history.each_with_index do |h, idx|
155
+ next if h.name == "a"
107
156
 
108
- iter = nil
109
- if stage.include? "draft"
110
- iter = 1
111
- history = doc.xpath("//span[@id='pub-history-container']/a"\
112
- "|//span[@id='pub-history-container']/span")
113
- history.each_with_index do |h, idx|
114
- next if h.name == "a"
115
-
116
- iter = idx + 1 if idx.positive?
117
- # iter = if lsif idx < (history.size - 1) && !history.last.text.include?("Draft")
118
- # "final"
119
- # elsif idx.positive? then idx + 1
120
- # end
121
- break
157
+ iter = idx + 1 if idx.positive?
158
+ break
159
+ end
122
160
  end
123
161
  end
124
162
 
125
- # if doc.at "//p/strong[text()='Withdrawn:']"
126
- # substage = "withdrawn"
127
- # else
128
- # substage = "active"
129
- # item_ref = doc.at(
130
- # "//div[contains(@class, 'publications-detail')]/h3",
131
- # ).text.strip
132
- # wip = item_ref.match(/(?<=\()\w+/).to_s
133
- # stage = "draft-public" if wip == "DRAFT"
134
- # end
135
- RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter
163
+ RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
136
164
  end
137
165
 
138
166
  # Fetch titles.
@@ -144,46 +172,87 @@ module RelatonNist
144
172
 
145
173
  # Fetch dates
146
174
  # @param doc [Nokogiri::HTML::Document]
175
+ # @param release_date [Date]
147
176
  # @return [Array<Hash>]
148
177
  def fetch_dates(doc, release_date)
149
178
  dates = [{ type: "published", on: release_date.to_s }]
150
179
 
151
- d = doc.at("//span[@id='pub-release-date']").text.strip
152
- date = if /(?<date>\w+\s\d{4})/ =~ d
153
- Date.strptime(date, "%B %Y")
154
- elsif /(?<date>\w+\s\d{1,2},\s\d{4})/ =~ d
155
- Date.strptime(date, "%B %d, %Y")
156
- end
157
- dates << { type: "issued", on: date.to_s }
158
-
180
+ if doc.is_a? Hash
181
+ issued = RelatonNist.parse_date doc["issued-date"]
182
+ updated = RelatonNist.parse_date doc["updated-date"]
183
+ dates << { type: "updated", on: updated.to_s } if updated
184
+ obsoleted = RelatonNist.parse_date doc["obsoleted-date"]
185
+ dates << { type: "obsoleted", on: obsoleted.to_s } if obsoleted
186
+ else
187
+ d = doc.at("//span[@id='pub-release-date']").text.strip
188
+ issued = RelatonNist.parse_date d
189
+ end
190
+ dates << { type: "issued", on: issued.to_s }
159
191
  dates
160
192
  end
161
193
 
194
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
195
+ # @param doc [Nokogiri::HTML::Document, Hash]
196
+ # @return [Array<RelatonBib::ContributionInfo>]
162
197
  def fetch_contributors(doc)
163
- name = "National Institute of Standards and Technology"
164
- org = RelatonBib::Organization.new(
165
- name: name, url: "www.nist.gov", abbreviation: "NIST",
166
- )
167
- contribs = [
168
- RelatonBib::ContributionInfo.new(entity: org, role: ["publisher"]),
169
- ]
170
-
171
- authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
172
- contribs += contributors(authors, "author")
198
+ contribs = []
199
+ if doc.is_a? Hash
200
+ contribs += contributors_json(
201
+ doc["authors"], "author", doc["language"], doc["script"]
202
+ )
203
+ contribs + contributors_json(
204
+ doc["editors"], "editor", doc["language"], doc["script"]
205
+ )
206
+ else
207
+ name = "National Institute of Standards and Technology"
208
+ org = RelatonBib::Organization.new(
209
+ name: name, url: "www.nist.gov", abbreviation: "NIST",
210
+ )
211
+ contribs << RelatonBib::ContributionInfo.new(entity: org, role: ["publisher"])
212
+ authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
213
+ contribs += contributors(authors, "author")
214
+ editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
215
+ contribs + contributors(editors, "editor")
216
+ end
217
+ end
173
218
 
174
- editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
175
- contribs + contributors(editors, "editor")
219
+ # @param doc [Array<Hash>]
220
+ # @param role [String]
221
+ # @return [Array<RelatonBib::ContributionInfo>]
222
+ def contributors_json(doc, role, lang = "en", script = "Latn")
223
+ doc.map do |contr|
224
+ if contr["affiliation"]
225
+ if contr["affiliation"]["acronym"]
226
+ abbrev = RelatonBib::LocalizedString.new(contr["affiliation"]["acronym"])
227
+ end
228
+ org = RelatonBib::Organization.new(
229
+ name: contr["affiliation"]["name"], abbreviation: abbrev,
230
+ )
231
+ end
232
+ if contr["surname"]
233
+ affiliation = RelatonBib::Affilation.new org
234
+ entity = RelatonBib::Person.new(
235
+ name: full_name(contr, lang, script), affiliation: [affiliation],
236
+ )
237
+ else
238
+ entity = org
239
+ end
240
+ RelatonBib::ContributionInfo.new entity: entity, role: [role]
241
+ end
176
242
  end
177
243
 
178
244
  # rubocop:disable Metrics/CyclomaticComplexity
179
- def contributors(doc, role)
245
+ # @param doc [Nokogiri::HTML::Element, Array<Hash>]
246
+ # @param role [String]
247
+ # @return [Array<RelatonBib::ContributionInfo>]
248
+ def contributors(doc, role, lang = "en", script = "Latn")
180
249
  return [] if doc.nil?
181
250
 
182
251
  doc.text.split(", ").map do |contr|
183
252
  /(?<an>.+?)(\s+\((?<abbrev>.+?)\))?$/ =~ contr
184
253
  if abbrev && an.downcase !~ /(task|force|group)/ && an.split.size.between?(2, 3)
185
254
  fullname = RelatonBib::FullName.new(
186
- completename: RelatonBib::LocalizedString.new(an, "en", "Latn"),
255
+ completename: RelatonBib::LocalizedString.new(an, lang, script),
187
256
  )
188
257
  case abbrev
189
258
  when "NIST"
@@ -199,7 +268,7 @@ module RelatonNist
199
268
  org = RelatonBib::Organization.new name: org_name, url: url, abbreviation: abbrev
200
269
  affiliation = RelatonBib::Affilation.new org
201
270
  entity = RelatonBib::Person.new(
202
- name: fullname, affiliation: [affiliation], contacts: [],
271
+ name: fullname, affiliation: [affiliation],
203
272
  )
204
273
  else
205
274
  entity = RelatonBib::Organization.new name: an, abbreviation: abbrev
@@ -207,17 +276,49 @@ module RelatonNist
207
276
  RelatonBib::ContributionInfo.new entity: entity, role: [role]
208
277
  end
209
278
  end
210
- # rubocop:enable Metrics/CyclomaticComplexity
279
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength
280
+
281
+ # @param name [Hash]
282
+ # @param lang [Strong]
283
+ # @param script [String]
284
+ # @return [RelatonBib::FullName]
285
+ def full_name(name, lang, script)
286
+ RelatonBib::FullName.new(
287
+ surname: RelatonBib::LocalizedString.new(name["surname"], lang, script),
288
+ forenames: name_parts(name["givenName"], lang, script),
289
+ additions: name_parts(name["suffix"], lang, script),
290
+ prefix: name_parts(name["title"], lang, script),
291
+ completename: RelatonBib::LocalizedString.new(name["fullName"], lang, script),
292
+ )
293
+ end
211
294
 
212
- def fetch_edition(code)
213
- return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ code
295
+ # @param part [String, NilClass]
296
+ # @param lang [Strong]
297
+ # @param script [String]
298
+ # @return [Array<RelatonBib::LocalizedString>]
299
+ def name_parts(part, lang, script)
300
+ return [] unless part
301
+
302
+ [RelatonBib::LocalizedString.new(name[part], lang, script)]
303
+ end
304
+
305
+ # @param doc [String, Hash]
306
+ # @return [String, NilClass]
307
+ def fetch_edition(doc)
308
+ if doc.is_a? Hash
309
+ return unless doc["edition"]
310
+
311
+ rev = doc["edition"]
312
+ else
313
+ return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
314
+ end
214
315
 
215
316
  "Revision #{rev}"
216
317
  end
217
318
 
218
319
  # Fetch abstracts.
219
320
  # @param doc [Nokigiri::HTML::Document]
220
- # @return [Array<Array>]
321
+ # @return [Array<Hash>]
221
322
  def fetch_abstract(doc)
222
323
  abstract_content = doc.xpath('//div[contains(@class, "pub-abstract-callout")]/div[1]/p').text
223
324
  [{
@@ -229,58 +330,82 @@ module RelatonNist
229
330
  end
230
331
 
231
332
  # Fetch copyright.
232
- # @param title [String]
333
+ # @param doc [Nokogiri::HTL::Document, String]
233
334
  # @return [Hash]
234
335
  def fetch_copyright(doc)
235
336
  name = "National Institute of Standards and Technology"
236
337
  url = "www.nist.gov"
237
- d = doc.at("//span[@id='pub-release-date']").text.strip
338
+ d = if doc.is_a? String then doc
339
+ else
340
+ doc.at("//span[@id='pub-release-date']").text.strip
341
+ end
238
342
  from = d.match(/\d{4}/).to_s
239
343
  { owner: { name: name, abbreviation: "NIST", url: url }, from: from }
240
344
  end
241
345
 
242
346
  # Fetch links.
243
- # @param doc [Nokogiri::HTML::Document]
347
+ # @param doc [Nokogiri::HTML::Document, Hash]
244
348
  # @return [Array<Hash>]
245
349
  def fetch_link(doc)
246
- pub = doc.at "//p/strong[.='Publication:']"
247
350
  links = []
248
- pdf = pub.at "./following-sibling::a[.=' Local Download']"
249
- links << { type: "pdf", content: pdf[:href] } if pdf
250
- doi = pub.at("./following-sibling::a[contains(.,'(DOI)')]")
251
- links << { type: "doi", content: doi[:href] } if doi
351
+ if doc.is_a? Hash
352
+ links << { type: "uri", content: doc["uri"] } if doc["uri"]
353
+ doi = "https://doi.org/" + doc["doi"] if doc["doi"]
354
+ else
355
+ pub = doc.at "//p/strong[.='Publication:']"
356
+ pdf = pub.at "./following-sibling::a[.=' Local Download']"
357
+ doi = pub.at("./following-sibling::a[contains(.,'(DOI)')]")&.attr :href
358
+ links << { type: "pdf", content: pdf[:href] } if pdf
359
+ end
360
+ links << { type: "doi", content: doi } if doi
252
361
  links
253
362
  end
254
363
 
255
364
  # Fetch relations.
256
365
  # @param doc [Nokogiri::HTML::Document]
257
- # @return [Array<Hash>]
366
+ # @return [Array<RelatonBib::DocumentRelation>]
258
367
  def fetch_relations(doc)
259
368
  relations = doc.xpath('//span[@id="pub-supersedes-container"]/a').map do |r|
260
- doc_relation "supersedes", r
369
+ doc_relation "supersedes", r.text, DOMAIN + r[:href]
261
370
  end
262
371
 
263
372
  relations += doc.xpath('//span[@id="pub-part-container"]/a').map do |r|
264
- doc_relation "partOf", r
373
+ doc_relation "partOf", r.text, DOMAIN + r[:href]
265
374
  end
266
375
 
267
376
  relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
268
- doc_relation "updates", r
377
+ doc_relation "updates", r.text, DOMAIN + r[:href]
269
378
  end
270
379
  end
271
380
 
272
- def doc_relation(type, ref)
381
+ def fetch_relations_json(doc)
382
+ relations = doc["supersedes"].map do |r|
383
+ doc_relation "supersedes", r["docidentifier"], r["uri"]
384
+ end
385
+
386
+ relations + doc["superseded-by"].map do |r|
387
+ doc_relation "updates", r["docidentifier"], r["uri"]
388
+ end
389
+ end
390
+
391
+ # @param type [String]
392
+ # @param ref [String]
393
+ # @param uri [String]
394
+ # @return [RelatonBib::DocumentRelation]
395
+ def doc_relation(type, ref, uri, lang = "en", script = "Latn")
273
396
  RelatonBib::DocumentRelation.new(
274
397
  type: type,
275
398
  bibitem: RelatonBib::BibliographicItem.new(
276
399
  formattedref: RelatonBib::FormattedRef.new(
277
- content: ref.text, language: "en", script: "Latn", format: "text/plain",
400
+ content: ref, language: lang, script: script, format: "text/plain",
278
401
  ),
279
- link: [RelatonBib::TypedUri.new(type: "src", content: DOMAIN + ref[:href])],
402
+ link: [RelatonBib::TypedUri.new(type: "src", content: uri)],
280
403
  ),
281
404
  )
282
405
  end
283
406
 
407
+ # @param doc [Nokogiri::HTML::Document]
408
+ # @return [Array<RelatonBib::Series>]
284
409
  def fetch_series(doc)
285
410
  series = doc.xpath "//span[@id='pub-history-container']/a"\
286
411
  "|//span[@id='pub-history-container']/span"
@@ -305,11 +430,19 @@ module RelatonNist
305
430
  end.select { |s| s }
306
431
  end
307
432
 
433
+ # @param doc [Nokogiri::HTML::Document, Hash]
434
+ # @return [Array<RelatonNist::Keyword>]
308
435
  def fetch_keywords(doc)
309
- kws = doc.xpath "//span[@id='pub-keywords-container']/span"
310
- kws.map { |kw| Keyword.new kw.text }
436
+ kws = if doc.is_a? Hash
437
+ doc["keywords"]
438
+ else
439
+ doc.xpath "//span[@id='pub-keywords-container']/span"
440
+ end
441
+ kws.map { |kw| Keyword.new kw.is_a?(String) ? kw : kw.text }
311
442
  end
312
443
 
444
+ # @param doc [Nokogiri::HTML::Document]
445
+ # @return [RelatonNist::CommentPeriod, NilClass]
313
446
  def fetch_commentperiod(doc)
314
447
  cp = doc.at "//span[@id='pub-comments-due']"
315
448
  return unless cp
@@ -324,6 +457,12 @@ module RelatonNist
324
457
  extended = ext.empty? ? nil : Date.strptime(ext, "%B %d, %Y")
325
458
  CommentPeriod.new from, to, extended
326
459
  end
460
+
461
+ # @param json [Hash]
462
+ # @return [RelatonNist::CommentPeriod, NilClass]
463
+ def fetch_commentperiod_json(json)
464
+ CommentPeriod.new json["comment-from"], json["comment-to"] if json["comment-from"]
465
+ end
327
466
  end
328
467
  end
329
468
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonNist
2
- VERSION = "0.2.1".freeze
2
+ VERSION = "0.2.2".freeze
3
3
  end
@@ -36,4 +36,5 @@ Gem::Specification.new do |spec|
36
36
  spec.add_development_dependency "webmock"
37
37
 
38
38
  spec.add_dependency "relaton-bib", "~> 0.2.0"
39
+ spec.add_dependency "rubyzip"
39
40
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-nist
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-19 00:00:00.000000000 Z
11
+ date: 2019-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -178,6 +178,20 @@ dependencies:
178
178
  - - "~>"
179
179
  - !ruby/object:Gem::Version
180
180
  version: 0.2.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: rubyzip
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :runtime
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
181
195
  description: 'RelatonNist: retrive NIST standards.'
182
196
  email:
183
197
  - open.source@ribose.com
@@ -200,6 +214,7 @@ files:
200
214
  - lib/relaton/processor.rb
201
215
  - lib/relaton_nist.rb
202
216
  - lib/relaton_nist/comment_period.rb
217
+ - lib/relaton_nist/data/pubs-export.zip
203
218
  - lib/relaton_nist/document_status.rb
204
219
  - lib/relaton_nist/hit.rb
205
220
  - lib/relaton_nist/hit_collection.rb
@@ -230,7 +245,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
230
245
  version: '0'
231
246
  requirements: []
232
247
  rubyforge_project:
233
- rubygems_version: 2.7.7
248
+ rubygems_version: 2.6.12
234
249
  signing_key:
235
250
  specification_version: 4
236
251
  summary: 'RelatonNist: retrive NIST standards.'