relaton-nist 1.7.4 → 1.9.2

Sign up to get free protection for your applications and to get access to all the features.
data/grammars/nist.rng CHANGED
@@ -65,6 +65,9 @@
65
65
  <optional>
66
66
  <ref name="doctype"/>
67
67
  </optional>
68
+ <optional>
69
+ <ref name="docsubtype"/>
70
+ </optional>
68
71
  <optional>
69
72
  <ref name="editorialgroup"/>
70
73
  </optional>
@@ -194,6 +197,9 @@
194
197
  <zeroOrMore>
195
198
  <ref name="termdocsource"/>
196
199
  </zeroOrMore>
200
+ <optional>
201
+ <ref name="misccontainer"/>
202
+ </optional>
197
203
  <optional>
198
204
  <ref name="boilerplate"/>
199
205
  </optional>
@@ -207,6 +213,9 @@
207
213
  <optional>
208
214
  <ref name="bibliography"/>
209
215
  </optional>
216
+ <zeroOrMore>
217
+ <ref name="indexsect"/>
218
+ </zeroOrMore>
210
219
  </element>
211
220
  </define>
212
221
  </grammar>
data/grammars/reqt.rng CHANGED
@@ -30,15 +30,34 @@
30
30
  <data type="boolean"/>
31
31
  </attribute>
32
32
  </optional>
33
+ <optional>
34
+ <attribute name="number"/>
35
+ </optional>
33
36
  <optional>
34
37
  <attribute name="subsequence"/>
35
38
  </optional>
39
+ <optional>
40
+ <attribute name="keep-with-next">
41
+ <data type="boolean"/>
42
+ </attribute>
43
+ </optional>
44
+ <optional>
45
+ <attribute name="keep-lines-together">
46
+ <data type="boolean"/>
47
+ </attribute>
48
+ </optional>
36
49
  <attribute name="id">
37
50
  <data type="ID"/>
38
51
  </attribute>
39
52
  <optional>
40
53
  <attribute name="filename"/>
41
54
  </optional>
55
+ <optional>
56
+ <attribute name="model"/>
57
+ </optional>
58
+ <optional>
59
+ <attribute name="type"/>
60
+ </optional>
42
61
  <optional>
43
62
  <ref name="reqtitle"/>
44
63
  </optional>
@@ -48,9 +67,9 @@
48
67
  <optional>
49
68
  <ref name="subject"/>
50
69
  </optional>
51
- <optional>
70
+ <zeroOrMore>
52
71
  <ref name="reqinherit"/>
53
- </optional>
72
+ </zeroOrMore>
54
73
  <zeroOrMore>
55
74
  <ref name="classification"/>
56
75
  </zeroOrMore>
@@ -135,6 +154,16 @@
135
154
  <data type="boolean"/>
136
155
  </attribute>
137
156
  </optional>
157
+ <optional>
158
+ <attribute name="keep-with-next">
159
+ <data type="boolean"/>
160
+ </attribute>
161
+ </optional>
162
+ <optional>
163
+ <attribute name="keep-lines-together">
164
+ <data type="boolean"/>
165
+ </attribute>
166
+ </optional>
138
167
  <oneOrMore>
139
168
  <ref name="BasicBlock"/>
140
169
  </oneOrMore>
@@ -0,0 +1,221 @@
1
+ # frozen_string_literal: true
2
+
3
+ # require 'English'
4
+ # require 'mechanize'
5
+ # require "fileutils"
6
+ require "yaml"
7
+ # require "open-uri"
8
+ # require "nokogiri"
9
+ # require "relaton_nist"
10
+
11
+ module RelatonNist
12
+ class DataFetcher
13
+ RELATION_TYPES = {
14
+ "replaces" => "obsoletes",
15
+ "isVersionOf" => "editionOf",
16
+ "hasTranslation" => "hasTranslation",
17
+ "isTranslationOf" => "translatedFrom",
18
+ "hasPreprint" => "hasReprint",
19
+ "isSupplementTo" => "complements",
20
+ }.freeze
21
+ URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
22
+
23
+ def initialize(output, format)
24
+ @output = output
25
+ @format = format
26
+ end
27
+
28
+ def parse_docid(doc)
29
+ doi = doc.at("doi_data/doi").text
30
+ id = doc.at("publisher_item/item_number", "publisher_item/identifier").text.sub(%r{^/}, "")
31
+ case doi
32
+ when "10.6028/NBS.CIRC.12e2revjune" then id.sub!("13e", "12e")
33
+ when "10.6028/NBS.CIRC.36e2" then id.sub!("46e", "36e")
34
+ when "10.6028/NBS.HB.67suppJune1967" then id.sub!("1965", "1967")
35
+ when "10.6028/NBS.HB.105-1r1990" then id.sub!("105-1-1990", "105-1r1990")
36
+ when "10.6028/NIST.HB.150-10-1995" then id.sub!(/150-10$/, "150-10-1995")
37
+ end
38
+ [{ type: "NIST", id: id }, { type: "DOI", id: doi }]
39
+ end
40
+
41
+ # @param doc [Nokogiri::XML::Element]
42
+ # @return [Array<RelatonBib::DocumentIdentifier>]
43
+ def fetch_docid(doc)
44
+ parse_docid(doc).map do |id|
45
+ RelatonBib::DocumentIdentifier.new(type: id[:type], id: id[:id])
46
+ end
47
+ end
48
+
49
+ # @param doc [Nokogiri::XML::Element]
50
+ # @return [RelatonBib::TypedTitleStringCollection, Array]
51
+ def fetch_title(doc)
52
+ t = doc.xpath("titles/title|titles/subtitle")
53
+ return [] unless t.any?
54
+
55
+ RelatonBib::TypedTitleString.from_string t.map(&:text).join(" "), "en", "Latn"
56
+ end
57
+
58
+ # @param doc [Nokogiri::XML::Element]
59
+ # @return [Array<RelatonBib::BibliographicDate>]
60
+ def fetch_date(doc)
61
+ doc.xpath("publication_date|approval_date").map do |dt|
62
+ on = dt.at("year").text
63
+ if (m = dt.at "month")
64
+ on += "-#{m.text}"
65
+ d = dt.at "day"
66
+ on += "-#{d.text}" if d
67
+ end
68
+ type = dt.name == "publication_date" ? "published" : "confirmed"
69
+ RelatonBib::BibliographicDate.new(type: type, on: on)
70
+ end
71
+ end
72
+
73
+ # @param doc [Nokogiri::XML::Element]
74
+ # @return [String]
75
+ def fetch_edition(doc)
76
+ doc.at("edition_number")&.text
77
+ end
78
+
79
+ # @param doc [Nokogiri::XML::Element]
80
+ # @return [Array<Hash>]
81
+ def fetch_relation(doc)
82
+ ns = "http://www.crossref.org/relations.xsd"
83
+ doc.xpath("./ns:program/ns:related_item", ns: ns).map do |rel|
84
+ doi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
85
+ # ref = doi_to_id doi.text
86
+ # ref, = parse_docid doc
87
+ fref = RelatonBib::FormattedRef.new content: doi.text
88
+ bibitem = RelatonBib::BibliographicItem.new formattedref: fref
89
+ type = RELATION_TYPES[doi["relationship-type"]]
90
+ { type: type, bibitem: bibitem }
91
+ end
92
+ end
93
+
94
+ # @param doc [Nokogiri::XML::Element]
95
+ # @return [Array<RelatonBib::TypedUri>]
96
+ def fetch_link(doc)
97
+ url = doc.at("doi_data/resource").text
98
+ [RelatonBib::TypedUri.new(type: "doi", content: url)]
99
+ end
100
+
101
+ # @param doc [Nokogiri::XML::Element]
102
+ # @return [Array<RelatonBib::FormattedString>]
103
+ def fetch_abstract(doc)
104
+ doc.xpath("jats:abstract/jats:p", "jats" => "http://www.ncbi.nlm.nih.gov/JATS1").map do |a|
105
+ RelatonBib::FormattedString.new(content: a.text, language: doc["language"], script: "Latn")
106
+ end
107
+ end
108
+
109
+ # @param doc [Nokogiri::XML::Element]
110
+ # @return [Array<Hash>]
111
+ def fetch_contributor(doc) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
112
+ contribs = doc.xpath("contributors/person_name").map do |p|
113
+ forename = []
114
+ initial = []
115
+ p.at("given_name")&.text&.split&.each do |fn|
116
+ if /^(?<init>\w)\.?$/ =~ fn
117
+ initial << RelatonBib::LocalizedString.new(init, doc["language"], "Latn")
118
+ else
119
+ forename << RelatonBib::LocalizedString.new(fn, doc["language"], "Latn")
120
+ end
121
+ end
122
+ sname = p.at("surname").text
123
+ surname = RelatonBib::LocalizedString.new sname, doc["language"], "Latn"
124
+ initial = []
125
+ ident = p.xpath("ORCID").map do |id|
126
+ RelatonBib::PersonIdentifier.new "orcid", id.text
127
+ end
128
+ fullname = RelatonBib::FullName.new(
129
+ surname: surname, forename: forename, initial: initial, identifier: ident,
130
+ )
131
+ person = RelatonBib::Person.new name: fullname
132
+ { entity: person, role: [{ type: p["contributor_role"] }] }
133
+ end
134
+ contribs + doc.xpath("publisher").map do |p|
135
+ abbr = p.at("../institution/institution_acronym")&.text
136
+ org = RelatonBib::Organization.new(name: p.at("publisher_name").text, abbreviation: abbr)
137
+ { entity: org, role: [{ type: "publisher" }] }
138
+ end
139
+ end
140
+
141
+ # @param doc [Nokogiri::XML::Element]
142
+ # @return [Array<String>]
143
+ def fetch_place(doc)
144
+ doc.xpath("institution/institution_place").map(&:text)
145
+ end
146
+
147
+ #
148
+ # Save document
149
+ #
150
+ # @param bib [RelatonNist::NistBibliographicItem]
151
+ #
152
+ def write_file(bib) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
153
+ id = bib.docidentifier[0].id.gsub(%r{[/\s:.]}, "_").upcase.sub(/^NIST_IR/, "NISTIR")
154
+ file = File.join(@output, "#{id}.#{@format}")
155
+ if File.exist? file
156
+ warn "File #{file} exists. Docid: #{bib.docidentifier[0].id}"
157
+ # warn "Link: #{bib.link.detect { |l| l.type == 'src' }.content}"
158
+ else
159
+ output = case @format
160
+ when "yaml" then bib.to_hash.to_yaml
161
+ when "xml" then bib.to_xml bibdata: true
162
+ end
163
+ File.write file, output, encoding: "UTF-8"
164
+ end
165
+ end
166
+
167
+ #
168
+ # Create a document instance an save it.
169
+ #
170
+ # @param doc [Nokogiri::XML::Element]
171
+ #
172
+ # @raise [StandardError]
173
+ #
174
+ def parse_doc(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
175
+ # mtd = doc.at('doi_record/report-paper/report-paper_metadata')
176
+ item = RelatonNist::NistBibliographicItem.new(
177
+ type: "standard", docid: fetch_docid(doc), title: fetch_title(doc),
178
+ link: fetch_link(doc), abstract: fetch_abstract(doc),
179
+ date: fetch_date(doc), edition: fetch_edition(doc),
180
+ contributor: fetch_contributor(doc), relation: fetch_relation(doc),
181
+ place: fetch_place(doc),
182
+ language: [doc["language"]], script: ["Latn"], doctype: "standard"
183
+ )
184
+ write_file item
185
+ rescue StandardError => e
186
+ warn "Document: #{doc.at('doi').text}"
187
+ warn e.message
188
+ raise e
189
+ end
190
+
191
+ #
192
+ # Fetch all the documnts from dataset
193
+ #
194
+ def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
195
+ t1 = Time.now
196
+ puts "Started at: #{t1}"
197
+
198
+ docs = Nokogiri::XML OpenURI.open_uri URL
199
+ FileUtils.mkdir @output unless Dir.exist? @output
200
+ FileUtils.rm Dir[File.join(@output, "*.#{@format}")]
201
+ docs.xpath("/body/query/doi_record/report-paper/report-paper_metadata")
202
+ .each { |doc| parse_doc doc }
203
+
204
+ t2 = Time.now
205
+ puts "Stopped at: #{t2}"
206
+ puts "Done in: #{(t2 - t1).round} sec."
207
+ rescue StandardError => e
208
+ warn e.message
209
+ end
210
+
211
+ #
212
+ # Fetch all the documnts from dataset
213
+ #
214
+ # @param [String] output foldet name to save the documents
215
+ # @param [String] format format to save the documents
216
+ #
217
+ def self.fetch(output: "data", format: "yaml")
218
+ new(output, format).fetch
219
+ end
220
+ end
221
+ end
@@ -3,6 +3,8 @@
3
3
  module RelatonNist
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
+ attr_writer :fetch
7
+
6
8
  # Parse page.
7
9
  # @return [RelatonNist::NistBliographicItem]
8
10
  def fetch
@@ -10,7 +12,7 @@ module RelatonNist
10
12
  end
11
13
 
12
14
  # @return [Iteger]
13
- def sort_value
15
+ def sort_value # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
14
16
  @sort_value ||= begin
15
17
  sort_phrase = [hit[:serie], hit[:code], hit[:title]].join " "
16
18
  corr = hit_collection&.text&.split&.map do |w|
@@ -13,75 +13,90 @@ module RelatonNist
13
13
  PUBS_EXPORT = URI.join(DOMAIN, "/CSRC/media/feeds/metanorma/pubs-export")
14
14
  DATAFILEDIR = File.expand_path ".relaton/nist", Dir.home
15
15
  DATAFILE = File.expand_path "pubs-export.zip", DATAFILEDIR
16
+ GHNISTDATA = "https://raw.githubusercontent.com/relaton/relaton-data-nist/main/data/"
16
17
 
17
- # @param ref_nbr [String]
18
- # @param year [String]
19
- # @param opts [Hash]
20
- # @option opts [String] :stage
21
- def initialize(ref_nbr, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize
22
- super ref_nbr, year
18
+ def self.search(text, year = nil, opts = {})
19
+ new(text, year).search(opts)
20
+ end
21
+
22
+ def search(opts)
23
+ @array = from_json(**opts)
24
+ @array = from_ga unless @array.any?
25
+ sort_hits!
26
+ end
23
27
 
24
- /(?<docid>(SP|FIPS)\s[0-9-]+)/ =~ text
25
- @array = docid ? from_json(docid, **opts) : from_csrc(**opts)
26
- @array = from_csrc(**opts) unless @array.any?
28
+ private
27
29
 
30
+ def sort_hits!
28
31
  @array.sort! do |a, b|
29
- if a.sort_value != b.sort_value
30
- b.sort_value - a.sort_value
31
- else
32
+ if a.sort_value == b.sort_value
32
33
  (b.hit[:release_date] - a.hit[:release_date]).to_i
34
+ else
35
+ b.sort_value - a.sort_value
33
36
  end
34
37
  end
38
+ self
35
39
  end
36
40
 
37
- private
41
+ def from_ga # rubocop:disable Metrics/AbcSize
42
+ fn = text.gsub(%r{[/\s:.]}, "_").upcase
43
+ yaml = OpenURI.open_uri "#{GHNISTDATA}#{fn}.yaml"
44
+ hash = YAML.safe_load yaml
45
+ bib = RelatonNist::NistBibliographicItem.from_hash hash
46
+ hit = Hit.new({ code: text }, self)
47
+ hit.fetch = bib
48
+ [hit]
49
+ rescue OpenURI::HTTPError => e
50
+ return [] if e.io.status[0] == "404"
51
+
52
+ raise e
53
+ end
38
54
 
39
55
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
40
56
 
41
57
  # @param stage [String]
42
58
  # @return [Array<RelatonNist::Hit>]
43
- def from_csrc(**opts)
44
- from, to = nil
45
- if year
46
- d = Date.strptime year, "%Y"
47
- from = d.strftime "%m/%d/%Y"
48
- to = d.next_year.prev_day.strftime "%m/%d/%Y"
49
- end
50
- url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"\
51
- "&sortBy-lg=relevence"
52
- url += "&dateFrom-lg=#{from}" if from
53
- url += "&dateTo-lg=#{to}" if to
54
- url += if /PD/.match? opts[:stage]
55
- "&status-lg=Draft,Retired Draft,Withdrawn"
56
- else
57
- "&status-lg=Final,Withdrawn"
58
- end
59
-
60
- doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
61
- doc.css("table.publications-table > tbody > tr").map do |h|
62
- link = h.at("td/div/strong/a")
63
- serie = h.at("td[1]").text.strip
64
- code = h.at("td[2]").text.strip
65
- title = link.text
66
- doc_url = DOMAIN + link[:href]
67
- status = h.at("td[4]").text.strip.downcase
68
- release_date = Date.strptime h.at("td[5]").text.strip, "%m/%d/%Y"
69
- Hit.new(
70
- {
71
- code: code, serie: serie, title: title, url: doc_url,
72
- status: status, release_date: release_date
73
- }, self
74
- )
75
- end
76
- end
59
+ # def from_csrc(**opts)
60
+ # from, to = nil
61
+ # if year
62
+ # d = Date.strptime year, "%Y"
63
+ # from = d.strftime "%m/%d/%Y"
64
+ # to = d.next_year.prev_day.strftime "%m/%d/%Y"
65
+ # end
66
+ # url = "#{DOMAIN}/publications/search?keywords-lg=#{text}"\
67
+ # "&sortBy-lg=relevence"
68
+ # url += "&dateFrom-lg=#{from}" if from
69
+ # url += "&dateTo-lg=#{to}" if to
70
+ # url += if /PD/.match? opts[:stage]
71
+ # "&status-lg=Draft,Retired Draft,Withdrawn"
72
+ # else
73
+ # "&status-lg=Final,Withdrawn"
74
+ # end
75
+
76
+ # doc = Nokogiri::HTML OpenURI.open_uri(::Addressable::URI.parse(url).normalize)
77
+ # doc.css("table.publications-table > tbody > tr").map do |h|
78
+ # link = h.at("td/div/strong/a")
79
+ # serie = h.at("td[1]").text.strip
80
+ # code = h.at("td[2]").text.strip
81
+ # title = link.text
82
+ # doc_url = DOMAIN + link[:href]
83
+ # status = h.at("td[4]").text.strip.downcase
84
+ # release_date = Date.strptime h.at("td[5]").text.strip, "%m/%d/%Y"
85
+ # Hit.new(
86
+ # {
87
+ # code: code, serie: serie, title: title, url: doc_url,
88
+ # status: status, release_date: release_date
89
+ # }, self
90
+ # )
91
+ # end
92
+ # end
77
93
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
78
94
 
79
95
  # Fetches data form json
80
- # @param docid [String]
81
96
  # @param stage [String]
82
97
  # @return [Array<RelatonNist::Hit>]
83
- def from_json(docid, **opts)
84
- select_data(docid, **opts).map do |h|
98
+ def from_json(**opts)
99
+ select_data(**opts).map do |h|
85
100
  /(?<serie>(?<=-)\w+$)/ =~ h["series"]
86
101
  title = [h["title-main"], h["title-sub"]].compact.join " - "
87
102
  release_date = RelatonBib.parse_date h["published-date"], false
@@ -91,22 +106,20 @@ module RelatonNist
91
106
  end
92
107
  end
93
108
 
94
- # @param docid [String]
95
109
  # @param stage [String]
96
110
  # @return [Array<Hach>]
97
- def select_data(docid, **opts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
98
- # ref = docid.sub(/(?<=\d{3}-\d{2})r(\d+)/, ' Rev. \1')
111
+ def select_data(**opts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength,Metrics/PerceivedComplexity
99
112
  d = Date.strptime year, "%Y" if year
100
- # didrx = Regexp.new(docid)
113
+ statuses = %w[draft-public draft-prelim]
101
114
  data.select do |doc|
102
115
  next unless match_year?(doc, d)
103
116
 
104
117
  if /PD/.match? opts[:stage]
105
- next unless %w[draft-public draft-prelim].include? doc["status"]
118
+ next unless statuses.include? doc["status"]
106
119
  else
107
120
  next unless doc["status"] == "final"
108
121
  end
109
- doc["docidentifier"].include? docid
122
+ doc["docidentifier"].include? text
110
123
  end
111
124
  end
112
125
 
@@ -134,8 +147,8 @@ module RelatonNist
134
147
  #
135
148
  # @prarm ctime [Time, NilClass]
136
149
  def fetch_data(ctime)
137
- resp = OpenURI.open_uri("#{PUBS_EXPORT}.meta")
138
- if !ctime || ctime < resp.last_modified
150
+ # resp = OpenURI.open_uri("#{PUBS_EXPORT}.meta")
151
+ if !ctime || ctime < OpenURI.open_uri("#{PUBS_EXPORT}.meta").last_modified
139
152
  @data = nil
140
153
  uri_open = URI.method(:open) || Kernel.method(:open)
141
154
  FileUtils.mkdir_p DATAFILEDIR unless Dir.exist? DATAFILEDIR