relaton-nist 1.8.0 → 1.9.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 708aed7e58956fbc6f13f3762f643c8dcd894d71d27019e640adf63b8b8b5215
4
- data.tar.gz: 55e8c9f4bfd97442aebb7a4ee782f5620fba41b4f89e33f44cf79f60df6c7f95
3
+ metadata.gz: ea210109d9e306b8c9c6b4395812b52454dad6b8fdbd87cc08ec750e9dd2d4ac
4
+ data.tar.gz: e2dad0af3e66b42b2b5d5f2ecf48653ea21b2662d9bed9f6003a5b7a6d953f1d
5
5
  SHA512:
6
- metadata.gz: 1a13a4b39a41bce75a8ce7d79f741b38954539701d5f2788b0beee850b4ff54e50055c9ab23a79e0df3b40e6e2dbded6cd4155ea227589d34147b1a0524ce99b
7
- data.tar.gz: 477d2f7d2cd8a3870a469e8e2c5ac12a8c68c3ff96fe0c6fea8bdf903d50c1287d63e0a10b7ca81987db3718330b687f3ebedfed149e6a8ee4e973bd0e03a3cb
6
+ metadata.gz: '08a75dfaf1616874bab1c6edd2eb9a1f80c64a2d8d052dd66a3f4ca4f5b8aac1ee520c397f62327d718865fbd5f476db0e9a4ebef445ae3ecbe6f0366ee75e66'
7
+ data.tar.gz: cae6a5c20af9c77ead564ee5337d3092356a1786337406c260e75a387cc0446bd9937a23b2a24d1eafcc5b0bb1e98a621288e091634b7d950d112d96f13019cf
@@ -16,19 +16,9 @@ jobs:
16
16
  strategy:
17
17
  fail-fast: false
18
18
  matrix:
19
- ruby: [ '2.7', '2.6', '2.5', '2.4' ]
19
+ ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
20
  os: [ ubuntu-latest, windows-latest, macos-latest ]
21
21
  experimental: [ false ]
22
- include:
23
- - ruby: '3.0'
24
- os: 'ubuntu-latest'
25
- experimental: true
26
- - ruby: '3.0'
27
- os: 'windows-latest'
28
- experimental: true
29
- - ruby: '3.0'
30
- os: 'macos-latest'
31
- experimental: true
32
22
  steps:
33
23
  - uses: actions/checkout@v2
34
24
  with:
data/.gitignore CHANGED
@@ -7,6 +7,7 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
  .vscode/
10
+ /data/
10
11
  .rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-master-ci-rubocop-yml
11
12
 
12
13
  # rspec failure tracking
data/.rubocop.yml CHANGED
@@ -5,6 +5,6 @@
5
5
  inherit_from:
6
6
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
7
  AllCops:
8
- TargetRubyVersion: 2.4
8
+ TargetRubyVersion: 2.5
9
9
  Rails:
10
10
  Enabled: false
data/README.adoc CHANGED
@@ -147,6 +147,17 @@ item.docidentifier.first.id
147
147
  => "SP 800-38A-Add"
148
148
  ----
149
149
 
150
+ === Typed links
151
+
152
+ NIST documents may have `src` and `doi` link types.
153
+
154
+ [source,ruby]
155
+ ----
156
+ item.link
157
+ => [#<RelatonBib::TypedUri:0x00007f901971dc10 @content=#<Addressable::URI:0x62c URI:https://csrc.nist.gov/publications/detail/sp/800-67/rev-2/final>, @type="src">,
158
+ #<RelatonBib::TypedUri:0x00007f901971d6e8 @content=#<Addressable::URI:0x640 URI:https://doi.org/10.6028/NIST.SP.800-67r2>, @type="doi">]
159
+ ----
160
+
150
161
  === Create bibliographic item from YAML
151
162
  [source,ruby]
152
163
  ----
@@ -159,6 +170,25 @@ RelatonNist::NistBibliographicItem.from_hash hash
159
170
  ...
160
171
  ----
161
172
 
173
+ === Fetch data
174
+
175
+ This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of data sources.
176
+
177
+ The method `RelatonNist::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
178
+ Arguments:
179
+
180
+ - `output` - folder to save documents (default './data').
181
+ - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml`, `bibxxml` (default `yaml`).
182
+
183
+ [source,ruby]
184
+ ----
185
+ RelatonNist::DataFetcher.fetch
186
+ Started at: 2021-09-01 18:01:01 +0200
187
+ Stopped at: 2021-09-01 18:01:43 +0200
188
+ Done in: 42 sec.
189
+ => nil
190
+ ----
191
+
162
192
  == Development
163
193
 
164
194
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/bin/rspec ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rspec' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rspec-core", "rspec")
data/grammars/biblio.rng CHANGED
@@ -787,6 +787,7 @@
787
787
  <value>adapted</value>
788
788
  <value>vote-started</value>
789
789
  <value>vote-ended</value>
790
+ <value>announced</value>
790
791
  </choice>
791
792
  </define>
792
793
  <define name="bdate">
data/grammars/isodoc.rng CHANGED
@@ -45,6 +45,11 @@
45
45
  <optional>
46
46
  <attribute name="alt"/>
47
47
  </optional>
48
+ <optional>
49
+ <attribute name="updatetype">
50
+ <data type="boolean"/>
51
+ </attribute>
52
+ </optional>
48
53
  <text/>
49
54
  </element>
50
55
  </define>
@@ -199,6 +204,18 @@
199
204
  </zeroOrMore>
200
205
  </element>
201
206
  </define>
207
+ <define name="dt">
208
+ <element name="dt">
209
+ <optional>
210
+ <attribute name="id">
211
+ <data type="ID"/>
212
+ </attribute>
213
+ </optional>
214
+ <zeroOrMore>
215
+ <ref name="TextElement"/>
216
+ </zeroOrMore>
217
+ </element>
218
+ </define>
202
219
  <define name="example">
203
220
  <element name="example">
204
221
  <attribute name="id">
@@ -543,6 +560,9 @@
543
560
  </define>
544
561
  <define name="BibDataExtensionType">
545
562
  <ref name="doctype"/>
563
+ <optional>
564
+ <ref name="docsubtype"/>
565
+ </optional>
546
566
  <optional>
547
567
  <ref name="editorialgroup"/>
548
568
  </optional>
@@ -890,6 +910,14 @@
890
910
  </define>
891
911
  </include>
892
912
  <!-- end overrides -->
913
+ <define name="docsubtype">
914
+ <element name="subdoctype">
915
+ <ref name="DocumentSubtype"/>
916
+ </element>
917
+ </define>
918
+ <define name="DocumentSubtype">
919
+ <text/>
920
+ </define>
893
921
  <define name="colgroup">
894
922
  <element name="colgroup">
895
923
  <oneOrMore>
@@ -939,7 +967,34 @@
939
967
  <define name="concept">
940
968
  <element name="concept">
941
969
  <optional>
942
- <attribute name="term"/>
970
+ <attribute name="ital">
971
+ <data type="boolean"/>
972
+ </attribute>
973
+ </optional>
974
+ <optional>
975
+ <attribute name="ref">
976
+ <data type="boolean"/>
977
+ </attribute>
978
+ </optional>
979
+ <optional>
980
+ <element name="refterm">
981
+ <zeroOrMore>
982
+ <choice>
983
+ <ref name="PureTextElement"/>
984
+ <ref name="stem"/>
985
+ </choice>
986
+ </zeroOrMore>
987
+ </element>
988
+ </optional>
989
+ <optional>
990
+ <element name="renderterm">
991
+ <zeroOrMore>
992
+ <choice>
993
+ <ref name="PureTextElement"/>
994
+ <ref name="stem"/>
995
+ </choice>
996
+ </zeroOrMore>
997
+ </element>
943
998
  </optional>
944
999
  <choice>
945
1000
  <ref name="eref"/>
@@ -965,6 +1020,9 @@
965
1020
  </attribute>
966
1021
  <attribute name="name"/>
967
1022
  <attribute name="action"/>
1023
+ <optional>
1024
+ <attribute name="class"/>
1025
+ </optional>
968
1026
  <zeroOrMore>
969
1027
  <choice>
970
1028
  <ref name="TextElement"/>
@@ -1191,13 +1249,17 @@
1191
1249
  </define>
1192
1250
  <define name="IsoWorkgroup">
1193
1251
  <optional>
1194
- <attribute name="number">
1195
- <data type="int"/>
1196
- </attribute>
1252
+ <attribute name="number"/>
1197
1253
  </optional>
1198
1254
  <optional>
1199
1255
  <attribute name="type"/>
1200
1256
  </optional>
1257
+ <optional>
1258
+ <attribute name="identifier"/>
1259
+ </optional>
1260
+ <optional>
1261
+ <attribute name="prefix"/>
1262
+ </optional>
1201
1263
  <text/>
1202
1264
  </define>
1203
1265
  <define name="ics">
@@ -1459,26 +1521,26 @@
1459
1521
  <optional>
1460
1522
  <ref name="section-title"/>
1461
1523
  </optional>
1462
- <group>
1524
+ <choice>
1463
1525
  <choice>
1464
1526
  <group>
1465
- <zeroOrMore>
1527
+ <oneOrMore>
1466
1528
  <ref name="BasicBlock"/>
1467
- </zeroOrMore>
1529
+ </oneOrMore>
1468
1530
  <zeroOrMore>
1469
1531
  <ref name="note"/>
1470
1532
  </zeroOrMore>
1471
1533
  </group>
1472
1534
  <ref name="amend"/>
1473
1535
  </choice>
1474
- <zeroOrMore>
1536
+ <oneOrMore>
1475
1537
  <choice>
1476
1538
  <ref name="clause-subsection"/>
1477
1539
  <ref name="terms"/>
1478
1540
  <ref name="definitions"/>
1479
1541
  </choice>
1480
- </zeroOrMore>
1481
- </group>
1542
+ </oneOrMore>
1543
+ </choice>
1482
1544
  </define>
1483
1545
  <define name="Annex-Section">
1484
1546
  <optional>
data/grammars/nist.rng CHANGED
@@ -65,6 +65,9 @@
65
65
  <optional>
66
66
  <ref name="doctype"/>
67
67
  </optional>
68
+ <optional>
69
+ <ref name="docsubtype"/>
70
+ </optional>
68
71
  <optional>
69
72
  <ref name="editorialgroup"/>
70
73
  </optional>
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+
5
+ module RelatonNist
6
+ class DataFetcher
7
+ RELATION_TYPES = {
8
+ "replaces" => "obsoletes",
9
+ "isVersionOf" => "editionOf",
10
+ "hasTranslation" => "hasTranslation",
11
+ "isTranslationOf" => "translatedFrom",
12
+ "hasPreprint" => "hasReprint",
13
+ "isSupplementTo" => "complements",
14
+ }.freeze
15
+ URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
16
+
17
+ def initialize(output, format)
18
+ @output = output
19
+ @format = format
20
+ @ext = format.sub(/^bib/, "")
21
+ end
22
+
23
+ def parse_docid(doc)
24
+ doi = doc.at("doi_data/doi").text
25
+ id = doc.at("publisher_item/item_number", "publisher_item/identifier").text.sub(%r{^/}, "")
26
+ case doi
27
+ when "10.6028/NBS.CIRC.12e2revjune" then id.sub!("13e", "12e")
28
+ when "10.6028/NBS.CIRC.36e2" then id.sub!("46e", "36e")
29
+ when "10.6028/NBS.HB.67suppJune1967" then id.sub!("1965", "1967")
30
+ when "10.6028/NBS.HB.105-1r1990" then id.sub!("105-1-1990", "105-1r1990")
31
+ when "10.6028/NIST.HB.150-10-1995" then id.sub!(/150-10$/, "150-10-1995")
32
+ end
33
+ [{ type: "NIST", id: id }, { type: "DOI", id: doi }]
34
+ end
35
+
36
+ # @param doc [Nokogiri::XML::Element]
37
+ # @return [Array<RelatonBib::DocumentIdentifier>]
38
+ def fetch_docid(doc)
39
+ parse_docid(doc).map do |id|
40
+ RelatonBib::DocumentIdentifier.new(type: id[:type], id: id[:id])
41
+ end
42
+ end
43
+
44
+ # @param doc [Nokogiri::XML::Element]
45
+ # @return [RelatonBib::TypedTitleStringCollection, Array]
46
+ def fetch_title(doc)
47
+ t = doc.xpath("titles/title|titles/subtitle")
48
+ return [] unless t.any?
49
+
50
+ RelatonBib::TypedTitleString.from_string t.map(&:text).join(" "), "en", "Latn"
51
+ end
52
+
53
+ # @param doc [Nokogiri::XML::Element]
54
+ # @return [Array<RelatonBib::BibliographicDate>]
55
+ def fetch_date(doc)
56
+ doc.xpath("publication_date|approval_date").map do |dt|
57
+ on = dt.at("year").text
58
+ if (m = dt.at "month")
59
+ on += "-#{m.text}"
60
+ d = dt.at "day"
61
+ on += "-#{d.text}" if d
62
+ end
63
+ type = dt.name == "publication_date" ? "published" : "confirmed"
64
+ RelatonBib::BibliographicDate.new(type: type, on: on)
65
+ end
66
+ end
67
+
68
+ # @param doc [Nokogiri::XML::Element]
69
+ # @return [String]
70
+ def fetch_edition(doc)
71
+ doc.at("edition_number")&.text
72
+ end
73
+
74
+ # @param doc [Nokogiri::XML::Element]
75
+ # @return [Array<Hash>]
76
+ def fetch_relation(doc)
77
+ ns = "http://www.crossref.org/relations.xsd"
78
+ doc.xpath("./ns:program/ns:related_item", ns: ns).map do |rel|
79
+ doi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
80
+ # ref = doi_to_id doi.text
81
+ # ref, = parse_docid doc
82
+ fref = RelatonBib::FormattedRef.new content: doi.text
83
+ bibitem = RelatonBib::BibliographicItem.new formattedref: fref
84
+ type = RELATION_TYPES[doi["relationship-type"]]
85
+ { type: type, bibitem: bibitem }
86
+ end
87
+ end
88
+
89
+ # @param doc [Nokogiri::XML::Element]
90
+ # @return [Array<RelatonBib::TypedUri>]
91
+ def fetch_link(doc)
92
+ url = doc.at("doi_data/resource").text
93
+ [RelatonBib::TypedUri.new(type: "doi", content: url)]
94
+ end
95
+
96
+ # @param doc [Nokogiri::XML::Element]
97
+ # @return [Array<RelatonBib::FormattedString>]
98
+ def fetch_abstract(doc)
99
+ doc.xpath("jats:abstract/jats:p", "jats" => "http://www.ncbi.nlm.nih.gov/JATS1").map do |a|
100
+ RelatonBib::FormattedString.new(content: a.text, language: doc["language"], script: "Latn")
101
+ end
102
+ end
103
+
104
+ # @param doc [Nokogiri::XML::Element]
105
+ # @return [Array<Hash>]
106
+ def fetch_contributor(doc) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
107
+ contribs = doc.xpath("contributors/person_name").map do |p|
108
+ forename = []
109
+ initial = []
110
+ p.at("given_name")&.text&.split&.each do |fn|
111
+ if /^(?<init>\w)\.?$/ =~ fn
112
+ initial << RelatonBib::LocalizedString.new(init, doc["language"], "Latn")
113
+ else
114
+ forename << RelatonBib::LocalizedString.new(fn, doc["language"], "Latn")
115
+ end
116
+ end
117
+ sname = p.at("surname").text
118
+ surname = RelatonBib::LocalizedString.new sname, doc["language"], "Latn"
119
+ initial = []
120
+ ident = p.xpath("ORCID").map do |id|
121
+ RelatonBib::PersonIdentifier.new "orcid", id.text
122
+ end
123
+ fullname = RelatonBib::FullName.new(
124
+ surname: surname, forename: forename, initial: initial, identifier: ident,
125
+ )
126
+ person = RelatonBib::Person.new name: fullname
127
+ { entity: person, role: [{ type: p["contributor_role"] }] }
128
+ end
129
+ contribs + doc.xpath("publisher").map do |p|
130
+ abbr = p.at("../institution/institution_acronym")&.text
131
+ org = RelatonBib::Organization.new(name: p.at("publisher_name").text, abbreviation: abbr)
132
+ { entity: org, role: [{ type: "publisher" }] }
133
+ end
134
+ end
135
+
136
+ # @param doc [Nokogiri::XML::Element]
137
+ # @return [Array<String>]
138
+ def fetch_place(doc)
139
+ doc.xpath("institution/institution_place").map(&:text)
140
+ end
141
+
142
+ #
143
+ # Save document
144
+ #
145
+ # @param bib [RelatonNist::NistBibliographicItem]
146
+ #
147
+ def write_file(bib) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
148
+ id = bib.docidentifier[0].id.gsub(%r{[/\s:.]}, "_").upcase.sub(/^NIST_IR/, "NISTIR")
149
+ file = File.join(@output, "#{id}.#{@ext}")
150
+ if File.exist? file
151
+ warn "File #{file} exists. Docid: #{bib.docidentifier[0].id}"
152
+ # warn "Link: #{bib.link.detect { |l| l.type == 'src' }.content}"
153
+ else
154
+ output = case @format
155
+ when "yaml" then bib.to_hash.to_yaml
156
+ when "xml" then bib.to_xml bibdata: true
157
+ else bib.send "to_#{@format}"
158
+ end
159
+ File.write file, output, encoding: "UTF-8"
160
+ end
161
+ end
162
+
163
+ #
164
+ # Create a document instance an save it.
165
+ #
166
+ # @param doc [Nokogiri::XML::Element]
167
+ #
168
+ # @raise [StandardError]
169
+ #
170
+ def parse_doc(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
171
+ # mtd = doc.at('doi_record/report-paper/report-paper_metadata')
172
+ item = RelatonNist::NistBibliographicItem.new(
173
+ type: "standard", docid: fetch_docid(doc), title: fetch_title(doc),
174
+ link: fetch_link(doc), abstract: fetch_abstract(doc),
175
+ date: fetch_date(doc), edition: fetch_edition(doc),
176
+ contributor: fetch_contributor(doc), relation: fetch_relation(doc),
177
+ place: fetch_place(doc),
178
+ language: [doc["language"]], script: ["Latn"], doctype: "standard"
179
+ )
180
+ write_file item
181
+ rescue StandardError => e
182
+ warn "Document: #{doc.at('doi').text}"
183
+ warn e.message
184
+ raise e
185
+ end
186
+
187
+ #
188
+ # Fetch all the documnts from dataset
189
+ #
190
+ def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
191
+ t1 = Time.now
192
+ puts "Started at: #{t1}"
193
+
194
+ docs = Nokogiri::XML OpenURI.open_uri URL
195
+ FileUtils.mkdir @output unless Dir.exist? @output
196
+ FileUtils.rm Dir[File.join(@output, "*.#{@ext}")]
197
+ docs.xpath("/body/query/doi_record/report-paper/report-paper_metadata")
198
+ .each { |doc| parse_doc doc }
199
+
200
+ t2 = Time.now
201
+ puts "Stopped at: #{t2}"
202
+ puts "Done in: #{(t2 - t1).round} sec."
203
+ rescue StandardError => e
204
+ warn e.message
205
+ end
206
+
207
+ #
208
+ # Fetch all the documnts from dataset
209
+ #
210
+ # @param [String] output foldet name to save the documents
211
+ # @param [String] format format to save the documents (yaml, xml, bibxml)
212
+ #
213
+ def self.fetch(output: "data", format: "yaml")
214
+ new(output, format).fetch
215
+ end
216
+ end
217
+ end
@@ -3,6 +3,8 @@
3
3
  module RelatonNist
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
+ attr_writer :fetch
7
+
6
8
  # Parse page.
7
9
  # @return [RelatonNist::NistBliographicItem]
8
10
  def fetch
@@ -10,7 +12,7 @@ module RelatonNist
10
12
  end
11
13
 
12
14
  # @return [Iteger]
13
- def sort_value
15
+ def sort_value # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
14
16
  @sort_value ||= begin
15
17
  sort_phrase = [hit[:serie], hit[:code], hit[:title]].join " "
16
18
  corr = hit_collection&.text&.split&.map do |w|