relaton-nist 1.8.0 → 1.9.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -1
- data/README.adoc +30 -0
- data/bin/rspec +29 -0
- data/grammars/biblio.rng +1 -0
- data/grammars/isodoc.rng +72 -10
- data/grammars/nist.rng +3 -0
- data/lib/relaton_nist/data_fetcher.rb +217 -0
- data/lib/relaton_nist/hit.rb +3 -1
- data/lib/relaton_nist/hit_collection.rb +71 -58
- data/lib/relaton_nist/nist_bibliographic_item.rb +1 -1
- data/lib/relaton_nist/nist_bibliography.rb +16 -6
- data/lib/relaton_nist/processor.rb +14 -1
- data/lib/relaton_nist/scrapper.rb +45 -301
- data/lib/relaton_nist/version.rb +1 -1
- data/lib/relaton_nist/xml_parser.rb +1 -1
- data/lib/relaton_nist.rb +1 -0
- data/relaton_nist.gemspec +3 -5
- metadata +9 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea210109d9e306b8c9c6b4395812b52454dad6b8fdbd87cc08ec750e9dd2d4ac
|
4
|
+
data.tar.gz: e2dad0af3e66b42b2b5d5f2ecf48653ea21b2662d9bed9f6003a5b7a6d953f1d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '08a75dfaf1616874bab1c6edd2eb9a1f80c64a2d8d052dd66a3f4ca4f5b8aac1ee520c397f62327d718865fbd5f476db0e9a4ebef445ae3ecbe6f0366ee75e66'
|
7
|
+
data.tar.gz: cae6a5c20af9c77ead564ee5337d3092356a1786337406c260e75a387cc0446bd9937a23b2a24d1eafcc5b0bb1e98a621288e091634b7d950d112d96f13019cf
|
data/.github/workflows/rake.yml
CHANGED
@@ -16,19 +16,9 @@ jobs:
|
|
16
16
|
strategy:
|
17
17
|
fail-fast: false
|
18
18
|
matrix:
|
19
|
-
ruby: [ '
|
19
|
+
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
20
|
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
21
|
experimental: [ false ]
|
22
|
-
include:
|
23
|
-
- ruby: '3.0'
|
24
|
-
os: 'ubuntu-latest'
|
25
|
-
experimental: true
|
26
|
-
- ruby: '3.0'
|
27
|
-
os: 'windows-latest'
|
28
|
-
experimental: true
|
29
|
-
- ruby: '3.0'
|
30
|
-
os: 'macos-latest'
|
31
|
-
experimental: true
|
32
22
|
steps:
|
33
23
|
- uses: actions/checkout@v2
|
34
24
|
with:
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
@@ -147,6 +147,17 @@ item.docidentifier.first.id
|
|
147
147
|
=> "SP 800-38A-Add"
|
148
148
|
----
|
149
149
|
|
150
|
+
=== Typed links
|
151
|
+
|
152
|
+
NIST documents may have `src` and `doi` link types.
|
153
|
+
|
154
|
+
[source,ruby]
|
155
|
+
----
|
156
|
+
item.link
|
157
|
+
=> [#<RelatonBib::TypedUri:0x00007f901971dc10 @content=#<Addressable::URI:0x62c URI:https://csrc.nist.gov/publications/detail/sp/800-67/rev-2/final>, @type="src">,
|
158
|
+
#<RelatonBib::TypedUri:0x00007f901971d6e8 @content=#<Addressable::URI:0x640 URI:https://doi.org/10.6028/NIST.SP.800-67r2>, @type="doi">]
|
159
|
+
----
|
160
|
+
|
150
161
|
=== Create bibliographic item from YAML
|
151
162
|
[source,ruby]
|
152
163
|
----
|
@@ -159,6 +170,25 @@ RelatonNist::NistBibliographicItem.from_hash hash
|
|
159
170
|
...
|
160
171
|
----
|
161
172
|
|
173
|
+
=== Fetch data
|
174
|
+
|
175
|
+
This gem uses the https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml dataset as one of data sources.
|
176
|
+
|
177
|
+
The method `RelatonNist::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
|
178
|
+
Arguments:
|
179
|
+
|
180
|
+
- `output` - folder to save documents (default './data').
|
181
|
+
- `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml`, `bibxxml` (default `yaml`).
|
182
|
+
|
183
|
+
[source,ruby]
|
184
|
+
----
|
185
|
+
RelatonNist::DataFetcher.fetch
|
186
|
+
Started at: 2021-09-01 18:01:01 +0200
|
187
|
+
Stopped at: 2021-09-01 18:01:43 +0200
|
188
|
+
Done in: 42 sec.
|
189
|
+
=> nil
|
190
|
+
----
|
191
|
+
|
162
192
|
== Development
|
163
193
|
|
164
194
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/bin/rspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/grammars/biblio.rng
CHANGED
data/grammars/isodoc.rng
CHANGED
@@ -45,6 +45,11 @@
|
|
45
45
|
<optional>
|
46
46
|
<attribute name="alt"/>
|
47
47
|
</optional>
|
48
|
+
<optional>
|
49
|
+
<attribute name="updatetype">
|
50
|
+
<data type="boolean"/>
|
51
|
+
</attribute>
|
52
|
+
</optional>
|
48
53
|
<text/>
|
49
54
|
</element>
|
50
55
|
</define>
|
@@ -199,6 +204,18 @@
|
|
199
204
|
</zeroOrMore>
|
200
205
|
</element>
|
201
206
|
</define>
|
207
|
+
<define name="dt">
|
208
|
+
<element name="dt">
|
209
|
+
<optional>
|
210
|
+
<attribute name="id">
|
211
|
+
<data type="ID"/>
|
212
|
+
</attribute>
|
213
|
+
</optional>
|
214
|
+
<zeroOrMore>
|
215
|
+
<ref name="TextElement"/>
|
216
|
+
</zeroOrMore>
|
217
|
+
</element>
|
218
|
+
</define>
|
202
219
|
<define name="example">
|
203
220
|
<element name="example">
|
204
221
|
<attribute name="id">
|
@@ -543,6 +560,9 @@
|
|
543
560
|
</define>
|
544
561
|
<define name="BibDataExtensionType">
|
545
562
|
<ref name="doctype"/>
|
563
|
+
<optional>
|
564
|
+
<ref name="docsubtype"/>
|
565
|
+
</optional>
|
546
566
|
<optional>
|
547
567
|
<ref name="editorialgroup"/>
|
548
568
|
</optional>
|
@@ -890,6 +910,14 @@
|
|
890
910
|
</define>
|
891
911
|
</include>
|
892
912
|
<!-- end overrides -->
|
913
|
+
<define name="docsubtype">
|
914
|
+
<element name="subdoctype">
|
915
|
+
<ref name="DocumentSubtype"/>
|
916
|
+
</element>
|
917
|
+
</define>
|
918
|
+
<define name="DocumentSubtype">
|
919
|
+
<text/>
|
920
|
+
</define>
|
893
921
|
<define name="colgroup">
|
894
922
|
<element name="colgroup">
|
895
923
|
<oneOrMore>
|
@@ -939,7 +967,34 @@
|
|
939
967
|
<define name="concept">
|
940
968
|
<element name="concept">
|
941
969
|
<optional>
|
942
|
-
<attribute name="
|
970
|
+
<attribute name="ital">
|
971
|
+
<data type="boolean"/>
|
972
|
+
</attribute>
|
973
|
+
</optional>
|
974
|
+
<optional>
|
975
|
+
<attribute name="ref">
|
976
|
+
<data type="boolean"/>
|
977
|
+
</attribute>
|
978
|
+
</optional>
|
979
|
+
<optional>
|
980
|
+
<element name="refterm">
|
981
|
+
<zeroOrMore>
|
982
|
+
<choice>
|
983
|
+
<ref name="PureTextElement"/>
|
984
|
+
<ref name="stem"/>
|
985
|
+
</choice>
|
986
|
+
</zeroOrMore>
|
987
|
+
</element>
|
988
|
+
</optional>
|
989
|
+
<optional>
|
990
|
+
<element name="renderterm">
|
991
|
+
<zeroOrMore>
|
992
|
+
<choice>
|
993
|
+
<ref name="PureTextElement"/>
|
994
|
+
<ref name="stem"/>
|
995
|
+
</choice>
|
996
|
+
</zeroOrMore>
|
997
|
+
</element>
|
943
998
|
</optional>
|
944
999
|
<choice>
|
945
1000
|
<ref name="eref"/>
|
@@ -965,6 +1020,9 @@
|
|
965
1020
|
</attribute>
|
966
1021
|
<attribute name="name"/>
|
967
1022
|
<attribute name="action"/>
|
1023
|
+
<optional>
|
1024
|
+
<attribute name="class"/>
|
1025
|
+
</optional>
|
968
1026
|
<zeroOrMore>
|
969
1027
|
<choice>
|
970
1028
|
<ref name="TextElement"/>
|
@@ -1191,13 +1249,17 @@
|
|
1191
1249
|
</define>
|
1192
1250
|
<define name="IsoWorkgroup">
|
1193
1251
|
<optional>
|
1194
|
-
<attribute name="number"
|
1195
|
-
<data type="int"/>
|
1196
|
-
</attribute>
|
1252
|
+
<attribute name="number"/>
|
1197
1253
|
</optional>
|
1198
1254
|
<optional>
|
1199
1255
|
<attribute name="type"/>
|
1200
1256
|
</optional>
|
1257
|
+
<optional>
|
1258
|
+
<attribute name="identifier"/>
|
1259
|
+
</optional>
|
1260
|
+
<optional>
|
1261
|
+
<attribute name="prefix"/>
|
1262
|
+
</optional>
|
1201
1263
|
<text/>
|
1202
1264
|
</define>
|
1203
1265
|
<define name="ics">
|
@@ -1459,26 +1521,26 @@
|
|
1459
1521
|
<optional>
|
1460
1522
|
<ref name="section-title"/>
|
1461
1523
|
</optional>
|
1462
|
-
<
|
1524
|
+
<choice>
|
1463
1525
|
<choice>
|
1464
1526
|
<group>
|
1465
|
-
<
|
1527
|
+
<oneOrMore>
|
1466
1528
|
<ref name="BasicBlock"/>
|
1467
|
-
</
|
1529
|
+
</oneOrMore>
|
1468
1530
|
<zeroOrMore>
|
1469
1531
|
<ref name="note"/>
|
1470
1532
|
</zeroOrMore>
|
1471
1533
|
</group>
|
1472
1534
|
<ref name="amend"/>
|
1473
1535
|
</choice>
|
1474
|
-
<
|
1536
|
+
<oneOrMore>
|
1475
1537
|
<choice>
|
1476
1538
|
<ref name="clause-subsection"/>
|
1477
1539
|
<ref name="terms"/>
|
1478
1540
|
<ref name="definitions"/>
|
1479
1541
|
</choice>
|
1480
|
-
</
|
1481
|
-
</
|
1542
|
+
</oneOrMore>
|
1543
|
+
</choice>
|
1482
1544
|
</define>
|
1483
1545
|
<define name="Annex-Section">
|
1484
1546
|
<optional>
|
data/grammars/nist.rng
CHANGED
@@ -0,0 +1,217 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "yaml"
|
4
|
+
|
5
|
+
module RelatonNist
|
6
|
+
class DataFetcher
|
7
|
+
RELATION_TYPES = {
|
8
|
+
"replaces" => "obsoletes",
|
9
|
+
"isVersionOf" => "editionOf",
|
10
|
+
"hasTranslation" => "hasTranslation",
|
11
|
+
"isTranslationOf" => "translatedFrom",
|
12
|
+
"hasPreprint" => "hasReprint",
|
13
|
+
"isSupplementTo" => "complements",
|
14
|
+
}.freeze
|
15
|
+
URL = "https://raw.githubusercontent.com/usnistgov/NIST-Tech-Pubs/nist-pages/xml/allrecords.xml"
|
16
|
+
|
17
|
+
def initialize(output, format)
|
18
|
+
@output = output
|
19
|
+
@format = format
|
20
|
+
@ext = format.sub(/^bib/, "")
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse_docid(doc)
|
24
|
+
doi = doc.at("doi_data/doi").text
|
25
|
+
id = doc.at("publisher_item/item_number", "publisher_item/identifier").text.sub(%r{^/}, "")
|
26
|
+
case doi
|
27
|
+
when "10.6028/NBS.CIRC.12e2revjune" then id.sub!("13e", "12e")
|
28
|
+
when "10.6028/NBS.CIRC.36e2" then id.sub!("46e", "36e")
|
29
|
+
when "10.6028/NBS.HB.67suppJune1967" then id.sub!("1965", "1967")
|
30
|
+
when "10.6028/NBS.HB.105-1r1990" then id.sub!("105-1-1990", "105-1r1990")
|
31
|
+
when "10.6028/NIST.HB.150-10-1995" then id.sub!(/150-10$/, "150-10-1995")
|
32
|
+
end
|
33
|
+
[{ type: "NIST", id: id }, { type: "DOI", id: doi }]
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param doc [Nokogiri::XML::Element]
|
37
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
38
|
+
def fetch_docid(doc)
|
39
|
+
parse_docid(doc).map do |id|
|
40
|
+
RelatonBib::DocumentIdentifier.new(type: id[:type], id: id[:id])
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# @param doc [Nokogiri::XML::Element]
|
45
|
+
# @return [RelatonBib::TypedTitleStringCollection, Array]
|
46
|
+
def fetch_title(doc)
|
47
|
+
t = doc.xpath("titles/title|titles/subtitle")
|
48
|
+
return [] unless t.any?
|
49
|
+
|
50
|
+
RelatonBib::TypedTitleString.from_string t.map(&:text).join(" "), "en", "Latn"
|
51
|
+
end
|
52
|
+
|
53
|
+
# @param doc [Nokogiri::XML::Element]
|
54
|
+
# @return [Array<RelatonBib::BibliographicDate>]
|
55
|
+
def fetch_date(doc)
|
56
|
+
doc.xpath("publication_date|approval_date").map do |dt|
|
57
|
+
on = dt.at("year").text
|
58
|
+
if (m = dt.at "month")
|
59
|
+
on += "-#{m.text}"
|
60
|
+
d = dt.at "day"
|
61
|
+
on += "-#{d.text}" if d
|
62
|
+
end
|
63
|
+
type = dt.name == "publication_date" ? "published" : "confirmed"
|
64
|
+
RelatonBib::BibliographicDate.new(type: type, on: on)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# @param doc [Nokogiri::XML::Element]
|
69
|
+
# @return [String]
|
70
|
+
def fetch_edition(doc)
|
71
|
+
doc.at("edition_number")&.text
|
72
|
+
end
|
73
|
+
|
74
|
+
# @param doc [Nokogiri::XML::Element]
|
75
|
+
# @return [Array<Hash>]
|
76
|
+
def fetch_relation(doc)
|
77
|
+
ns = "http://www.crossref.org/relations.xsd"
|
78
|
+
doc.xpath("./ns:program/ns:related_item", ns: ns).map do |rel|
|
79
|
+
doi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: ns)
|
80
|
+
# ref = doi_to_id doi.text
|
81
|
+
# ref, = parse_docid doc
|
82
|
+
fref = RelatonBib::FormattedRef.new content: doi.text
|
83
|
+
bibitem = RelatonBib::BibliographicItem.new formattedref: fref
|
84
|
+
type = RELATION_TYPES[doi["relationship-type"]]
|
85
|
+
{ type: type, bibitem: bibitem }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# @param doc [Nokogiri::XML::Element]
|
90
|
+
# @return [Array<RelatonBib::TypedUri>]
|
91
|
+
def fetch_link(doc)
|
92
|
+
url = doc.at("doi_data/resource").text
|
93
|
+
[RelatonBib::TypedUri.new(type: "doi", content: url)]
|
94
|
+
end
|
95
|
+
|
96
|
+
# @param doc [Nokogiri::XML::Element]
|
97
|
+
# @return [Array<RelatonBib::FormattedString>]
|
98
|
+
def fetch_abstract(doc)
|
99
|
+
doc.xpath("jats:abstract/jats:p", "jats" => "http://www.ncbi.nlm.nih.gov/JATS1").map do |a|
|
100
|
+
RelatonBib::FormattedString.new(content: a.text, language: doc["language"], script: "Latn")
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# @param doc [Nokogiri::XML::Element]
|
105
|
+
# @return [Array<Hash>]
|
106
|
+
def fetch_contributor(doc) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
107
|
+
contribs = doc.xpath("contributors/person_name").map do |p|
|
108
|
+
forename = []
|
109
|
+
initial = []
|
110
|
+
p.at("given_name")&.text&.split&.each do |fn|
|
111
|
+
if /^(?<init>\w)\.?$/ =~ fn
|
112
|
+
initial << RelatonBib::LocalizedString.new(init, doc["language"], "Latn")
|
113
|
+
else
|
114
|
+
forename << RelatonBib::LocalizedString.new(fn, doc["language"], "Latn")
|
115
|
+
end
|
116
|
+
end
|
117
|
+
sname = p.at("surname").text
|
118
|
+
surname = RelatonBib::LocalizedString.new sname, doc["language"], "Latn"
|
119
|
+
initial = []
|
120
|
+
ident = p.xpath("ORCID").map do |id|
|
121
|
+
RelatonBib::PersonIdentifier.new "orcid", id.text
|
122
|
+
end
|
123
|
+
fullname = RelatonBib::FullName.new(
|
124
|
+
surname: surname, forename: forename, initial: initial, identifier: ident,
|
125
|
+
)
|
126
|
+
person = RelatonBib::Person.new name: fullname
|
127
|
+
{ entity: person, role: [{ type: p["contributor_role"] }] }
|
128
|
+
end
|
129
|
+
contribs + doc.xpath("publisher").map do |p|
|
130
|
+
abbr = p.at("../institution/institution_acronym")&.text
|
131
|
+
org = RelatonBib::Organization.new(name: p.at("publisher_name").text, abbreviation: abbr)
|
132
|
+
{ entity: org, role: [{ type: "publisher" }] }
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# @param doc [Nokogiri::XML::Element]
|
137
|
+
# @return [Array<String>]
|
138
|
+
def fetch_place(doc)
|
139
|
+
doc.xpath("institution/institution_place").map(&:text)
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
# Save document
|
144
|
+
#
|
145
|
+
# @param bib [RelatonNist::NistBibliographicItem]
|
146
|
+
#
|
147
|
+
def write_file(bib) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
148
|
+
id = bib.docidentifier[0].id.gsub(%r{[/\s:.]}, "_").upcase.sub(/^NIST_IR/, "NISTIR")
|
149
|
+
file = File.join(@output, "#{id}.#{@ext}")
|
150
|
+
if File.exist? file
|
151
|
+
warn "File #{file} exists. Docid: #{bib.docidentifier[0].id}"
|
152
|
+
# warn "Link: #{bib.link.detect { |l| l.type == 'src' }.content}"
|
153
|
+
else
|
154
|
+
output = case @format
|
155
|
+
when "yaml" then bib.to_hash.to_yaml
|
156
|
+
when "xml" then bib.to_xml bibdata: true
|
157
|
+
else bib.send "to_#{@format}"
|
158
|
+
end
|
159
|
+
File.write file, output, encoding: "UTF-8"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
#
|
164
|
+
# Create a document instance an save it.
|
165
|
+
#
|
166
|
+
# @param doc [Nokogiri::XML::Element]
|
167
|
+
#
|
168
|
+
# @raise [StandardError]
|
169
|
+
#
|
170
|
+
def parse_doc(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
171
|
+
# mtd = doc.at('doi_record/report-paper/report-paper_metadata')
|
172
|
+
item = RelatonNist::NistBibliographicItem.new(
|
173
|
+
type: "standard", docid: fetch_docid(doc), title: fetch_title(doc),
|
174
|
+
link: fetch_link(doc), abstract: fetch_abstract(doc),
|
175
|
+
date: fetch_date(doc), edition: fetch_edition(doc),
|
176
|
+
contributor: fetch_contributor(doc), relation: fetch_relation(doc),
|
177
|
+
place: fetch_place(doc),
|
178
|
+
language: [doc["language"]], script: ["Latn"], doctype: "standard"
|
179
|
+
)
|
180
|
+
write_file item
|
181
|
+
rescue StandardError => e
|
182
|
+
warn "Document: #{doc.at('doi').text}"
|
183
|
+
warn e.message
|
184
|
+
raise e
|
185
|
+
end
|
186
|
+
|
187
|
+
#
|
188
|
+
# Fetch all the documnts from dataset
|
189
|
+
#
|
190
|
+
def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
191
|
+
t1 = Time.now
|
192
|
+
puts "Started at: #{t1}"
|
193
|
+
|
194
|
+
docs = Nokogiri::XML OpenURI.open_uri URL
|
195
|
+
FileUtils.mkdir @output unless Dir.exist? @output
|
196
|
+
FileUtils.rm Dir[File.join(@output, "*.#{@ext}")]
|
197
|
+
docs.xpath("/body/query/doi_record/report-paper/report-paper_metadata")
|
198
|
+
.each { |doc| parse_doc doc }
|
199
|
+
|
200
|
+
t2 = Time.now
|
201
|
+
puts "Stopped at: #{t2}"
|
202
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
203
|
+
rescue StandardError => e
|
204
|
+
warn e.message
|
205
|
+
end
|
206
|
+
|
207
|
+
#
|
208
|
+
# Fetch all the documnts from dataset
|
209
|
+
#
|
210
|
+
# @param [String] output foldet name to save the documents
|
211
|
+
# @param [String] format format to save the documents (yaml, xml, bibxml)
|
212
|
+
#
|
213
|
+
def self.fetch(output: "data", format: "yaml")
|
214
|
+
new(output, format).fetch
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
data/lib/relaton_nist/hit.rb
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
module RelatonNist
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
|
+
attr_writer :fetch
|
7
|
+
|
6
8
|
# Parse page.
|
7
9
|
# @return [RelatonNist::NistBliographicItem]
|
8
10
|
def fetch
|
@@ -10,7 +12,7 @@ module RelatonNist
|
|
10
12
|
end
|
11
13
|
|
12
14
|
# @return [Iteger]
|
13
|
-
def sort_value
|
15
|
+
def sort_value # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
14
16
|
@sort_value ||= begin
|
15
17
|
sort_phrase = [hit[:serie], hit[:code], hit[:title]].join " "
|
16
18
|
corr = hit_collection&.text&.split&.map do |w|
|