relaton-nist 2.0.0.pre.alpha.2 → 2.0.0.pre.alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/grammars/basicdoc.rng +14 -1
- data/grammars/biblio.rng +8 -8
- data/lib/relaton/nist/data_fetcher.rb +2 -5
- data/lib/relaton/nist/mods_parser.rb +50 -29
- data/lib/relaton/nist/scraper.rb +2 -2
- data/lib/relaton/nist/tech_pubs_parser.rb +2 -2
- data/lib/relaton/nist/version.rb +1 -1
- data/relaton_nist.gemspec +2 -2
- metadata +5 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8f241e09f50957db23bc91820f9bba18b14ed8fdf237646e0fdaa51f8ebc8f46
|
|
4
|
+
data.tar.gz: da8e0ac7ffc24a64f02624467c479369129125f6fdda3fe6da950e05fe2fa9bc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fd8257cfae0044309acc0b1818e903be60595e08722c961853f2f663f98395a18202749a89a96cbf587b9ed2b4cdf2a2d33a683fd65944dd5eefef06798e4fe3
|
|
7
|
+
data.tar.gz: 2087de5a12a59076d95eb2cc03f920d2dd9c39512ec8cb9e5392f7f0d60b19e7fb59949a4e2e3cca62d4d6e6abb22281098d32c83068646fb08c26b6b7da5c34
|
data/grammars/basicdoc.rng
CHANGED
|
@@ -187,6 +187,15 @@ Applicable to modify and delete</a:documentation>
|
|
|
187
187
|
<a:documentation>Optional caption of this block</a:documentation>
|
|
188
188
|
</attribute>
|
|
189
189
|
</optional>
|
|
190
|
+
<optional>
|
|
191
|
+
<attribute name="position">
|
|
192
|
+
<a:documentation>For an "add" change, whether the change is added before or after the location</a:documentation>
|
|
193
|
+
<choice>
|
|
194
|
+
<value>before</value>
|
|
195
|
+
<value>after</value>
|
|
196
|
+
</choice>
|
|
197
|
+
</attribute>
|
|
198
|
+
</optional>
|
|
190
199
|
<optional>
|
|
191
200
|
<element name="location">
|
|
192
201
|
<a:documentation>The location(s) in the original document which have undergone the change described in this block</a:documentation>
|
|
@@ -208,11 +217,15 @@ Applicable to modify and delete</a:documentation>
|
|
|
208
217
|
</zeroOrMore>
|
|
209
218
|
<optional>
|
|
210
219
|
<element name="newcontent">
|
|
211
|
-
<a:documentation>New content to be added to the document; applicable to add and modify
|
|
220
|
+
<a:documentation>New content to be added to the document; applicable to add and modify.
|
|
221
|
+
Can be blocks and/or sections</a:documentation>
|
|
212
222
|
<ref name="OptionalId"/>
|
|
213
223
|
<zeroOrMore>
|
|
214
224
|
<ref name="BasicBlock"/>
|
|
215
225
|
</zeroOrMore>
|
|
226
|
+
<zeroOrMore>
|
|
227
|
+
<ref name="section"/>
|
|
228
|
+
</zeroOrMore>
|
|
216
229
|
</element>
|
|
217
230
|
</optional>
|
|
218
231
|
<zeroOrMore>
|
data/grammars/biblio.rng
CHANGED
|
@@ -1142,11 +1142,11 @@ NOTE: This should preferably be encoded as a URI or short identifier, rather th
|
|
|
1142
1142
|
<a:documentation>Information about how long the current description of the bibliographic item is valid for</a:documentation>
|
|
1143
1143
|
</ref>
|
|
1144
1144
|
</optional>
|
|
1145
|
-
<
|
|
1145
|
+
<zeroOrMore>
|
|
1146
1146
|
<ref name="depiction">
|
|
1147
1147
|
<a:documentation>Depiction of the bibliographic item, typically an image</a:documentation>
|
|
1148
1148
|
</ref>
|
|
1149
|
-
</
|
|
1149
|
+
</zeroOrMore>
|
|
1150
1150
|
</define>
|
|
1151
1151
|
<define name="ReducedBibliographicItem">
|
|
1152
1152
|
<a:documentation>Reduced description of a bibliographic resource, without mandatory title and docidentifier, used for document relations
|
|
@@ -1939,10 +1939,10 @@ Detailed in https://www.relaton.org/model/relations/</a:documentation>
|
|
|
1939
1939
|
<value>hasAnnotation</value>
|
|
1940
1940
|
<value>draftOf</value>
|
|
1941
1941
|
<value>hasDraft</value>
|
|
1942
|
-
<value>
|
|
1943
|
-
<value>
|
|
1944
|
-
<value>
|
|
1945
|
-
<value>
|
|
1942
|
+
<value>predecessorDraftOf</value>
|
|
1943
|
+
<value>hasPredecessorDraft</value>
|
|
1944
|
+
<value>successorDraftOf</value>
|
|
1945
|
+
<value>hasSuccessorDraft</value>
|
|
1946
1946
|
<value>editionOf</value>
|
|
1947
1947
|
<value>hasEdition</value>
|
|
1948
1948
|
<value>updates</value>
|
|
@@ -2063,13 +2063,13 @@ provided that it is not the entire bibliographic item that is so related</a:docu
|
|
|
2063
2063
|
<ref name="LocalizedString"/>
|
|
2064
2064
|
</element>
|
|
2065
2065
|
</optional>
|
|
2066
|
-
<
|
|
2066
|
+
<zeroOrMore>
|
|
2067
2067
|
<element name="taxon">
|
|
2068
2068
|
<a:documentation>The keywords as a hierarchical taxonomy. For example, the sequence of `taxon` elements
|
|
2069
2069
|
`pump`, `centrifugal pump`, `line shaft pump` represents a taxonomic classification</a:documentation>
|
|
2070
2070
|
<ref name="LocalizedString"/>
|
|
2071
2071
|
</element>
|
|
2072
|
-
</
|
|
2072
|
+
</zeroOrMore>
|
|
2073
2073
|
<zeroOrMore>
|
|
2074
2074
|
<ref name="vocabid">
|
|
2075
2075
|
<a:documentation>Identifiers for the keyword as a controlled vocabulary</a:documentation>
|
|
@@ -16,12 +16,13 @@ module Relaton
|
|
|
16
16
|
fetch_tech_pubs
|
|
17
17
|
# add_static_files
|
|
18
18
|
index.save
|
|
19
|
+
report_errors
|
|
19
20
|
end
|
|
20
21
|
|
|
21
22
|
def fetch_tech_pubs
|
|
22
23
|
xml_data = Mechanize.new.get(URL).body
|
|
23
24
|
docs = LocMods::Collection.from_xml xml_data
|
|
24
|
-
docs.mods.each { |doc| write_file ModsParser.new(doc, series).parse }
|
|
25
|
+
docs.mods.each { |doc| write_file ModsParser.new(doc, series, @errors).parse }
|
|
25
26
|
end
|
|
26
27
|
|
|
27
28
|
def write_file(bib)
|
|
@@ -54,10 +55,6 @@ module Relaton
|
|
|
54
55
|
bib.to_rfcxml
|
|
55
56
|
end
|
|
56
57
|
|
|
57
|
-
def gh_issue_channel
|
|
58
|
-
["relaton/relaton-data-nist", "Error fetching NIST documents"]
|
|
59
|
-
end
|
|
60
|
-
|
|
61
58
|
def log_error(msg)
|
|
62
59
|
Util.error msg
|
|
63
60
|
end
|
|
@@ -15,9 +15,10 @@ module Relaton
|
|
|
15
15
|
ATTRS = %i[type docidentifier title source abstract date contributor
|
|
16
16
|
relation place series].freeze
|
|
17
17
|
|
|
18
|
-
def initialize(doc, series)
|
|
18
|
+
def initialize(doc, series, errors = {})
|
|
19
19
|
@doc = doc
|
|
20
20
|
@series = series
|
|
21
|
+
@errors = errors
|
|
21
22
|
end
|
|
22
23
|
|
|
23
24
|
# @return [Bib::ItemData]
|
|
@@ -29,24 +30,24 @@ module Relaton
|
|
|
29
30
|
ItemData.new(**args)
|
|
30
31
|
end
|
|
31
32
|
|
|
32
|
-
def parse_type
|
|
33
|
-
"standard"
|
|
34
|
-
end
|
|
33
|
+
def parse_type = "standard"
|
|
35
34
|
|
|
36
35
|
# @return [Array<Bib::Docidentifier>]
|
|
37
36
|
def parse_docidentifier
|
|
38
|
-
[
|
|
37
|
+
ids = [
|
|
39
38
|
{ type: "NIST", content: pub_id, primary: true },
|
|
40
39
|
{ type: "DOI", content: parse_doi },
|
|
41
|
-
].
|
|
40
|
+
].reject { |id| id[:content].nil? || id[:content].empty? }
|
|
41
|
+
@errors[:docidentifier] &&= ids.empty?
|
|
42
|
+
ids.map { |id| Bib::Docidentifier.new(**id) }
|
|
42
43
|
end
|
|
43
44
|
|
|
44
45
|
# @return [String]
|
|
45
|
-
def pub_id
|
|
46
|
-
get_id_from_str parse_doi
|
|
47
|
-
end
|
|
46
|
+
def pub_id = get_id_from_str parse_doi
|
|
48
47
|
|
|
49
48
|
def get_id_from_str(str)
|
|
49
|
+
return if str.nil? || str.empty?
|
|
50
|
+
|
|
50
51
|
::Pubid::Nist::Identifier.parse(str).to_s
|
|
51
52
|
rescue ::Pubid::Core::Errors::ParseError
|
|
52
53
|
str.gsub(".", " ").sub(/^[\D]+/, &:upcase)
|
|
@@ -66,13 +67,15 @@ module Relaton
|
|
|
66
67
|
|
|
67
68
|
def parse_doi
|
|
68
69
|
url = @doc.location.reduce(nil) { |m, l| m || l.url.detect { |u| u.usage == "primary display" } }
|
|
70
|
+
return if url.nil?
|
|
71
|
+
|
|
69
72
|
id = remove_doi_prefix(url.content)
|
|
73
|
+
return if id.nil?
|
|
74
|
+
|
|
70
75
|
replace_wrong_doi(id)
|
|
71
76
|
end
|
|
72
77
|
|
|
73
|
-
def remove_doi_prefix(id)
|
|
74
|
-
id.match(/10\.6028\/(.+)/)[1]
|
|
75
|
-
end
|
|
78
|
+
def remove_doi_prefix(id) = id.match(/10\.6028\/(.+)/)&.send(:[], 1)
|
|
76
79
|
|
|
77
80
|
# @return [Array<Bib::Title>]
|
|
78
81
|
def parse_title
|
|
@@ -90,6 +93,7 @@ module Relaton
|
|
|
90
93
|
elsif title.size == 1
|
|
91
94
|
title[0].instance_variable_set :@type, "main"
|
|
92
95
|
end
|
|
96
|
+
@errors[:title] &&= title.empty?
|
|
93
97
|
title
|
|
94
98
|
end
|
|
95
99
|
|
|
@@ -100,26 +104,31 @@ module Relaton
|
|
|
100
104
|
end
|
|
101
105
|
|
|
102
106
|
def parse_source
|
|
103
|
-
@doc.location.map do |location|
|
|
107
|
+
source = @doc.location.map do |location|
|
|
104
108
|
url = location.url.first
|
|
105
109
|
type = url.usage == "primary display" ? "doi" : "src"
|
|
106
110
|
Bib::Uri.new content: url.content, type: type
|
|
107
111
|
end
|
|
112
|
+
@errors[:source] &&= source.empty?
|
|
113
|
+
source
|
|
108
114
|
end
|
|
109
115
|
|
|
110
116
|
def parse_abstract
|
|
111
|
-
Array(@doc.abstract).map do |a|
|
|
117
|
+
abstract = Array(@doc.abstract).map do |a|
|
|
112
118
|
content = a.content.gsub("\n", " ").squeeze(" ").strip
|
|
113
|
-
Bib::
|
|
119
|
+
Bib::Abstract.new content: content, language: "en",
|
|
114
120
|
script: "Latn"
|
|
115
121
|
end
|
|
122
|
+
@errors[:abstract] &&= abstract.empty?
|
|
123
|
+
abstract
|
|
116
124
|
end
|
|
117
125
|
|
|
118
126
|
def parse_date
|
|
119
127
|
date = @doc.origin_info[0].date_issued.map do |di|
|
|
120
128
|
create_date(di, "issued")
|
|
121
|
-
end
|
|
122
|
-
date.
|
|
129
|
+
end.compact
|
|
130
|
+
@errors[:date] &&= date.empty?
|
|
131
|
+
date
|
|
123
132
|
end
|
|
124
133
|
|
|
125
134
|
def create_date(date, type)
|
|
@@ -136,13 +145,11 @@ module Relaton
|
|
|
136
145
|
end
|
|
137
146
|
end
|
|
138
147
|
|
|
139
|
-
def parse_doctype
|
|
140
|
-
Doctype.new(content: "standard")
|
|
141
|
-
end
|
|
148
|
+
def parse_doctype = Doctype.new(content: "standard")
|
|
142
149
|
|
|
143
150
|
def parse_contributor
|
|
144
151
|
# exclude primary contributors to avoid duplication
|
|
145
|
-
@doc.name.reject { |n| n.usage == "primary" }.map do |name|
|
|
152
|
+
contributor = @doc.name.reject { |n| n.usage == "primary" }.map do |name|
|
|
146
153
|
entity, default_role = create_entity(name)
|
|
147
154
|
next unless entity
|
|
148
155
|
|
|
@@ -152,6 +159,8 @@ module Relaton
|
|
|
152
159
|
role << Bib::Contributor::Role.new(type: default_role) if role.empty?
|
|
153
160
|
create_contributor(entity, role)
|
|
154
161
|
end.compact
|
|
162
|
+
@errors[:contributor] &&= contributor.empty?
|
|
163
|
+
contributor
|
|
155
164
|
end
|
|
156
165
|
|
|
157
166
|
def create_contributor(entity, role)
|
|
@@ -200,16 +209,21 @@ module Relaton
|
|
|
200
209
|
end
|
|
201
210
|
|
|
202
211
|
def parse_relation
|
|
203
|
-
Array(@doc.related_item).reject { |ri| ri.type == "series" }.
|
|
212
|
+
relations = Array(@doc.related_item).reject { |ri| ri.type == "series" }.filter_map do |ri|
|
|
204
213
|
type = RELATION_TYPES[ri.type]
|
|
205
|
-
|
|
214
|
+
bibitem = create_related_item(ri)
|
|
215
|
+
Relation.new(type: type, bibitem: bibitem) if bibitem
|
|
206
216
|
end
|
|
217
|
+
@errors[:relation] &&= relations.empty?
|
|
218
|
+
relations
|
|
207
219
|
end
|
|
208
220
|
|
|
209
221
|
def create_related_item(item)
|
|
210
222
|
item_id = get_id_from_str related_item_id(item)
|
|
223
|
+
return if item_id.nil? || item_id.empty?
|
|
224
|
+
|
|
211
225
|
docid = Bib::Docidentifier.new(type: "NIST", content: item_id)
|
|
212
|
-
fref = Bib::
|
|
226
|
+
fref = Bib::Formattedref.new(content: item_id)
|
|
213
227
|
ItemData.new(docidentifier: [docid], formattedref: fref)
|
|
214
228
|
end
|
|
215
229
|
|
|
@@ -219,15 +233,20 @@ module Relaton
|
|
|
219
233
|
else
|
|
220
234
|
item.name[0].name_part[0].content
|
|
221
235
|
end => id
|
|
222
|
-
|
|
236
|
+
doi = remove_doi_prefix(id)
|
|
237
|
+
return if doi.nil?
|
|
238
|
+
|
|
239
|
+
replace_wrong_doi(doi)
|
|
223
240
|
end
|
|
224
241
|
|
|
225
242
|
def parse_place
|
|
226
|
-
@doc.origin_info.select { |p| p.event_type == "publisher" }.map do |p|
|
|
227
|
-
|
|
228
|
-
/(?<city>\w+), (?<state>\w+)/ =~
|
|
243
|
+
place = @doc.origin_info.select { |p| p.event_type == "publisher" }.map do |p|
|
|
244
|
+
pl = p.place[0].place_term[0].content
|
|
245
|
+
/(?<city>\w+), (?<state>\w+)/ =~ pl
|
|
229
246
|
Bib::Place.new(city: city, region: create_region(state))
|
|
230
247
|
end
|
|
248
|
+
@errors[:place] &&= place.empty?
|
|
249
|
+
place
|
|
231
250
|
end
|
|
232
251
|
|
|
233
252
|
def create_region(state)
|
|
@@ -237,12 +256,14 @@ module Relaton
|
|
|
237
256
|
end
|
|
238
257
|
|
|
239
258
|
def parse_series
|
|
240
|
-
Array(@doc.related_item).select { |ri| ri.type == "series" }.map do |ri|
|
|
259
|
+
series = Array(@doc.related_item).select { |ri| ri.type == "series" }.map do |ri|
|
|
241
260
|
tinfo = ri.title_info[0]
|
|
242
261
|
tcontent = tinfo.title[0].strip
|
|
243
262
|
title = Bib::Title.new(content: tcontent)
|
|
244
263
|
Bib::Series.new(title: [title], number: tinfo.part_number&.first)
|
|
245
264
|
end
|
|
265
|
+
@errors[:series] &&= series.empty?
|
|
266
|
+
series
|
|
246
267
|
end
|
|
247
268
|
end
|
|
248
269
|
end
|
data/lib/relaton/nist/scraper.rb
CHANGED
|
@@ -264,7 +264,7 @@ module Relaton
|
|
|
264
264
|
end
|
|
265
265
|
ids = [Bib::Docidentifier.new(content: ref, type: "NIST", primary: true)]
|
|
266
266
|
link = [Bib::Uri.new(type: "src", content: uri)]
|
|
267
|
-
bib = ItemData.new(formattedref: ref, source: link, docidentifier: ids)
|
|
267
|
+
bib = ItemData.new(formattedref: Bib::Formattedref.new(content: ref), source: link, docidentifier: ids)
|
|
268
268
|
Relation.new(type: t, description: descr, bibitem: bib)
|
|
269
269
|
end
|
|
270
270
|
|
|
@@ -273,7 +273,7 @@ module Relaton
|
|
|
273
273
|
def fetch_keywords(doc)
|
|
274
274
|
doc["keywords"].map do |kw|
|
|
275
275
|
text = kw.is_a?(String) ? kw : kw.text
|
|
276
|
-
Bib::Keyword.new(
|
|
276
|
+
Bib::Keyword.new(vocab: Bib::LocalizedString.new(content: text))
|
|
277
277
|
end
|
|
278
278
|
end
|
|
279
279
|
|
|
@@ -113,7 +113,7 @@ module Relaton
|
|
|
113
113
|
).each_with_object([]) do |a, m|
|
|
114
114
|
next if a.text.empty?
|
|
115
115
|
|
|
116
|
-
m << Bib::
|
|
116
|
+
m << Bib::Abstract.new(
|
|
117
117
|
content: a.text, language: @doc["language"], script: "Latn",
|
|
118
118
|
)
|
|
119
119
|
end
|
|
@@ -267,7 +267,7 @@ module Relaton
|
|
|
267
267
|
@doc.xpath("./ns:program/ns:related_item", ns: NS).map do |rel|
|
|
268
268
|
rdoi = rel.at_xpath("ns:intra_work_relation|ns:inter_work_relation", ns: NS)
|
|
269
269
|
id = rdoi.text.split("/")[1..].join("/").gsub(".", " ")
|
|
270
|
-
fref = Bib::
|
|
270
|
+
fref = Bib::Formattedref.new(content: id)
|
|
271
271
|
docid = Bib::Docidentifier.new(type: "NIST", content: id, primary: true)
|
|
272
272
|
bibitem = ItemData.new(formattedref: fref, docidentifier: [docid])
|
|
273
273
|
type = RELATION_TYPES[rdoi["relationship-type"]]
|
data/lib/relaton/nist/version.rb
CHANGED
data/relaton_nist.gemspec
CHANGED
|
@@ -27,8 +27,8 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_dependency "mechanize", "~> 2.0"
|
|
28
28
|
spec.add_dependency "loc_mods", "~> 0.2.0"
|
|
29
29
|
spec.add_dependency "pubid", "~> 1.15.6"
|
|
30
|
-
spec.add_dependency "relaton-bib", "~> 2.0.0-alpha.
|
|
31
|
-
spec.add_dependency "relaton-core", "~> 0.0.
|
|
30
|
+
spec.add_dependency "relaton-bib", "~> 2.0.0-alpha.7"
|
|
31
|
+
spec.add_dependency "relaton-core", "~> 0.0.13"
|
|
32
32
|
spec.add_dependency "relaton-index", "~> 0.2.0"
|
|
33
33
|
spec.add_dependency "rubyzip"
|
|
34
34
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-nist
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.0.pre.alpha.
|
|
4
|
+
version: 2.0.0.pre.alpha.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
@@ -71,28 +71,28 @@ dependencies:
|
|
|
71
71
|
requirements:
|
|
72
72
|
- - "~>"
|
|
73
73
|
- !ruby/object:Gem::Version
|
|
74
|
-
version: 2.0.0.pre.alpha.
|
|
74
|
+
version: 2.0.0.pre.alpha.7
|
|
75
75
|
type: :runtime
|
|
76
76
|
prerelease: false
|
|
77
77
|
version_requirements: !ruby/object:Gem::Requirement
|
|
78
78
|
requirements:
|
|
79
79
|
- - "~>"
|
|
80
80
|
- !ruby/object:Gem::Version
|
|
81
|
-
version: 2.0.0.pre.alpha.
|
|
81
|
+
version: 2.0.0.pre.alpha.7
|
|
82
82
|
- !ruby/object:Gem::Dependency
|
|
83
83
|
name: relaton-core
|
|
84
84
|
requirement: !ruby/object:Gem::Requirement
|
|
85
85
|
requirements:
|
|
86
86
|
- - "~>"
|
|
87
87
|
- !ruby/object:Gem::Version
|
|
88
|
-
version: 0.0.
|
|
88
|
+
version: 0.0.13
|
|
89
89
|
type: :runtime
|
|
90
90
|
prerelease: false
|
|
91
91
|
version_requirements: !ruby/object:Gem::Requirement
|
|
92
92
|
requirements:
|
|
93
93
|
- - "~>"
|
|
94
94
|
- !ruby/object:Gem::Version
|
|
95
|
-
version: 0.0.
|
|
95
|
+
version: 0.0.13
|
|
96
96
|
- !ruby/object:Gem::Dependency
|
|
97
97
|
name: relaton-index
|
|
98
98
|
requirement: !ruby/object:Gem::Requirement
|