relaton-w3c 1.10.1 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data/reference.W3C.DSig-label.xml +32 -32
- data/data/reference.W3C.P3P-rdfschema.xml +26 -26
- data/data/reference.W3C.P3P.xml +38 -38
- data/data/reference.W3C.PICS-labels.xml +43 -43
- data/data/reference.W3C.PICS-rules.xml +38 -38
- data/data/reference.W3C.PICS-services.xml +37 -37
- data/data/reference.W3C.daml-oil-reference.xml +39 -39
- data/data/reference.W3C.soap11.xml +56 -56
- data/data/reference.W3C.soap12-part1.xml +38 -38
- data/data/reference.W3C.soap12-part2.xml +38 -38
- data/data/reference.W3C.xkms.xml +50 -50
- data/data/reference.W3C.xml-c14n.xml +15 -15
- data/data/reference.W3C.xmldsig-core.xml +26 -26
- data/data/reference.W3C.xmlenc-core.xml +20 -20
- data/data/reference.W3C.xpath.xml +22 -22
- data/grammars/biblio.rng +24 -1
- data/grammars/isodoc.rng +73 -3
- data/lib/relaton_w3c/bibxml_parser.rb +7 -0
- data/lib/relaton_w3c/data_fetcher.rb +188 -0
- data/lib/relaton_w3c/data_index.rb +143 -0
- data/lib/relaton_w3c/data_parser.rb +171 -26
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +9 -7
- data/lib/relaton_w3c/workgroups.yaml +7 -0
- data/lib/relaton_w3c.rb +5 -4
- data/relaton_w3c.gemspec +4 -1
- metadata +35 -6
- data/lib/relaton_w3c/data_fethcer.rb +0 -110
@@ -1,5 +1,24 @@
|
|
1
1
|
module RelatonW3c
|
2
2
|
class DataParser
|
3
|
+
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
4
|
+
|
5
|
+
DOCTYPES = {
|
6
|
+
"TR" => "technicalReport",
|
7
|
+
"NOTE" => "groupNote",
|
8
|
+
}.freeze
|
9
|
+
|
10
|
+
STAGES = {
|
11
|
+
"RET" => "retired",
|
12
|
+
"SPSD" => "supersededRecommendation",
|
13
|
+
"OBSL" => "obsoletedRecommendation",
|
14
|
+
"WD" => "workingDraft",
|
15
|
+
"CRD" => "candidateRecommendationDraft",
|
16
|
+
"CR" => "candidateRecommendation",
|
17
|
+
"PR" => "proposedRecommendation",
|
18
|
+
"PER" => "proposedEditedRecommendation",
|
19
|
+
"REC" => "recommendation",
|
20
|
+
}.freeze
|
21
|
+
|
3
22
|
#
|
4
23
|
# Document parser initalization
|
5
24
|
#
|
@@ -29,7 +48,7 @@ module RelatonW3c
|
|
29
48
|
# @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
|
30
49
|
#
|
31
50
|
def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
32
|
-
return
|
51
|
+
return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }
|
33
52
|
|
34
53
|
RelatonW3c::W3cBibliographicItem.new(
|
35
54
|
type: "standard",
|
@@ -37,10 +56,12 @@ module RelatonW3c
|
|
37
56
|
fetched: Date.today.to_s,
|
38
57
|
language: ["en"],
|
39
58
|
script: ["Latn"],
|
59
|
+
docstatus: parse_docstatus,
|
40
60
|
title: parse_title,
|
41
61
|
link: parse_link,
|
42
62
|
docid: parse_docid,
|
43
|
-
|
63
|
+
formattedref: parse_formattedref,
|
64
|
+
docnumber: identifier,
|
44
65
|
series: parse_series,
|
45
66
|
date: parse_date,
|
46
67
|
relation: parse_relation,
|
@@ -49,12 +70,24 @@ module RelatonW3c
|
|
49
70
|
)
|
50
71
|
end
|
51
72
|
|
73
|
+
#
|
74
|
+
# Extract documetn status
|
75
|
+
#
|
76
|
+
# @return [RelatonBib::DocumentStatus, nil] dcoument status
|
77
|
+
#
|
78
|
+
def parse_docstatus
|
79
|
+
stage = types_stages&.detect { |st| STAGES.include?(st) }
|
80
|
+
RelatonBib::DocumentStatus.new stage: STAGES[stage] if stage
|
81
|
+
end
|
82
|
+
|
52
83
|
#
|
53
84
|
# Parse title
|
54
85
|
#
|
55
86
|
# @return [RelatonBib::TypedTitleStringCollection] title
|
56
87
|
#
|
57
88
|
def parse_title
|
89
|
+
return [] unless @sol.respond_to?(:title)
|
90
|
+
|
58
91
|
t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
|
59
92
|
RelatonBib::TypedTitleStringCollection.new [t]
|
60
93
|
end
|
@@ -65,7 +98,9 @@ module RelatonW3c
|
|
65
98
|
# @return [Array<RelatonBib::TypedUri>] link
|
66
99
|
#
|
67
100
|
def parse_link
|
68
|
-
|
101
|
+
link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
|
102
|
+
|
103
|
+
[RelatonBib::TypedUri.new(type: "src", content: link.to_s)]
|
69
104
|
end
|
70
105
|
|
71
106
|
#
|
@@ -74,23 +109,45 @@ module RelatonW3c
|
|
74
109
|
# @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
|
75
110
|
#
|
76
111
|
def parse_docid
|
77
|
-
|
112
|
+
return [] unless @sol.respond_to?(:link)
|
113
|
+
|
114
|
+
id = pub_id(@sol.link)
|
78
115
|
[RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)]
|
79
116
|
end
|
80
117
|
|
81
118
|
#
|
82
119
|
# Generate PubID
|
83
120
|
#
|
84
|
-
# @
|
85
|
-
#
|
86
|
-
# @return [String] PubID
|
121
|
+
# @return [RDF::URI] PubID
|
87
122
|
#
|
88
123
|
def pub_id(url)
|
89
124
|
"W3C #{identifier(url)}"
|
90
125
|
end
|
91
126
|
|
92
|
-
|
93
|
-
|
127
|
+
#
|
128
|
+
# Generate identifier from URL
|
129
|
+
#
|
130
|
+
# @param [RDF::URI, nil] link
|
131
|
+
#
|
132
|
+
# @return [String] identifier
|
133
|
+
#
|
134
|
+
def identifier(link = nil)
|
135
|
+
url = link || (@sol.respond_to?(:link) ? @sol.link : @sol.version_of)
|
136
|
+
self.class.parse_identifier(url.to_s)
|
137
|
+
end
|
138
|
+
|
139
|
+
#
|
140
|
+
# Parse identifier from URL
|
141
|
+
#
|
142
|
+
# @param [String] url URL
|
143
|
+
#
|
144
|
+
# @return [String] identifier
|
145
|
+
#
|
146
|
+
def self.parse_identifier(url)
|
147
|
+
if /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/ =~ url.to_s
|
148
|
+
$1.to_s
|
149
|
+
else url.to_s.split("/").last
|
150
|
+
end
|
94
151
|
end
|
95
152
|
|
96
153
|
#
|
@@ -99,12 +156,31 @@ module RelatonW3c
|
|
99
156
|
# @return [Array<RelatonBib::Series>] series
|
100
157
|
#
|
101
158
|
def parse_series
|
159
|
+
return [] unless type
|
160
|
+
|
102
161
|
title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
|
103
|
-
[RelatonBib::Series.new(title: title, number: identifier
|
162
|
+
[RelatonBib::Series.new(title: title, number: identifier)]
|
104
163
|
end
|
105
164
|
|
106
|
-
|
107
|
-
|
165
|
+
#
|
166
|
+
# Extract type
|
167
|
+
#
|
168
|
+
# @return [String] type
|
169
|
+
#
|
170
|
+
def type
|
171
|
+
# thre are many types, we need to find the right one
|
172
|
+
@type ||= types_stages&.detect { |t| USED_TYPES.include?(t) }
|
173
|
+
end
|
174
|
+
|
175
|
+
#
|
176
|
+
# Fetches types and stages
|
177
|
+
#
|
178
|
+
# @return [Array<String>] types and stages
|
179
|
+
#
|
180
|
+
def types_stages # rubocop:disable Metrics/MethodLength
|
181
|
+
return unless @sol.respond_to?(:link)
|
182
|
+
|
183
|
+
@types_stages ||= begin
|
108
184
|
sse = SPARQL.parse(%(
|
109
185
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
110
186
|
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
@@ -113,8 +189,7 @@ module RelatonW3c
|
|
113
189
|
{ <#{@sol.link}> rdf:type ?type }
|
114
190
|
}
|
115
191
|
))
|
116
|
-
|
117
|
-
tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
|
192
|
+
@fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
|
118
193
|
end
|
119
194
|
end
|
120
195
|
|
@@ -124,10 +199,17 @@ module RelatonW3c
|
|
124
199
|
# @return [Strinf] doctype
|
125
200
|
#
|
126
201
|
def parse_doctype
|
127
|
-
|
202
|
+
DOCTYPES[type] || "recommendation"
|
128
203
|
end
|
129
204
|
|
205
|
+
#
|
206
|
+
# Parse date
|
207
|
+
#
|
208
|
+
# @return [Array<RelatonBib::BibliographicDate>] date
|
209
|
+
#
|
130
210
|
def parse_date
|
211
|
+
return [] unless @sol.respond_to?(:date)
|
212
|
+
|
131
213
|
[RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
|
132
214
|
end
|
133
215
|
|
@@ -136,29 +218,90 @@ module RelatonW3c
|
|
136
218
|
#
|
137
219
|
# @return [Array<RelatonBib::DocumentRelation>] relation
|
138
220
|
#
|
139
|
-
def parse_relation
|
221
|
+
def parse_relation
|
222
|
+
if @sol.respond_to?(:link)
|
223
|
+
relations + editor_drafts
|
224
|
+
else document_versions
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
229
|
+
{
|
230
|
+
"doc:obsoletes" => { type: "obsoletes" },
|
231
|
+
"mat:hasErrata" => { type: "updatedBy", description: "errata" },
|
232
|
+
# "mat:hasTranslations" => "hasTranslation",
|
233
|
+
# "mat:hasImplReport" => "hasImpReport",
|
234
|
+
":previousEdition" => { type: "editionOf" },
|
235
|
+
}.reduce([]) do |acc, (predicate, tp)|
|
236
|
+
acc + relation_query(predicate).map do |r|
|
237
|
+
fr = RelatonBib::LocalizedString.new pub_id(r.rel.to_s)
|
238
|
+
bib = W3cBibliographicItem.new formattedref: fr
|
239
|
+
tp[:description] = RelatonBib::FormattedString.new content: tp[:description] if tp[:description]
|
240
|
+
RelatonBib::DocumentRelation.new(**tp, bibitem: bib)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def editor_drafts # rubocop:disable Metrics/MethodLength
|
140
246
|
sse = SPARQL.parse(%(
|
141
|
-
PREFIX
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
{ <#{@sol.link}> ?p ?obsoletes }
|
146
|
-
}
|
247
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
248
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
249
|
+
SELECT ?rel
|
250
|
+
WHERE { <#{@sol.link}> :ED ?rel . }
|
147
251
|
))
|
148
|
-
@fetcher.data.query(sse).
|
149
|
-
|
150
|
-
fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
|
252
|
+
@fetcher.data.query(sse).map do |s|
|
253
|
+
fr = RelatonBib::LocalizedString.new pub_id(s.rel.to_s)
|
151
254
|
bib = W3cBibliographicItem.new formattedref: fr
|
152
|
-
RelatonBib::
|
255
|
+
desc = RelatonBib::FormattedString.new content: "Editor's draft"
|
256
|
+
RelatonBib::DocumentRelation.new(
|
257
|
+
type: "hasDraft", description: desc, bibitem: bib,
|
258
|
+
)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def relation_query(predicate)
|
263
|
+
sse = SPARQL.parse(%(
|
264
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
265
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
266
|
+
PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
267
|
+
SELECT ?rel
|
268
|
+
WHERE { <#{@sol.link}> #{predicate} ?rel . }
|
269
|
+
))
|
270
|
+
@fetcher.data.query(sse).order_by(:rel)
|
271
|
+
end
|
272
|
+
|
273
|
+
def document_versions
|
274
|
+
sse = SPARQL.parse(%(
|
275
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
276
|
+
SELECT ?link
|
277
|
+
WHERE { ?link doc:versionOf <#{@sol.version_of}> }
|
278
|
+
))
|
279
|
+
@fetcher.data.query(sse).map do |r|
|
280
|
+
fref = RelatonBib::FormattedRef.new content: pub_id(r.link)
|
281
|
+
bib = W3cBibliographicItem.new formattedref: fref
|
282
|
+
RelatonBib::DocumentRelation.new(type: "hasEdition", bibitem: bib)
|
153
283
|
end
|
154
284
|
end
|
155
285
|
|
286
|
+
#
|
287
|
+
# Parse formattedref
|
288
|
+
#
|
289
|
+
# @return [RelatonBib::FormattedRef] formattedref
|
290
|
+
#
|
291
|
+
def parse_formattedref
|
292
|
+
return if @sol.respond_to?(:link)
|
293
|
+
|
294
|
+
RelatonBib::FormattedRef.new(content: pub_id(@sol.version_of))
|
295
|
+
end
|
296
|
+
|
156
297
|
#
|
157
298
|
# Parse contributor
|
158
299
|
#
|
159
300
|
# @return [Array<RelatonBib::ContributionInfo>] contributor
|
160
301
|
#
|
161
302
|
def parse_contrib # rubocop:disable Metrics/MethodLength
|
303
|
+
return [] unless @sol.respond_to?(:link)
|
304
|
+
|
162
305
|
sse = SPARQL.parse(%(
|
163
306
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
164
307
|
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
@@ -181,6 +324,8 @@ module RelatonW3c
|
|
181
324
|
# @return [RelatonBib::EditorialGroup] editorialgroup
|
182
325
|
#
|
183
326
|
def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
327
|
+
return unless @sol.respond_to?(:link)
|
328
|
+
|
184
329
|
sse = SPARQL.parse(%(
|
185
330
|
PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
|
186
331
|
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
data/lib/relaton_w3c/version.rb
CHANGED
@@ -5,15 +5,17 @@ require "net/http"
|
|
5
5
|
module RelatonW3c
|
6
6
|
# Class methods for search W3C standards.
|
7
7
|
class W3cBibliography
|
8
|
-
SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/
|
8
|
+
SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/"
|
9
9
|
|
10
10
|
class << self
|
11
11
|
# @param text [String]
|
12
12
|
# @return [RelatonW3c::HitCollection]
|
13
13
|
def search(text) # rubocop:disable Metrics/MethodLength
|
14
|
-
|
15
|
-
file =
|
16
|
-
|
14
|
+
ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
|
15
|
+
file = DataIndex.create_from_repo.search(ref)
|
16
|
+
return unless file
|
17
|
+
|
18
|
+
url = "#{SOURCE}#{file}"
|
17
19
|
resp = Net::HTTP.get_response(URI.parse(url))
|
18
20
|
return unless resp.code == "200"
|
19
21
|
|
@@ -24,7 +26,7 @@ module RelatonW3c
|
|
24
26
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
25
27
|
Net::ProtocolError, Errno::ETIMEDOUT
|
26
28
|
raise RelatonBib::RequestError,
|
27
|
-
"Could not access #{
|
29
|
+
"Could not access #{url}"
|
28
30
|
end
|
29
31
|
|
30
32
|
# @param ref [String] the W3C standard Code to look up
|
@@ -39,8 +41,8 @@ module RelatonW3c
|
|
39
41
|
return
|
40
42
|
end
|
41
43
|
|
42
|
-
|
43
|
-
warn "[relaton-w3c] (\"#{ref}\") found #{
|
44
|
+
found = result.docnumber
|
45
|
+
warn "[relaton-w3c] (\"#{ref}\") found #{found}"
|
44
46
|
result
|
45
47
|
end
|
46
48
|
end
|
@@ -32,6 +32,9 @@
|
|
32
32
|
'https://www.w3.org/WAI/EO':
|
33
33
|
name: Education and Outreach Working Group
|
34
34
|
abbrev: EOWG
|
35
|
+
'https://www.w3.org/WAI/about/groups/eowg':
|
36
|
+
name: Education and Outreach Working Group
|
37
|
+
abbrev: EOWG
|
35
38
|
'https://www.w3.org/2001/sw/WebOnt':
|
36
39
|
name: Web-Ontology Working Group
|
37
40
|
'http://www.w3.org/MarkUp/Forms':
|
@@ -54,6 +57,8 @@
|
|
54
57
|
name: Web Applications Working Group
|
55
58
|
'https://www.w3.org/2008/webapps':
|
56
59
|
name: Web Applications Working Group
|
60
|
+
'https://www.w3.org/groups/wg/webapps':
|
61
|
+
name: Web Applications Working Group
|
57
62
|
'https://www.w3.org/das':
|
58
63
|
name: Devices and Sensors Working Group
|
59
64
|
abbrev: DAS WG
|
@@ -226,6 +231,8 @@
|
|
226
231
|
abbrev: ARIA WG
|
227
232
|
'https://www.w3.org/wasm':
|
228
233
|
name: WebAssembly Working Group
|
234
|
+
'https://www.w3.org/groups/wg/wasm':
|
235
|
+
name: WebAssembly Working Group
|
229
236
|
'https://www.w3.org/groups/wg/webediting':
|
230
237
|
name: Web Editing Working Group
|
231
238
|
'https://www.w3.org/2014/data-shapes':
|
data/lib/relaton_w3c.rb
CHANGED
@@ -2,13 +2,14 @@ require "relaton_bib"
|
|
2
2
|
require "relaton_w3c/version"
|
3
3
|
require "relaton_w3c/w3c_bibliography"
|
4
4
|
require "relaton_w3c/w3c_bibliographic_item"
|
5
|
-
require "relaton_w3c/hit_collection"
|
6
|
-
require "relaton_w3c/hit"
|
7
|
-
require "relaton_w3c/scrapper"
|
5
|
+
# require "relaton_w3c/hit_collection"
|
6
|
+
# require "relaton_w3c/hit"
|
7
|
+
# require "relaton_w3c/scrapper"
|
8
8
|
require "relaton_w3c/xml_parser"
|
9
9
|
require "relaton_w3c/bibxml_parser"
|
10
10
|
require "relaton_w3c/hash_converter"
|
11
|
-
require "relaton_w3c/
|
11
|
+
require "relaton_w3c/data_fetcher"
|
12
|
+
require "relaton_w3c/data_index"
|
12
13
|
|
13
14
|
module RelatonW3c
|
14
15
|
class Error < StandardError; end
|
data/relaton_w3c.gemspec
CHANGED
@@ -39,7 +39,10 @@ Gem::Specification.new do |spec|
|
|
39
39
|
|
40
40
|
spec.add_dependency "linkeddata", "~> 3.1.0"
|
41
41
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
42
|
+
# spec.add_dependency "picky"
|
42
43
|
spec.add_dependency "rdf", "~> 3.1.0"
|
43
|
-
spec.add_dependency "
|
44
|
+
spec.add_dependency "rdf-normalize", "~> 0.4.0"
|
45
|
+
spec.add_dependency "relaton-bib", "~> 1.11.0"
|
46
|
+
spec.add_dependency "shex", "~> 0.6.0"
|
44
47
|
spec.add_dependency "sparql", "~> 3.1.0"
|
45
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.11.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-04-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -122,20 +122,48 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: 3.1.0
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rdf-normalize
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.4.0
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.4.0
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: relaton-bib
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
128
142
|
requirements:
|
129
143
|
- - "~>"
|
130
144
|
- !ruby/object:Gem::Version
|
131
|
-
version: 1.
|
145
|
+
version: 1.11.0
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 1.11.0
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: shex
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 0.6.0
|
132
160
|
type: :runtime
|
133
161
|
prerelease: false
|
134
162
|
version_requirements: !ruby/object:Gem::Requirement
|
135
163
|
requirements:
|
136
164
|
- - "~>"
|
137
165
|
- !ruby/object:Gem::Version
|
138
|
-
version:
|
166
|
+
version: 0.6.0
|
139
167
|
- !ruby/object:Gem::Dependency
|
140
168
|
name: sparql
|
141
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -201,7 +229,8 @@ files:
|
|
201
229
|
- grammars/reqt.rng
|
202
230
|
- lib/relaton_w3c.rb
|
203
231
|
- lib/relaton_w3c/bibxml_parser.rb
|
204
|
-
- lib/relaton_w3c/
|
232
|
+
- lib/relaton_w3c/data_fetcher.rb
|
233
|
+
- lib/relaton_w3c/data_index.rb
|
205
234
|
- lib/relaton_w3c/data_parser.rb
|
206
235
|
- lib/relaton_w3c/hash_converter.rb
|
207
236
|
- lib/relaton_w3c/hit.rb
|
@@ -234,7 +263,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
234
263
|
- !ruby/object:Gem::Version
|
235
264
|
version: '0'
|
236
265
|
requirements: []
|
237
|
-
rubygems_version: 3.
|
266
|
+
rubygems_version: 3.3.7
|
238
267
|
signing_key:
|
239
268
|
specification_version: 4
|
240
269
|
summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
|
@@ -1,110 +0,0 @@
|
|
1
|
-
require "rdf"
|
2
|
-
require "linkeddata"
|
3
|
-
require "sparql"
|
4
|
-
require "mechanize"
|
5
|
-
require "relaton_w3c/data_parser"
|
6
|
-
|
7
|
-
module RelatonW3c
|
8
|
-
class DataFetcher
|
9
|
-
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
10
|
-
|
11
|
-
attr_reader :data, :group_names
|
12
|
-
|
13
|
-
#
|
14
|
-
# Data fetcher initializer
|
15
|
-
#
|
16
|
-
# @param [String] output directory to save files
|
17
|
-
# @param [String] format format of output files (xml, yaml, bibxml)
|
18
|
-
#
|
19
|
-
def initialize(output, format)
|
20
|
-
@output = output
|
21
|
-
@format = format
|
22
|
-
@ext = format.sub(/^bib/, "")
|
23
|
-
dir = File.dirname(File.expand_path(__FILE__))
|
24
|
-
@group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
|
25
|
-
@data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
26
|
-
@files = []
|
27
|
-
end
|
28
|
-
|
29
|
-
#
|
30
|
-
# Initialize fetcher and run fetch
|
31
|
-
#
|
32
|
-
# @param [Strin] output directory to save files, default: "data"
|
33
|
-
# @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
|
34
|
-
#
|
35
|
-
def self.fetch(output: "data", format: "yaml")
|
36
|
-
t1 = Time.now
|
37
|
-
puts "Started at: #{t1}"
|
38
|
-
FileUtils.mkdir_p output unless Dir.exist? output
|
39
|
-
new(output, format).fetch
|
40
|
-
t2 = Time.now
|
41
|
-
puts "Stopped at: #{t2}"
|
42
|
-
puts "Done in: #{(t2 - t1).round} sec."
|
43
|
-
end
|
44
|
-
|
45
|
-
#
|
46
|
-
# Parse documents
|
47
|
-
#
|
48
|
-
def fetch
|
49
|
-
query.each { |sl| save_doc DataParser.parse(sl, self) }
|
50
|
-
Dir[File.expand_path("../../data/*", __dir__)].each do |file|
|
51
|
-
xml = File.read file, encoding: "UTF-8"
|
52
|
-
save_doc BibXMLParser.parse(xml)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
#
|
57
|
-
# Query RDF source for documents
|
58
|
-
#
|
59
|
-
# @return [RDF::Query::Solutions] query results
|
60
|
-
#
|
61
|
-
def query # rubocop:disable Metrics/MethodLength
|
62
|
-
sse = SPARQL.parse(%(
|
63
|
-
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
64
|
-
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
65
|
-
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
66
|
-
# PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
67
|
-
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
68
|
-
SELECT ?link ?title ?date
|
69
|
-
WHERE {
|
70
|
-
?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
|
71
|
-
}
|
72
|
-
))
|
73
|
-
data.query sse
|
74
|
-
end
|
75
|
-
|
76
|
-
#
|
77
|
-
# Save document to file
|
78
|
-
#
|
79
|
-
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
80
|
-
#
|
81
|
-
def save_doc(bib) # rubocop:disable Metrics/MethodLength
|
82
|
-
return unless bib
|
83
|
-
|
84
|
-
c = case @format
|
85
|
-
when "xml" then bib.to_xml(bibdata: true)
|
86
|
-
when "yaml" then bib.to_hash.to_yaml
|
87
|
-
else bib.send("to_#{@format}")
|
88
|
-
end
|
89
|
-
file = file_name(bib)
|
90
|
-
if @files.include? file
|
91
|
-
warn "File #{file} already exists. Document: #{bib.docnumber}"
|
92
|
-
else
|
93
|
-
@files << file
|
94
|
-
end
|
95
|
-
File.write file, c, encoding: "UTF-8"
|
96
|
-
end
|
97
|
-
|
98
|
-
#
|
99
|
-
# Generate file name
|
100
|
-
#
|
101
|
-
# @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
|
102
|
-
#
|
103
|
-
# @return [String] file name
|
104
|
-
#
|
105
|
-
def file_name(bib)
|
106
|
-
name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
107
|
-
File.join @output, "#{name}.#{@ext}"
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|