relaton-w3c 1.10.1 → 1.11.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/data/reference.W3C.DSig-label.xml +32 -32
- data/data/reference.W3C.P3P-rdfschema.xml +26 -26
- data/data/reference.W3C.P3P.xml +38 -38
- data/data/reference.W3C.PICS-labels.xml +43 -43
- data/data/reference.W3C.PICS-rules.xml +38 -38
- data/data/reference.W3C.PICS-services.xml +37 -37
- data/data/reference.W3C.daml-oil-reference.xml +39 -39
- data/data/reference.W3C.soap11.xml +56 -56
- data/data/reference.W3C.soap12-part1.xml +38 -38
- data/data/reference.W3C.soap12-part2.xml +38 -38
- data/data/reference.W3C.xkms.xml +50 -50
- data/data/reference.W3C.xml-c14n.xml +15 -15
- data/data/reference.W3C.xmldsig-core.xml +26 -26
- data/data/reference.W3C.xmlenc-core.xml +20 -20
- data/data/reference.W3C.xpath.xml +22 -22
- data/grammars/biblio.rng +24 -1
- data/grammars/isodoc.rng +73 -3
- data/lib/relaton_w3c/bibxml_parser.rb +7 -0
- data/lib/relaton_w3c/data_fetcher.rb +188 -0
- data/lib/relaton_w3c/data_index.rb +143 -0
- data/lib/relaton_w3c/data_parser.rb +171 -26
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +9 -7
- data/lib/relaton_w3c/workgroups.yaml +7 -0
- data/lib/relaton_w3c.rb +5 -4
- data/relaton_w3c.gemspec +4 -1
- metadata +35 -6
- data/lib/relaton_w3c/data_fethcer.rb +0 -110
@@ -1,5 +1,24 @@
|
|
1
1
|
module RelatonW3c
|
2
2
|
class DataParser
|
3
|
+
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
4
|
+
|
5
|
+
DOCTYPES = {
|
6
|
+
"TR" => "technicalReport",
|
7
|
+
"NOTE" => "groupNote",
|
8
|
+
}.freeze
|
9
|
+
|
10
|
+
STAGES = {
|
11
|
+
"RET" => "retired",
|
12
|
+
"SPSD" => "supersededRecommendation",
|
13
|
+
"OBSL" => "obsoletedRecommendation",
|
14
|
+
"WD" => "workingDraft",
|
15
|
+
"CRD" => "candidateRecommendationDraft",
|
16
|
+
"CR" => "candidateRecommendation",
|
17
|
+
"PR" => "proposedRecommendation",
|
18
|
+
"PER" => "proposedEditedRecommendation",
|
19
|
+
"REC" => "recommendation",
|
20
|
+
}.freeze
|
21
|
+
|
3
22
|
#
|
4
23
|
# Document parser initalization
|
5
24
|
#
|
@@ -29,7 +48,7 @@ module RelatonW3c
|
|
29
48
|
# @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
|
30
49
|
#
|
31
50
|
def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
32
|
-
return
|
51
|
+
return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }
|
33
52
|
|
34
53
|
RelatonW3c::W3cBibliographicItem.new(
|
35
54
|
type: "standard",
|
@@ -37,10 +56,12 @@ module RelatonW3c
|
|
37
56
|
fetched: Date.today.to_s,
|
38
57
|
language: ["en"],
|
39
58
|
script: ["Latn"],
|
59
|
+
docstatus: parse_docstatus,
|
40
60
|
title: parse_title,
|
41
61
|
link: parse_link,
|
42
62
|
docid: parse_docid,
|
43
|
-
|
63
|
+
formattedref: parse_formattedref,
|
64
|
+
docnumber: identifier,
|
44
65
|
series: parse_series,
|
45
66
|
date: parse_date,
|
46
67
|
relation: parse_relation,
|
@@ -49,12 +70,24 @@ module RelatonW3c
|
|
49
70
|
)
|
50
71
|
end
|
51
72
|
|
73
|
+
#
|
74
|
+
# Extract documetn status
|
75
|
+
#
|
76
|
+
# @return [RelatonBib::DocumentStatus, nil] dcoument status
|
77
|
+
#
|
78
|
+
def parse_docstatus
|
79
|
+
stage = types_stages&.detect { |st| STAGES.include?(st) }
|
80
|
+
RelatonBib::DocumentStatus.new stage: STAGES[stage] if stage
|
81
|
+
end
|
82
|
+
|
52
83
|
#
|
53
84
|
# Parse title
|
54
85
|
#
|
55
86
|
# @return [RelatonBib::TypedTitleStringCollection] title
|
56
87
|
#
|
57
88
|
def parse_title
|
89
|
+
return [] unless @sol.respond_to?(:title)
|
90
|
+
|
58
91
|
t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
|
59
92
|
RelatonBib::TypedTitleStringCollection.new [t]
|
60
93
|
end
|
@@ -65,7 +98,9 @@ module RelatonW3c
|
|
65
98
|
# @return [Array<RelatonBib::TypedUri>] link
|
66
99
|
#
|
67
100
|
def parse_link
|
68
|
-
|
101
|
+
link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
|
102
|
+
|
103
|
+
[RelatonBib::TypedUri.new(type: "src", content: link.to_s)]
|
69
104
|
end
|
70
105
|
|
71
106
|
#
|
@@ -74,23 +109,45 @@ module RelatonW3c
|
|
74
109
|
# @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
|
75
110
|
#
|
76
111
|
def parse_docid
|
77
|
-
|
112
|
+
return [] unless @sol.respond_to?(:link)
|
113
|
+
|
114
|
+
id = pub_id(@sol.link)
|
78
115
|
[RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)]
|
79
116
|
end
|
80
117
|
|
81
118
|
#
|
82
119
|
# Generate PubID
|
83
120
|
#
|
84
|
-
# @
|
85
|
-
#
|
86
|
-
# @return [String] PubID
|
121
|
+
# @return [RDF::URI] PubID
|
87
122
|
#
|
88
123
|
def pub_id(url)
|
89
124
|
"W3C #{identifier(url)}"
|
90
125
|
end
|
91
126
|
|
92
|
-
|
93
|
-
|
127
|
+
#
|
128
|
+
# Generate identifier from URL
|
129
|
+
#
|
130
|
+
# @param [RDF::URI, nil] link
|
131
|
+
#
|
132
|
+
# @return [String] identifier
|
133
|
+
#
|
134
|
+
def identifier(link = nil)
|
135
|
+
url = link || (@sol.respond_to?(:link) ? @sol.link : @sol.version_of)
|
136
|
+
self.class.parse_identifier(url.to_s)
|
137
|
+
end
|
138
|
+
|
139
|
+
#
|
140
|
+
# Parse identifier from URL
|
141
|
+
#
|
142
|
+
# @param [String] url URL
|
143
|
+
#
|
144
|
+
# @return [String] identifier
|
145
|
+
#
|
146
|
+
def self.parse_identifier(url)
|
147
|
+
if /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/ =~ url.to_s
|
148
|
+
$1.to_s
|
149
|
+
else url.to_s.split("/").last
|
150
|
+
end
|
94
151
|
end
|
95
152
|
|
96
153
|
#
|
@@ -99,12 +156,31 @@ module RelatonW3c
|
|
99
156
|
# @return [Array<RelatonBib::Series>] series
|
100
157
|
#
|
101
158
|
def parse_series
|
159
|
+
return [] unless type
|
160
|
+
|
102
161
|
title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
|
103
|
-
[RelatonBib::Series.new(title: title, number: identifier
|
162
|
+
[RelatonBib::Series.new(title: title, number: identifier)]
|
104
163
|
end
|
105
164
|
|
106
|
-
|
107
|
-
|
165
|
+
#
|
166
|
+
# Extract type
|
167
|
+
#
|
168
|
+
# @return [String] type
|
169
|
+
#
|
170
|
+
def type
|
171
|
+
# thre are many types, we need to find the right one
|
172
|
+
@type ||= types_stages&.detect { |t| USED_TYPES.include?(t) }
|
173
|
+
end
|
174
|
+
|
175
|
+
#
|
176
|
+
# Fetches types and stages
|
177
|
+
#
|
178
|
+
# @return [Array<String>] types and stages
|
179
|
+
#
|
180
|
+
def types_stages # rubocop:disable Metrics/MethodLength
|
181
|
+
return unless @sol.respond_to?(:link)
|
182
|
+
|
183
|
+
@types_stages ||= begin
|
108
184
|
sse = SPARQL.parse(%(
|
109
185
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
110
186
|
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
@@ -113,8 +189,7 @@ module RelatonW3c
|
|
113
189
|
{ <#{@sol.link}> rdf:type ?type }
|
114
190
|
}
|
115
191
|
))
|
116
|
-
|
117
|
-
tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
|
192
|
+
@fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
|
118
193
|
end
|
119
194
|
end
|
120
195
|
|
@@ -124,10 +199,17 @@ module RelatonW3c
|
|
124
199
|
# @return [Strinf] doctype
|
125
200
|
#
|
126
201
|
def parse_doctype
|
127
|
-
|
202
|
+
DOCTYPES[type] || "recommendation"
|
128
203
|
end
|
129
204
|
|
205
|
+
#
|
206
|
+
# Parse date
|
207
|
+
#
|
208
|
+
# @return [Array<RelatonBib::BibliographicDate>] date
|
209
|
+
#
|
130
210
|
def parse_date
|
211
|
+
return [] unless @sol.respond_to?(:date)
|
212
|
+
|
131
213
|
[RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
|
132
214
|
end
|
133
215
|
|
@@ -136,29 +218,90 @@ module RelatonW3c
|
|
136
218
|
#
|
137
219
|
# @return [Array<RelatonBib::DocumentRelation>] relation
|
138
220
|
#
|
139
|
-
def parse_relation
|
221
|
+
def parse_relation
|
222
|
+
if @sol.respond_to?(:link)
|
223
|
+
relations + editor_drafts
|
224
|
+
else document_versions
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
229
|
+
{
|
230
|
+
"doc:obsoletes" => { type: "obsoletes" },
|
231
|
+
"mat:hasErrata" => { type: "updatedBy", description: "errata" },
|
232
|
+
# "mat:hasTranslations" => "hasTranslation",
|
233
|
+
# "mat:hasImplReport" => "hasImpReport",
|
234
|
+
":previousEdition" => { type: "editionOf" },
|
235
|
+
}.reduce([]) do |acc, (predicate, tp)|
|
236
|
+
acc + relation_query(predicate).map do |r|
|
237
|
+
fr = RelatonBib::LocalizedString.new pub_id(r.rel.to_s)
|
238
|
+
bib = W3cBibliographicItem.new formattedref: fr
|
239
|
+
tp[:description] = RelatonBib::FormattedString.new content: tp[:description] if tp[:description]
|
240
|
+
RelatonBib::DocumentRelation.new(**tp, bibitem: bib)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def editor_drafts # rubocop:disable Metrics/MethodLength
|
140
246
|
sse = SPARQL.parse(%(
|
141
|
-
PREFIX
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
{ <#{@sol.link}> ?p ?obsoletes }
|
146
|
-
}
|
247
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
248
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
249
|
+
SELECT ?rel
|
250
|
+
WHERE { <#{@sol.link}> :ED ?rel . }
|
147
251
|
))
|
148
|
-
@fetcher.data.query(sse).
|
149
|
-
|
150
|
-
fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
|
252
|
+
@fetcher.data.query(sse).map do |s|
|
253
|
+
fr = RelatonBib::LocalizedString.new pub_id(s.rel.to_s)
|
151
254
|
bib = W3cBibliographicItem.new formattedref: fr
|
152
|
-
RelatonBib::
|
255
|
+
desc = RelatonBib::FormattedString.new content: "Editor's draft"
|
256
|
+
RelatonBib::DocumentRelation.new(
|
257
|
+
type: "hasDraft", description: desc, bibitem: bib,
|
258
|
+
)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def relation_query(predicate)
|
263
|
+
sse = SPARQL.parse(%(
|
264
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
265
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
266
|
+
PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
267
|
+
SELECT ?rel
|
268
|
+
WHERE { <#{@sol.link}> #{predicate} ?rel . }
|
269
|
+
))
|
270
|
+
@fetcher.data.query(sse).order_by(:rel)
|
271
|
+
end
|
272
|
+
|
273
|
+
def document_versions
|
274
|
+
sse = SPARQL.parse(%(
|
275
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
276
|
+
SELECT ?link
|
277
|
+
WHERE { ?link doc:versionOf <#{@sol.version_of}> }
|
278
|
+
))
|
279
|
+
@fetcher.data.query(sse).map do |r|
|
280
|
+
fref = RelatonBib::FormattedRef.new content: pub_id(r.link)
|
281
|
+
bib = W3cBibliographicItem.new formattedref: fref
|
282
|
+
RelatonBib::DocumentRelation.new(type: "hasEdition", bibitem: bib)
|
153
283
|
end
|
154
284
|
end
|
155
285
|
|
286
|
+
#
|
287
|
+
# Parse formattedref
|
288
|
+
#
|
289
|
+
# @return [RelatonBib::FormattedRef] formattedref
|
290
|
+
#
|
291
|
+
def parse_formattedref
|
292
|
+
return if @sol.respond_to?(:link)
|
293
|
+
|
294
|
+
RelatonBib::FormattedRef.new(content: pub_id(@sol.version_of))
|
295
|
+
end
|
296
|
+
|
156
297
|
#
|
157
298
|
# Parse contributor
|
158
299
|
#
|
159
300
|
# @return [Array<RelatonBib::ContributionInfo>] contributor
|
160
301
|
#
|
161
302
|
def parse_contrib # rubocop:disable Metrics/MethodLength
|
303
|
+
return [] unless @sol.respond_to?(:link)
|
304
|
+
|
162
305
|
sse = SPARQL.parse(%(
|
163
306
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
164
307
|
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
@@ -181,6 +324,8 @@ module RelatonW3c
|
|
181
324
|
# @return [RelatonBib::EditorialGroup] editorialgroup
|
182
325
|
#
|
183
326
|
def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
327
|
+
return unless @sol.respond_to?(:link)
|
328
|
+
|
184
329
|
sse = SPARQL.parse(%(
|
185
330
|
PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
|
186
331
|
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
data/lib/relaton_w3c/version.rb
CHANGED
@@ -5,15 +5,17 @@ require "net/http"
|
|
5
5
|
module RelatonW3c
|
6
6
|
# Class methods for search W3C standards.
|
7
7
|
class W3cBibliography
|
8
|
-
SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/
|
8
|
+
SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/"
|
9
9
|
|
10
10
|
class << self
|
11
11
|
# @param text [String]
|
12
12
|
# @return [RelatonW3c::HitCollection]
|
13
13
|
def search(text) # rubocop:disable Metrics/MethodLength
|
14
|
-
|
15
|
-
file =
|
16
|
-
|
14
|
+
ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
|
15
|
+
file = DataIndex.create_from_repo.search(ref)
|
16
|
+
return unless file
|
17
|
+
|
18
|
+
url = "#{SOURCE}#{file}"
|
17
19
|
resp = Net::HTTP.get_response(URI.parse(url))
|
18
20
|
return unless resp.code == "200"
|
19
21
|
|
@@ -24,7 +26,7 @@ module RelatonW3c
|
|
24
26
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
25
27
|
Net::ProtocolError, Errno::ETIMEDOUT
|
26
28
|
raise RelatonBib::RequestError,
|
27
|
-
"Could not access #{
|
29
|
+
"Could not access #{url}"
|
28
30
|
end
|
29
31
|
|
30
32
|
# @param ref [String] the W3C standard Code to look up
|
@@ -39,8 +41,8 @@ module RelatonW3c
|
|
39
41
|
return
|
40
42
|
end
|
41
43
|
|
42
|
-
|
43
|
-
warn "[relaton-w3c] (\"#{ref}\") found #{
|
44
|
+
found = result.docnumber
|
45
|
+
warn "[relaton-w3c] (\"#{ref}\") found #{found}"
|
44
46
|
result
|
45
47
|
end
|
46
48
|
end
|
@@ -32,6 +32,9 @@
|
|
32
32
|
'https://www.w3.org/WAI/EO':
|
33
33
|
name: Education and Outreach Working Group
|
34
34
|
abbrev: EOWG
|
35
|
+
'https://www.w3.org/WAI/about/groups/eowg':
|
36
|
+
name: Education and Outreach Working Group
|
37
|
+
abbrev: EOWG
|
35
38
|
'https://www.w3.org/2001/sw/WebOnt':
|
36
39
|
name: Web-Ontology Working Group
|
37
40
|
'http://www.w3.org/MarkUp/Forms':
|
@@ -54,6 +57,8 @@
|
|
54
57
|
name: Web Applications Working Group
|
55
58
|
'https://www.w3.org/2008/webapps':
|
56
59
|
name: Web Applications Working Group
|
60
|
+
'https://www.w3.org/groups/wg/webapps':
|
61
|
+
name: Web Applications Working Group
|
57
62
|
'https://www.w3.org/das':
|
58
63
|
name: Devices and Sensors Working Group
|
59
64
|
abbrev: DAS WG
|
@@ -226,6 +231,8 @@
|
|
226
231
|
abbrev: ARIA WG
|
227
232
|
'https://www.w3.org/wasm':
|
228
233
|
name: WebAssembly Working Group
|
234
|
+
'https://www.w3.org/groups/wg/wasm':
|
235
|
+
name: WebAssembly Working Group
|
229
236
|
'https://www.w3.org/groups/wg/webediting':
|
230
237
|
name: Web Editing Working Group
|
231
238
|
'https://www.w3.org/2014/data-shapes':
|
data/lib/relaton_w3c.rb
CHANGED
@@ -2,13 +2,14 @@ require "relaton_bib"
|
|
2
2
|
require "relaton_w3c/version"
|
3
3
|
require "relaton_w3c/w3c_bibliography"
|
4
4
|
require "relaton_w3c/w3c_bibliographic_item"
|
5
|
-
require "relaton_w3c/hit_collection"
|
6
|
-
require "relaton_w3c/hit"
|
7
|
-
require "relaton_w3c/scrapper"
|
5
|
+
# require "relaton_w3c/hit_collection"
|
6
|
+
# require "relaton_w3c/hit"
|
7
|
+
# require "relaton_w3c/scrapper"
|
8
8
|
require "relaton_w3c/xml_parser"
|
9
9
|
require "relaton_w3c/bibxml_parser"
|
10
10
|
require "relaton_w3c/hash_converter"
|
11
|
-
require "relaton_w3c/
|
11
|
+
require "relaton_w3c/data_fetcher"
|
12
|
+
require "relaton_w3c/data_index"
|
12
13
|
|
13
14
|
module RelatonW3c
|
14
15
|
class Error < StandardError; end
|
data/relaton_w3c.gemspec
CHANGED
@@ -39,7 +39,10 @@ Gem::Specification.new do |spec|
|
|
39
39
|
|
40
40
|
spec.add_dependency "linkeddata", "~> 3.1.0"
|
41
41
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
42
|
+
# spec.add_dependency "picky"
|
42
43
|
spec.add_dependency "rdf", "~> 3.1.0"
|
43
|
-
spec.add_dependency "
|
44
|
+
spec.add_dependency "rdf-normalize", "~> 0.4.0"
|
45
|
+
spec.add_dependency "relaton-bib", "~> 1.11.0"
|
46
|
+
spec.add_dependency "shex", "~> 0.6.0"
|
44
47
|
spec.add_dependency "sparql", "~> 3.1.0"
|
45
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.11.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-04-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -122,20 +122,48 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: 3.1.0
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rdf-normalize
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.4.0
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.4.0
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: relaton-bib
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
128
142
|
requirements:
|
129
143
|
- - "~>"
|
130
144
|
- !ruby/object:Gem::Version
|
131
|
-
version: 1.
|
145
|
+
version: 1.11.0
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 1.11.0
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: shex
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 0.6.0
|
132
160
|
type: :runtime
|
133
161
|
prerelease: false
|
134
162
|
version_requirements: !ruby/object:Gem::Requirement
|
135
163
|
requirements:
|
136
164
|
- - "~>"
|
137
165
|
- !ruby/object:Gem::Version
|
138
|
-
version:
|
166
|
+
version: 0.6.0
|
139
167
|
- !ruby/object:Gem::Dependency
|
140
168
|
name: sparql
|
141
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -201,7 +229,8 @@ files:
|
|
201
229
|
- grammars/reqt.rng
|
202
230
|
- lib/relaton_w3c.rb
|
203
231
|
- lib/relaton_w3c/bibxml_parser.rb
|
204
|
-
- lib/relaton_w3c/
|
232
|
+
- lib/relaton_w3c/data_fetcher.rb
|
233
|
+
- lib/relaton_w3c/data_index.rb
|
205
234
|
- lib/relaton_w3c/data_parser.rb
|
206
235
|
- lib/relaton_w3c/hash_converter.rb
|
207
236
|
- lib/relaton_w3c/hit.rb
|
@@ -234,7 +263,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
234
263
|
- !ruby/object:Gem::Version
|
235
264
|
version: '0'
|
236
265
|
requirements: []
|
237
|
-
rubygems_version: 3.
|
266
|
+
rubygems_version: 3.3.7
|
238
267
|
signing_key:
|
239
268
|
specification_version: 4
|
240
269
|
summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
|
@@ -1,110 +0,0 @@
|
|
1
|
-
require "rdf"
|
2
|
-
require "linkeddata"
|
3
|
-
require "sparql"
|
4
|
-
require "mechanize"
|
5
|
-
require "relaton_w3c/data_parser"
|
6
|
-
|
7
|
-
module RelatonW3c
|
8
|
-
class DataFetcher
|
9
|
-
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
10
|
-
|
11
|
-
attr_reader :data, :group_names
|
12
|
-
|
13
|
-
#
|
14
|
-
# Data fetcher initializer
|
15
|
-
#
|
16
|
-
# @param [String] output directory to save files
|
17
|
-
# @param [String] format format of output files (xml, yaml, bibxml)
|
18
|
-
#
|
19
|
-
def initialize(output, format)
|
20
|
-
@output = output
|
21
|
-
@format = format
|
22
|
-
@ext = format.sub(/^bib/, "")
|
23
|
-
dir = File.dirname(File.expand_path(__FILE__))
|
24
|
-
@group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
|
25
|
-
@data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
26
|
-
@files = []
|
27
|
-
end
|
28
|
-
|
29
|
-
#
|
30
|
-
# Initialize fetcher and run fetch
|
31
|
-
#
|
32
|
-
# @param [Strin] output directory to save files, default: "data"
|
33
|
-
# @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
|
34
|
-
#
|
35
|
-
def self.fetch(output: "data", format: "yaml")
|
36
|
-
t1 = Time.now
|
37
|
-
puts "Started at: #{t1}"
|
38
|
-
FileUtils.mkdir_p output unless Dir.exist? output
|
39
|
-
new(output, format).fetch
|
40
|
-
t2 = Time.now
|
41
|
-
puts "Stopped at: #{t2}"
|
42
|
-
puts "Done in: #{(t2 - t1).round} sec."
|
43
|
-
end
|
44
|
-
|
45
|
-
#
|
46
|
-
# Parse documents
|
47
|
-
#
|
48
|
-
def fetch
|
49
|
-
query.each { |sl| save_doc DataParser.parse(sl, self) }
|
50
|
-
Dir[File.expand_path("../../data/*", __dir__)].each do |file|
|
51
|
-
xml = File.read file, encoding: "UTF-8"
|
52
|
-
save_doc BibXMLParser.parse(xml)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
#
|
57
|
-
# Query RDF source for documents
|
58
|
-
#
|
59
|
-
# @return [RDF::Query::Solutions] query results
|
60
|
-
#
|
61
|
-
def query # rubocop:disable Metrics/MethodLength
|
62
|
-
sse = SPARQL.parse(%(
|
63
|
-
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
64
|
-
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
65
|
-
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
66
|
-
# PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
67
|
-
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
68
|
-
SELECT ?link ?title ?date
|
69
|
-
WHERE {
|
70
|
-
?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
|
71
|
-
}
|
72
|
-
))
|
73
|
-
data.query sse
|
74
|
-
end
|
75
|
-
|
76
|
-
#
|
77
|
-
# Save document to file
|
78
|
-
#
|
79
|
-
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
80
|
-
#
|
81
|
-
def save_doc(bib) # rubocop:disable Metrics/MethodLength
|
82
|
-
return unless bib
|
83
|
-
|
84
|
-
c = case @format
|
85
|
-
when "xml" then bib.to_xml(bibdata: true)
|
86
|
-
when "yaml" then bib.to_hash.to_yaml
|
87
|
-
else bib.send("to_#{@format}")
|
88
|
-
end
|
89
|
-
file = file_name(bib)
|
90
|
-
if @files.include? file
|
91
|
-
warn "File #{file} already exists. Document: #{bib.docnumber}"
|
92
|
-
else
|
93
|
-
@files << file
|
94
|
-
end
|
95
|
-
File.write file, c, encoding: "UTF-8"
|
96
|
-
end
|
97
|
-
|
98
|
-
#
|
99
|
-
# Generate file name
|
100
|
-
#
|
101
|
-
# @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
|
102
|
-
#
|
103
|
-
# @return [String] file name
|
104
|
-
#
|
105
|
-
def file_name(bib)
|
106
|
-
name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
107
|
-
File.join @output, "#{name}.#{@ext}"
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|