relaton-w3c 1.19.0 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton_w3c/data_fetcher.rb +30 -58
- data/lib/relaton_w3c/data_parser.rb +8 -4
- data/lib/relaton_w3c/processor.rb +3 -3
- data/lib/relaton_w3c/rdf_archive.rb +67 -0
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/workgroups.yaml +13 -0
- data/relaton_w3c.gemspec +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9c777f3bbcbff5de8311b7103f7f64dee70971f5745be10f0415200ec8a3fe9
|
4
|
+
data.tar.gz: c13ba956f36f1d4e18a846aeb5d550aad5dbd0128a63457418032958d9d503ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: efbc142c4590df61f6b625bc1e00a29e95edad3ca97d940ea05f57caec3cf8db9fc5ed2a64ec6d5a4079ef3228fd682bae801b6c659a5afb83279d49c4d0b2e8
|
7
|
+
data.tar.gz: ef68de56840568a3e7d063a5c72876d23ae8d7f02d8cd35169e5a5b619c240edad1622738d38cd5d80642301f892e17a6f40d9fe17e33520bd523440064c9dc0
|
@@ -2,11 +2,12 @@ require "rdf"
|
|
2
2
|
require "linkeddata"
|
3
3
|
require "sparql"
|
4
4
|
require "mechanize"
|
5
|
-
|
5
|
+
require_relative "rdf_archive"
|
6
|
+
require_relative "data_parser"
|
6
7
|
|
7
8
|
module RelatonW3c
|
8
9
|
class DataFetcher
|
9
|
-
attr_reader :data, :group_names
|
10
|
+
attr_reader :data, :group_names, :rdf_archive
|
10
11
|
|
11
12
|
#
|
12
13
|
# Data fetcher initializer
|
@@ -20,6 +21,7 @@ module RelatonW3c
|
|
20
21
|
@ext = format.sub(/^bib/, "")
|
21
22
|
dir = File.dirname(File.expand_path(__FILE__))
|
22
23
|
@group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
|
24
|
+
@files = Set.new
|
23
25
|
@index = DataIndex.create_from_file
|
24
26
|
@index1 = Relaton::Index.find_or_create :W3C, file: "index1.yaml"
|
25
27
|
end
|
@@ -31,32 +33,35 @@ module RelatonW3c
|
|
31
33
|
# @param [Strin] output directory to save files, default: "data"
|
32
34
|
# @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
|
33
35
|
#
|
34
|
-
def self.fetch(
|
36
|
+
def self.fetch(output: "data", format: "yaml")
|
35
37
|
t1 = Time.now
|
36
38
|
puts "Started at: #{t1}"
|
37
39
|
FileUtils.mkdir_p output
|
38
|
-
new(output, format).fetch
|
40
|
+
new(output, format).fetch
|
39
41
|
t2 = Time.now
|
40
42
|
puts "Stopped at: #{t2}"
|
41
43
|
puts "Done in: #{(t2 - t1).round} sec."
|
42
44
|
end
|
43
45
|
|
46
|
+
def rdf_archive
|
47
|
+
@rdf_archive ||= RDFArchive.new
|
48
|
+
end
|
49
|
+
|
44
50
|
#
|
45
51
|
# Parse documents
|
46
52
|
#
|
47
53
|
# @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
|
48
54
|
#
|
49
|
-
def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
end
|
55
|
+
def fetch # (source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
56
|
+
rdf = rdf_archive.get_data
|
57
|
+
%i[versioned unversioned].each do |type|
|
58
|
+
send("query_#{type}_docs", rdf).each do |sl|
|
59
|
+
bib = DataParser.parse(rdf, sl, self)
|
60
|
+
add_has_edition_relation(bib) if type == :unversioned
|
61
|
+
save_doc bib
|
62
|
+
rescue StandardError => e
|
63
|
+
link = sl.respond_to?(:link) ? sl.link : sl.version_of
|
64
|
+
Util.error "Error: document #{link} #{e.message}\n#{e.backtrace.join("\n")}"
|
60
65
|
end
|
61
66
|
end
|
62
67
|
@index.sort!.save
|
@@ -132,41 +137,6 @@ module RelatonW3c
|
|
132
137
|
(ids1 & ids2).any?
|
133
138
|
end
|
134
139
|
|
135
|
-
#
|
136
|
-
# Yield fetching for each dataset
|
137
|
-
#
|
138
|
-
# @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
|
139
|
-
#
|
140
|
-
# @yield [RDF::Repository] RDF repository
|
141
|
-
#
|
142
|
-
def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
|
143
|
-
case source
|
144
|
-
when "w3c-tr-archive"
|
145
|
-
Dir["w3c-tr-archive/*.rdf"].map do |f|
|
146
|
-
@files = []
|
147
|
-
yield RDF::Repository.load(f)
|
148
|
-
end
|
149
|
-
when "w3c-rdf"
|
150
|
-
@files = []
|
151
|
-
rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
152
|
-
yield rdf
|
153
|
-
# parse_static_dataset
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
#
|
158
|
-
# Parse static dataset
|
159
|
-
#
|
160
|
-
# def parse_static_dataset
|
161
|
-
# Dir[File.expand_path("../../data/*", __dir__)].each do |file|
|
162
|
-
# xml = File.read file, encoding: "UTF-8"
|
163
|
-
# save_doc BibXMLParser.parse(xml), warn_duplicate: false
|
164
|
-
# rescue StandardError => e
|
165
|
-
# warn "Error: document #{file} #{e.message}"
|
166
|
-
# warn e.backtrace.join("\n")
|
167
|
-
# end
|
168
|
-
# end
|
169
|
-
|
170
140
|
#
|
171
141
|
# Query RDF source for versioned documents
|
172
142
|
#
|
@@ -206,15 +176,9 @@ module RelatonW3c
|
|
206
176
|
#
|
207
177
|
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
208
178
|
#
|
209
|
-
def save_doc(bib, warn_duplicate: true)
|
179
|
+
def save_doc(bib, warn_duplicate: true)
|
210
180
|
return unless bib
|
211
181
|
|
212
|
-
c = case @format
|
213
|
-
when "xml" then bib.to_xml(bibdata: true)
|
214
|
-
when "yaml" then bib.to_hash.to_yaml
|
215
|
-
else bib.send("to_#{@format}")
|
216
|
-
end
|
217
|
-
# id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
|
218
182
|
file = file_name(bib.docnumber)
|
219
183
|
if @files.include?(file)
|
220
184
|
Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
|
@@ -223,7 +187,15 @@ module RelatonW3c
|
|
223
187
|
@index.add pubid, file
|
224
188
|
@index1.add_or_update pubid.to_hash, file
|
225
189
|
@files << file
|
226
|
-
|
190
|
+
end
|
191
|
+
File.write file, serialize(bib), encoding: "UTF-8"
|
192
|
+
end
|
193
|
+
|
194
|
+
def serialize(bib)
|
195
|
+
case @format
|
196
|
+
when "xml" then bib.to_xml(bibdata: true)
|
197
|
+
when "yaml" then bib.to_hash.to_yaml
|
198
|
+
else bib.send("to_#{@format}")
|
227
199
|
end
|
228
200
|
end
|
229
201
|
|
@@ -280,9 +280,9 @@ module RelatonW3c
|
|
280
280
|
end
|
281
281
|
|
282
282
|
#
|
283
|
-
# Parse editor drafts
|
283
|
+
# Parse editor drafts links
|
284
284
|
#
|
285
|
-
# @return [Array<RelatonBib::
|
285
|
+
# @return [Array<RelatonBib::TypedUri>] links
|
286
286
|
#
|
287
287
|
def editor_drafts # rubocop:disable Metrics/MethodLength
|
288
288
|
return [] unless @sol.respond_to?(:link)
|
@@ -311,7 +311,10 @@ module RelatonW3c
|
|
311
311
|
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
312
312
|
PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
313
313
|
SELECT ?rel
|
314
|
-
WHERE {
|
314
|
+
WHERE {
|
315
|
+
<#{@sol.link.to_s.strip}> #{predicate} ?rel .
|
316
|
+
FILTER ( isURI(?rel) )
|
317
|
+
}
|
315
318
|
))
|
316
319
|
@rdf.query(sse).order_by(:rel)
|
317
320
|
end
|
@@ -371,7 +374,8 @@ module RelatonW3c
|
|
371
374
|
id = pub_id(url)
|
372
375
|
fref = RelatonBib::FormattedRef.new content: id
|
373
376
|
docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)
|
374
|
-
|
377
|
+
link = [RelatonBib::TypedUri.new(type: "src", content: url)]
|
378
|
+
bib = W3cBibliographicItem.new formattedref: fref, docid: [docid], link: link
|
375
379
|
dsc = RelatonBib::FormattedString.new content: desc if desc
|
376
380
|
RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc)
|
377
381
|
end
|
@@ -9,7 +9,7 @@ module RelatonW3c
|
|
9
9
|
@prefix = "W3C"
|
10
10
|
@defaultprefix = %r{^W3C\s}
|
11
11
|
@idtype = "W3C"
|
12
|
-
@datasets = %w[w3c-rdf
|
12
|
+
@datasets = %w[w3c-rdf]
|
13
13
|
end
|
14
14
|
|
15
15
|
# @param code [String]
|
@@ -28,8 +28,8 @@ module RelatonW3c
|
|
28
28
|
# @option opts [String] :output directory to output documents
|
29
29
|
# @option opts [String] :format
|
30
30
|
#
|
31
|
-
def fetch_data(
|
32
|
-
DataFetcher.fetch(
|
31
|
+
def fetch_data(_source, opts)
|
32
|
+
DataFetcher.fetch(**opts)
|
33
33
|
end
|
34
34
|
|
35
35
|
# @param xml [String]
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module RelatonW3c
|
2
|
+
class RDFArchive
|
3
|
+
def initialize(file = "archive.rdf")
|
4
|
+
@file = file
|
5
|
+
end
|
6
|
+
|
7
|
+
#
|
8
|
+
# Get RDF data from the updated archive file.
|
9
|
+
#
|
10
|
+
# @return [RDF::Repository]
|
11
|
+
#
|
12
|
+
def get_data
|
13
|
+
if !File.exist?(@file) || File.mtime(@file) < Time.now - 86_400
|
14
|
+
get_archive
|
15
|
+
update_archive
|
16
|
+
end
|
17
|
+
RDF::Repository.load(@file)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def update_archive
|
23
|
+
# Load the older RDF/XML file
|
24
|
+
older = Nokogiri::XML File.read(@file, encoding: "UTF-8")
|
25
|
+
|
26
|
+
# Load the newer RDF/XML file
|
27
|
+
url = "http://www.w3.org/2002/01/tr-automation/tr.rdf"
|
28
|
+
newer = Nokogiri::XML OpenURI.open_uri(url).read
|
29
|
+
|
30
|
+
# Create a hash to store rdf:about attributes from the newer file
|
31
|
+
newer_elements = {}
|
32
|
+
newer.root.element_children.each do |element|
|
33
|
+
rdf_about = element.attribute('about')&.value
|
34
|
+
newer_elements[rdf_about.sub(/^http\s:/, "")] = element if rdf_about
|
35
|
+
end
|
36
|
+
|
37
|
+
# Replace elements in the older document
|
38
|
+
older.root.element_children.each do |element|
|
39
|
+
rdf_about = element.attribute('about')&.value
|
40
|
+
if rdf_about && newer_elements[url = rdf_about.sub(/^http\s:/, "")]
|
41
|
+
element.replace(newer_elements[url])
|
42
|
+
newer_elements.delete(url)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Add remaining new elements to the older document
|
47
|
+
newer_elements.each_value do |element|
|
48
|
+
older.root.add_child(element)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Add new namespaces from the newer document to the older document
|
52
|
+
newer.root.namespace_definitions.each do |ns|
|
53
|
+
unless older.root.namespace_definitions.any? { |old_ns| old_ns.href == ns.href }
|
54
|
+
older.root.add_namespace_definition(ns.prefix, ns.href)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
File.write @file, older.to_xml, encoding: "UTF-8"
|
58
|
+
end
|
59
|
+
|
60
|
+
def get_archive
|
61
|
+
unless File.exist? @file
|
62
|
+
url = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/refs/heads/main/archive.rdf"
|
63
|
+
File.write @file, OpenURI.open_uri(url).read, encoding: "UTF-8"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/relaton_w3c/version.rb
CHANGED
@@ -506,3 +506,16 @@
|
|
506
506
|
'www.w3.org/groups/wg/rch':
|
507
507
|
name: RDF Dataset Canonicalization and Hash Working Group
|
508
508
|
abbrev: RCH WG
|
509
|
+
'www.w3.org/groups/wg/browser-tools-testing':
|
510
|
+
name: Browser Testing and Tools Working Group
|
511
|
+
'www.w3.org/groups/wg/did':
|
512
|
+
name: Decentralized Identifier Working Group
|
513
|
+
abbrev: DID WG
|
514
|
+
'www.w3.org/Member/Board':
|
515
|
+
name: Board of Directors
|
516
|
+
'www.w3.org/groups/wg/webtransport':
|
517
|
+
name: WebTransport Working Group
|
518
|
+
'www.w3.org/groups/wg/fedid':
|
519
|
+
name: Federated Identity Working Group
|
520
|
+
'www.w3.org/groups/wg/gpu':
|
521
|
+
name: GPU for the Web Working Group
|
data/relaton_w3c.gemspec
CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.add_dependency "mechanize", "~> 2.10"
|
36
36
|
spec.add_dependency "rdf", "~> 3.2"
|
37
37
|
spec.add_dependency "rdf-normalize", "~> 0.6"
|
38
|
-
spec.add_dependency "relaton-bib", "~> 1.
|
38
|
+
spec.add_dependency "relaton-bib", "~> 1.20.0"
|
39
39
|
spec.add_dependency "relaton-index", "~> 0.2.8"
|
40
40
|
spec.add_dependency "rubyzip", "~> 2.3"
|
41
41
|
spec.add_dependency "shex", "~> 0.7"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: linkeddata
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 1.
|
75
|
+
version: 1.20.0
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.
|
82
|
+
version: 1.20.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: relaton-index
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -187,6 +187,7 @@ files:
|
|
187
187
|
- lib/relaton_w3c/hit_collection.rb
|
188
188
|
- lib/relaton_w3c/processor.rb
|
189
189
|
- lib/relaton_w3c/pubid.rb
|
190
|
+
- lib/relaton_w3c/rdf_archive.rb
|
190
191
|
- lib/relaton_w3c/util.rb
|
191
192
|
- lib/relaton_w3c/version.rb
|
192
193
|
- lib/relaton_w3c/w3c_bibliographic_item.rb
|
@@ -199,7 +200,7 @@ licenses:
|
|
199
200
|
- BSD-2-Clause
|
200
201
|
metadata:
|
201
202
|
homepage_uri: https://github.com/relaton/relaton-wc3
|
202
|
-
post_install_message:
|
203
|
+
post_install_message:
|
203
204
|
rdoc_options: []
|
204
205
|
require_paths:
|
205
206
|
- lib
|
@@ -215,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
215
216
|
version: '0'
|
216
217
|
requirements: []
|
217
218
|
rubygems_version: 3.3.27
|
218
|
-
signing_key:
|
219
|
+
signing_key:
|
219
220
|
specification_version: 4
|
220
221
|
summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
|
221
222
|
model'
|