relaton-w3c 1.19.0 → 1.20.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/relaton_w3c/data_fetcher.rb +30 -58
- data/lib/relaton_w3c/data_parser.rb +8 -4
- data/lib/relaton_w3c/processor.rb +3 -3
- data/lib/relaton_w3c/rdf_archive.rb +67 -0
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/workgroups.yaml +13 -0
- data/relaton_w3c.gemspec +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9c777f3bbcbff5de8311b7103f7f64dee70971f5745be10f0415200ec8a3fe9
|
4
|
+
data.tar.gz: c13ba956f36f1d4e18a846aeb5d550aad5dbd0128a63457418032958d9d503ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: efbc142c4590df61f6b625bc1e00a29e95edad3ca97d940ea05f57caec3cf8db9fc5ed2a64ec6d5a4079ef3228fd682bae801b6c659a5afb83279d49c4d0b2e8
|
7
|
+
data.tar.gz: ef68de56840568a3e7d063a5c72876d23ae8d7f02d8cd35169e5a5b619c240edad1622738d38cd5d80642301f892e17a6f40d9fe17e33520bd523440064c9dc0
|
@@ -2,11 +2,12 @@ require "rdf"
|
|
2
2
|
require "linkeddata"
|
3
3
|
require "sparql"
|
4
4
|
require "mechanize"
|
5
|
-
|
5
|
+
require_relative "rdf_archive"
|
6
|
+
require_relative "data_parser"
|
6
7
|
|
7
8
|
module RelatonW3c
|
8
9
|
class DataFetcher
|
9
|
-
attr_reader :data, :group_names
|
10
|
+
attr_reader :data, :group_names, :rdf_archive
|
10
11
|
|
11
12
|
#
|
12
13
|
# Data fetcher initializer
|
@@ -20,6 +21,7 @@ module RelatonW3c
|
|
20
21
|
@ext = format.sub(/^bib/, "")
|
21
22
|
dir = File.dirname(File.expand_path(__FILE__))
|
22
23
|
@group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
|
24
|
+
@files = Set.new
|
23
25
|
@index = DataIndex.create_from_file
|
24
26
|
@index1 = Relaton::Index.find_or_create :W3C, file: "index1.yaml"
|
25
27
|
end
|
@@ -31,32 +33,35 @@ module RelatonW3c
|
|
31
33
|
# @param [Strin] output directory to save files, default: "data"
|
32
34
|
# @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
|
33
35
|
#
|
34
|
-
def self.fetch(
|
36
|
+
def self.fetch(output: "data", format: "yaml")
|
35
37
|
t1 = Time.now
|
36
38
|
puts "Started at: #{t1}"
|
37
39
|
FileUtils.mkdir_p output
|
38
|
-
new(output, format).fetch
|
40
|
+
new(output, format).fetch
|
39
41
|
t2 = Time.now
|
40
42
|
puts "Stopped at: #{t2}"
|
41
43
|
puts "Done in: #{(t2 - t1).round} sec."
|
42
44
|
end
|
43
45
|
|
46
|
+
def rdf_archive
|
47
|
+
@rdf_archive ||= RDFArchive.new
|
48
|
+
end
|
49
|
+
|
44
50
|
#
|
45
51
|
# Parse documents
|
46
52
|
#
|
47
53
|
# @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
|
48
54
|
#
|
49
|
-
def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
end
|
55
|
+
def fetch # (source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
56
|
+
rdf = rdf_archive.get_data
|
57
|
+
%i[versioned unversioned].each do |type|
|
58
|
+
send("query_#{type}_docs", rdf).each do |sl|
|
59
|
+
bib = DataParser.parse(rdf, sl, self)
|
60
|
+
add_has_edition_relation(bib) if type == :unversioned
|
61
|
+
save_doc bib
|
62
|
+
rescue StandardError => e
|
63
|
+
link = sl.respond_to?(:link) ? sl.link : sl.version_of
|
64
|
+
Util.error "Error: document #{link} #{e.message}\n#{e.backtrace.join("\n")}"
|
60
65
|
end
|
61
66
|
end
|
62
67
|
@index.sort!.save
|
@@ -132,41 +137,6 @@ module RelatonW3c
|
|
132
137
|
(ids1 & ids2).any?
|
133
138
|
end
|
134
139
|
|
135
|
-
#
|
136
|
-
# Yield fetching for each dataset
|
137
|
-
#
|
138
|
-
# @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
|
139
|
-
#
|
140
|
-
# @yield [RDF::Repository] RDF repository
|
141
|
-
#
|
142
|
-
def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
|
143
|
-
case source
|
144
|
-
when "w3c-tr-archive"
|
145
|
-
Dir["w3c-tr-archive/*.rdf"].map do |f|
|
146
|
-
@files = []
|
147
|
-
yield RDF::Repository.load(f)
|
148
|
-
end
|
149
|
-
when "w3c-rdf"
|
150
|
-
@files = []
|
151
|
-
rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
152
|
-
yield rdf
|
153
|
-
# parse_static_dataset
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
#
|
158
|
-
# Parse static dataset
|
159
|
-
#
|
160
|
-
# def parse_static_dataset
|
161
|
-
# Dir[File.expand_path("../../data/*", __dir__)].each do |file|
|
162
|
-
# xml = File.read file, encoding: "UTF-8"
|
163
|
-
# save_doc BibXMLParser.parse(xml), warn_duplicate: false
|
164
|
-
# rescue StandardError => e
|
165
|
-
# warn "Error: document #{file} #{e.message}"
|
166
|
-
# warn e.backtrace.join("\n")
|
167
|
-
# end
|
168
|
-
# end
|
169
|
-
|
170
140
|
#
|
171
141
|
# Query RDF source for versioned documents
|
172
142
|
#
|
@@ -206,15 +176,9 @@ module RelatonW3c
|
|
206
176
|
#
|
207
177
|
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
208
178
|
#
|
209
|
-
def save_doc(bib, warn_duplicate: true)
|
179
|
+
def save_doc(bib, warn_duplicate: true)
|
210
180
|
return unless bib
|
211
181
|
|
212
|
-
c = case @format
|
213
|
-
when "xml" then bib.to_xml(bibdata: true)
|
214
|
-
when "yaml" then bib.to_hash.to_yaml
|
215
|
-
else bib.send("to_#{@format}")
|
216
|
-
end
|
217
|
-
# id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
|
218
182
|
file = file_name(bib.docnumber)
|
219
183
|
if @files.include?(file)
|
220
184
|
Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
|
@@ -223,7 +187,15 @@ module RelatonW3c
|
|
223
187
|
@index.add pubid, file
|
224
188
|
@index1.add_or_update pubid.to_hash, file
|
225
189
|
@files << file
|
226
|
-
|
190
|
+
end
|
191
|
+
File.write file, serialize(bib), encoding: "UTF-8"
|
192
|
+
end
|
193
|
+
|
194
|
+
def serialize(bib)
|
195
|
+
case @format
|
196
|
+
when "xml" then bib.to_xml(bibdata: true)
|
197
|
+
when "yaml" then bib.to_hash.to_yaml
|
198
|
+
else bib.send("to_#{@format}")
|
227
199
|
end
|
228
200
|
end
|
229
201
|
|
@@ -280,9 +280,9 @@ module RelatonW3c
|
|
280
280
|
end
|
281
281
|
|
282
282
|
#
|
283
|
-
# Parse editor drafts
|
283
|
+
# Parse editor drafts links
|
284
284
|
#
|
285
|
-
# @return [Array<RelatonBib::
|
285
|
+
# @return [Array<RelatonBib::TypedUri>] links
|
286
286
|
#
|
287
287
|
def editor_drafts # rubocop:disable Metrics/MethodLength
|
288
288
|
return [] unless @sol.respond_to?(:link)
|
@@ -311,7 +311,10 @@ module RelatonW3c
|
|
311
311
|
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
312
312
|
PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
313
313
|
SELECT ?rel
|
314
|
-
WHERE {
|
314
|
+
WHERE {
|
315
|
+
<#{@sol.link.to_s.strip}> #{predicate} ?rel .
|
316
|
+
FILTER ( isURI(?rel) )
|
317
|
+
}
|
315
318
|
))
|
316
319
|
@rdf.query(sse).order_by(:rel)
|
317
320
|
end
|
@@ -371,7 +374,8 @@ module RelatonW3c
|
|
371
374
|
id = pub_id(url)
|
372
375
|
fref = RelatonBib::FormattedRef.new content: id
|
373
376
|
docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)
|
374
|
-
|
377
|
+
link = [RelatonBib::TypedUri.new(type: "src", content: url)]
|
378
|
+
bib = W3cBibliographicItem.new formattedref: fref, docid: [docid], link: link
|
375
379
|
dsc = RelatonBib::FormattedString.new content: desc if desc
|
376
380
|
RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc)
|
377
381
|
end
|
@@ -9,7 +9,7 @@ module RelatonW3c
|
|
9
9
|
@prefix = "W3C"
|
10
10
|
@defaultprefix = %r{^W3C\s}
|
11
11
|
@idtype = "W3C"
|
12
|
-
@datasets = %w[w3c-rdf
|
12
|
+
@datasets = %w[w3c-rdf]
|
13
13
|
end
|
14
14
|
|
15
15
|
# @param code [String]
|
@@ -28,8 +28,8 @@ module RelatonW3c
|
|
28
28
|
# @option opts [String] :output directory to output documents
|
29
29
|
# @option opts [String] :format
|
30
30
|
#
|
31
|
-
def fetch_data(
|
32
|
-
DataFetcher.fetch(
|
31
|
+
def fetch_data(_source, opts)
|
32
|
+
DataFetcher.fetch(**opts)
|
33
33
|
end
|
34
34
|
|
35
35
|
# @param xml [String]
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module RelatonW3c
|
2
|
+
class RDFArchive
|
3
|
+
def initialize(file = "archive.rdf")
|
4
|
+
@file = file
|
5
|
+
end
|
6
|
+
|
7
|
+
#
|
8
|
+
# Get RDF data from the updated archive file.
|
9
|
+
#
|
10
|
+
# @return [RDF::Repository]
|
11
|
+
#
|
12
|
+
def get_data
|
13
|
+
if !File.exist?(@file) || File.mtime(@file) < Time.now - 86_400
|
14
|
+
get_archive
|
15
|
+
update_archive
|
16
|
+
end
|
17
|
+
RDF::Repository.load(@file)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def update_archive
|
23
|
+
# Load the older RDF/XML file
|
24
|
+
older = Nokogiri::XML File.read(@file, encoding: "UTF-8")
|
25
|
+
|
26
|
+
# Load the newer RDF/XML file
|
27
|
+
url = "http://www.w3.org/2002/01/tr-automation/tr.rdf"
|
28
|
+
newer = Nokogiri::XML OpenURI.open_uri(url).read
|
29
|
+
|
30
|
+
# Create a hash to store rdf:about attributes from the newer file
|
31
|
+
newer_elements = {}
|
32
|
+
newer.root.element_children.each do |element|
|
33
|
+
rdf_about = element.attribute('about')&.value
|
34
|
+
newer_elements[rdf_about.sub(/^http\s:/, "")] = element if rdf_about
|
35
|
+
end
|
36
|
+
|
37
|
+
# Replace elements in the older document
|
38
|
+
older.root.element_children.each do |element|
|
39
|
+
rdf_about = element.attribute('about')&.value
|
40
|
+
if rdf_about && newer_elements[url = rdf_about.sub(/^http\s:/, "")]
|
41
|
+
element.replace(newer_elements[url])
|
42
|
+
newer_elements.delete(url)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Add remaining new elements to the older document
|
47
|
+
newer_elements.each_value do |element|
|
48
|
+
older.root.add_child(element)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Add new namespaces from the newer document to the older document
|
52
|
+
newer.root.namespace_definitions.each do |ns|
|
53
|
+
unless older.root.namespace_definitions.any? { |old_ns| old_ns.href == ns.href }
|
54
|
+
older.root.add_namespace_definition(ns.prefix, ns.href)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
File.write @file, older.to_xml, encoding: "UTF-8"
|
58
|
+
end
|
59
|
+
|
60
|
+
def get_archive
|
61
|
+
unless File.exist? @file
|
62
|
+
url = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/refs/heads/main/archive.rdf"
|
63
|
+
File.write @file, OpenURI.open_uri(url).read, encoding: "UTF-8"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/relaton_w3c/version.rb
CHANGED
@@ -506,3 +506,16 @@
|
|
506
506
|
'www.w3.org/groups/wg/rch':
|
507
507
|
name: RDF Dataset Canonicalization and Hash Working Group
|
508
508
|
abbrev: RCH WG
|
509
|
+
'www.w3.org/groups/wg/browser-tools-testing':
|
510
|
+
name: Browser Testing and Tools Working Group
|
511
|
+
'www.w3.org/groups/wg/did':
|
512
|
+
name: Decentralized Identifier Working Group
|
513
|
+
abbrev: DID WG
|
514
|
+
'www.w3.org/Member/Board':
|
515
|
+
name: Board of Directors
|
516
|
+
'www.w3.org/groups/wg/webtransport':
|
517
|
+
name: WebTransport Working Group
|
518
|
+
'www.w3.org/groups/wg/fedid':
|
519
|
+
name: Federated Identity Working Group
|
520
|
+
'www.w3.org/groups/wg/gpu':
|
521
|
+
name: GPU for the Web Working Group
|
data/relaton_w3c.gemspec
CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.add_dependency "mechanize", "~> 2.10"
|
36
36
|
spec.add_dependency "rdf", "~> 3.2"
|
37
37
|
spec.add_dependency "rdf-normalize", "~> 0.6"
|
38
|
-
spec.add_dependency "relaton-bib", "~> 1.
|
38
|
+
spec.add_dependency "relaton-bib", "~> 1.20.0"
|
39
39
|
spec.add_dependency "relaton-index", "~> 0.2.8"
|
40
40
|
spec.add_dependency "rubyzip", "~> 2.3"
|
41
41
|
spec.add_dependency "shex", "~> 0.7"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: linkeddata
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 1.
|
75
|
+
version: 1.20.0
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.
|
82
|
+
version: 1.20.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: relaton-index
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -187,6 +187,7 @@ files:
|
|
187
187
|
- lib/relaton_w3c/hit_collection.rb
|
188
188
|
- lib/relaton_w3c/processor.rb
|
189
189
|
- lib/relaton_w3c/pubid.rb
|
190
|
+
- lib/relaton_w3c/rdf_archive.rb
|
190
191
|
- lib/relaton_w3c/util.rb
|
191
192
|
- lib/relaton_w3c/version.rb
|
192
193
|
- lib/relaton_w3c/w3c_bibliographic_item.rb
|
@@ -199,7 +200,7 @@ licenses:
|
|
199
200
|
- BSD-2-Clause
|
200
201
|
metadata:
|
201
202
|
homepage_uri: https://github.com/relaton/relaton-wc3
|
202
|
-
post_install_message:
|
203
|
+
post_install_message:
|
203
204
|
rdoc_options: []
|
204
205
|
require_paths:
|
205
206
|
- lib
|
@@ -215,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
215
216
|
version: '0'
|
216
217
|
requirements: []
|
217
218
|
rubygems_version: 3.3.27
|
218
|
-
signing_key:
|
219
|
+
signing_key:
|
219
220
|
specification_version: 4
|
220
221
|
summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
|
221
222
|
model'
|