relaton-w3c 1.18.1 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +5 -12
- data/lib/relaton_w3c/data_fetcher.rb +31 -60
- data/lib/relaton_w3c/data_parser.rb +9 -5
- data/lib/relaton_w3c/document_type.rb +1 -1
- data/lib/relaton_w3c/hash_converter.rb +4 -0
- data/lib/relaton_w3c/processor.rb +3 -3
- data/lib/relaton_w3c/rdf_archive.rb +67 -0
- data/lib/relaton_w3c/util.rb +1 -4
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +3 -3
- data/lib/relaton_w3c/workgroups.yaml +13 -0
- data/lib/relaton_w3c/xml_parser.rb +4 -0
- data/lib/relaton_w3c.rb +0 -1
- data/relaton_w3c.gemspec +1 -1
- metadata +8 -8
- data/lib/relaton_w3c/config.rb +0 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9c777f3bbcbff5de8311b7103f7f64dee70971f5745be10f0415200ec8a3fe9
|
4
|
+
data.tar.gz: c13ba956f36f1d4e18a846aeb5d550aad5dbd0128a63457418032958d9d503ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: efbc142c4590df61f6b625bc1e00a29e95edad3ca97d940ea05f57caec3cf8db9fc5ed2a64ec6d5a4079ef3228fd682bae801b6c659a5afb83279d49c4d0b2e8
|
7
|
+
data.tar.gz: ef68de56840568a3e7d063a5c72876d23ae8d7f02d8cd35169e5a5b619c240edad1622738d38cd5d80642301f892e17a6f40d9fe17e33520bd523440064c9dc0
|
data/README.adoc
CHANGED
@@ -23,24 +23,13 @@ Or install it yourself as:
|
|
23
23
|
|
24
24
|
== Usage
|
25
25
|
|
26
|
-
===
|
27
|
-
|
28
|
-
Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonW3c.configure` block.
|
26
|
+
=== Search for a standard using keywords
|
29
27
|
|
30
28
|
[source,ruby]
|
31
29
|
----
|
32
30
|
require 'relaton_w3c'
|
33
31
|
=> true
|
34
32
|
|
35
|
-
RelatonW3c.configure do |config|
|
36
|
-
config.logger.level = Logger::DEBUG
|
37
|
-
end
|
38
|
-
----
|
39
|
-
|
40
|
-
=== Search for a standard using keywords
|
41
|
-
|
42
|
-
[source,ruby]
|
43
|
-
----
|
44
33
|
item = RelatonW3c::W3cBibliography.get "W3C REC-json-ld11-20200716"
|
45
34
|
[relaton-w3c] (W3C REC-json-ld11-20200716) Fetching from Relaton repository ...
|
46
35
|
[relaton-w3c] (W3C REC-json-ld11-20200716) Found: `REC-json-ld11-20200716`
|
@@ -139,6 +128,10 @@ Done in: 155 sec.
|
|
139
128
|
=> nil
|
140
129
|
----
|
141
130
|
|
131
|
+
=== Logging
|
132
|
+
|
133
|
+
RelatonW3c uses the relaton-logger gem for logging. By default, it logs to STDOUT. To change the log levels and add other loggers, read the https://github.com/relaton/relaton-logger#usage[relaton-logger] documentation.
|
134
|
+
|
142
135
|
== Development
|
143
136
|
|
144
137
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -2,11 +2,12 @@ require "rdf"
|
|
2
2
|
require "linkeddata"
|
3
3
|
require "sparql"
|
4
4
|
require "mechanize"
|
5
|
-
|
5
|
+
require_relative "rdf_archive"
|
6
|
+
require_relative "data_parser"
|
6
7
|
|
7
8
|
module RelatonW3c
|
8
9
|
class DataFetcher
|
9
|
-
attr_reader :data, :group_names
|
10
|
+
attr_reader :data, :group_names, :rdf_archive
|
10
11
|
|
11
12
|
#
|
12
13
|
# Data fetcher initializer
|
@@ -20,6 +21,7 @@ module RelatonW3c
|
|
20
21
|
@ext = format.sub(/^bib/, "")
|
21
22
|
dir = File.dirname(File.expand_path(__FILE__))
|
22
23
|
@group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
|
24
|
+
@files = Set.new
|
23
25
|
@index = DataIndex.create_from_file
|
24
26
|
@index1 = Relaton::Index.find_or_create :W3C, file: "index1.yaml"
|
25
27
|
end
|
@@ -31,33 +33,35 @@ module RelatonW3c
|
|
31
33
|
# @param [Strin] output directory to save files, default: "data"
|
32
34
|
# @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
|
33
35
|
#
|
34
|
-
def self.fetch(
|
36
|
+
def self.fetch(output: "data", format: "yaml")
|
35
37
|
t1 = Time.now
|
36
38
|
puts "Started at: #{t1}"
|
37
39
|
FileUtils.mkdir_p output
|
38
|
-
new(output, format).fetch
|
40
|
+
new(output, format).fetch
|
39
41
|
t2 = Time.now
|
40
42
|
puts "Stopped at: #{t2}"
|
41
43
|
puts "Done in: #{(t2 - t1).round} sec."
|
42
44
|
end
|
43
45
|
|
46
|
+
def rdf_archive
|
47
|
+
@rdf_archive ||= RDFArchive.new
|
48
|
+
end
|
49
|
+
|
44
50
|
#
|
45
51
|
# Parse documents
|
46
52
|
#
|
47
53
|
# @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
|
48
54
|
#
|
49
|
-
def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
warn e.backtrace.join("\n")
|
60
|
-
end
|
55
|
+
def fetch # (source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
56
|
+
rdf = rdf_archive.get_data
|
57
|
+
%i[versioned unversioned].each do |type|
|
58
|
+
send("query_#{type}_docs", rdf).each do |sl|
|
59
|
+
bib = DataParser.parse(rdf, sl, self)
|
60
|
+
add_has_edition_relation(bib) if type == :unversioned
|
61
|
+
save_doc bib
|
62
|
+
rescue StandardError => e
|
63
|
+
link = sl.respond_to?(:link) ? sl.link : sl.version_of
|
64
|
+
Util.error "Error: document #{link} #{e.message}\n#{e.backtrace.join("\n")}"
|
61
65
|
end
|
62
66
|
end
|
63
67
|
@index.sort!.save
|
@@ -133,41 +137,6 @@ module RelatonW3c
|
|
133
137
|
(ids1 & ids2).any?
|
134
138
|
end
|
135
139
|
|
136
|
-
#
|
137
|
-
# Yield fetching for each dataset
|
138
|
-
#
|
139
|
-
# @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
|
140
|
-
#
|
141
|
-
# @yield [RDF::Repository] RDF repository
|
142
|
-
#
|
143
|
-
def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
|
144
|
-
case source
|
145
|
-
when "w3c-tr-archive"
|
146
|
-
Dir["w3c-tr-archive/*.rdf"].map do |f|
|
147
|
-
@files = []
|
148
|
-
yield RDF::Repository.load(f)
|
149
|
-
end
|
150
|
-
when "w3c-rdf"
|
151
|
-
@files = []
|
152
|
-
rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
153
|
-
yield rdf
|
154
|
-
# parse_static_dataset
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
#
|
159
|
-
# Parse static dataset
|
160
|
-
#
|
161
|
-
# def parse_static_dataset
|
162
|
-
# Dir[File.expand_path("../../data/*", __dir__)].each do |file|
|
163
|
-
# xml = File.read file, encoding: "UTF-8"
|
164
|
-
# save_doc BibXMLParser.parse(xml), warn_duplicate: false
|
165
|
-
# rescue StandardError => e
|
166
|
-
# warn "Error: document #{file} #{e.message}"
|
167
|
-
# warn e.backtrace.join("\n")
|
168
|
-
# end
|
169
|
-
# end
|
170
|
-
|
171
140
|
#
|
172
141
|
# Query RDF source for versioned documents
|
173
142
|
#
|
@@ -207,24 +176,26 @@ module RelatonW3c
|
|
207
176
|
#
|
208
177
|
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
209
178
|
#
|
210
|
-
def save_doc(bib, warn_duplicate: true)
|
179
|
+
def save_doc(bib, warn_duplicate: true)
|
211
180
|
return unless bib
|
212
181
|
|
213
|
-
c = case @format
|
214
|
-
when "xml" then bib.to_xml(bibdata: true)
|
215
|
-
when "yaml" then bib.to_hash.to_yaml
|
216
|
-
else bib.send("to_#{@format}")
|
217
|
-
end
|
218
|
-
# id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
|
219
182
|
file = file_name(bib.docnumber)
|
220
183
|
if @files.include?(file)
|
221
|
-
warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
|
184
|
+
Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
|
222
185
|
else
|
223
186
|
pubid = PubId.parse bib.docnumber
|
224
187
|
@index.add pubid, file
|
225
188
|
@index1.add_or_update pubid.to_hash, file
|
226
189
|
@files << file
|
227
|
-
|
190
|
+
end
|
191
|
+
File.write file, serialize(bib), encoding: "UTF-8"
|
192
|
+
end
|
193
|
+
|
194
|
+
def serialize(bib)
|
195
|
+
case @format
|
196
|
+
when "xml" then bib.to_xml(bibdata: true)
|
197
|
+
when "yaml" then bib.to_hash.to_yaml
|
198
|
+
else bib.send("to_#{@format}")
|
228
199
|
end
|
229
200
|
end
|
230
201
|
|
@@ -280,9 +280,9 @@ module RelatonW3c
|
|
280
280
|
end
|
281
281
|
|
282
282
|
#
|
283
|
-
# Parse editor drafts
|
283
|
+
# Parse editor drafts links
|
284
284
|
#
|
285
|
-
# @return [Array<RelatonBib::
|
285
|
+
# @return [Array<RelatonBib::TypedUri>] links
|
286
286
|
#
|
287
287
|
def editor_drafts # rubocop:disable Metrics/MethodLength
|
288
288
|
return [] unless @sol.respond_to?(:link)
|
@@ -311,7 +311,10 @@ module RelatonW3c
|
|
311
311
|
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
312
312
|
PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
313
313
|
SELECT ?rel
|
314
|
-
WHERE {
|
314
|
+
WHERE {
|
315
|
+
<#{@sol.link.to_s.strip}> #{predicate} ?rel .
|
316
|
+
FILTER ( isURI(?rel) )
|
317
|
+
}
|
315
318
|
))
|
316
319
|
@rdf.query(sse).order_by(:rel)
|
317
320
|
end
|
@@ -371,7 +374,8 @@ module RelatonW3c
|
|
371
374
|
id = pub_id(url)
|
372
375
|
fref = RelatonBib::FormattedRef.new content: id
|
373
376
|
docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)
|
374
|
-
|
377
|
+
link = [RelatonBib::TypedUri.new(type: "src", content: url)]
|
378
|
+
bib = W3cBibliographicItem.new formattedref: fref, docid: [docid], link: link
|
375
379
|
dsc = RelatonBib::FormattedString.new content: desc if desc
|
376
380
|
RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc)
|
377
381
|
end
|
@@ -443,7 +447,7 @@ module RelatonW3c
|
|
443
447
|
rwg = RelatonBib::WorkGroup.new name: wg["name"]
|
444
448
|
obj << RelatonBib::TechnicalCommittee.new(rwg)
|
445
449
|
else
|
446
|
-
Util.warn "
|
450
|
+
Util.warn "Working group name not found for: `#{edg.home_page}`"
|
447
451
|
end
|
448
452
|
end
|
449
453
|
RelatonBib::EditorialGroup.new tc
|
@@ -9,7 +9,7 @@ module RelatonW3c
|
|
9
9
|
@prefix = "W3C"
|
10
10
|
@defaultprefix = %r{^W3C\s}
|
11
11
|
@idtype = "W3C"
|
12
|
-
@datasets = %w[w3c-rdf
|
12
|
+
@datasets = %w[w3c-rdf]
|
13
13
|
end
|
14
14
|
|
15
15
|
# @param code [String]
|
@@ -28,8 +28,8 @@ module RelatonW3c
|
|
28
28
|
# @option opts [String] :output directory to output documents
|
29
29
|
# @option opts [String] :format
|
30
30
|
#
|
31
|
-
def fetch_data(
|
32
|
-
DataFetcher.fetch(
|
31
|
+
def fetch_data(_source, opts)
|
32
|
+
DataFetcher.fetch(**opts)
|
33
33
|
end
|
34
34
|
|
35
35
|
# @param xml [String]
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module RelatonW3c
|
2
|
+
class RDFArchive
|
3
|
+
def initialize(file = "archive.rdf")
|
4
|
+
@file = file
|
5
|
+
end
|
6
|
+
|
7
|
+
#
|
8
|
+
# Get RDF data from the updated archive file.
|
9
|
+
#
|
10
|
+
# @return [RDF::Repository]
|
11
|
+
#
|
12
|
+
def get_data
|
13
|
+
if !File.exist?(@file) || File.mtime(@file) < Time.now - 86_400
|
14
|
+
get_archive
|
15
|
+
update_archive
|
16
|
+
end
|
17
|
+
RDF::Repository.load(@file)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def update_archive
|
23
|
+
# Load the older RDF/XML file
|
24
|
+
older = Nokogiri::XML File.read(@file, encoding: "UTF-8")
|
25
|
+
|
26
|
+
# Load the newer RDF/XML file
|
27
|
+
url = "http://www.w3.org/2002/01/tr-automation/tr.rdf"
|
28
|
+
newer = Nokogiri::XML OpenURI.open_uri(url).read
|
29
|
+
|
30
|
+
# Create a hash to store rdf:about attributes from the newer file
|
31
|
+
newer_elements = {}
|
32
|
+
newer.root.element_children.each do |element|
|
33
|
+
rdf_about = element.attribute('about')&.value
|
34
|
+
newer_elements[rdf_about.sub(/^http\s:/, "")] = element if rdf_about
|
35
|
+
end
|
36
|
+
|
37
|
+
# Replace elements in the older document
|
38
|
+
older.root.element_children.each do |element|
|
39
|
+
rdf_about = element.attribute('about')&.value
|
40
|
+
if rdf_about && newer_elements[url = rdf_about.sub(/^http\s:/, "")]
|
41
|
+
element.replace(newer_elements[url])
|
42
|
+
newer_elements.delete(url)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Add remaining new elements to the older document
|
47
|
+
newer_elements.each_value do |element|
|
48
|
+
older.root.add_child(element)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Add new namespaces from the newer document to the older document
|
52
|
+
newer.root.namespace_definitions.each do |ns|
|
53
|
+
unless older.root.namespace_definitions.any? { |old_ns| old_ns.href == ns.href }
|
54
|
+
older.root.add_namespace_definition(ns.prefix, ns.href)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
File.write @file, older.to_xml, encoding: "UTF-8"
|
58
|
+
end
|
59
|
+
|
60
|
+
def get_archive
|
61
|
+
unless File.exist? @file
|
62
|
+
url = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/refs/heads/main/archive.rdf"
|
63
|
+
File.write @file, OpenURI.open_uri(url).read, encoding: "UTF-8"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/relaton_w3c/util.rb
CHANGED
data/lib/relaton_w3c/version.rb
CHANGED
@@ -39,15 +39,15 @@ module RelatonW3c
|
|
39
39
|
# @param opts [Hash] options
|
40
40
|
# @return [RelatonW3c::W3cBibliographicItem]
|
41
41
|
def get(ref, _year = nil, _opts = {})
|
42
|
-
Util.
|
42
|
+
Util.info "Fetching from Relaton repository ...", key: ref
|
43
43
|
result = search(ref)
|
44
44
|
unless result
|
45
|
-
Util.
|
45
|
+
Util.info "Not found.", key: ref
|
46
46
|
return
|
47
47
|
end
|
48
48
|
|
49
49
|
found = result.docidentifier.first.id
|
50
|
-
Util.
|
50
|
+
Util.info "Found: `#{found}`", key: ref
|
51
51
|
result
|
52
52
|
end
|
53
53
|
end
|
@@ -506,3 +506,16 @@
|
|
506
506
|
'www.w3.org/groups/wg/rch':
|
507
507
|
name: RDF Dataset Canonicalization and Hash Working Group
|
508
508
|
abbrev: RCH WG
|
509
|
+
'www.w3.org/groups/wg/browser-tools-testing':
|
510
|
+
name: Browser Testing and Tools Working Group
|
511
|
+
'www.w3.org/groups/wg/did':
|
512
|
+
name: Decentralized Identifier Working Group
|
513
|
+
abbrev: DID WG
|
514
|
+
'www.w3.org/Member/Board':
|
515
|
+
name: Board of Directors
|
516
|
+
'www.w3.org/groups/wg/webtransport':
|
517
|
+
name: WebTransport Working Group
|
518
|
+
'www.w3.org/groups/wg/fedid':
|
519
|
+
name: Federated Identity Working Group
|
520
|
+
'www.w3.org/groups/wg/gpu':
|
521
|
+
name: GPU for the Web Working Group
|
data/lib/relaton_w3c.rb
CHANGED
data/relaton_w3c.gemspec
CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
|
|
35
35
|
spec.add_dependency "mechanize", "~> 2.10"
|
36
36
|
spec.add_dependency "rdf", "~> 3.2"
|
37
37
|
spec.add_dependency "rdf-normalize", "~> 0.6"
|
38
|
-
spec.add_dependency "relaton-bib", "~> 1.
|
38
|
+
spec.add_dependency "relaton-bib", "~> 1.20.0"
|
39
39
|
spec.add_dependency "relaton-index", "~> 0.2.8"
|
40
40
|
spec.add_dependency "rubyzip", "~> 2.3"
|
41
41
|
spec.add_dependency "shex", "~> 0.7"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: linkeddata
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 1.
|
75
|
+
version: 1.20.0
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.
|
82
|
+
version: 1.20.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: relaton-index
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -178,7 +178,6 @@ files:
|
|
178
178
|
- grammars/relaton-w3c.rng
|
179
179
|
- lib/relaton_w3c.rb
|
180
180
|
- lib/relaton_w3c/bibxml_parser.rb
|
181
|
-
- lib/relaton_w3c/config.rb
|
182
181
|
- lib/relaton_w3c/data_fetcher.rb
|
183
182
|
- lib/relaton_w3c/data_index.rb
|
184
183
|
- lib/relaton_w3c/data_parser.rb
|
@@ -188,6 +187,7 @@ files:
|
|
188
187
|
- lib/relaton_w3c/hit_collection.rb
|
189
188
|
- lib/relaton_w3c/processor.rb
|
190
189
|
- lib/relaton_w3c/pubid.rb
|
190
|
+
- lib/relaton_w3c/rdf_archive.rb
|
191
191
|
- lib/relaton_w3c/util.rb
|
192
192
|
- lib/relaton_w3c/version.rb
|
193
193
|
- lib/relaton_w3c/w3c_bibliographic_item.rb
|
@@ -200,7 +200,7 @@ licenses:
|
|
200
200
|
- BSD-2-Clause
|
201
201
|
metadata:
|
202
202
|
homepage_uri: https://github.com/relaton/relaton-wc3
|
203
|
-
post_install_message:
|
203
|
+
post_install_message:
|
204
204
|
rdoc_options: []
|
205
205
|
require_paths:
|
206
206
|
- lib
|
@@ -216,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
216
216
|
version: '0'
|
217
217
|
requirements: []
|
218
218
|
rubygems_version: 3.3.27
|
219
|
-
signing_key:
|
219
|
+
signing_key:
|
220
220
|
specification_version: 4
|
221
221
|
summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
|
222
222
|
model'
|