relaton-w3c 1.19.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dde70fdc615a88c4343add641e193635a5377af68180d96122b02af747aa50e5
4
- data.tar.gz: 363d74d0a170b307006310bbbe2f6e645da40aabf12dbc2a94ebac4887f2452f
3
+ metadata.gz: c9c777f3bbcbff5de8311b7103f7f64dee70971f5745be10f0415200ec8a3fe9
4
+ data.tar.gz: c13ba956f36f1d4e18a846aeb5d550aad5dbd0128a63457418032958d9d503ff
5
5
  SHA512:
6
- metadata.gz: fec1491eaa3108fd8726dfa0af50f54b98d7c28286c936d827088eed8ee7badbe6be430dd4588ad6324936f6cce8824379750a71ac4da9aa8d88788a47fceec4
7
- data.tar.gz: e9d4074458f1a01e12ab84619861dda851ce668573d3b3e843a2c7ba8effed42e6c94545b49157626ba922ebfc31b57ff9d5877461626d2578a9ebdf128d314c
6
+ metadata.gz: efbc142c4590df61f6b625bc1e00a29e95edad3ca97d940ea05f57caec3cf8db9fc5ed2a64ec6d5a4079ef3228fd682bae801b6c659a5afb83279d49c4d0b2e8
7
+ data.tar.gz: ef68de56840568a3e7d063a5c72876d23ae8d7f02d8cd35169e5a5b619c240edad1622738d38cd5d80642301f892e17a6f40d9fe17e33520bd523440064c9dc0
@@ -2,11 +2,12 @@ require "rdf"
2
2
  require "linkeddata"
3
3
  require "sparql"
4
4
  require "mechanize"
5
- require "relaton_w3c/data_parser"
5
+ require_relative "rdf_archive"
6
+ require_relative "data_parser"
6
7
 
7
8
  module RelatonW3c
8
9
  class DataFetcher
9
- attr_reader :data, :group_names
10
+ attr_reader :data, :group_names, :rdf_archive
10
11
 
11
12
  #
12
13
  # Data fetcher initializer
@@ -20,6 +21,7 @@ module RelatonW3c
20
21
  @ext = format.sub(/^bib/, "")
21
22
  dir = File.dirname(File.expand_path(__FILE__))
22
23
  @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
24
+ @files = Set.new
23
25
  @index = DataIndex.create_from_file
24
26
  @index1 = Relaton::Index.find_or_create :W3C, file: "index1.yaml"
25
27
  end
@@ -31,32 +33,35 @@ module RelatonW3c
31
33
  # @param [Strin] output directory to save files, default: "data"
32
34
  # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
33
35
  #
34
- def self.fetch(source, output: "data", format: "yaml")
36
+ def self.fetch(output: "data", format: "yaml")
35
37
  t1 = Time.now
36
38
  puts "Started at: #{t1}"
37
39
  FileUtils.mkdir_p output
38
- new(output, format).fetch source
40
+ new(output, format).fetch
39
41
  t2 = Time.now
40
42
  puts "Stopped at: #{t2}"
41
43
  puts "Done in: #{(t2 - t1).round} sec."
42
44
  end
43
45
 
46
+ def rdf_archive
47
+ @rdf_archive ||= RDFArchive.new
48
+ end
49
+
44
50
  #
45
51
  # Parse documents
46
52
  #
47
53
  # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
48
54
  #
49
- def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
50
- each_dataset(source) do |rdf|
51
- %i[versioned unversioned].each do |type|
52
- send("query_#{type}_docs", rdf).each do |sl|
53
- bib = DataParser.parse(rdf, sl, self)
54
- add_has_edition_relation(bib) if type == :unversioned
55
- save_doc bib
56
- rescue StandardError => e
57
- link = sl.respond_to?(:link) ? sl.link : sl.version_of
58
- Util.error "Error: document #{link} #{e.message}\n#{e.backtrace.join("\n")}"
59
- end
55
+ def fetch # (source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
56
+ rdf = rdf_archive.get_data
57
+ %i[versioned unversioned].each do |type|
58
+ send("query_#{type}_docs", rdf).each do |sl|
59
+ bib = DataParser.parse(rdf, sl, self)
60
+ add_has_edition_relation(bib) if type == :unversioned
61
+ save_doc bib
62
+ rescue StandardError => e
63
+ link = sl.respond_to?(:link) ? sl.link : sl.version_of
64
+ Util.error "Error: document #{link} #{e.message}\n#{e.backtrace.join("\n")}"
60
65
  end
61
66
  end
62
67
  @index.sort!.save
@@ -132,41 +137,6 @@ module RelatonW3c
132
137
  (ids1 & ids2).any?
133
138
  end
134
139
 
135
- #
136
- # Yield fetching for each dataset
137
- #
138
- # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
139
- #
140
- # @yield [RDF::Repository] RDF repository
141
- #
142
- def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
143
- case source
144
- when "w3c-tr-archive"
145
- Dir["w3c-tr-archive/*.rdf"].map do |f|
146
- @files = []
147
- yield RDF::Repository.load(f)
148
- end
149
- when "w3c-rdf"
150
- @files = []
151
- rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
152
- yield rdf
153
- # parse_static_dataset
154
- end
155
- end
156
-
157
- #
158
- # Parse static dataset
159
- #
160
- # def parse_static_dataset
161
- # Dir[File.expand_path("../../data/*", __dir__)].each do |file|
162
- # xml = File.read file, encoding: "UTF-8"
163
- # save_doc BibXMLParser.parse(xml), warn_duplicate: false
164
- # rescue StandardError => e
165
- # warn "Error: document #{file} #{e.message}"
166
- # warn e.backtrace.join("\n")
167
- # end
168
- # end
169
-
170
140
  #
171
141
  # Query RDF source for versioned documents
172
142
  #
@@ -206,15 +176,9 @@ module RelatonW3c
206
176
  #
207
177
  # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
208
178
  #
209
- def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
179
+ def save_doc(bib, warn_duplicate: true)
210
180
  return unless bib
211
181
 
212
- c = case @format
213
- when "xml" then bib.to_xml(bibdata: true)
214
- when "yaml" then bib.to_hash.to_yaml
215
- else bib.send("to_#{@format}")
216
- end
217
- # id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
218
182
  file = file_name(bib.docnumber)
219
183
  if @files.include?(file)
220
184
  Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
@@ -223,7 +187,15 @@ module RelatonW3c
223
187
  @index.add pubid, file
224
188
  @index1.add_or_update pubid.to_hash, file
225
189
  @files << file
226
- File.write file, c, encoding: "UTF-8"
190
+ end
191
+ File.write file, serialize(bib), encoding: "UTF-8"
192
+ end
193
+
194
+ def serialize(bib)
195
+ case @format
196
+ when "xml" then bib.to_xml(bibdata: true)
197
+ when "yaml" then bib.to_hash.to_yaml
198
+ else bib.send("to_#{@format}")
227
199
  end
228
200
  end
229
201
 
@@ -280,9 +280,9 @@ module RelatonW3c
280
280
  end
281
281
 
282
282
  #
283
- # Parse editor drafts relation
283
+ # Parse editor drafts links
284
284
  #
285
- # @return [Array<RelatonBib::DocumentRelation>] relation
285
+ # @return [Array<RelatonBib::TypedUri>] links
286
286
  #
287
287
  def editor_drafts # rubocop:disable Metrics/MethodLength
288
288
  return [] unless @sol.respond_to?(:link)
@@ -311,7 +311,10 @@ module RelatonW3c
311
311
  PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
312
312
  PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
313
313
  SELECT ?rel
314
- WHERE { <#{@sol.link.to_s.strip}> #{predicate} ?rel . }
314
+ WHERE {
315
+ <#{@sol.link.to_s.strip}> #{predicate} ?rel .
316
+ FILTER ( isURI(?rel) )
317
+ }
315
318
  ))
316
319
  @rdf.query(sse).order_by(:rel)
317
320
  end
@@ -371,7 +374,8 @@ module RelatonW3c
371
374
  id = pub_id(url)
372
375
  fref = RelatonBib::FormattedRef.new content: id
373
376
  docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)
374
- bib = W3cBibliographicItem.new formattedref: fref, docid: [docid]
377
+ link = [RelatonBib::TypedUri.new(type: "src", content: url)]
378
+ bib = W3cBibliographicItem.new formattedref: fref, docid: [docid], link: link
375
379
  dsc = RelatonBib::FormattedString.new content: desc if desc
376
380
  RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc)
377
381
  end
@@ -9,7 +9,7 @@ module RelatonW3c
9
9
  @prefix = "W3C"
10
10
  @defaultprefix = %r{^W3C\s}
11
11
  @idtype = "W3C"
12
- @datasets = %w[w3c-rdf w3c-tr-archive]
12
+ @datasets = %w[w3c-rdf]
13
13
  end
14
14
 
15
15
  # @param code [String]
@@ -28,8 +28,8 @@ module RelatonW3c
28
28
  # @option opts [String] :output directory to output documents
29
29
  # @option opts [String] :format
30
30
  #
31
- def fetch_data(source, opts)
32
- DataFetcher.fetch(source, **opts)
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
33
  end
34
34
 
35
35
  # @param xml [String]
@@ -0,0 +1,67 @@
1
+ module RelatonW3c
2
+ class RDFArchive
3
+ def initialize(file = "archive.rdf")
4
+ @file = file
5
+ end
6
+
7
+ #
8
+ # Get RDF data from the updated archive file.
9
+ #
10
+ # @return [RDF::Repository]
11
+ #
12
+ def get_data
13
+ if !File.exist?(@file) || File.mtime(@file) < Time.now - 86_400
14
+ get_archive
15
+ update_archive
16
+ end
17
+ RDF::Repository.load(@file)
18
+ end
19
+
20
+ private
21
+
22
+ def update_archive
23
+ # Load the older RDF/XML file
24
+ older = Nokogiri::XML File.read(@file, encoding: "UTF-8")
25
+
26
+ # Load the newer RDF/XML file
27
+ url = "http://www.w3.org/2002/01/tr-automation/tr.rdf"
28
+ newer = Nokogiri::XML OpenURI.open_uri(url).read
29
+
30
+ # Create a hash to store rdf:about attributes from the newer file
31
+ newer_elements = {}
32
+ newer.root.element_children.each do |element|
33
+ rdf_about = element.attribute('about')&.value
34
+ newer_elements[rdf_about.sub(/^http\s:/, "")] = element if rdf_about
35
+ end
36
+
37
+ # Replace elements in the older document
38
+ older.root.element_children.each do |element|
39
+ rdf_about = element.attribute('about')&.value
40
+ if rdf_about && newer_elements[url = rdf_about.sub(/^http\s:/, "")]
41
+ element.replace(newer_elements[url])
42
+ newer_elements.delete(url)
43
+ end
44
+ end
45
+
46
+ # Add remaining new elements to the older document
47
+ newer_elements.each_value do |element|
48
+ older.root.add_child(element)
49
+ end
50
+
51
+ # Add new namespaces from the newer document to the older document
52
+ newer.root.namespace_definitions.each do |ns|
53
+ unless older.root.namespace_definitions.any? { |old_ns| old_ns.href == ns.href }
54
+ older.root.add_namespace_definition(ns.prefix, ns.href)
55
+ end
56
+ end
57
+ File.write @file, older.to_xml, encoding: "UTF-8"
58
+ end
59
+
60
+ def get_archive
61
+ unless File.exist? @file
62
+ url = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/refs/heads/main/archive.rdf"
63
+ File.write @file, OpenURI.open_uri(url).read, encoding: "UTF-8"
64
+ end
65
+ end
66
+ end
67
+ end
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.19.0".freeze
2
+ VERSION = "1.20.0".freeze
3
3
  end
@@ -506,3 +506,16 @@
506
506
  'www.w3.org/groups/wg/rch':
507
507
  name: RDF Dataset Canonicalization and Hash Working Group
508
508
  abbrev: RCH WG
509
+ 'www.w3.org/groups/wg/browser-tools-testing':
510
+ name: Browser Testing and Tools Working Group
511
+ 'www.w3.org/groups/wg/did':
512
+ name: Decentralized Identifier Working Group
513
+ abbrev: DID WG
514
+ 'www.w3.org/Member/Board':
515
+ name: Board of Directors
516
+ 'www.w3.org/groups/wg/webtransport':
517
+ name: WebTransport Working Group
518
+ 'www.w3.org/groups/wg/fedid':
519
+ name: Federated Identity Working Group
520
+ 'www.w3.org/groups/wg/gpu':
521
+ name: GPU for the Web Working Group
data/relaton_w3c.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.add_dependency "mechanize", "~> 2.10"
36
36
  spec.add_dependency "rdf", "~> 3.2"
37
37
  spec.add_dependency "rdf-normalize", "~> 0.6"
38
- spec.add_dependency "relaton-bib", "~> 1.19.0"
38
+ spec.add_dependency "relaton-bib", "~> 1.20.0"
39
39
  spec.add_dependency "relaton-index", "~> 0.2.8"
40
40
  spec.add_dependency "rubyzip", "~> 2.3"
41
41
  spec.add_dependency "shex", "~> 0.7"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.19.0
4
+ version: 1.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-04 00:00:00.000000000 Z
11
+ date: 2024-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: linkeddata
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.19.0
75
+ version: 1.20.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.19.0
82
+ version: 1.20.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: relaton-index
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -187,6 +187,7 @@ files:
187
187
  - lib/relaton_w3c/hit_collection.rb
188
188
  - lib/relaton_w3c/processor.rb
189
189
  - lib/relaton_w3c/pubid.rb
190
+ - lib/relaton_w3c/rdf_archive.rb
190
191
  - lib/relaton_w3c/util.rb
191
192
  - lib/relaton_w3c/version.rb
192
193
  - lib/relaton_w3c/w3c_bibliographic_item.rb
@@ -199,7 +200,7 @@ licenses:
199
200
  - BSD-2-Clause
200
201
  metadata:
201
202
  homepage_uri: https://github.com/relaton/relaton-wc3
202
- post_install_message:
203
+ post_install_message:
203
204
  rdoc_options: []
204
205
  require_paths:
205
206
  - lib
@@ -215,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
215
216
  version: '0'
216
217
  requirements: []
217
218
  rubygems_version: 3.3.27
218
- signing_key:
219
+ signing_key:
219
220
  specification_version: 4
220
221
  summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
221
222
  model'