relaton-w3c 1.19.0 → 1.20.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dde70fdc615a88c4343add641e193635a5377af68180d96122b02af747aa50e5
4
- data.tar.gz: 363d74d0a170b307006310bbbe2f6e645da40aabf12dbc2a94ebac4887f2452f
3
+ metadata.gz: c9c777f3bbcbff5de8311b7103f7f64dee70971f5745be10f0415200ec8a3fe9
4
+ data.tar.gz: c13ba956f36f1d4e18a846aeb5d550aad5dbd0128a63457418032958d9d503ff
5
5
  SHA512:
6
- metadata.gz: fec1491eaa3108fd8726dfa0af50f54b98d7c28286c936d827088eed8ee7badbe6be430dd4588ad6324936f6cce8824379750a71ac4da9aa8d88788a47fceec4
7
- data.tar.gz: e9d4074458f1a01e12ab84619861dda851ce668573d3b3e843a2c7ba8effed42e6c94545b49157626ba922ebfc31b57ff9d5877461626d2578a9ebdf128d314c
6
+ metadata.gz: efbc142c4590df61f6b625bc1e00a29e95edad3ca97d940ea05f57caec3cf8db9fc5ed2a64ec6d5a4079ef3228fd682bae801b6c659a5afb83279d49c4d0b2e8
7
+ data.tar.gz: ef68de56840568a3e7d063a5c72876d23ae8d7f02d8cd35169e5a5b619c240edad1622738d38cd5d80642301f892e17a6f40d9fe17e33520bd523440064c9dc0
@@ -2,11 +2,12 @@ require "rdf"
2
2
  require "linkeddata"
3
3
  require "sparql"
4
4
  require "mechanize"
5
- require "relaton_w3c/data_parser"
5
+ require_relative "rdf_archive"
6
+ require_relative "data_parser"
6
7
 
7
8
  module RelatonW3c
8
9
  class DataFetcher
9
- attr_reader :data, :group_names
10
+ attr_reader :data, :group_names, :rdf_archive
10
11
 
11
12
  #
12
13
  # Data fetcher initializer
@@ -20,6 +21,7 @@ module RelatonW3c
20
21
  @ext = format.sub(/^bib/, "")
21
22
  dir = File.dirname(File.expand_path(__FILE__))
22
23
  @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
24
+ @files = Set.new
23
25
  @index = DataIndex.create_from_file
24
26
  @index1 = Relaton::Index.find_or_create :W3C, file: "index1.yaml"
25
27
  end
@@ -31,32 +33,35 @@ module RelatonW3c
31
33
  # @param [Strin] output directory to save files, default: "data"
32
34
  # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
33
35
  #
34
- def self.fetch(source, output: "data", format: "yaml")
36
+ def self.fetch(output: "data", format: "yaml")
35
37
  t1 = Time.now
36
38
  puts "Started at: #{t1}"
37
39
  FileUtils.mkdir_p output
38
- new(output, format).fetch source
40
+ new(output, format).fetch
39
41
  t2 = Time.now
40
42
  puts "Stopped at: #{t2}"
41
43
  puts "Done in: #{(t2 - t1).round} sec."
42
44
  end
43
45
 
46
+ def rdf_archive
47
+ @rdf_archive ||= RDFArchive.new
48
+ end
49
+
44
50
  #
45
51
  # Parse documents
46
52
  #
47
53
  # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
48
54
  #
49
- def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
50
- each_dataset(source) do |rdf|
51
- %i[versioned unversioned].each do |type|
52
- send("query_#{type}_docs", rdf).each do |sl|
53
- bib = DataParser.parse(rdf, sl, self)
54
- add_has_edition_relation(bib) if type == :unversioned
55
- save_doc bib
56
- rescue StandardError => e
57
- link = sl.respond_to?(:link) ? sl.link : sl.version_of
58
- Util.error "Error: document #{link} #{e.message}\n#{e.backtrace.join("\n")}"
59
- end
55
+ def fetch # (source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
56
+ rdf = rdf_archive.get_data
57
+ %i[versioned unversioned].each do |type|
58
+ send("query_#{type}_docs", rdf).each do |sl|
59
+ bib = DataParser.parse(rdf, sl, self)
60
+ add_has_edition_relation(bib) if type == :unversioned
61
+ save_doc bib
62
+ rescue StandardError => e
63
+ link = sl.respond_to?(:link) ? sl.link : sl.version_of
64
+ Util.error "Error: document #{link} #{e.message}\n#{e.backtrace.join("\n")}"
60
65
  end
61
66
  end
62
67
  @index.sort!.save
@@ -132,41 +137,6 @@ module RelatonW3c
132
137
  (ids1 & ids2).any?
133
138
  end
134
139
 
135
- #
136
- # Yield fetching for each dataset
137
- #
138
- # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
139
- #
140
- # @yield [RDF::Repository] RDF repository
141
- #
142
- def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
143
- case source
144
- when "w3c-tr-archive"
145
- Dir["w3c-tr-archive/*.rdf"].map do |f|
146
- @files = []
147
- yield RDF::Repository.load(f)
148
- end
149
- when "w3c-rdf"
150
- @files = []
151
- rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
152
- yield rdf
153
- # parse_static_dataset
154
- end
155
- end
156
-
157
- #
158
- # Parse static dataset
159
- #
160
- # def parse_static_dataset
161
- # Dir[File.expand_path("../../data/*", __dir__)].each do |file|
162
- # xml = File.read file, encoding: "UTF-8"
163
- # save_doc BibXMLParser.parse(xml), warn_duplicate: false
164
- # rescue StandardError => e
165
- # warn "Error: document #{file} #{e.message}"
166
- # warn e.backtrace.join("\n")
167
- # end
168
- # end
169
-
170
140
  #
171
141
  # Query RDF source for versioned documents
172
142
  #
@@ -206,15 +176,9 @@ module RelatonW3c
206
176
  #
207
177
  # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
208
178
  #
209
- def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
179
+ def save_doc(bib, warn_duplicate: true)
210
180
  return unless bib
211
181
 
212
- c = case @format
213
- when "xml" then bib.to_xml(bibdata: true)
214
- when "yaml" then bib.to_hash.to_yaml
215
- else bib.send("to_#{@format}")
216
- end
217
- # id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
218
182
  file = file_name(bib.docnumber)
219
183
  if @files.include?(file)
220
184
  Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
@@ -223,7 +187,15 @@ module RelatonW3c
223
187
  @index.add pubid, file
224
188
  @index1.add_or_update pubid.to_hash, file
225
189
  @files << file
226
- File.write file, c, encoding: "UTF-8"
190
+ end
191
+ File.write file, serialize(bib), encoding: "UTF-8"
192
+ end
193
+
194
+ def serialize(bib)
195
+ case @format
196
+ when "xml" then bib.to_xml(bibdata: true)
197
+ when "yaml" then bib.to_hash.to_yaml
198
+ else bib.send("to_#{@format}")
227
199
  end
228
200
  end
229
201
 
@@ -280,9 +280,9 @@ module RelatonW3c
280
280
  end
281
281
 
282
282
  #
283
- # Parse editor drafts relation
283
+ # Parse editor drafts links
284
284
  #
285
- # @return [Array<RelatonBib::DocumentRelation>] relation
285
+ # @return [Array<RelatonBib::TypedUri>] links
286
286
  #
287
287
  def editor_drafts # rubocop:disable Metrics/MethodLength
288
288
  return [] unless @sol.respond_to?(:link)
@@ -311,7 +311,10 @@ module RelatonW3c
311
311
  PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
312
312
  PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
313
313
  SELECT ?rel
314
- WHERE { <#{@sol.link.to_s.strip}> #{predicate} ?rel . }
314
+ WHERE {
315
+ <#{@sol.link.to_s.strip}> #{predicate} ?rel .
316
+ FILTER ( isURI(?rel) )
317
+ }
315
318
  ))
316
319
  @rdf.query(sse).order_by(:rel)
317
320
  end
@@ -371,7 +374,8 @@ module RelatonW3c
371
374
  id = pub_id(url)
372
375
  fref = RelatonBib::FormattedRef.new content: id
373
376
  docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)
374
- bib = W3cBibliographicItem.new formattedref: fref, docid: [docid]
377
+ link = [RelatonBib::TypedUri.new(type: "src", content: url)]
378
+ bib = W3cBibliographicItem.new formattedref: fref, docid: [docid], link: link
375
379
  dsc = RelatonBib::FormattedString.new content: desc if desc
376
380
  RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc)
377
381
  end
@@ -9,7 +9,7 @@ module RelatonW3c
9
9
  @prefix = "W3C"
10
10
  @defaultprefix = %r{^W3C\s}
11
11
  @idtype = "W3C"
12
- @datasets = %w[w3c-rdf w3c-tr-archive]
12
+ @datasets = %w[w3c-rdf]
13
13
  end
14
14
 
15
15
  # @param code [String]
@@ -28,8 +28,8 @@ module RelatonW3c
28
28
  # @option opts [String] :output directory to output documents
29
29
  # @option opts [String] :format
30
30
  #
31
- def fetch_data(source, opts)
32
- DataFetcher.fetch(source, **opts)
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
33
  end
34
34
 
35
35
  # @param xml [String]
@@ -0,0 +1,67 @@
1
+ module RelatonW3c
2
+ class RDFArchive
3
+ def initialize(file = "archive.rdf")
4
+ @file = file
5
+ end
6
+
7
+ #
8
+ # Get RDF data from the updated archive file.
9
+ #
10
+ # @return [RDF::Repository]
11
+ #
12
+ def get_data
13
+ if !File.exist?(@file) || File.mtime(@file) < Time.now - 86_400
14
+ get_archive
15
+ update_archive
16
+ end
17
+ RDF::Repository.load(@file)
18
+ end
19
+
20
+ private
21
+
22
+ def update_archive
23
+ # Load the older RDF/XML file
24
+ older = Nokogiri::XML File.read(@file, encoding: "UTF-8")
25
+
26
+ # Load the newer RDF/XML file
27
+ url = "http://www.w3.org/2002/01/tr-automation/tr.rdf"
28
+ newer = Nokogiri::XML OpenURI.open_uri(url).read
29
+
30
+ # Create a hash to store rdf:about attributes from the newer file
31
+ newer_elements = {}
32
+ newer.root.element_children.each do |element|
33
+ rdf_about = element.attribute('about')&.value
34
+ newer_elements[rdf_about.sub(/^http\s:/, "")] = element if rdf_about
35
+ end
36
+
37
+ # Replace elements in the older document
38
+ older.root.element_children.each do |element|
39
+ rdf_about = element.attribute('about')&.value
40
+ if rdf_about && newer_elements[url = rdf_about.sub(/^http\s:/, "")]
41
+ element.replace(newer_elements[url])
42
+ newer_elements.delete(url)
43
+ end
44
+ end
45
+
46
+ # Add remaining new elements to the older document
47
+ newer_elements.each_value do |element|
48
+ older.root.add_child(element)
49
+ end
50
+
51
+ # Add new namespaces from the newer document to the older document
52
+ newer.root.namespace_definitions.each do |ns|
53
+ unless older.root.namespace_definitions.any? { |old_ns| old_ns.href == ns.href }
54
+ older.root.add_namespace_definition(ns.prefix, ns.href)
55
+ end
56
+ end
57
+ File.write @file, older.to_xml, encoding: "UTF-8"
58
+ end
59
+
60
+ def get_archive
61
+ unless File.exist? @file
62
+ url = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/refs/heads/main/archive.rdf"
63
+ File.write @file, OpenURI.open_uri(url).read, encoding: "UTF-8"
64
+ end
65
+ end
66
+ end
67
+ end
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.19.0".freeze
2
+ VERSION = "1.20.0".freeze
3
3
  end
@@ -506,3 +506,16 @@
506
506
  'www.w3.org/groups/wg/rch':
507
507
  name: RDF Dataset Canonicalization and Hash Working Group
508
508
  abbrev: RCH WG
509
+ 'www.w3.org/groups/wg/browser-tools-testing':
510
+ name: Browser Testing and Tools Working Group
511
+ 'www.w3.org/groups/wg/did':
512
+ name: Decentralized Identifier Working Group
513
+ abbrev: DID WG
514
+ 'www.w3.org/Member/Board':
515
+ name: Board of Directors
516
+ 'www.w3.org/groups/wg/webtransport':
517
+ name: WebTransport Working Group
518
+ 'www.w3.org/groups/wg/fedid':
519
+ name: Federated Identity Working Group
520
+ 'www.w3.org/groups/wg/gpu':
521
+ name: GPU for the Web Working Group
data/relaton_w3c.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.add_dependency "mechanize", "~> 2.10"
36
36
  spec.add_dependency "rdf", "~> 3.2"
37
37
  spec.add_dependency "rdf-normalize", "~> 0.6"
38
- spec.add_dependency "relaton-bib", "~> 1.19.0"
38
+ spec.add_dependency "relaton-bib", "~> 1.20.0"
39
39
  spec.add_dependency "relaton-index", "~> 0.2.8"
40
40
  spec.add_dependency "rubyzip", "~> 2.3"
41
41
  spec.add_dependency "shex", "~> 0.7"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.19.0
4
+ version: 1.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-04 00:00:00.000000000 Z
11
+ date: 2024-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: linkeddata
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.19.0
75
+ version: 1.20.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.19.0
82
+ version: 1.20.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: relaton-index
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -187,6 +187,7 @@ files:
187
187
  - lib/relaton_w3c/hit_collection.rb
188
188
  - lib/relaton_w3c/processor.rb
189
189
  - lib/relaton_w3c/pubid.rb
190
+ - lib/relaton_w3c/rdf_archive.rb
190
191
  - lib/relaton_w3c/util.rb
191
192
  - lib/relaton_w3c/version.rb
192
193
  - lib/relaton_w3c/w3c_bibliographic_item.rb
@@ -199,7 +200,7 @@ licenses:
199
200
  - BSD-2-Clause
200
201
  metadata:
201
202
  homepage_uri: https://github.com/relaton/relaton-wc3
202
- post_install_message:
203
+ post_install_message:
203
204
  rdoc_options: []
204
205
  require_paths:
205
206
  - lib
@@ -215,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
215
216
  version: '0'
216
217
  requirements: []
217
218
  rubygems_version: 3.3.27
218
- signing_key:
219
+ signing_key:
219
220
  specification_version: 4
220
221
  summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
221
222
  model'