relaton-w3c 1.18.1 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5ad3c2a2f3f43bd1b2f49932f6d380c9c27883287f7d70d7cbc9ab687ca24edd
4
- data.tar.gz: 35a4773f512ec8964b000cd1c13e29c3d0037007ad7a794c2a85972a300dc898
3
+ metadata.gz: c9c777f3bbcbff5de8311b7103f7f64dee70971f5745be10f0415200ec8a3fe9
4
+ data.tar.gz: c13ba956f36f1d4e18a846aeb5d550aad5dbd0128a63457418032958d9d503ff
5
5
  SHA512:
6
- metadata.gz: 19bcd85f66199e6615d582a00ecffe168c9528d9d21e10ad1ce3d7c216123e0bee38dc204565d44d0442f37f4acf1c1979f5804c1dcf37875b9a84c05806f910
7
- data.tar.gz: 30fb3565ec766e6103e16c9b1b29fb64ca46826b43c8367a5fb109141b0228118dd972a887393671dab4c9a3074d559e4de720574134f8949ae0bae97d3bec4d
6
+ metadata.gz: efbc142c4590df61f6b625bc1e00a29e95edad3ca97d940ea05f57caec3cf8db9fc5ed2a64ec6d5a4079ef3228fd682bae801b6c659a5afb83279d49c4d0b2e8
7
+ data.tar.gz: ef68de56840568a3e7d063a5c72876d23ae8d7f02d8cd35169e5a5b619c240edad1622738d38cd5d80642301f892e17a6f40d9fe17e33520bd523440064c9dc0
data/README.adoc CHANGED
@@ -23,24 +23,13 @@ Or install it yourself as:
23
23
 
24
24
  == Usage
25
25
 
26
- === Configuration
27
-
28
- Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonW3c.configure` block.
26
+ === Search for a standard using keywords
29
27
 
30
28
  [source,ruby]
31
29
  ----
32
30
  require 'relaton_w3c'
33
31
  => true
34
32
 
35
- RelatonW3c.configure do |config|
36
- config.logger.level = Logger::DEBUG
37
- end
38
- ----
39
-
40
- === Search for a standard using keywords
41
-
42
- [source,ruby]
43
- ----
44
33
  item = RelatonW3c::W3cBibliography.get "W3C REC-json-ld11-20200716"
45
34
  [relaton-w3c] (W3C REC-json-ld11-20200716) Fetching from Relaton repository ...
46
35
  [relaton-w3c] (W3C REC-json-ld11-20200716) Found: `REC-json-ld11-20200716`
@@ -139,6 +128,10 @@ Done in: 155 sec.
139
128
  => nil
140
129
  ----
141
130
 
131
+ === Logging
132
+
133
+ RelatonW3c uses the relaton-logger gem for logging. By default, it logs to STDOUT. To change the log levels and add other loggers, read the https://github.com/relaton/relaton-logger#usage[relaton-logger] documentation.
134
+
142
135
  == Development
143
136
 
144
137
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -2,11 +2,12 @@ require "rdf"
2
2
  require "linkeddata"
3
3
  require "sparql"
4
4
  require "mechanize"
5
- require "relaton_w3c/data_parser"
5
+ require_relative "rdf_archive"
6
+ require_relative "data_parser"
6
7
 
7
8
  module RelatonW3c
8
9
  class DataFetcher
9
- attr_reader :data, :group_names
10
+ attr_reader :data, :group_names, :rdf_archive
10
11
 
11
12
  #
12
13
  # Data fetcher initializer
@@ -20,6 +21,7 @@ module RelatonW3c
20
21
  @ext = format.sub(/^bib/, "")
21
22
  dir = File.dirname(File.expand_path(__FILE__))
22
23
  @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
24
+ @files = Set.new
23
25
  @index = DataIndex.create_from_file
24
26
  @index1 = Relaton::Index.find_or_create :W3C, file: "index1.yaml"
25
27
  end
@@ -31,33 +33,35 @@ module RelatonW3c
31
33
  # @param [Strin] output directory to save files, default: "data"
32
34
  # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
33
35
  #
34
- def self.fetch(source, output: "data", format: "yaml")
36
+ def self.fetch(output: "data", format: "yaml")
35
37
  t1 = Time.now
36
38
  puts "Started at: #{t1}"
37
39
  FileUtils.mkdir_p output
38
- new(output, format).fetch source
40
+ new(output, format).fetch
39
41
  t2 = Time.now
40
42
  puts "Stopped at: #{t2}"
41
43
  puts "Done in: #{(t2 - t1).round} sec."
42
44
  end
43
45
 
46
+ def rdf_archive
47
+ @rdf_archive ||= RDFArchive.new
48
+ end
49
+
44
50
  #
45
51
  # Parse documents
46
52
  #
47
53
  # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
48
54
  #
49
- def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
50
- each_dataset(source) do |rdf|
51
- %i[versioned unversioned].each do |type|
52
- send("query_#{type}_docs", rdf).each do |sl|
53
- bib = DataParser.parse(rdf, sl, self)
54
- add_has_edition_relation(bib) if type == :unversioned
55
- save_doc bib
56
- rescue StandardError => e
57
- link = sl.respond_to?(:link) ? sl.link : sl.version_of
58
- warn "Error: document #{link} #{e.message}"
59
- warn e.backtrace.join("\n")
60
- end
55
+ def fetch # (source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
56
+ rdf = rdf_archive.get_data
57
+ %i[versioned unversioned].each do |type|
58
+ send("query_#{type}_docs", rdf).each do |sl|
59
+ bib = DataParser.parse(rdf, sl, self)
60
+ add_has_edition_relation(bib) if type == :unversioned
61
+ save_doc bib
62
+ rescue StandardError => e
63
+ link = sl.respond_to?(:link) ? sl.link : sl.version_of
64
+ Util.error "Error: document #{link} #{e.message}\n#{e.backtrace.join("\n")}"
61
65
  end
62
66
  end
63
67
  @index.sort!.save
@@ -133,41 +137,6 @@ module RelatonW3c
133
137
  (ids1 & ids2).any?
134
138
  end
135
139
 
136
- #
137
- # Yield fetching for each dataset
138
- #
139
- # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
140
- #
141
- # @yield [RDF::Repository] RDF repository
142
- #
143
- def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
144
- case source
145
- when "w3c-tr-archive"
146
- Dir["w3c-tr-archive/*.rdf"].map do |f|
147
- @files = []
148
- yield RDF::Repository.load(f)
149
- end
150
- when "w3c-rdf"
151
- @files = []
152
- rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
153
- yield rdf
154
- # parse_static_dataset
155
- end
156
- end
157
-
158
- #
159
- # Parse static dataset
160
- #
161
- # def parse_static_dataset
162
- # Dir[File.expand_path("../../data/*", __dir__)].each do |file|
163
- # xml = File.read file, encoding: "UTF-8"
164
- # save_doc BibXMLParser.parse(xml), warn_duplicate: false
165
- # rescue StandardError => e
166
- # warn "Error: document #{file} #{e.message}"
167
- # warn e.backtrace.join("\n")
168
- # end
169
- # end
170
-
171
140
  #
172
141
  # Query RDF source for versioned documents
173
142
  #
@@ -207,24 +176,26 @@ module RelatonW3c
207
176
  #
208
177
  # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
209
178
  #
210
- def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
179
+ def save_doc(bib, warn_duplicate: true)
211
180
  return unless bib
212
181
 
213
- c = case @format
214
- when "xml" then bib.to_xml(bibdata: true)
215
- when "yaml" then bib.to_hash.to_yaml
216
- else bib.send("to_#{@format}")
217
- end
218
- # id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
219
182
  file = file_name(bib.docnumber)
220
183
  if @files.include?(file)
221
- warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
184
+ Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
222
185
  else
223
186
  pubid = PubId.parse bib.docnumber
224
187
  @index.add pubid, file
225
188
  @index1.add_or_update pubid.to_hash, file
226
189
  @files << file
227
- File.write file, c, encoding: "UTF-8"
190
+ end
191
+ File.write file, serialize(bib), encoding: "UTF-8"
192
+ end
193
+
194
+ def serialize(bib)
195
+ case @format
196
+ when "xml" then bib.to_xml(bibdata: true)
197
+ when "yaml" then bib.to_hash.to_yaml
198
+ else bib.send("to_#{@format}")
228
199
  end
229
200
  end
230
201
 
@@ -280,9 +280,9 @@ module RelatonW3c
280
280
  end
281
281
 
282
282
  #
283
- # Parse editor drafts relation
283
+ # Parse editor drafts links
284
284
  #
285
- # @return [Array<RelatonBib::DocumentRelation>] relation
285
+ # @return [Array<RelatonBib::TypedUri>] links
286
286
  #
287
287
  def editor_drafts # rubocop:disable Metrics/MethodLength
288
288
  return [] unless @sol.respond_to?(:link)
@@ -311,7 +311,10 @@ module RelatonW3c
311
311
  PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
312
312
  PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
313
313
  SELECT ?rel
314
- WHERE { <#{@sol.link.to_s.strip}> #{predicate} ?rel . }
314
+ WHERE {
315
+ <#{@sol.link.to_s.strip}> #{predicate} ?rel .
316
+ FILTER ( isURI(?rel) )
317
+ }
315
318
  ))
316
319
  @rdf.query(sse).order_by(:rel)
317
320
  end
@@ -371,7 +374,8 @@ module RelatonW3c
371
374
  id = pub_id(url)
372
375
  fref = RelatonBib::FormattedRef.new content: id
373
376
  docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)
374
- bib = W3cBibliographicItem.new formattedref: fref, docid: [docid]
377
+ link = [RelatonBib::TypedUri.new(type: "src", content: url)]
378
+ bib = W3cBibliographicItem.new formattedref: fref, docid: [docid], link: link
375
379
  dsc = RelatonBib::FormattedString.new content: desc if desc
376
380
  RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc)
377
381
  end
@@ -443,7 +447,7 @@ module RelatonW3c
443
447
  rwg = RelatonBib::WorkGroup.new name: wg["name"]
444
448
  obj << RelatonBib::TechnicalCommittee.new(rwg)
445
449
  else
446
- Util.warn "WARNING: Working group name not found for: `#{edg.home_page}`"
450
+ Util.warn "Working group name not found for: `#{edg.home_page}`"
447
451
  end
448
452
  end
449
453
  RelatonBib::EditorialGroup.new tc
@@ -9,7 +9,7 @@ module RelatonW3c
9
9
 
10
10
  def check_type(type)
11
11
  unless DOCTYPES.include? type
12
- Util.warn "WARNING: invalid doctype: `#{type}`"
12
+ Util.warn "invalid doctype: `#{type}`"
13
13
  end
14
14
  end
15
15
  end
@@ -8,5 +8,9 @@ module RelatonW3c
8
8
  def bib_item(item_hash)
9
9
  W3cBibliographicItem.new(**item_hash)
10
10
  end
11
+
12
+ def create_doctype(**args)
13
+ DocumentType.new(**args)
14
+ end
11
15
  end
12
16
  end
@@ -9,7 +9,7 @@ module RelatonW3c
9
9
  @prefix = "W3C"
10
10
  @defaultprefix = %r{^W3C\s}
11
11
  @idtype = "W3C"
12
- @datasets = %w[w3c-rdf w3c-tr-archive]
12
+ @datasets = %w[w3c-rdf]
13
13
  end
14
14
 
15
15
  # @param code [String]
@@ -28,8 +28,8 @@ module RelatonW3c
28
28
  # @option opts [String] :output directory to output documents
29
29
  # @option opts [String] :format
30
30
  #
31
- def fetch_data(source, opts)
32
- DataFetcher.fetch(source, **opts)
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
33
  end
34
34
 
35
35
  # @param xml [String]
@@ -0,0 +1,67 @@
1
+ module RelatonW3c
2
+ class RDFArchive
3
+ def initialize(file = "archive.rdf")
4
+ @file = file
5
+ end
6
+
7
+ #
8
+ # Get RDF data from the updated archive file.
9
+ #
10
+ # @return [RDF::Repository]
11
+ #
12
+ def get_data
13
+ if !File.exist?(@file) || File.mtime(@file) < Time.now - 86_400
14
+ get_archive
15
+ update_archive
16
+ end
17
+ RDF::Repository.load(@file)
18
+ end
19
+
20
+ private
21
+
22
+ def update_archive
23
+ # Load the older RDF/XML file
24
+ older = Nokogiri::XML File.read(@file, encoding: "UTF-8")
25
+
26
+ # Load the newer RDF/XML file
27
+ url = "http://www.w3.org/2002/01/tr-automation/tr.rdf"
28
+ newer = Nokogiri::XML OpenURI.open_uri(url).read
29
+
30
+ # Create a hash to store rdf:about attributes from the newer file
31
+ newer_elements = {}
32
+ newer.root.element_children.each do |element|
33
+ rdf_about = element.attribute('about')&.value
34
+ newer_elements[rdf_about.sub(/^http\s:/, "")] = element if rdf_about
35
+ end
36
+
37
+ # Replace elements in the older document
38
+ older.root.element_children.each do |element|
39
+ rdf_about = element.attribute('about')&.value
40
+ if rdf_about && newer_elements[url = rdf_about.sub(/^http\s:/, "")]
41
+ element.replace(newer_elements[url])
42
+ newer_elements.delete(url)
43
+ end
44
+ end
45
+
46
+ # Add remaining new elements to the older document
47
+ newer_elements.each_value do |element|
48
+ older.root.add_child(element)
49
+ end
50
+
51
+ # Add new namespaces from the newer document to the older document
52
+ newer.root.namespace_definitions.each do |ns|
53
+ unless older.root.namespace_definitions.any? { |old_ns| old_ns.href == ns.href }
54
+ older.root.add_namespace_definition(ns.prefix, ns.href)
55
+ end
56
+ end
57
+ File.write @file, older.to_xml, encoding: "UTF-8"
58
+ end
59
+
60
+ def get_archive
61
+ unless File.exist? @file
62
+ url = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/refs/heads/main/archive.rdf"
63
+ File.write @file, OpenURI.open_uri(url).read, encoding: "UTF-8"
64
+ end
65
+ end
66
+ end
67
+ end
@@ -1,9 +1,6 @@
1
1
  module RelatonW3c
2
2
  module Util
3
3
  extend RelatonBib::Util
4
-
5
- def self.logger
6
- RelatonW3c.configuration.logger
7
- end
4
+ PROGNAME = "relaton-w3c".freeze
8
5
  end
9
6
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.18.1".freeze
2
+ VERSION = "1.20.0".freeze
3
3
  end
@@ -39,15 +39,15 @@ module RelatonW3c
39
39
  # @param opts [Hash] options
40
40
  # @return [RelatonW3c::W3cBibliographicItem]
41
41
  def get(ref, _year = nil, _opts = {})
42
- Util.warn "(#{ref}) Fetching from Relaton repository ..."
42
+ Util.info "Fetching from Relaton repository ...", key: ref
43
43
  result = search(ref)
44
44
  unless result
45
- Util.warn "(#{ref}) Not found."
45
+ Util.info "Not found.", key: ref
46
46
  return
47
47
  end
48
48
 
49
49
  found = result.docidentifier.first.id
50
- Util.warn "(#{ref}) Found: `#{found}`"
50
+ Util.info "Found: `#{found}`", key: ref
51
51
  result
52
52
  end
53
53
  end
@@ -506,3 +506,16 @@
506
506
  'www.w3.org/groups/wg/rch':
507
507
  name: RDF Dataset Canonicalization and Hash Working Group
508
508
  abbrev: RCH WG
509
+ 'www.w3.org/groups/wg/browser-tools-testing':
510
+ name: Browser Testing and Tools Working Group
511
+ 'www.w3.org/groups/wg/did':
512
+ name: Decentralized Identifier Working Group
513
+ abbrev: DID WG
514
+ 'www.w3.org/Member/Board':
515
+ name: Board of Directors
516
+ 'www.w3.org/groups/wg/webtransport':
517
+ name: WebTransport Working Group
518
+ 'www.w3.org/groups/wg/fedid':
519
+ name: Federated Identity Working Group
520
+ 'www.w3.org/groups/wg/gpu':
521
+ name: GPU for the Web Working Group
@@ -20,6 +20,10 @@ module RelatonW3c
20
20
  def bib_item(item_hash)
21
21
  W3cBibliographicItem.new(**item_hash)
22
22
  end
23
+
24
+ def create_doctype(type)
25
+ DocumentType.new type: type.text, abbreviation: type[:abbreviation]
26
+ end
23
27
  end
24
28
  end
25
29
  end
data/lib/relaton_w3c.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require "relaton_bib"
2
2
  require "relaton/index"
3
3
  require "relaton_w3c/version"
4
- require "relaton_w3c/config"
5
4
  require "relaton_w3c/util"
6
5
  require "relaton_w3c/document_type"
7
6
  require "relaton_w3c/w3c_bibliography"
data/relaton_w3c.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.add_dependency "mechanize", "~> 2.10"
36
36
  spec.add_dependency "rdf", "~> 3.2"
37
37
  spec.add_dependency "rdf-normalize", "~> 0.6"
38
- spec.add_dependency "relaton-bib", "~> 1.18.0"
38
+ spec.add_dependency "relaton-bib", "~> 1.20.0"
39
39
  spec.add_dependency "relaton-index", "~> 0.2.8"
40
40
  spec.add_dependency "rubyzip", "~> 2.3"
41
41
  spec.add_dependency "shex", "~> 0.7"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.18.1
4
+ version: 1.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-19 00:00:00.000000000 Z
11
+ date: 2024-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: linkeddata
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.18.0
75
+ version: 1.20.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.18.0
82
+ version: 1.20.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: relaton-index
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -178,7 +178,6 @@ files:
178
178
  - grammars/relaton-w3c.rng
179
179
  - lib/relaton_w3c.rb
180
180
  - lib/relaton_w3c/bibxml_parser.rb
181
- - lib/relaton_w3c/config.rb
182
181
  - lib/relaton_w3c/data_fetcher.rb
183
182
  - lib/relaton_w3c/data_index.rb
184
183
  - lib/relaton_w3c/data_parser.rb
@@ -188,6 +187,7 @@ files:
188
187
  - lib/relaton_w3c/hit_collection.rb
189
188
  - lib/relaton_w3c/processor.rb
190
189
  - lib/relaton_w3c/pubid.rb
190
+ - lib/relaton_w3c/rdf_archive.rb
191
191
  - lib/relaton_w3c/util.rb
192
192
  - lib/relaton_w3c/version.rb
193
193
  - lib/relaton_w3c/w3c_bibliographic_item.rb
@@ -200,7 +200,7 @@ licenses:
200
200
  - BSD-2-Clause
201
201
  metadata:
202
202
  homepage_uri: https://github.com/relaton/relaton-wc3
203
- post_install_message:
203
+ post_install_message:
204
204
  rdoc_options: []
205
205
  require_paths:
206
206
  - lib
@@ -216,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
216
216
  version: '0'
217
217
  requirements: []
218
218
  rubygems_version: 3.3.27
219
- signing_key:
219
+ signing_key:
220
220
  specification_version: 4
221
221
  summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
222
222
  model'
@@ -1,10 +0,0 @@
1
- module RelatonW3c
2
- module Config
3
- include RelatonBib::Config
4
- end
5
- extend Config
6
-
7
- class Configuration < RelatonBib::Configuration
8
- PROGNAME = "relaton-w3c".freeze
9
- end
10
- end