relaton-w3c 1.18.1 → 1.20.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5ad3c2a2f3f43bd1b2f49932f6d380c9c27883287f7d70d7cbc9ab687ca24edd
4
- data.tar.gz: 35a4773f512ec8964b000cd1c13e29c3d0037007ad7a794c2a85972a300dc898
3
+ metadata.gz: c9c777f3bbcbff5de8311b7103f7f64dee70971f5745be10f0415200ec8a3fe9
4
+ data.tar.gz: c13ba956f36f1d4e18a846aeb5d550aad5dbd0128a63457418032958d9d503ff
5
5
  SHA512:
6
- metadata.gz: 19bcd85f66199e6615d582a00ecffe168c9528d9d21e10ad1ce3d7c216123e0bee38dc204565d44d0442f37f4acf1c1979f5804c1dcf37875b9a84c05806f910
7
- data.tar.gz: 30fb3565ec766e6103e16c9b1b29fb64ca46826b43c8367a5fb109141b0228118dd972a887393671dab4c9a3074d559e4de720574134f8949ae0bae97d3bec4d
6
+ metadata.gz: efbc142c4590df61f6b625bc1e00a29e95edad3ca97d940ea05f57caec3cf8db9fc5ed2a64ec6d5a4079ef3228fd682bae801b6c659a5afb83279d49c4d0b2e8
7
+ data.tar.gz: ef68de56840568a3e7d063a5c72876d23ae8d7f02d8cd35169e5a5b619c240edad1622738d38cd5d80642301f892e17a6f40d9fe17e33520bd523440064c9dc0
data/README.adoc CHANGED
@@ -23,24 +23,13 @@ Or install it yourself as:
23
23
 
24
24
  == Usage
25
25
 
26
- === Configuration
27
-
28
- Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonW3c.configure` block.
26
+ === Search for a standard using keywords
29
27
 
30
28
  [source,ruby]
31
29
  ----
32
30
  require 'relaton_w3c'
33
31
  => true
34
32
 
35
- RelatonW3c.configure do |config|
36
- config.logger.level = Logger::DEBUG
37
- end
38
- ----
39
-
40
- === Search for a standard using keywords
41
-
42
- [source,ruby]
43
- ----
44
33
  item = RelatonW3c::W3cBibliography.get "W3C REC-json-ld11-20200716"
45
34
  [relaton-w3c] (W3C REC-json-ld11-20200716) Fetching from Relaton repository ...
46
35
  [relaton-w3c] (W3C REC-json-ld11-20200716) Found: `REC-json-ld11-20200716`
@@ -139,6 +128,10 @@ Done in: 155 sec.
139
128
  => nil
140
129
  ----
141
130
 
131
+ === Logging
132
+
133
+ RelatonW3c uses the relaton-logger gem for logging. By default, it logs to STDOUT. To change the log levels and add other loggers, read the https://github.com/relaton/relaton-logger#usage[relaton-logger] documentation.
134
+
142
135
  == Development
143
136
 
144
137
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -2,11 +2,12 @@ require "rdf"
2
2
  require "linkeddata"
3
3
  require "sparql"
4
4
  require "mechanize"
5
- require "relaton_w3c/data_parser"
5
+ require_relative "rdf_archive"
6
+ require_relative "data_parser"
6
7
 
7
8
  module RelatonW3c
8
9
  class DataFetcher
9
- attr_reader :data, :group_names
10
+ attr_reader :data, :group_names, :rdf_archive
10
11
 
11
12
  #
12
13
  # Data fetcher initializer
@@ -20,6 +21,7 @@ module RelatonW3c
20
21
  @ext = format.sub(/^bib/, "")
21
22
  dir = File.dirname(File.expand_path(__FILE__))
22
23
  @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
24
+ @files = Set.new
23
25
  @index = DataIndex.create_from_file
24
26
  @index1 = Relaton::Index.find_or_create :W3C, file: "index1.yaml"
25
27
  end
@@ -31,33 +33,35 @@ module RelatonW3c
31
33
  # @param [Strin] output directory to save files, default: "data"
32
34
  # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
33
35
  #
34
- def self.fetch(source, output: "data", format: "yaml")
36
+ def self.fetch(output: "data", format: "yaml")
35
37
  t1 = Time.now
36
38
  puts "Started at: #{t1}"
37
39
  FileUtils.mkdir_p output
38
- new(output, format).fetch source
40
+ new(output, format).fetch
39
41
  t2 = Time.now
40
42
  puts "Stopped at: #{t2}"
41
43
  puts "Done in: #{(t2 - t1).round} sec."
42
44
  end
43
45
 
46
+ def rdf_archive
47
+ @rdf_archive ||= RDFArchive.new
48
+ end
49
+
44
50
  #
45
51
  # Parse documents
46
52
  #
47
53
  # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
48
54
  #
49
- def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
50
- each_dataset(source) do |rdf|
51
- %i[versioned unversioned].each do |type|
52
- send("query_#{type}_docs", rdf).each do |sl|
53
- bib = DataParser.parse(rdf, sl, self)
54
- add_has_edition_relation(bib) if type == :unversioned
55
- save_doc bib
56
- rescue StandardError => e
57
- link = sl.respond_to?(:link) ? sl.link : sl.version_of
58
- warn "Error: document #{link} #{e.message}"
59
- warn e.backtrace.join("\n")
60
- end
55
+ def fetch # (source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
56
+ rdf = rdf_archive.get_data
57
+ %i[versioned unversioned].each do |type|
58
+ send("query_#{type}_docs", rdf).each do |sl|
59
+ bib = DataParser.parse(rdf, sl, self)
60
+ add_has_edition_relation(bib) if type == :unversioned
61
+ save_doc bib
62
+ rescue StandardError => e
63
+ link = sl.respond_to?(:link) ? sl.link : sl.version_of
64
+ Util.error "Error: document #{link} #{e.message}\n#{e.backtrace.join("\n")}"
61
65
  end
62
66
  end
63
67
  @index.sort!.save
@@ -133,41 +137,6 @@ module RelatonW3c
133
137
  (ids1 & ids2).any?
134
138
  end
135
139
 
136
- #
137
- # Yield fetching for each dataset
138
- #
139
- # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
140
- #
141
- # @yield [RDF::Repository] RDF repository
142
- #
143
- def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
144
- case source
145
- when "w3c-tr-archive"
146
- Dir["w3c-tr-archive/*.rdf"].map do |f|
147
- @files = []
148
- yield RDF::Repository.load(f)
149
- end
150
- when "w3c-rdf"
151
- @files = []
152
- rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
153
- yield rdf
154
- # parse_static_dataset
155
- end
156
- end
157
-
158
- #
159
- # Parse static dataset
160
- #
161
- # def parse_static_dataset
162
- # Dir[File.expand_path("../../data/*", __dir__)].each do |file|
163
- # xml = File.read file, encoding: "UTF-8"
164
- # save_doc BibXMLParser.parse(xml), warn_duplicate: false
165
- # rescue StandardError => e
166
- # warn "Error: document #{file} #{e.message}"
167
- # warn e.backtrace.join("\n")
168
- # end
169
- # end
170
-
171
140
  #
172
141
  # Query RDF source for versioned documents
173
142
  #
@@ -207,24 +176,26 @@ module RelatonW3c
207
176
  #
208
177
  # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
209
178
  #
210
- def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
179
+ def save_doc(bib, warn_duplicate: true)
211
180
  return unless bib
212
181
 
213
- c = case @format
214
- when "xml" then bib.to_xml(bibdata: true)
215
- when "yaml" then bib.to_hash.to_yaml
216
- else bib.send("to_#{@format}")
217
- end
218
- # id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
219
182
  file = file_name(bib.docnumber)
220
183
  if @files.include?(file)
221
- warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
184
+ Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
222
185
  else
223
186
  pubid = PubId.parse bib.docnumber
224
187
  @index.add pubid, file
225
188
  @index1.add_or_update pubid.to_hash, file
226
189
  @files << file
227
- File.write file, c, encoding: "UTF-8"
190
+ end
191
+ File.write file, serialize(bib), encoding: "UTF-8"
192
+ end
193
+
194
+ def serialize(bib)
195
+ case @format
196
+ when "xml" then bib.to_xml(bibdata: true)
197
+ when "yaml" then bib.to_hash.to_yaml
198
+ else bib.send("to_#{@format}")
228
199
  end
229
200
  end
230
201
 
@@ -280,9 +280,9 @@ module RelatonW3c
280
280
  end
281
281
 
282
282
  #
283
- # Parse editor drafts relation
283
+ # Parse editor drafts links
284
284
  #
285
- # @return [Array<RelatonBib::DocumentRelation>] relation
285
+ # @return [Array<RelatonBib::TypedUri>] links
286
286
  #
287
287
  def editor_drafts # rubocop:disable Metrics/MethodLength
288
288
  return [] unless @sol.respond_to?(:link)
@@ -311,7 +311,10 @@ module RelatonW3c
311
311
  PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
312
312
  PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
313
313
  SELECT ?rel
314
- WHERE { <#{@sol.link.to_s.strip}> #{predicate} ?rel . }
314
+ WHERE {
315
+ <#{@sol.link.to_s.strip}> #{predicate} ?rel .
316
+ FILTER ( isURI(?rel) )
317
+ }
315
318
  ))
316
319
  @rdf.query(sse).order_by(:rel)
317
320
  end
@@ -371,7 +374,8 @@ module RelatonW3c
371
374
  id = pub_id(url)
372
375
  fref = RelatonBib::FormattedRef.new content: id
373
376
  docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)
374
- bib = W3cBibliographicItem.new formattedref: fref, docid: [docid]
377
+ link = [RelatonBib::TypedUri.new(type: "src", content: url)]
378
+ bib = W3cBibliographicItem.new formattedref: fref, docid: [docid], link: link
375
379
  dsc = RelatonBib::FormattedString.new content: desc if desc
376
380
  RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc)
377
381
  end
@@ -443,7 +447,7 @@ module RelatonW3c
443
447
  rwg = RelatonBib::WorkGroup.new name: wg["name"]
444
448
  obj << RelatonBib::TechnicalCommittee.new(rwg)
445
449
  else
446
- Util.warn "WARNING: Working group name not found for: `#{edg.home_page}`"
450
+ Util.warn "Working group name not found for: `#{edg.home_page}`"
447
451
  end
448
452
  end
449
453
  RelatonBib::EditorialGroup.new tc
@@ -9,7 +9,7 @@ module RelatonW3c
9
9
 
10
10
  def check_type(type)
11
11
  unless DOCTYPES.include? type
12
- Util.warn "WARNING: invalid doctype: `#{type}`"
12
+ Util.warn "invalid doctype: `#{type}`"
13
13
  end
14
14
  end
15
15
  end
@@ -8,5 +8,9 @@ module RelatonW3c
8
8
  def bib_item(item_hash)
9
9
  W3cBibliographicItem.new(**item_hash)
10
10
  end
11
+
12
+ def create_doctype(**args)
13
+ DocumentType.new(**args)
14
+ end
11
15
  end
12
16
  end
@@ -9,7 +9,7 @@ module RelatonW3c
9
9
  @prefix = "W3C"
10
10
  @defaultprefix = %r{^W3C\s}
11
11
  @idtype = "W3C"
12
- @datasets = %w[w3c-rdf w3c-tr-archive]
12
+ @datasets = %w[w3c-rdf]
13
13
  end
14
14
 
15
15
  # @param code [String]
@@ -28,8 +28,8 @@ module RelatonW3c
28
28
  # @option opts [String] :output directory to output documents
29
29
  # @option opts [String] :format
30
30
  #
31
- def fetch_data(source, opts)
32
- DataFetcher.fetch(source, **opts)
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
33
  end
34
34
 
35
35
  # @param xml [String]
@@ -0,0 +1,67 @@
1
+ module RelatonW3c
2
+ class RDFArchive
3
+ def initialize(file = "archive.rdf")
4
+ @file = file
5
+ end
6
+
7
+ #
8
+ # Get RDF data from the updated archive file.
9
+ #
10
+ # @return [RDF::Repository]
11
+ #
12
+ def get_data
13
+ if !File.exist?(@file) || File.mtime(@file) < Time.now - 86_400
14
+ get_archive
15
+ update_archive
16
+ end
17
+ RDF::Repository.load(@file)
18
+ end
19
+
20
+ private
21
+
22
+ def update_archive
23
+ # Load the older RDF/XML file
24
+ older = Nokogiri::XML File.read(@file, encoding: "UTF-8")
25
+
26
+ # Load the newer RDF/XML file
27
+ url = "http://www.w3.org/2002/01/tr-automation/tr.rdf"
28
+ newer = Nokogiri::XML OpenURI.open_uri(url).read
29
+
30
+ # Create a hash to store rdf:about attributes from the newer file
31
+ newer_elements = {}
32
+ newer.root.element_children.each do |element|
33
+ rdf_about = element.attribute('about')&.value
34
+ newer_elements[rdf_about.sub(/^http\s:/, "")] = element if rdf_about
35
+ end
36
+
37
+ # Replace elements in the older document
38
+ older.root.element_children.each do |element|
39
+ rdf_about = element.attribute('about')&.value
40
+ if rdf_about && newer_elements[url = rdf_about.sub(/^http\s:/, "")]
41
+ element.replace(newer_elements[url])
42
+ newer_elements.delete(url)
43
+ end
44
+ end
45
+
46
+ # Add remaining new elements to the older document
47
+ newer_elements.each_value do |element|
48
+ older.root.add_child(element)
49
+ end
50
+
51
+ # Add new namespaces from the newer document to the older document
52
+ newer.root.namespace_definitions.each do |ns|
53
+ unless older.root.namespace_definitions.any? { |old_ns| old_ns.href == ns.href }
54
+ older.root.add_namespace_definition(ns.prefix, ns.href)
55
+ end
56
+ end
57
+ File.write @file, older.to_xml, encoding: "UTF-8"
58
+ end
59
+
60
+ def get_archive
61
+ unless File.exist? @file
62
+ url = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/refs/heads/main/archive.rdf"
63
+ File.write @file, OpenURI.open_uri(url).read, encoding: "UTF-8"
64
+ end
65
+ end
66
+ end
67
+ end
@@ -1,9 +1,6 @@
1
1
  module RelatonW3c
2
2
  module Util
3
3
  extend RelatonBib::Util
4
-
5
- def self.logger
6
- RelatonW3c.configuration.logger
7
- end
4
+ PROGNAME = "relaton-w3c".freeze
8
5
  end
9
6
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.18.1".freeze
2
+ VERSION = "1.20.0".freeze
3
3
  end
@@ -39,15 +39,15 @@ module RelatonW3c
39
39
  # @param opts [Hash] options
40
40
  # @return [RelatonW3c::W3cBibliographicItem]
41
41
  def get(ref, _year = nil, _opts = {})
42
- Util.warn "(#{ref}) Fetching from Relaton repository ..."
42
+ Util.info "Fetching from Relaton repository ...", key: ref
43
43
  result = search(ref)
44
44
  unless result
45
- Util.warn "(#{ref}) Not found."
45
+ Util.info "Not found.", key: ref
46
46
  return
47
47
  end
48
48
 
49
49
  found = result.docidentifier.first.id
50
- Util.warn "(#{ref}) Found: `#{found}`"
50
+ Util.info "Found: `#{found}`", key: ref
51
51
  result
52
52
  end
53
53
  end
@@ -506,3 +506,16 @@
506
506
  'www.w3.org/groups/wg/rch':
507
507
  name: RDF Dataset Canonicalization and Hash Working Group
508
508
  abbrev: RCH WG
509
+ 'www.w3.org/groups/wg/browser-tools-testing':
510
+ name: Browser Testing and Tools Working Group
511
+ 'www.w3.org/groups/wg/did':
512
+ name: Decentralized Identifier Working Group
513
+ abbrev: DID WG
514
+ 'www.w3.org/Member/Board':
515
+ name: Board of Directors
516
+ 'www.w3.org/groups/wg/webtransport':
517
+ name: WebTransport Working Group
518
+ 'www.w3.org/groups/wg/fedid':
519
+ name: Federated Identity Working Group
520
+ 'www.w3.org/groups/wg/gpu':
521
+ name: GPU for the Web Working Group
@@ -20,6 +20,10 @@ module RelatonW3c
20
20
  def bib_item(item_hash)
21
21
  W3cBibliographicItem.new(**item_hash)
22
22
  end
23
+
24
+ def create_doctype(type)
25
+ DocumentType.new type: type.text, abbreviation: type[:abbreviation]
26
+ end
23
27
  end
24
28
  end
25
29
  end
data/lib/relaton_w3c.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require "relaton_bib"
2
2
  require "relaton/index"
3
3
  require "relaton_w3c/version"
4
- require "relaton_w3c/config"
5
4
  require "relaton_w3c/util"
6
5
  require "relaton_w3c/document_type"
7
6
  require "relaton_w3c/w3c_bibliography"
data/relaton_w3c.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.add_dependency "mechanize", "~> 2.10"
36
36
  spec.add_dependency "rdf", "~> 3.2"
37
37
  spec.add_dependency "rdf-normalize", "~> 0.6"
38
- spec.add_dependency "relaton-bib", "~> 1.18.0"
38
+ spec.add_dependency "relaton-bib", "~> 1.20.0"
39
39
  spec.add_dependency "relaton-index", "~> 0.2.8"
40
40
  spec.add_dependency "rubyzip", "~> 2.3"
41
41
  spec.add_dependency "shex", "~> 0.7"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.18.1
4
+ version: 1.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-19 00:00:00.000000000 Z
11
+ date: 2024-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: linkeddata
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.18.0
75
+ version: 1.20.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.18.0
82
+ version: 1.20.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: relaton-index
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -178,7 +178,6 @@ files:
178
178
  - grammars/relaton-w3c.rng
179
179
  - lib/relaton_w3c.rb
180
180
  - lib/relaton_w3c/bibxml_parser.rb
181
- - lib/relaton_w3c/config.rb
182
181
  - lib/relaton_w3c/data_fetcher.rb
183
182
  - lib/relaton_w3c/data_index.rb
184
183
  - lib/relaton_w3c/data_parser.rb
@@ -188,6 +187,7 @@ files:
188
187
  - lib/relaton_w3c/hit_collection.rb
189
188
  - lib/relaton_w3c/processor.rb
190
189
  - lib/relaton_w3c/pubid.rb
190
+ - lib/relaton_w3c/rdf_archive.rb
191
191
  - lib/relaton_w3c/util.rb
192
192
  - lib/relaton_w3c/version.rb
193
193
  - lib/relaton_w3c/w3c_bibliographic_item.rb
@@ -200,7 +200,7 @@ licenses:
200
200
  - BSD-2-Clause
201
201
  metadata:
202
202
  homepage_uri: https://github.com/relaton/relaton-wc3
203
- post_install_message:
203
+ post_install_message:
204
204
  rdoc_options: []
205
205
  require_paths:
206
206
  - lib
@@ -216,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
216
216
  version: '0'
217
217
  requirements: []
218
218
  rubygems_version: 3.3.27
219
- signing_key:
219
+ signing_key:
220
220
  specification_version: 4
221
221
  summary: 'RelatonIso: retrieve W3C Standards for bibliographic using the IsoBibliographicItem
222
222
  model'
@@ -1,10 +0,0 @@
1
- module RelatonW3c
2
- module Config
3
- include RelatonBib::Config
4
- end
5
- extend Config
6
-
7
- class Configuration < RelatonBib::Configuration
8
- PROGNAME = "relaton-w3c".freeze
9
- end
10
- end