relaton-w3c 1.19.0 → 1.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  module RelatonW3c
2
2
  class DataParser
3
+ include RelatonW3c::RateLimitHandler
4
+
3
5
  USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
4
6
 
5
7
  DOCTYPES = {
@@ -22,25 +24,22 @@ module RelatonW3c
22
24
  #
23
25
  # Document parser initalization
24
26
  #
25
- # @param [RDF::Query::Solution] sol entry from the SPARQL query
27
+ # @param [W3cApi::Models::SpecVersion] sol entry from the SPARQL query
26
28
  # @param [RelatonW3c::DataFetcher] fetcher data fetcher
27
29
  #
28
- def initialize(rdf, sol, fetcher)
29
- @rdf = rdf
30
- @sol = sol
31
- @fetcher = fetcher
30
+ def initialize(spec)
31
+ @spec = spec
32
32
  end
33
33
 
34
34
  #
35
35
  # Initialize document parser and run it
36
36
  #
37
- # @param [RDF::Query::Solution] sol entry from the SPARQL query
38
- # @param [RelatonW3c::DataFetcher] fetcher data fetcher
37
+ # @param [W3cApi::Models::SpecVersion] sol entry from the SPARQL query
39
38
  #
40
39
  # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
41
40
  #
42
- def self.parse(rdf, sol, fetcher)
43
- new(rdf, sol, fetcher).parse
41
+ def self.parse(spec)
42
+ new(spec).parse
44
43
  end
45
44
 
46
45
  #
@@ -49,7 +48,7 @@ module RelatonW3c
49
48
  # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
50
49
  #
51
50
  def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
52
- return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }
51
+ # return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }
53
52
 
54
53
  RelatonW3c::W3cBibliographicItem.new(
55
54
  type: "standard",
@@ -76,8 +75,10 @@ module RelatonW3c
76
75
  # @return [RelatonBib::DocumentStatus, nil] dcoument status
77
76
  #
78
77
  def parse_docstatus
79
- stage = types_stages&.detect { |st| STAGES.include?(st) }
80
- RelatonBib::DocumentStatus.new stage: STAGES[stage] if stage
78
+ # stage = types_stages&.detect { |st| STAGES.include?(st) }
79
+ return unless @spec.respond_to?(:status) && @spec.status
80
+
81
+ RelatonBib::DocumentStatus.new stage: @spec.status
81
82
  end
82
83
 
83
84
  #
@@ -85,22 +86,22 @@ module RelatonW3c
85
86
  #
86
87
  # @return [RelatonBib::TypedTitleStringCollection] title
87
88
  #
88
- def parse_title
89
- content = if @sol.respond_to?(:title) then @sol.title.to_s
90
- else document_versions.max_by { |dv| dv.date.to_s }.title.to_s
91
- end
92
- t = RelatonBib::TypedTitleString.new content: content
89
+ def parse_title(spec = @spec)
90
+ t = RelatonBib::TypedTitleString.new content: spec.title
93
91
  RelatonBib::TypedTitleStringCollection.new [t]
94
92
  end
95
93
 
94
+ def doc_uri(spec = @spec)
95
+ spec.respond_to?(:uri) ? spec.uri : spec.shortlink
96
+ end
97
+
96
98
  #
97
99
  # Parse link
98
100
  #
99
101
  # @return [Array<RelatonBib::TypedUri>] link
100
102
  #
101
103
  def parse_link
102
- link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
103
- [RelatonBib::TypedUri.new(type: "src", content: link.to_s.strip)] + editor_drafts
104
+ [RelatonBib::TypedUri.new(type: "src", content: doc_uri)] # + editor_drafts
104
105
  end
105
106
 
106
107
  #
@@ -109,14 +110,14 @@ module RelatonW3c
109
110
  # @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
110
111
  #
111
112
  def parse_docid
112
- id = @sol.respond_to?(:link) ? pub_id(@sol.link) : pub_id(@sol.version_of)
113
+ id = pub_id(doc_uri)
113
114
  [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)]
114
115
  end
115
116
 
116
117
  #
117
118
  # Generate PubID
118
119
  #
119
- # @return [RDF::URI] PubID
120
+ # @return [String] PubID
120
121
  #
121
122
  def pub_id(url)
122
123
  "W3C #{identifier(url)}"
@@ -125,13 +126,12 @@ module RelatonW3c
125
126
  #
126
127
  # Generate identifier from URL
127
128
  #
128
- # @param [RDF::URI, nil] link
129
+ # @param [String] link
129
130
  #
130
131
  # @return [String] identifier
131
132
  #
132
- def identifier(link = nil)
133
- url = link || (@sol.respond_to?(:link) ? @sol.link : @sol.version_of)
134
- self.class.parse_identifier(url.to_s.strip)
133
+ def identifier(link = doc_uri)
134
+ self.class.parse_identifier(link)
135
135
  end
136
136
 
137
137
  #
@@ -166,54 +166,9 @@ module RelatonW3c
166
166
  # @return [String] type
167
167
  #
168
168
  def type
169
- # thre are many types, we need to find the right one
170
- @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) } || "technicalReport"
171
- end
172
-
173
- #
174
- # Fetches types and stages
175
- #
176
- # @return [Array<String>] types and stages
177
- #
178
- def types_stages
179
- @types_stages ||= begin
180
- sse = @sol.respond_to?(:link) ? versioned_types_stages : unversioned_types_stages
181
- @rdf.query(sse).map { |s| s.type.to_s.split("#").last }
182
- end
183
- end
184
-
185
- #
186
- # Create SPARQL query for versioned types and stages
187
- #
188
- # @return [SPARQL::Algebra::Operator::Prefix] SPARQL query
189
- #
190
- def versioned_types_stages
191
- SPARQL.parse(%(
192
- PREFIX : <http://www.w3.org/2001/02pd/rec54#>
193
- PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
194
- SELECT ?type
195
- WHERE {
196
- { <#{@sol.link.to_s.strip}> rdf:type ?type }
197
- }
198
- ))
199
- end
200
-
201
- #
202
- # Create SPARQL query for unversioned types and stages
203
- #
204
- # @return [SPARQL::Algebra::Operator::Prefix] SPARQL query
205
- #
206
- def unversioned_types_stages
207
- SPARQL.parse(%(
208
- PREFIX : <http://www.w3.org/2001/02pd/rec54#>
209
- PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
210
- PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
211
- SELECT ?type
212
- WHERE {
213
- ?link doc:versionOf <#{@sol.version_of}>; rdf:type ?type .
214
- FILTER ( isURI(?link) && STR(?link) != <#{@sol.version_of}> )
215
- }
216
- ))
169
+ # there are many types, we need to find the right one
170
+ # @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) } || "technicalReport"
171
+ @type ||= @spec.respond_to?(:status) ? @spec.status : "technicalReport"
217
172
  end
218
173
 
219
174
  #
@@ -222,8 +177,8 @@ module RelatonW3c
222
177
  # @return [String, nil] doctype
223
178
  #
224
179
  def parse_doctype
225
- type = DOCTYPES[type] || DOCTYPES[type_from_link]
226
- DocumentType.new(type: type) if type
180
+ t = DOCTYPES[type] || DOCTYPES[type_from_link]
181
+ DocumentType.new(type: t) if t
227
182
  end
228
183
 
229
184
  #
@@ -232,8 +187,8 @@ module RelatonW3c
232
187
  # @return [String, nil] type
233
188
  #
234
189
  def type_from_link
235
- link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
236
- link.to_s.strip.match(/www\.w3\.org\/(TR)/)&.to_a&.fetch 1
190
+ # link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
191
+ @spec.shortlink.strip.match(/www\.w3\.org\/(TR)/)&.to_a&.fetch 1
237
192
  end
238
193
 
239
194
  #
@@ -242,9 +197,9 @@ module RelatonW3c
242
197
  # @return [Array<RelatonBib::BibliographicDate>] date
243
198
  #
244
199
  def parse_date
245
- return [] unless @sol.respond_to?(:date)
200
+ return [] unless @spec.respond_to?(:date)
246
201
 
247
- [RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
202
+ [RelatonBib::BibliographicDate.new(type: "published", on: @spec.date.to_date.to_s)]
248
203
  end
249
204
 
250
205
  #
@@ -253,10 +208,11 @@ module RelatonW3c
253
208
  # @return [Array<RelatonBib::DocumentRelation>] relation
254
209
  #
255
210
  def parse_relation
256
- if @sol.respond_to?(:link)
257
- relations
211
+ if @spec.links.respond_to?(:version_history)
212
+ version_history = realize @spec.links.version_history
213
+ version_history.links.spec_versions.map { |version| create_relation(version, "hasEdition") }
258
214
  else
259
- document_versions.map { |r| create_relation(r.link.to_s.strip, "hasEdition") }
215
+ relations
260
216
  end
261
217
  end
262
218
 
@@ -266,96 +222,21 @@ module RelatonW3c
266
222
  # @return [Array<RelatonBib::DocumentRelation>] relations
267
223
  #
268
224
  def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
269
- {
270
- "doc:obsoletes" => { type: "obsoletes" },
271
- "mat:hasErrata" => { type: "updatedBy", description: "errata" },
272
- # "mat:hasTranslations" => "hasTranslation",
273
- # "mat:hasImplReport" => "hasImpReport",
274
- ":previousEdition" => { type: "editionOf" },
275
- }.reduce([]) do |acc, (predicate, tp)|
276
- acc + relation_query(predicate).map do |r|
277
- create_relation(r.rel.to_s, tp[:type], tp[:description])
225
+ rels = []
226
+ rels << create_relation(@spec.links.specification, "editionOf") if @spec.links.respond_to?(:specification)
227
+ if @spec.links.respond_to?(:predecessor_versions) && @spec.links.predecessor_versions
228
+ predecessor_versions = realize @spec.links.predecessor_versions
229
+ predecessor_versions.links.predecessor_versions.each do |version|
230
+ rels << create_relation(version, "obsoletes")
278
231
  end
279
232
  end
280
- end
281
-
282
- #
283
- # Parse editor drafts relation
284
- #
285
- # @return [Array<RelatonBib::DocumentRelation>] relation
286
- #
287
- def editor_drafts # rubocop:disable Metrics/MethodLength
288
- return [] unless @sol.respond_to?(:link)
289
-
290
- sse = SPARQL.parse(%(
291
- PREFIX : <http://www.w3.org/2001/02pd/rec54#>
292
- PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
293
- SELECT ?latest
294
- WHERE { <#{@sol.link.to_s.strip}> :ED ?latest . }
295
- ))
296
- @rdf.query(sse).map do |s|
297
- RelatonBib::TypedUri.new(type: "current", content: s.latest.to_s.strip)
298
- end
299
- end
300
-
301
- #
302
- # Query for relations
303
- #
304
- # @param [String] predicate relation type
305
- #
306
- # @return [RDF::Query::Solutions] query result
307
- #
308
- def relation_query(predicate)
309
- sse = SPARQL.parse(%(
310
- PREFIX : <http://www.w3.org/2001/02pd/rec54#>
311
- PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
312
- PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
313
- SELECT ?rel
314
- WHERE { <#{@sol.link.to_s.strip}> #{predicate} ?rel . }
315
- ))
316
- @rdf.query(sse).order_by(:rel)
317
- end
318
-
319
- #
320
- # Query document versions relations
321
- #
322
- # @return [Array<RDF::Query::Solution>] query results
323
- #
324
- def document_versions # rubocop:disable Metrics/MethodLength
325
- @document_versions ||= version_of.each_with_object([]) do |s, acc|
326
- sse = SPARQL.parse(%(
327
- PREFIX : <http://www.w3.org/2001/02pd/rec54#>
328
- PREFIX dc: <http://purl.org/dc/elements/1.1/>
329
- PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
330
- PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
331
- SELECT ?link ?title ?date
332
- WHERE {
333
- ?link doc:versionOf <#{s.version_of}> ;
334
- dc:title ?title ;
335
- dc:date ?date .
336
- }
337
- ))
338
- @rdf.query(sse).each { |r| acc << r }
233
+ if @spec.links.respond_to?(:successor_versions) && @spec.links.successor_versions
234
+ successor_versions = realize @spec.links.successor_versions
235
+ successor_versions.links.successor_versions.each do |version|
236
+ rels << create_relation(version, "updatedBy", "errata")
237
+ end
339
238
  end
340
- end
341
-
342
- #
343
- # Query for document versions
344
- #
345
- # @return [RDF::Query::Solutions] query results
346
- #
347
- def version_of
348
- return [@sol] unless @sol.respond_to?(:link)
349
-
350
- sse = SPARQL.parse(%(
351
- PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
352
- SELECT ?version_of
353
- WHERE {
354
- <#{@sol.link.to_s.strip}> doc:versionOf ?version_of .
355
- FILTER ( isURI(?version_of) && <#{@sol.link.to_s.strip}> != str(?version_of) )
356
- }
357
- ))
358
- @rdf.query(sse)
239
+ rels
359
240
  end
360
241
 
361
242
  #
@@ -367,11 +248,15 @@ module RelatonW3c
367
248
  #
368
249
  # @return [RelatonBib::DocumentRelation] <description>
369
250
  #
370
- def create_relation(url, type, desc = nil)
251
+ def create_relation(version, type, desc = nil)
252
+ version_spec = realize version
253
+ url = doc_uri(version_spec)
371
254
  id = pub_id(url)
372
- fref = RelatonBib::FormattedRef.new content: id
255
+ # fref = RelatonBib::FormattedRef.new content: id
256
+ title = parse_title(version_spec)
373
257
  docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)
374
- bib = W3cBibliographicItem.new formattedref: fref, docid: [docid]
258
+ link = [RelatonBib::TypedUri.new(type: "src", content: url)]
259
+ bib = W3cBibliographicItem.new title: title, docid: [docid], link: link
375
260
  dsc = RelatonBib::FormattedString.new content: desc if desc
376
261
  RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc)
377
262
  end
@@ -382,9 +267,9 @@ module RelatonW3c
382
267
  # @return [RelatonBib::FormattedRef] formattedref
383
268
  #
384
269
  def parse_formattedref
385
- return if @sol.respond_to?(:link)
270
+ return unless @spec.respond_to?(:uri)
386
271
 
387
- RelatonBib::FormattedRef.new(content: pub_id(@sol.version_of))
272
+ RelatonBib::FormattedRef.new(content: pub_id(@spec.uri))
388
273
  end
389
274
 
390
275
  #
@@ -397,26 +282,27 @@ module RelatonW3c
397
282
  name: "World Wide Web Consortium", abbreviation: "W3C", url: "https://www.w3.org/"
398
283
  )
399
284
  contribs = [RelatonBib::ContributionInfo.new(entity: publisher, role: [type: "publisher"])]
400
- return contribs unless @sol.respond_to?(:link)
401
-
402
- sse = SPARQL.parse(%(
403
- PREFIX : <http://www.w3.org/2001/02pd/rec54#>
404
- PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
405
- SELECT ?full_name
406
- WHERE {
407
- <#{@sol.link.to_s.strip}> :editor/contact:fullName ?full_name
408
- }
409
- ))
410
- @rdf.query(sse).order_by(:full_name).each_with_object(contribs) do |ed, obj|
411
- obj << create_editor(ed.full_name.to_s)
285
+
286
+ if @spec.links.respond_to?(:editors)
287
+ editors = realize @spec.links.editors
288
+ editors.links.editors&.each do |ed|
289
+ editor = create_editor(ed)
290
+ contribs << editor if editor
291
+ end
412
292
  end
293
+
294
+ contribs
413
295
  end
414
296
 
415
- def create_editor(name)
416
- cn = RelatonBib::LocalizedString.new(name, "en", "Latn")
417
- n = RelatonBib::FullName.new completename: cn
418
- p = RelatonBib::Person.new name: n
419
- RelatonBib::ContributionInfo.new(entity: p, role: [type: "editor"])
297
+ def create_editor(unrealized_editor)
298
+ editor = realize unrealized_editor
299
+ return unless editor
300
+
301
+ surname = RelatonBib::LocalizedString.new(editor.family, "en", "Latn")
302
+ forename = RelatonBib::Forename.new(content: editor.given, language: "en", script: "Latn")
303
+ name = RelatonBib::FullName.new surname: surname, forename: [forename]
304
+ person = RelatonBib::Person.new name: name
305
+ RelatonBib::ContributionInfo.new(entity: person, role: [type: "editor"])
420
306
  end
421
307
 
422
308
  #
@@ -425,27 +311,16 @@ module RelatonW3c
425
311
  # @return [RelatonBib::EditorialGroup] editorialgroup
426
312
  #
427
313
  def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
428
- return unless @sol.respond_to?(:link)
429
-
430
- sse = SPARQL.parse(%(
431
- PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
432
- PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
433
- SELECT ?home_page
434
- WHERE {
435
- <#{@sol.link.to_s.strip}> org:deliveredBy/contact:homePage ?home_page
436
- }
437
- ))
438
- res = @rdf.query(sse).order_by(:home_page)
439
- tc = res.each_with_object([]) do |edg, obj|
440
- group_path = edg.home_page.to_s.sub(/^https?:\/\//, "").sub(/\/$/, "")
441
- wg = @fetcher.group_names[group_path]
442
- if wg
443
- rwg = RelatonBib::WorkGroup.new name: wg["name"]
444
- obj << RelatonBib::TechnicalCommittee.new(rwg)
445
- else
446
- Util.warn "Working group name not found for: `#{edg.home_page}`"
447
- end
314
+ return unless @spec.links.respond_to?(:deliverers)
315
+
316
+ deliverers = realize @spec.links.deliverers
317
+ return unless deliverers.links.deliverers
318
+
319
+ tc = deliverers.links.deliverers.map do |edg|
320
+ wg = RelatonBib::WorkGroup.new(name: edg.title)
321
+ RelatonBib::TechnicalCommittee.new(wg)
448
322
  end
323
+
449
324
  RelatonBib::EditorialGroup.new tc
450
325
  end
451
326
  end
@@ -9,7 +9,7 @@ module RelatonW3c
9
9
  @prefix = "W3C"
10
10
  @defaultprefix = %r{^W3C\s}
11
11
  @idtype = "W3C"
12
- @datasets = %w[w3c-rdf w3c-tr-archive]
12
+ @datasets = %w[w3c-api]
13
13
  end
14
14
 
15
15
  # @param code [String]
@@ -28,8 +28,8 @@ module RelatonW3c
28
28
  # @option opts [String] :output directory to output documents
29
29
  # @option opts [String] :format
30
30
  #
31
- def fetch_data(source, opts)
32
- DataFetcher.fetch(source, **opts)
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
33
  end
34
34
 
35
35
  # @param xml [String]
@@ -0,0 +1,32 @@
1
+ module RelatonW3c
2
+ module RateLimitHandler
3
+ def self.fetched_objects
4
+ @fetched_objects ||= {}
5
+ end
6
+
7
+ def realize(obj)
8
+ href = obj.href || obj.links.self.href
9
+ return RateLimitHandler.fetched_objects[href] if RateLimitHandler.fetched_objects.key?(href)
10
+
11
+ n = 1
12
+ begin
13
+ RateLimitHandler.fetched_objects[href] = obj.realize
14
+ rescue NameError, # NameError caused by lutaml-hal-0.1.7/lib/lutaml/hal/client.rb:51:in `rescue in get': uninitialized constant Lutaml::Hal::Client::ConnectionError
15
+ Faraday::ConnectionFailed, Net::OpenTimeout => e
16
+ if n < 5
17
+ sleep_time = n * n
18
+ n += 1
19
+ Util.warn "Rate limit exceeded for #{href}, retrying in #{sleep_time} seconds..."
20
+ sleep sleep_time
21
+ retry
22
+ else
23
+ Util.warn "Failed to realize object: #{href}"
24
+ raise e
25
+ end
26
+ rescue Lutaml::Hal::NotFoundError
27
+ Util.warn "Object not found: #{href}"
28
+ RateLimitHandler.fetched_objects[href] = nil
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.19.0".freeze
2
+ VERSION = "1.20.1".freeze
3
3
  end
data/relaton_w3c.gemspec CHANGED
@@ -31,14 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ["lib"]
33
33
 
34
- spec.add_dependency "linkeddata", "~> 3.2"
35
- spec.add_dependency "mechanize", "~> 2.10"
36
- spec.add_dependency "rdf", "~> 3.2"
37
- spec.add_dependency "rdf-normalize", "~> 0.6"
38
- spec.add_dependency "relaton-bib", "~> 1.19.0"
34
+ spec.add_dependency "relaton-bib", "~> 1.20.0"
39
35
  spec.add_dependency "relaton-index", "~> 0.2.8"
40
- spec.add_dependency "rubyzip", "~> 2.3"
41
- spec.add_dependency "shex", "~> 0.7"
42
- spec.add_dependency "csv", "~> 3.0"
43
- spec.add_dependency "sparql", "~> 3.2"
36
+ spec.add_dependency "w3c_api", "~> 0.1.3"
44
37
  end