relaton-w3c 1.11.0 → 1.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data/reference.W3C.P3P.xml +1 -1
- data/lib/relaton_w3c/bibxml_parser.rb +7 -0
- data/lib/relaton_w3c/data_fethcer.rb +95 -17
- data/lib/relaton_w3c/data_index.rb +135 -0
- data/lib/relaton_w3c/data_parser.rb +171 -26
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +9 -7
- data/lib/relaton_w3c/workgroups.yaml +7 -0
- data/lib/relaton_w3c.rb +4 -3
- data/relaton_w3c.gemspec +3 -0
- metadata +31 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b9169cf7e9b747f47505f8ff279b33104ab64128d053013afa9307872ffa498
|
4
|
+
data.tar.gz: '09e56a18aa8db35ed2747f508ce9b786cc540bd28d4802f4cdbd41ac490a98ab'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 137b95e319714c074fe712e28d1b58123fb1c82d4a2ce155533c5c3d125e01c29b35496954eb7055bbe3badb0bdce3143e6b5735c3cacab8d08ab019971504a0
|
7
|
+
data.tar.gz: 477eaeba6d35b38835275cc765c88ab24366cadae358421cb396ab2a2a28e2000d0a67ff742ac4bbb1947fa569129fdd9b961221c71803cab7c0cd826042dadf
|
data/data/reference.W3C.P3P.xml
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
<?xml version="1.0" encoding='UTF-8'?>
|
2
|
-
<reference anchor="W3C.P3P"
|
2
|
+
<reference anchor="W3C.P3P" target="http://www.w3.org/TR/P3P/">
|
3
3
|
<front>
|
4
4
|
<title>The Platform for Privacy Preferences 1.0 (P3P1.0) Specification</title>
|
5
5
|
<author initials="M." surname="Marchiori" fullname="Massimo Marchiori">
|
@@ -13,5 +13,12 @@ module RelatonW3c
|
|
13
13
|
def pubid_type(_)
|
14
14
|
"W3C"
|
15
15
|
end
|
16
|
+
|
17
|
+
def docids(reference, ver)
|
18
|
+
ids = super
|
19
|
+
ids.reject! &:primary
|
20
|
+
id = "W3C #{reference[:target].split('/').last}"
|
21
|
+
ids.unshift RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)
|
22
|
+
end
|
16
23
|
end
|
17
24
|
end
|
@@ -6,8 +6,6 @@ require "relaton_w3c/data_parser"
|
|
6
6
|
|
7
7
|
module RelatonW3c
|
8
8
|
class DataFetcher
|
9
|
-
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
10
|
-
|
11
9
|
attr_reader :data, :group_names
|
12
10
|
|
13
11
|
#
|
@@ -21,9 +19,10 @@ module RelatonW3c
|
|
21
19
|
@format = format
|
22
20
|
@ext = format.sub(/^bib/, "")
|
23
21
|
dir = File.dirname(File.expand_path(__FILE__))
|
24
|
-
@group_names = YAML.load_file(File.join(dir
|
22
|
+
@group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
|
25
23
|
@data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
26
24
|
@files = []
|
25
|
+
@index = DataIndex.new
|
27
26
|
end
|
28
27
|
|
29
28
|
#
|
@@ -45,40 +44,117 @@ module RelatonW3c
|
|
45
44
|
#
|
46
45
|
# Parse documents
|
47
46
|
#
|
48
|
-
def fetch
|
49
|
-
|
47
|
+
def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
48
|
+
query_versioned_docs.each do |sl|
|
49
|
+
save_doc DataParser.parse(sl, self)
|
50
|
+
rescue StandardError => e
|
51
|
+
warn "Error: document #{sl.link} #{e.message}"
|
52
|
+
warn e.backtrace.join("\n")
|
53
|
+
end
|
54
|
+
query_unversioned_docs.each do |sl|
|
55
|
+
save_doc DataParser.parse(sl, self)
|
56
|
+
rescue StandardError => e
|
57
|
+
warn "Error: document #{sl.version_of} #{e.message}"
|
58
|
+
warn e.backtrace.join("\n")
|
59
|
+
end
|
50
60
|
Dir[File.expand_path("../../data/*", __dir__)].each do |file|
|
51
61
|
xml = File.read file, encoding: "UTF-8"
|
52
|
-
save_doc BibXMLParser.parse(xml)
|
62
|
+
save_doc BibXMLParser.parse(xml), warn_duplicate: false
|
63
|
+
rescue StandardError => e
|
64
|
+
warn "Error: document #{file} #{e.message}"
|
65
|
+
warn e.backtrace.join("\n")
|
53
66
|
end
|
67
|
+
@index.sort!.save
|
54
68
|
end
|
55
69
|
|
70
|
+
#
|
71
|
+
# Create index file
|
72
|
+
#
|
73
|
+
# def create_index
|
74
|
+
# index_file = "index-w3c.yaml"
|
75
|
+
# index_yaml = @index.sort do |a, b|
|
76
|
+
# compare_index_items a, b
|
77
|
+
# end.to_yaml
|
78
|
+
# File.write index_file, index_yaml, encoding: "UTF-8"
|
79
|
+
# end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Compare index items
|
83
|
+
#
|
84
|
+
# @param [Hash] aid first item
|
85
|
+
# @param [Hash] bid second item
|
86
|
+
#
|
87
|
+
# @return [Integer] comparison result
|
88
|
+
#
|
89
|
+
# def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
|
90
|
+
# ret = aid[:code] <=> bid[:code]
|
91
|
+
# ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
|
92
|
+
# ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
|
93
|
+
# # ret = aid[:type] <=> bid[:type] if ret.zero?
|
94
|
+
# ret
|
95
|
+
# end
|
96
|
+
|
97
|
+
#
|
98
|
+
# Weight of stage
|
99
|
+
#
|
100
|
+
# @param [String, nil] stage stage
|
101
|
+
#
|
102
|
+
# @return [Integer] weight
|
103
|
+
#
|
104
|
+
# def stage_weight(stage)
|
105
|
+
# return DataParser::STAGES.size if stage.nil?
|
106
|
+
|
107
|
+
# DataParser::STAGES.keys.index(stage)
|
108
|
+
# end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Weight of date
|
112
|
+
#
|
113
|
+
# @param [String] date date
|
114
|
+
#
|
115
|
+
# @return [String] weight
|
116
|
+
#
|
117
|
+
# def date_weight(date)
|
118
|
+
# return "99999999" if date.nil?
|
119
|
+
|
120
|
+
# date
|
121
|
+
# end
|
122
|
+
|
56
123
|
#
|
57
124
|
# Query RDF source for documents
|
58
125
|
#
|
59
126
|
# @return [RDF::Query::Solutions] query results
|
60
127
|
#
|
61
|
-
def
|
128
|
+
def query_versioned_docs # rubocop:disable Metrics/MethodLength
|
62
129
|
sse = SPARQL.parse(%(
|
63
130
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
64
131
|
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
65
132
|
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
66
133
|
# PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
67
134
|
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
68
|
-
SELECT ?link ?title ?date
|
135
|
+
SELECT ?link ?title ?date ?version_of
|
69
136
|
WHERE {
|
70
|
-
?link dc:title ?title ; dc:date ?date
|
137
|
+
?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
|
71
138
|
}
|
72
139
|
))
|
73
140
|
data.query sse
|
74
141
|
end
|
75
142
|
|
143
|
+
def query_unversioned_docs
|
144
|
+
sse = SPARQL.parse(%(
|
145
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
146
|
+
SELECT ?version_of
|
147
|
+
WHERE { ?x doc:versionOf ?version_of . }
|
148
|
+
))
|
149
|
+
data.query(sse).uniq &:version_of
|
150
|
+
end
|
151
|
+
|
76
152
|
#
|
77
153
|
# Save document to file
|
78
154
|
#
|
79
155
|
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
80
156
|
#
|
81
|
-
def save_doc(bib) # rubocop:disable Metrics/MethodLength
|
157
|
+
def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
82
158
|
return unless bib
|
83
159
|
|
84
160
|
c = case @format
|
@@ -86,24 +162,26 @@ module RelatonW3c
|
|
86
162
|
when "yaml" then bib.to_hash.to_yaml
|
87
163
|
else bib.send("to_#{@format}")
|
88
164
|
end
|
89
|
-
|
90
|
-
|
91
|
-
|
165
|
+
# id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
|
166
|
+
file = file_name(bib.docnumber)
|
167
|
+
if @files.include?(file)
|
168
|
+
warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
|
92
169
|
else
|
170
|
+
@index.add bib.docnumber, file
|
93
171
|
@files << file
|
172
|
+
File.write file, c, encoding: "UTF-8"
|
94
173
|
end
|
95
|
-
File.write file, c, encoding: "UTF-8"
|
96
174
|
end
|
97
175
|
|
98
176
|
#
|
99
177
|
# Generate file name
|
100
178
|
#
|
101
|
-
# @param [
|
179
|
+
# @param [String] id document id
|
102
180
|
#
|
103
181
|
# @return [String] file name
|
104
182
|
#
|
105
|
-
def file_name(
|
106
|
-
name =
|
183
|
+
def file_name(id)
|
184
|
+
name = id.sub(/^W3C\s/, "").gsub(/[\s,:\/+]/, "_").squeeze("_").downcase
|
107
185
|
File.join @output, "#{name}.#{@ext}"
|
108
186
|
end
|
109
187
|
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
module RelatonW3c
|
2
|
+
class DataIndex
|
3
|
+
#
|
4
|
+
# Initialize data index.
|
5
|
+
#
|
6
|
+
# @param [String] index_file path to index file
|
7
|
+
# @param [Array<Hash>] index index data
|
8
|
+
#
|
9
|
+
def initialize(index_file: "index-w3c.yaml", index: [])
|
10
|
+
@index_file = index_file
|
11
|
+
@index = index
|
12
|
+
end
|
13
|
+
|
14
|
+
#
|
15
|
+
# Create index from a GitHub repository
|
16
|
+
#
|
17
|
+
# @return [RelatonW3c::DataIndex] data index
|
18
|
+
#
|
19
|
+
def self.create_from_repo
|
20
|
+
resp_index = Net::HTTP.get(URI("#{W3cBibliography::SOURCE}index-w3c.yaml"))
|
21
|
+
DataIndex.new index: YAML.safe_load(resp_index, [Symbol])
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# Add document to index
|
26
|
+
#
|
27
|
+
# @param [String] docnumber document number
|
28
|
+
# @param [String] file path to document file
|
29
|
+
#
|
30
|
+
def add(docnumber, file)
|
31
|
+
@index << docnumber_to_parts(docnumber, file)
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Save index to file.
|
36
|
+
#
|
37
|
+
def save
|
38
|
+
File.write @index_file, @index.to_yaml, encoding: "UTF-8"
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Sort index
|
43
|
+
#
|
44
|
+
# @return [Array<Hash>] sorted index
|
45
|
+
#
|
46
|
+
def sort!
|
47
|
+
@index.sort! { |a, b| compare_index_items a, b }
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Search filename in index
|
53
|
+
#
|
54
|
+
# @param [String] ref reference
|
55
|
+
#
|
56
|
+
# @return [String] document's filename
|
57
|
+
#
|
58
|
+
def search(ref) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
59
|
+
dparts = docnumber_to_parts(ref)
|
60
|
+
@index.detect do |parts|
|
61
|
+
parts[:code].match?(/^#{Regexp.escape dparts[:code]}/i) &&
|
62
|
+
(dparts[:stage].nil? || dparts[:stage].casecmp?(parts[:stage])) &&
|
63
|
+
(dparts[:type].nil? || dparts[:type].casecmp?(parts[:type])) &&
|
64
|
+
(dparts[:date].nil? || dparts[:date] == parts[:date]) &&
|
65
|
+
(dparts[:suff].nil? || dparts[:suff].casecmp?(parts[:suff]))
|
66
|
+
end&.fetch(:file)
|
67
|
+
end
|
68
|
+
|
69
|
+
#
|
70
|
+
# Compare index items
|
71
|
+
#
|
72
|
+
# @param [Hash] aid first item
|
73
|
+
# @param [Hash] bid second item
|
74
|
+
#
|
75
|
+
# @return [Integer] comparison result
|
76
|
+
#
|
77
|
+
def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
|
78
|
+
ret = aid[:code] <=> bid[:code]
|
79
|
+
ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
|
80
|
+
ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
|
81
|
+
# ret = aid[:type] <=> bid[:type] if ret.zero?
|
82
|
+
ret
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# Weight of stage
|
87
|
+
#
|
88
|
+
# @param [String, nil] stage stage
|
89
|
+
#
|
90
|
+
# @return [Integer] weight
|
91
|
+
#
|
92
|
+
def stage_weight(stage)
|
93
|
+
return DataParser::STAGES.size if stage.nil?
|
94
|
+
|
95
|
+
DataParser::STAGES.keys.index(stage)
|
96
|
+
end
|
97
|
+
|
98
|
+
#
|
99
|
+
# Weight of date
|
100
|
+
#
|
101
|
+
# @param [String] date date
|
102
|
+
#
|
103
|
+
# @return [String] weight
|
104
|
+
#
|
105
|
+
def date_weight(date)
|
106
|
+
return "99999999" if date.nil?
|
107
|
+
|
108
|
+
date
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# Parse document number to parts
|
113
|
+
#
|
114
|
+
# @param [String] docnumber document number
|
115
|
+
# @param [String, nil] file path to document file
|
116
|
+
#
|
117
|
+
# @return [Hash{Symbol=>String}] document parts
|
118
|
+
#
|
119
|
+
def docnumber_to_parts(docnumber, file = nil) # rubocop:disable Metrics/MethodLength
|
120
|
+
%r{
|
121
|
+
^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE))-)?
|
122
|
+
(?<code>\w+(?:[+-][\w.]+)*?)
|
123
|
+
(?:-(?<date>\d{8}|\d{6}))?
|
124
|
+
(?:/(?<suff>\w+))?$
|
125
|
+
}xi =~ docnumber
|
126
|
+
entry = { code: code }
|
127
|
+
entry[:file] = file if file
|
128
|
+
entry[:stage] = stage if stage
|
129
|
+
entry[:type] = type if type
|
130
|
+
entry[:date] = date if date
|
131
|
+
entry[:suff] = suff if suff
|
132
|
+
entry
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -1,5 +1,24 @@
|
|
1
1
|
module RelatonW3c
|
2
2
|
class DataParser
|
3
|
+
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
4
|
+
|
5
|
+
DOCTYPES = {
|
6
|
+
"TR" => "technicalReport",
|
7
|
+
"NOTE" => "groupNote",
|
8
|
+
}.freeze
|
9
|
+
|
10
|
+
STAGES = {
|
11
|
+
"RET" => "retired",
|
12
|
+
"SPSD" => "supersededRecommendation",
|
13
|
+
"OBSL" => "obsoletedRecommendation",
|
14
|
+
"WD" => "workingDraft",
|
15
|
+
"CRD" => "candidateRecommendationDraft",
|
16
|
+
"CR" => "candidateRecommendation",
|
17
|
+
"PR" => "proposedRecommendation",
|
18
|
+
"PER" => "proposedEditedRecommendation",
|
19
|
+
"REC" => "recommendation",
|
20
|
+
}.freeze
|
21
|
+
|
3
22
|
#
|
4
23
|
# Document parser initalization
|
5
24
|
#
|
@@ -29,7 +48,7 @@ module RelatonW3c
|
|
29
48
|
# @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
|
30
49
|
#
|
31
50
|
def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
32
|
-
return
|
51
|
+
return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }
|
33
52
|
|
34
53
|
RelatonW3c::W3cBibliographicItem.new(
|
35
54
|
type: "standard",
|
@@ -37,10 +56,12 @@ module RelatonW3c
|
|
37
56
|
fetched: Date.today.to_s,
|
38
57
|
language: ["en"],
|
39
58
|
script: ["Latn"],
|
59
|
+
docstatus: parse_docstatus,
|
40
60
|
title: parse_title,
|
41
61
|
link: parse_link,
|
42
62
|
docid: parse_docid,
|
43
|
-
|
63
|
+
formattedref: parse_formattedref,
|
64
|
+
docnumber: identifier,
|
44
65
|
series: parse_series,
|
45
66
|
date: parse_date,
|
46
67
|
relation: parse_relation,
|
@@ -49,12 +70,24 @@ module RelatonW3c
|
|
49
70
|
)
|
50
71
|
end
|
51
72
|
|
73
|
+
#
|
74
|
+
# Extract documetn status
|
75
|
+
#
|
76
|
+
# @return [RelatonBib::DocumentStatus, nil] dcoument status
|
77
|
+
#
|
78
|
+
def parse_docstatus
|
79
|
+
stage = types_stages&.detect { |st| STAGES.include?(st) }
|
80
|
+
RelatonBib::DocumentStatus.new stage: STAGES[stage] if stage
|
81
|
+
end
|
82
|
+
|
52
83
|
#
|
53
84
|
# Parse title
|
54
85
|
#
|
55
86
|
# @return [RelatonBib::TypedTitleStringCollection] title
|
56
87
|
#
|
57
88
|
def parse_title
|
89
|
+
return [] unless @sol.respond_to?(:title)
|
90
|
+
|
58
91
|
t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
|
59
92
|
RelatonBib::TypedTitleStringCollection.new [t]
|
60
93
|
end
|
@@ -65,7 +98,9 @@ module RelatonW3c
|
|
65
98
|
# @return [Array<RelatonBib::TypedUri>] link
|
66
99
|
#
|
67
100
|
def parse_link
|
68
|
-
|
101
|
+
link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
|
102
|
+
|
103
|
+
[RelatonBib::TypedUri.new(type: "src", content: link.to_s)]
|
69
104
|
end
|
70
105
|
|
71
106
|
#
|
@@ -74,23 +109,45 @@ module RelatonW3c
|
|
74
109
|
# @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
|
75
110
|
#
|
76
111
|
def parse_docid
|
77
|
-
|
112
|
+
return [] unless @sol.respond_to?(:link)
|
113
|
+
|
114
|
+
id = pub_id(@sol.link)
|
78
115
|
[RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)]
|
79
116
|
end
|
80
117
|
|
81
118
|
#
|
82
119
|
# Generate PubID
|
83
120
|
#
|
84
|
-
# @
|
85
|
-
#
|
86
|
-
# @return [String] PubID
|
121
|
+
# @return [RDF::URI] PubID
|
87
122
|
#
|
88
123
|
def pub_id(url)
|
89
124
|
"W3C #{identifier(url)}"
|
90
125
|
end
|
91
126
|
|
92
|
-
|
93
|
-
|
127
|
+
#
|
128
|
+
# Generate identifier from URL
|
129
|
+
#
|
130
|
+
# @param [RDF::URI, nil] link
|
131
|
+
#
|
132
|
+
# @return [String] identifier
|
133
|
+
#
|
134
|
+
def identifier(link = nil)
|
135
|
+
url = link || (@sol.respond_to?(:link) ? @sol.link : @sol.version_of)
|
136
|
+
self.class.parse_identifier(url.to_s)
|
137
|
+
end
|
138
|
+
|
139
|
+
#
|
140
|
+
# Parse identifier from URL
|
141
|
+
#
|
142
|
+
# @param [String] url URL
|
143
|
+
#
|
144
|
+
# @return [String] identifier
|
145
|
+
#
|
146
|
+
def self.parse_identifier(url)
|
147
|
+
if /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/ =~ url.to_s
|
148
|
+
$1.to_s
|
149
|
+
else url.to_s.split("/").last
|
150
|
+
end
|
94
151
|
end
|
95
152
|
|
96
153
|
#
|
@@ -99,12 +156,31 @@ module RelatonW3c
|
|
99
156
|
# @return [Array<RelatonBib::Series>] series
|
100
157
|
#
|
101
158
|
def parse_series
|
159
|
+
return [] unless type
|
160
|
+
|
102
161
|
title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
|
103
|
-
[RelatonBib::Series.new(title: title, number: identifier
|
162
|
+
[RelatonBib::Series.new(title: title, number: identifier)]
|
104
163
|
end
|
105
164
|
|
106
|
-
|
107
|
-
|
165
|
+
#
|
166
|
+
# Extract type
|
167
|
+
#
|
168
|
+
# @return [String] type
|
169
|
+
#
|
170
|
+
def type
|
171
|
+
# thre are many types, we need to find the right one
|
172
|
+
@type ||= types_stages&.detect { |t| USED_TYPES.include?(t) }
|
173
|
+
end
|
174
|
+
|
175
|
+
#
|
176
|
+
# Fetches types and stages
|
177
|
+
#
|
178
|
+
# @return [Array<String>] types and stages
|
179
|
+
#
|
180
|
+
def types_stages # rubocop:disable Metrics/MethodLength
|
181
|
+
return unless @sol.respond_to?(:link)
|
182
|
+
|
183
|
+
@types_stages ||= begin
|
108
184
|
sse = SPARQL.parse(%(
|
109
185
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
110
186
|
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
@@ -113,8 +189,7 @@ module RelatonW3c
|
|
113
189
|
{ <#{@sol.link}> rdf:type ?type }
|
114
190
|
}
|
115
191
|
))
|
116
|
-
|
117
|
-
tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
|
192
|
+
@fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
|
118
193
|
end
|
119
194
|
end
|
120
195
|
|
@@ -124,10 +199,17 @@ module RelatonW3c
|
|
124
199
|
# @return [Strinf] doctype
|
125
200
|
#
|
126
201
|
def parse_doctype
|
127
|
-
|
202
|
+
DOCTYPES[type] || "recommendation"
|
128
203
|
end
|
129
204
|
|
205
|
+
#
|
206
|
+
# Parse date
|
207
|
+
#
|
208
|
+
# @return [Array<RelatonBib::BibliographicDate>] date
|
209
|
+
#
|
130
210
|
def parse_date
|
211
|
+
return [] unless @sol.respond_to?(:date)
|
212
|
+
|
131
213
|
[RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
|
132
214
|
end
|
133
215
|
|
@@ -136,29 +218,90 @@ module RelatonW3c
|
|
136
218
|
#
|
137
219
|
# @return [Array<RelatonBib::DocumentRelation>] relation
|
138
220
|
#
|
139
|
-
def parse_relation
|
221
|
+
def parse_relation
|
222
|
+
if @sol.respond_to?(:link)
|
223
|
+
relations + editor_drafts
|
224
|
+
else document_versions
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
229
|
+
{
|
230
|
+
"doc:obsoletes" => { type: "obsoletes" },
|
231
|
+
"mat:hasErrata" => { type: "updatedBy", description: "errata" },
|
232
|
+
# "mat:hasTranslations" => "hasTranslation",
|
233
|
+
# "mat:hasImplReport" => "hasImpReport",
|
234
|
+
":previousEdition" => { type: "editionOf" },
|
235
|
+
}.reduce([]) do |acc, (predicate, tp)|
|
236
|
+
acc + relation_query(predicate).map do |r|
|
237
|
+
fr = RelatonBib::LocalizedString.new pub_id(r.rel.to_s)
|
238
|
+
bib = W3cBibliographicItem.new formattedref: fr
|
239
|
+
tp[:description] = RelatonBib::FormattedString.new content: tp[:description] if tp[:description]
|
240
|
+
RelatonBib::DocumentRelation.new(**tp, bibitem: bib)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def editor_drafts # rubocop:disable Metrics/MethodLength
|
140
246
|
sse = SPARQL.parse(%(
|
141
|
-
PREFIX
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
{ <#{@sol.link}> ?p ?obsoletes }
|
146
|
-
}
|
247
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
248
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
249
|
+
SELECT ?rel
|
250
|
+
WHERE { <#{@sol.link}> :ED ?rel . }
|
147
251
|
))
|
148
|
-
@fetcher.data.query(sse).
|
149
|
-
|
150
|
-
fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
|
252
|
+
@fetcher.data.query(sse).map do |s|
|
253
|
+
fr = RelatonBib::LocalizedString.new pub_id(s.rel.to_s)
|
151
254
|
bib = W3cBibliographicItem.new formattedref: fr
|
152
|
-
RelatonBib::
|
255
|
+
desc = RelatonBib::FormattedString.new content: "Editor's draft"
|
256
|
+
RelatonBib::DocumentRelation.new(
|
257
|
+
type: "hasDraft", description: desc, bibitem: bib,
|
258
|
+
)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def relation_query(predicate)
|
263
|
+
sse = SPARQL.parse(%(
|
264
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
265
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
266
|
+
PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
267
|
+
SELECT ?rel
|
268
|
+
WHERE { <#{@sol.link}> #{predicate} ?rel . }
|
269
|
+
))
|
270
|
+
@fetcher.data.query(sse).order_by(:rel)
|
271
|
+
end
|
272
|
+
|
273
|
+
def document_versions
|
274
|
+
sse = SPARQL.parse(%(
|
275
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
276
|
+
SELECT ?link
|
277
|
+
WHERE { ?link doc:versionOf <#{@sol.version_of}> }
|
278
|
+
))
|
279
|
+
@fetcher.data.query(sse).map do |r|
|
280
|
+
fref = RelatonBib::FormattedRef.new content: pub_id(r.link)
|
281
|
+
bib = W3cBibliographicItem.new formattedref: fref
|
282
|
+
RelatonBib::DocumentRelation.new(type: "hasEdition", bibitem: bib)
|
153
283
|
end
|
154
284
|
end
|
155
285
|
|
286
|
+
#
|
287
|
+
# Parse formattedref
|
288
|
+
#
|
289
|
+
# @return [RelatonBib::FormattedRef] formattedref
|
290
|
+
#
|
291
|
+
def parse_formattedref
|
292
|
+
return if @sol.respond_to?(:link)
|
293
|
+
|
294
|
+
RelatonBib::FormattedRef.new(content: pub_id(@sol.version_of))
|
295
|
+
end
|
296
|
+
|
156
297
|
#
|
157
298
|
# Parse contributor
|
158
299
|
#
|
159
300
|
# @return [Array<RelatonBib::ContributionInfo>] contributor
|
160
301
|
#
|
161
302
|
def parse_contrib # rubocop:disable Metrics/MethodLength
|
303
|
+
return [] unless @sol.respond_to?(:link)
|
304
|
+
|
162
305
|
sse = SPARQL.parse(%(
|
163
306
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
164
307
|
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
@@ -181,6 +324,8 @@ module RelatonW3c
|
|
181
324
|
# @return [RelatonBib::EditorialGroup] editorialgroup
|
182
325
|
#
|
183
326
|
def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
327
|
+
return unless @sol.respond_to?(:link)
|
328
|
+
|
184
329
|
sse = SPARQL.parse(%(
|
185
330
|
PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
|
186
331
|
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
data/lib/relaton_w3c/version.rb
CHANGED
@@ -5,15 +5,17 @@ require "net/http"
|
|
5
5
|
module RelatonW3c
|
6
6
|
# Class methods for search W3C standards.
|
7
7
|
class W3cBibliography
|
8
|
-
SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/
|
8
|
+
SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/"
|
9
9
|
|
10
10
|
class << self
|
11
11
|
# @param text [String]
|
12
12
|
# @return [RelatonW3c::HitCollection]
|
13
13
|
def search(text) # rubocop:disable Metrics/MethodLength
|
14
|
-
|
15
|
-
file =
|
16
|
-
|
14
|
+
ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
|
15
|
+
file = DataIndex.create_from_repo.search(ref)
|
16
|
+
return unless file
|
17
|
+
|
18
|
+
url = "#{SOURCE}#{file}"
|
17
19
|
resp = Net::HTTP.get_response(URI.parse(url))
|
18
20
|
return unless resp.code == "200"
|
19
21
|
|
@@ -24,7 +26,7 @@ module RelatonW3c
|
|
24
26
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
25
27
|
Net::ProtocolError, Errno::ETIMEDOUT
|
26
28
|
raise RelatonBib::RequestError,
|
27
|
-
"Could not access #{
|
29
|
+
"Could not access #{url}"
|
28
30
|
end
|
29
31
|
|
30
32
|
# @param ref [String] the W3C standard Code to look up
|
@@ -39,8 +41,8 @@ module RelatonW3c
|
|
39
41
|
return
|
40
42
|
end
|
41
43
|
|
42
|
-
|
43
|
-
warn "[relaton-w3c] (\"#{ref}\") found #{
|
44
|
+
found = result.docnumber
|
45
|
+
warn "[relaton-w3c] (\"#{ref}\") found #{found}"
|
44
46
|
result
|
45
47
|
end
|
46
48
|
end
|
@@ -32,6 +32,9 @@
|
|
32
32
|
'https://www.w3.org/WAI/EO':
|
33
33
|
name: Education and Outreach Working Group
|
34
34
|
abbrev: EOWG
|
35
|
+
'https://www.w3.org/WAI/about/groups/eowg':
|
36
|
+
name: Education and Outreach Working Group
|
37
|
+
abbrev: EOWG
|
35
38
|
'https://www.w3.org/2001/sw/WebOnt':
|
36
39
|
name: Web-Ontology Working Group
|
37
40
|
'http://www.w3.org/MarkUp/Forms':
|
@@ -54,6 +57,8 @@
|
|
54
57
|
name: Web Applications Working Group
|
55
58
|
'https://www.w3.org/2008/webapps':
|
56
59
|
name: Web Applications Working Group
|
60
|
+
'https://www.w3.org/groups/wg/webapps':
|
61
|
+
name: Web Applications Working Group
|
57
62
|
'https://www.w3.org/das':
|
58
63
|
name: Devices and Sensors Working Group
|
59
64
|
abbrev: DAS WG
|
@@ -226,6 +231,8 @@
|
|
226
231
|
abbrev: ARIA WG
|
227
232
|
'https://www.w3.org/wasm':
|
228
233
|
name: WebAssembly Working Group
|
234
|
+
'https://www.w3.org/groups/wg/wasm':
|
235
|
+
name: WebAssembly Working Group
|
229
236
|
'https://www.w3.org/groups/wg/webediting':
|
230
237
|
name: Web Editing Working Group
|
231
238
|
'https://www.w3.org/2014/data-shapes':
|
data/lib/relaton_w3c.rb
CHANGED
@@ -2,13 +2,14 @@ require "relaton_bib"
|
|
2
2
|
require "relaton_w3c/version"
|
3
3
|
require "relaton_w3c/w3c_bibliography"
|
4
4
|
require "relaton_w3c/w3c_bibliographic_item"
|
5
|
-
require "relaton_w3c/hit_collection"
|
6
|
-
require "relaton_w3c/hit"
|
7
|
-
require "relaton_w3c/scrapper"
|
5
|
+
# require "relaton_w3c/hit_collection"
|
6
|
+
# require "relaton_w3c/hit"
|
7
|
+
# require "relaton_w3c/scrapper"
|
8
8
|
require "relaton_w3c/xml_parser"
|
9
9
|
require "relaton_w3c/bibxml_parser"
|
10
10
|
require "relaton_w3c/hash_converter"
|
11
11
|
require "relaton_w3c/data_fethcer"
|
12
|
+
require "relaton_w3c/data_index"
|
12
13
|
|
13
14
|
module RelatonW3c
|
14
15
|
class Error < StandardError; end
|
data/relaton_w3c.gemspec
CHANGED
@@ -39,7 +39,10 @@ Gem::Specification.new do |spec|
|
|
39
39
|
|
40
40
|
spec.add_dependency "linkeddata", "~> 3.1.0"
|
41
41
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
42
|
+
# spec.add_dependency "picky"
|
42
43
|
spec.add_dependency "rdf", "~> 3.1.0"
|
44
|
+
spec.add_dependency "rdf-normalize", "~> 0.4.0"
|
43
45
|
spec.add_dependency "relaton-bib", "~> 1.11.0"
|
46
|
+
spec.add_dependency "shex", "~> 0.6.0"
|
44
47
|
spec.add_dependency "sparql", "~> 3.1.0"
|
45
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.11.
|
4
|
+
version: 1.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: 3.1.0
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rdf-normalize
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.4.0
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.4.0
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: relaton-bib
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,6 +150,20 @@ dependencies:
|
|
136
150
|
- - "~>"
|
137
151
|
- !ruby/object:Gem::Version
|
138
152
|
version: 1.11.0
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: shex
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 0.6.0
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 0.6.0
|
139
167
|
- !ruby/object:Gem::Dependency
|
140
168
|
name: sparql
|
141
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -202,6 +230,7 @@ files:
|
|
202
230
|
- lib/relaton_w3c.rb
|
203
231
|
- lib/relaton_w3c/bibxml_parser.rb
|
204
232
|
- lib/relaton_w3c/data_fethcer.rb
|
233
|
+
- lib/relaton_w3c/data_index.rb
|
205
234
|
- lib/relaton_w3c/data_parser.rb
|
206
235
|
- lib/relaton_w3c/hash_converter.rb
|
207
236
|
- lib/relaton_w3c/hit.rb
|