relaton-w3c 1.11.0 → 1.11.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/data/reference.W3C.P3P.xml +1 -1
- data/lib/relaton_w3c/bibxml_parser.rb +7 -0
- data/lib/relaton_w3c/data_fethcer.rb +95 -17
- data/lib/relaton_w3c/data_index.rb +135 -0
- data/lib/relaton_w3c/data_parser.rb +171 -26
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +9 -7
- data/lib/relaton_w3c/workgroups.yaml +7 -0
- data/lib/relaton_w3c.rb +4 -3
- data/relaton_w3c.gemspec +3 -0
- metadata +31 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b9169cf7e9b747f47505f8ff279b33104ab64128d053013afa9307872ffa498
|
4
|
+
data.tar.gz: '09e56a18aa8db35ed2747f508ce9b786cc540bd28d4802f4cdbd41ac490a98ab'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 137b95e319714c074fe712e28d1b58123fb1c82d4a2ce155533c5c3d125e01c29b35496954eb7055bbe3badb0bdce3143e6b5735c3cacab8d08ab019971504a0
|
7
|
+
data.tar.gz: 477eaeba6d35b38835275cc765c88ab24366cadae358421cb396ab2a2a28e2000d0a67ff742ac4bbb1947fa569129fdd9b961221c71803cab7c0cd826042dadf
|
data/data/reference.W3C.P3P.xml
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
<?xml version="1.0" encoding='UTF-8'?>
|
2
|
-
<reference anchor="W3C.P3P"
|
2
|
+
<reference anchor="W3C.P3P" target="http://www.w3.org/TR/P3P/">
|
3
3
|
<front>
|
4
4
|
<title>The Platform for Privacy Preferences 1.0 (P3P1.0) Specification</title>
|
5
5
|
<author initials="M." surname="Marchiori" fullname="Massimo Marchiori">
|
@@ -13,5 +13,12 @@ module RelatonW3c
|
|
13
13
|
def pubid_type(_)
|
14
14
|
"W3C"
|
15
15
|
end
|
16
|
+
|
17
|
+
def docids(reference, ver)
|
18
|
+
ids = super
|
19
|
+
ids.reject! &:primary
|
20
|
+
id = "W3C #{reference[:target].split('/').last}"
|
21
|
+
ids.unshift RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)
|
22
|
+
end
|
16
23
|
end
|
17
24
|
end
|
@@ -6,8 +6,6 @@ require "relaton_w3c/data_parser"
|
|
6
6
|
|
7
7
|
module RelatonW3c
|
8
8
|
class DataFetcher
|
9
|
-
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
10
|
-
|
11
9
|
attr_reader :data, :group_names
|
12
10
|
|
13
11
|
#
|
@@ -21,9 +19,10 @@ module RelatonW3c
|
|
21
19
|
@format = format
|
22
20
|
@ext = format.sub(/^bib/, "")
|
23
21
|
dir = File.dirname(File.expand_path(__FILE__))
|
24
|
-
@group_names = YAML.load_file(File.join(dir
|
22
|
+
@group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
|
25
23
|
@data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
26
24
|
@files = []
|
25
|
+
@index = DataIndex.new
|
27
26
|
end
|
28
27
|
|
29
28
|
#
|
@@ -45,40 +44,117 @@ module RelatonW3c
|
|
45
44
|
#
|
46
45
|
# Parse documents
|
47
46
|
#
|
48
|
-
def fetch
|
49
|
-
|
47
|
+
def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
48
|
+
query_versioned_docs.each do |sl|
|
49
|
+
save_doc DataParser.parse(sl, self)
|
50
|
+
rescue StandardError => e
|
51
|
+
warn "Error: document #{sl.link} #{e.message}"
|
52
|
+
warn e.backtrace.join("\n")
|
53
|
+
end
|
54
|
+
query_unversioned_docs.each do |sl|
|
55
|
+
save_doc DataParser.parse(sl, self)
|
56
|
+
rescue StandardError => e
|
57
|
+
warn "Error: document #{sl.version_of} #{e.message}"
|
58
|
+
warn e.backtrace.join("\n")
|
59
|
+
end
|
50
60
|
Dir[File.expand_path("../../data/*", __dir__)].each do |file|
|
51
61
|
xml = File.read file, encoding: "UTF-8"
|
52
|
-
save_doc BibXMLParser.parse(xml)
|
62
|
+
save_doc BibXMLParser.parse(xml), warn_duplicate: false
|
63
|
+
rescue StandardError => e
|
64
|
+
warn "Error: document #{file} #{e.message}"
|
65
|
+
warn e.backtrace.join("\n")
|
53
66
|
end
|
67
|
+
@index.sort!.save
|
54
68
|
end
|
55
69
|
|
70
|
+
#
|
71
|
+
# Create index file
|
72
|
+
#
|
73
|
+
# def create_index
|
74
|
+
# index_file = "index-w3c.yaml"
|
75
|
+
# index_yaml = @index.sort do |a, b|
|
76
|
+
# compare_index_items a, b
|
77
|
+
# end.to_yaml
|
78
|
+
# File.write index_file, index_yaml, encoding: "UTF-8"
|
79
|
+
# end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Compare index items
|
83
|
+
#
|
84
|
+
# @param [Hash] aid first item
|
85
|
+
# @param [Hash] bid second item
|
86
|
+
#
|
87
|
+
# @return [Integer] comparison result
|
88
|
+
#
|
89
|
+
# def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
|
90
|
+
# ret = aid[:code] <=> bid[:code]
|
91
|
+
# ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
|
92
|
+
# ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
|
93
|
+
# # ret = aid[:type] <=> bid[:type] if ret.zero?
|
94
|
+
# ret
|
95
|
+
# end
|
96
|
+
|
97
|
+
#
|
98
|
+
# Weight of stage
|
99
|
+
#
|
100
|
+
# @param [String, nil] stage stage
|
101
|
+
#
|
102
|
+
# @return [Integer] weight
|
103
|
+
#
|
104
|
+
# def stage_weight(stage)
|
105
|
+
# return DataParser::STAGES.size if stage.nil?
|
106
|
+
|
107
|
+
# DataParser::STAGES.keys.index(stage)
|
108
|
+
# end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Weight of date
|
112
|
+
#
|
113
|
+
# @param [String] date date
|
114
|
+
#
|
115
|
+
# @return [String] weight
|
116
|
+
#
|
117
|
+
# def date_weight(date)
|
118
|
+
# return "99999999" if date.nil?
|
119
|
+
|
120
|
+
# date
|
121
|
+
# end
|
122
|
+
|
56
123
|
#
|
57
124
|
# Query RDF source for documents
|
58
125
|
#
|
59
126
|
# @return [RDF::Query::Solutions] query results
|
60
127
|
#
|
61
|
-
def
|
128
|
+
def query_versioned_docs # rubocop:disable Metrics/MethodLength
|
62
129
|
sse = SPARQL.parse(%(
|
63
130
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
64
131
|
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
65
132
|
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
66
133
|
# PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
67
134
|
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
68
|
-
SELECT ?link ?title ?date
|
135
|
+
SELECT ?link ?title ?date ?version_of
|
69
136
|
WHERE {
|
70
|
-
?link dc:title ?title ; dc:date ?date
|
137
|
+
?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
|
71
138
|
}
|
72
139
|
))
|
73
140
|
data.query sse
|
74
141
|
end
|
75
142
|
|
143
|
+
def query_unversioned_docs
|
144
|
+
sse = SPARQL.parse(%(
|
145
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
146
|
+
SELECT ?version_of
|
147
|
+
WHERE { ?x doc:versionOf ?version_of . }
|
148
|
+
))
|
149
|
+
data.query(sse).uniq &:version_of
|
150
|
+
end
|
151
|
+
|
76
152
|
#
|
77
153
|
# Save document to file
|
78
154
|
#
|
79
155
|
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
80
156
|
#
|
81
|
-
def save_doc(bib) # rubocop:disable Metrics/MethodLength
|
157
|
+
def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
82
158
|
return unless bib
|
83
159
|
|
84
160
|
c = case @format
|
@@ -86,24 +162,26 @@ module RelatonW3c
|
|
86
162
|
when "yaml" then bib.to_hash.to_yaml
|
87
163
|
else bib.send("to_#{@format}")
|
88
164
|
end
|
89
|
-
|
90
|
-
|
91
|
-
|
165
|
+
# id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
|
166
|
+
file = file_name(bib.docnumber)
|
167
|
+
if @files.include?(file)
|
168
|
+
warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
|
92
169
|
else
|
170
|
+
@index.add bib.docnumber, file
|
93
171
|
@files << file
|
172
|
+
File.write file, c, encoding: "UTF-8"
|
94
173
|
end
|
95
|
-
File.write file, c, encoding: "UTF-8"
|
96
174
|
end
|
97
175
|
|
98
176
|
#
|
99
177
|
# Generate file name
|
100
178
|
#
|
101
|
-
# @param [
|
179
|
+
# @param [String] id document id
|
102
180
|
#
|
103
181
|
# @return [String] file name
|
104
182
|
#
|
105
|
-
def file_name(
|
106
|
-
name =
|
183
|
+
def file_name(id)
|
184
|
+
name = id.sub(/^W3C\s/, "").gsub(/[\s,:\/+]/, "_").squeeze("_").downcase
|
107
185
|
File.join @output, "#{name}.#{@ext}"
|
108
186
|
end
|
109
187
|
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
module RelatonW3c
|
2
|
+
class DataIndex
|
3
|
+
#
|
4
|
+
# Initialize data index.
|
5
|
+
#
|
6
|
+
# @param [String] index_file path to index file
|
7
|
+
# @param [Array<Hash>] index index data
|
8
|
+
#
|
9
|
+
def initialize(index_file: "index-w3c.yaml", index: [])
|
10
|
+
@index_file = index_file
|
11
|
+
@index = index
|
12
|
+
end
|
13
|
+
|
14
|
+
#
|
15
|
+
# Create index from a GitHub repository
|
16
|
+
#
|
17
|
+
# @return [RelatonW3c::DataIndex] data index
|
18
|
+
#
|
19
|
+
def self.create_from_repo
|
20
|
+
resp_index = Net::HTTP.get(URI("#{W3cBibliography::SOURCE}index-w3c.yaml"))
|
21
|
+
DataIndex.new index: YAML.safe_load(resp_index, [Symbol])
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# Add document to index
|
26
|
+
#
|
27
|
+
# @param [String] docnumber document number
|
28
|
+
# @param [String] file path to document file
|
29
|
+
#
|
30
|
+
def add(docnumber, file)
|
31
|
+
@index << docnumber_to_parts(docnumber, file)
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Save index to file.
|
36
|
+
#
|
37
|
+
def save
|
38
|
+
File.write @index_file, @index.to_yaml, encoding: "UTF-8"
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Sort index
|
43
|
+
#
|
44
|
+
# @return [Array<Hash>] sorted index
|
45
|
+
#
|
46
|
+
def sort!
|
47
|
+
@index.sort! { |a, b| compare_index_items a, b }
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Search filename in index
|
53
|
+
#
|
54
|
+
# @param [String] ref reference
|
55
|
+
#
|
56
|
+
# @return [String] document's filename
|
57
|
+
#
|
58
|
+
def search(ref) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
59
|
+
dparts = docnumber_to_parts(ref)
|
60
|
+
@index.detect do |parts|
|
61
|
+
parts[:code].match?(/^#{Regexp.escape dparts[:code]}/i) &&
|
62
|
+
(dparts[:stage].nil? || dparts[:stage].casecmp?(parts[:stage])) &&
|
63
|
+
(dparts[:type].nil? || dparts[:type].casecmp?(parts[:type])) &&
|
64
|
+
(dparts[:date].nil? || dparts[:date] == parts[:date]) &&
|
65
|
+
(dparts[:suff].nil? || dparts[:suff].casecmp?(parts[:suff]))
|
66
|
+
end&.fetch(:file)
|
67
|
+
end
|
68
|
+
|
69
|
+
#
|
70
|
+
# Compare index items
|
71
|
+
#
|
72
|
+
# @param [Hash] aid first item
|
73
|
+
# @param [Hash] bid second item
|
74
|
+
#
|
75
|
+
# @return [Integer] comparison result
|
76
|
+
#
|
77
|
+
def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
|
78
|
+
ret = aid[:code] <=> bid[:code]
|
79
|
+
ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
|
80
|
+
ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
|
81
|
+
# ret = aid[:type] <=> bid[:type] if ret.zero?
|
82
|
+
ret
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# Weight of stage
|
87
|
+
#
|
88
|
+
# @param [String, nil] stage stage
|
89
|
+
#
|
90
|
+
# @return [Integer] weight
|
91
|
+
#
|
92
|
+
def stage_weight(stage)
|
93
|
+
return DataParser::STAGES.size if stage.nil?
|
94
|
+
|
95
|
+
DataParser::STAGES.keys.index(stage)
|
96
|
+
end
|
97
|
+
|
98
|
+
#
|
99
|
+
# Weight of date
|
100
|
+
#
|
101
|
+
# @param [String] date date
|
102
|
+
#
|
103
|
+
# @return [String] weight
|
104
|
+
#
|
105
|
+
def date_weight(date)
|
106
|
+
return "99999999" if date.nil?
|
107
|
+
|
108
|
+
date
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# Parse document number to parts
|
113
|
+
#
|
114
|
+
# @param [String] docnumber document number
|
115
|
+
# @param [String, nil] file path to document file
|
116
|
+
#
|
117
|
+
# @return [Hash{Symbol=>String}] document parts
|
118
|
+
#
|
119
|
+
def docnumber_to_parts(docnumber, file = nil) # rubocop:disable Metrics/MethodLength
|
120
|
+
%r{
|
121
|
+
^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE))-)?
|
122
|
+
(?<code>\w+(?:[+-][\w.]+)*?)
|
123
|
+
(?:-(?<date>\d{8}|\d{6}))?
|
124
|
+
(?:/(?<suff>\w+))?$
|
125
|
+
}xi =~ docnumber
|
126
|
+
entry = { code: code }
|
127
|
+
entry[:file] = file if file
|
128
|
+
entry[:stage] = stage if stage
|
129
|
+
entry[:type] = type if type
|
130
|
+
entry[:date] = date if date
|
131
|
+
entry[:suff] = suff if suff
|
132
|
+
entry
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -1,5 +1,24 @@
|
|
1
1
|
module RelatonW3c
|
2
2
|
class DataParser
|
3
|
+
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
4
|
+
|
5
|
+
DOCTYPES = {
|
6
|
+
"TR" => "technicalReport",
|
7
|
+
"NOTE" => "groupNote",
|
8
|
+
}.freeze
|
9
|
+
|
10
|
+
STAGES = {
|
11
|
+
"RET" => "retired",
|
12
|
+
"SPSD" => "supersededRecommendation",
|
13
|
+
"OBSL" => "obsoletedRecommendation",
|
14
|
+
"WD" => "workingDraft",
|
15
|
+
"CRD" => "candidateRecommendationDraft",
|
16
|
+
"CR" => "candidateRecommendation",
|
17
|
+
"PR" => "proposedRecommendation",
|
18
|
+
"PER" => "proposedEditedRecommendation",
|
19
|
+
"REC" => "recommendation",
|
20
|
+
}.freeze
|
21
|
+
|
3
22
|
#
|
4
23
|
# Document parser initalization
|
5
24
|
#
|
@@ -29,7 +48,7 @@ module RelatonW3c
|
|
29
48
|
# @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
|
30
49
|
#
|
31
50
|
def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
32
|
-
return
|
51
|
+
return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }
|
33
52
|
|
34
53
|
RelatonW3c::W3cBibliographicItem.new(
|
35
54
|
type: "standard",
|
@@ -37,10 +56,12 @@ module RelatonW3c
|
|
37
56
|
fetched: Date.today.to_s,
|
38
57
|
language: ["en"],
|
39
58
|
script: ["Latn"],
|
59
|
+
docstatus: parse_docstatus,
|
40
60
|
title: parse_title,
|
41
61
|
link: parse_link,
|
42
62
|
docid: parse_docid,
|
43
|
-
|
63
|
+
formattedref: parse_formattedref,
|
64
|
+
docnumber: identifier,
|
44
65
|
series: parse_series,
|
45
66
|
date: parse_date,
|
46
67
|
relation: parse_relation,
|
@@ -49,12 +70,24 @@ module RelatonW3c
|
|
49
70
|
)
|
50
71
|
end
|
51
72
|
|
73
|
+
#
|
74
|
+
# Extract documetn status
|
75
|
+
#
|
76
|
+
# @return [RelatonBib::DocumentStatus, nil] dcoument status
|
77
|
+
#
|
78
|
+
def parse_docstatus
|
79
|
+
stage = types_stages&.detect { |st| STAGES.include?(st) }
|
80
|
+
RelatonBib::DocumentStatus.new stage: STAGES[stage] if stage
|
81
|
+
end
|
82
|
+
|
52
83
|
#
|
53
84
|
# Parse title
|
54
85
|
#
|
55
86
|
# @return [RelatonBib::TypedTitleStringCollection] title
|
56
87
|
#
|
57
88
|
def parse_title
|
89
|
+
return [] unless @sol.respond_to?(:title)
|
90
|
+
|
58
91
|
t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
|
59
92
|
RelatonBib::TypedTitleStringCollection.new [t]
|
60
93
|
end
|
@@ -65,7 +98,9 @@ module RelatonW3c
|
|
65
98
|
# @return [Array<RelatonBib::TypedUri>] link
|
66
99
|
#
|
67
100
|
def parse_link
|
68
|
-
|
101
|
+
link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
|
102
|
+
|
103
|
+
[RelatonBib::TypedUri.new(type: "src", content: link.to_s)]
|
69
104
|
end
|
70
105
|
|
71
106
|
#
|
@@ -74,23 +109,45 @@ module RelatonW3c
|
|
74
109
|
# @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
|
75
110
|
#
|
76
111
|
def parse_docid
|
77
|
-
|
112
|
+
return [] unless @sol.respond_to?(:link)
|
113
|
+
|
114
|
+
id = pub_id(@sol.link)
|
78
115
|
[RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)]
|
79
116
|
end
|
80
117
|
|
81
118
|
#
|
82
119
|
# Generate PubID
|
83
120
|
#
|
84
|
-
# @
|
85
|
-
#
|
86
|
-
# @return [String] PubID
|
121
|
+
# @return [RDF::URI] PubID
|
87
122
|
#
|
88
123
|
def pub_id(url)
|
89
124
|
"W3C #{identifier(url)}"
|
90
125
|
end
|
91
126
|
|
92
|
-
|
93
|
-
|
127
|
+
#
|
128
|
+
# Generate identifier from URL
|
129
|
+
#
|
130
|
+
# @param [RDF::URI, nil] link
|
131
|
+
#
|
132
|
+
# @return [String] identifier
|
133
|
+
#
|
134
|
+
def identifier(link = nil)
|
135
|
+
url = link || (@sol.respond_to?(:link) ? @sol.link : @sol.version_of)
|
136
|
+
self.class.parse_identifier(url.to_s)
|
137
|
+
end
|
138
|
+
|
139
|
+
#
|
140
|
+
# Parse identifier from URL
|
141
|
+
#
|
142
|
+
# @param [String] url URL
|
143
|
+
#
|
144
|
+
# @return [String] identifier
|
145
|
+
#
|
146
|
+
def self.parse_identifier(url)
|
147
|
+
if /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/ =~ url.to_s
|
148
|
+
$1.to_s
|
149
|
+
else url.to_s.split("/").last
|
150
|
+
end
|
94
151
|
end
|
95
152
|
|
96
153
|
#
|
@@ -99,12 +156,31 @@ module RelatonW3c
|
|
99
156
|
# @return [Array<RelatonBib::Series>] series
|
100
157
|
#
|
101
158
|
def parse_series
|
159
|
+
return [] unless type
|
160
|
+
|
102
161
|
title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
|
103
|
-
[RelatonBib::Series.new(title: title, number: identifier
|
162
|
+
[RelatonBib::Series.new(title: title, number: identifier)]
|
104
163
|
end
|
105
164
|
|
106
|
-
|
107
|
-
|
165
|
+
#
|
166
|
+
# Extract type
|
167
|
+
#
|
168
|
+
# @return [String] type
|
169
|
+
#
|
170
|
+
def type
|
171
|
+
# thre are many types, we need to find the right one
|
172
|
+
@type ||= types_stages&.detect { |t| USED_TYPES.include?(t) }
|
173
|
+
end
|
174
|
+
|
175
|
+
#
|
176
|
+
# Fetches types and stages
|
177
|
+
#
|
178
|
+
# @return [Array<String>] types and stages
|
179
|
+
#
|
180
|
+
def types_stages # rubocop:disable Metrics/MethodLength
|
181
|
+
return unless @sol.respond_to?(:link)
|
182
|
+
|
183
|
+
@types_stages ||= begin
|
108
184
|
sse = SPARQL.parse(%(
|
109
185
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
110
186
|
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
@@ -113,8 +189,7 @@ module RelatonW3c
|
|
113
189
|
{ <#{@sol.link}> rdf:type ?type }
|
114
190
|
}
|
115
191
|
))
|
116
|
-
|
117
|
-
tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
|
192
|
+
@fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
|
118
193
|
end
|
119
194
|
end
|
120
195
|
|
@@ -124,10 +199,17 @@ module RelatonW3c
|
|
124
199
|
# @return [Strinf] doctype
|
125
200
|
#
|
126
201
|
def parse_doctype
|
127
|
-
|
202
|
+
DOCTYPES[type] || "recommendation"
|
128
203
|
end
|
129
204
|
|
205
|
+
#
|
206
|
+
# Parse date
|
207
|
+
#
|
208
|
+
# @return [Array<RelatonBib::BibliographicDate>] date
|
209
|
+
#
|
130
210
|
def parse_date
|
211
|
+
return [] unless @sol.respond_to?(:date)
|
212
|
+
|
131
213
|
[RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
|
132
214
|
end
|
133
215
|
|
@@ -136,29 +218,90 @@ module RelatonW3c
|
|
136
218
|
#
|
137
219
|
# @return [Array<RelatonBib::DocumentRelation>] relation
|
138
220
|
#
|
139
|
-
def parse_relation
|
221
|
+
def parse_relation
|
222
|
+
if @sol.respond_to?(:link)
|
223
|
+
relations + editor_drafts
|
224
|
+
else document_versions
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
229
|
+
{
|
230
|
+
"doc:obsoletes" => { type: "obsoletes" },
|
231
|
+
"mat:hasErrata" => { type: "updatedBy", description: "errata" },
|
232
|
+
# "mat:hasTranslations" => "hasTranslation",
|
233
|
+
# "mat:hasImplReport" => "hasImpReport",
|
234
|
+
":previousEdition" => { type: "editionOf" },
|
235
|
+
}.reduce([]) do |acc, (predicate, tp)|
|
236
|
+
acc + relation_query(predicate).map do |r|
|
237
|
+
fr = RelatonBib::LocalizedString.new pub_id(r.rel.to_s)
|
238
|
+
bib = W3cBibliographicItem.new formattedref: fr
|
239
|
+
tp[:description] = RelatonBib::FormattedString.new content: tp[:description] if tp[:description]
|
240
|
+
RelatonBib::DocumentRelation.new(**tp, bibitem: bib)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def editor_drafts # rubocop:disable Metrics/MethodLength
|
140
246
|
sse = SPARQL.parse(%(
|
141
|
-
PREFIX
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
{ <#{@sol.link}> ?p ?obsoletes }
|
146
|
-
}
|
247
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
248
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
249
|
+
SELECT ?rel
|
250
|
+
WHERE { <#{@sol.link}> :ED ?rel . }
|
147
251
|
))
|
148
|
-
@fetcher.data.query(sse).
|
149
|
-
|
150
|
-
fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
|
252
|
+
@fetcher.data.query(sse).map do |s|
|
253
|
+
fr = RelatonBib::LocalizedString.new pub_id(s.rel.to_s)
|
151
254
|
bib = W3cBibliographicItem.new formattedref: fr
|
152
|
-
RelatonBib::
|
255
|
+
desc = RelatonBib::FormattedString.new content: "Editor's draft"
|
256
|
+
RelatonBib::DocumentRelation.new(
|
257
|
+
type: "hasDraft", description: desc, bibitem: bib,
|
258
|
+
)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def relation_query(predicate)
|
263
|
+
sse = SPARQL.parse(%(
|
264
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
265
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
266
|
+
PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
267
|
+
SELECT ?rel
|
268
|
+
WHERE { <#{@sol.link}> #{predicate} ?rel . }
|
269
|
+
))
|
270
|
+
@fetcher.data.query(sse).order_by(:rel)
|
271
|
+
end
|
272
|
+
|
273
|
+
def document_versions
|
274
|
+
sse = SPARQL.parse(%(
|
275
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
276
|
+
SELECT ?link
|
277
|
+
WHERE { ?link doc:versionOf <#{@sol.version_of}> }
|
278
|
+
))
|
279
|
+
@fetcher.data.query(sse).map do |r|
|
280
|
+
fref = RelatonBib::FormattedRef.new content: pub_id(r.link)
|
281
|
+
bib = W3cBibliographicItem.new formattedref: fref
|
282
|
+
RelatonBib::DocumentRelation.new(type: "hasEdition", bibitem: bib)
|
153
283
|
end
|
154
284
|
end
|
155
285
|
|
286
|
+
#
|
287
|
+
# Parse formattedref
|
288
|
+
#
|
289
|
+
# @return [RelatonBib::FormattedRef] formattedref
|
290
|
+
#
|
291
|
+
def parse_formattedref
|
292
|
+
return if @sol.respond_to?(:link)
|
293
|
+
|
294
|
+
RelatonBib::FormattedRef.new(content: pub_id(@sol.version_of))
|
295
|
+
end
|
296
|
+
|
156
297
|
#
|
157
298
|
# Parse contributor
|
158
299
|
#
|
159
300
|
# @return [Array<RelatonBib::ContributionInfo>] contributor
|
160
301
|
#
|
161
302
|
def parse_contrib # rubocop:disable Metrics/MethodLength
|
303
|
+
return [] unless @sol.respond_to?(:link)
|
304
|
+
|
162
305
|
sse = SPARQL.parse(%(
|
163
306
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
164
307
|
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
@@ -181,6 +324,8 @@ module RelatonW3c
|
|
181
324
|
# @return [RelatonBib::EditorialGroup] editorialgroup
|
182
325
|
#
|
183
326
|
def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
327
|
+
return unless @sol.respond_to?(:link)
|
328
|
+
|
184
329
|
sse = SPARQL.parse(%(
|
185
330
|
PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
|
186
331
|
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
data/lib/relaton_w3c/version.rb
CHANGED
@@ -5,15 +5,17 @@ require "net/http"
|
|
5
5
|
module RelatonW3c
|
6
6
|
# Class methods for search W3C standards.
|
7
7
|
class W3cBibliography
|
8
|
-
SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/
|
8
|
+
SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/"
|
9
9
|
|
10
10
|
class << self
|
11
11
|
# @param text [String]
|
12
12
|
# @return [RelatonW3c::HitCollection]
|
13
13
|
def search(text) # rubocop:disable Metrics/MethodLength
|
14
|
-
|
15
|
-
file =
|
16
|
-
|
14
|
+
ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
|
15
|
+
file = DataIndex.create_from_repo.search(ref)
|
16
|
+
return unless file
|
17
|
+
|
18
|
+
url = "#{SOURCE}#{file}"
|
17
19
|
resp = Net::HTTP.get_response(URI.parse(url))
|
18
20
|
return unless resp.code == "200"
|
19
21
|
|
@@ -24,7 +26,7 @@ module RelatonW3c
|
|
24
26
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
25
27
|
Net::ProtocolError, Errno::ETIMEDOUT
|
26
28
|
raise RelatonBib::RequestError,
|
27
|
-
"Could not access #{
|
29
|
+
"Could not access #{url}"
|
28
30
|
end
|
29
31
|
|
30
32
|
# @param ref [String] the W3C standard Code to look up
|
@@ -39,8 +41,8 @@ module RelatonW3c
|
|
39
41
|
return
|
40
42
|
end
|
41
43
|
|
42
|
-
|
43
|
-
warn "[relaton-w3c] (\"#{ref}\") found #{
|
44
|
+
found = result.docnumber
|
45
|
+
warn "[relaton-w3c] (\"#{ref}\") found #{found}"
|
44
46
|
result
|
45
47
|
end
|
46
48
|
end
|
@@ -32,6 +32,9 @@
|
|
32
32
|
'https://www.w3.org/WAI/EO':
|
33
33
|
name: Education and Outreach Working Group
|
34
34
|
abbrev: EOWG
|
35
|
+
'https://www.w3.org/WAI/about/groups/eowg':
|
36
|
+
name: Education and Outreach Working Group
|
37
|
+
abbrev: EOWG
|
35
38
|
'https://www.w3.org/2001/sw/WebOnt':
|
36
39
|
name: Web-Ontology Working Group
|
37
40
|
'http://www.w3.org/MarkUp/Forms':
|
@@ -54,6 +57,8 @@
|
|
54
57
|
name: Web Applications Working Group
|
55
58
|
'https://www.w3.org/2008/webapps':
|
56
59
|
name: Web Applications Working Group
|
60
|
+
'https://www.w3.org/groups/wg/webapps':
|
61
|
+
name: Web Applications Working Group
|
57
62
|
'https://www.w3.org/das':
|
58
63
|
name: Devices and Sensors Working Group
|
59
64
|
abbrev: DAS WG
|
@@ -226,6 +231,8 @@
|
|
226
231
|
abbrev: ARIA WG
|
227
232
|
'https://www.w3.org/wasm':
|
228
233
|
name: WebAssembly Working Group
|
234
|
+
'https://www.w3.org/groups/wg/wasm':
|
235
|
+
name: WebAssembly Working Group
|
229
236
|
'https://www.w3.org/groups/wg/webediting':
|
230
237
|
name: Web Editing Working Group
|
231
238
|
'https://www.w3.org/2014/data-shapes':
|
data/lib/relaton_w3c.rb
CHANGED
@@ -2,13 +2,14 @@ require "relaton_bib"
|
|
2
2
|
require "relaton_w3c/version"
|
3
3
|
require "relaton_w3c/w3c_bibliography"
|
4
4
|
require "relaton_w3c/w3c_bibliographic_item"
|
5
|
-
require "relaton_w3c/hit_collection"
|
6
|
-
require "relaton_w3c/hit"
|
7
|
-
require "relaton_w3c/scrapper"
|
5
|
+
# require "relaton_w3c/hit_collection"
|
6
|
+
# require "relaton_w3c/hit"
|
7
|
+
# require "relaton_w3c/scrapper"
|
8
8
|
require "relaton_w3c/xml_parser"
|
9
9
|
require "relaton_w3c/bibxml_parser"
|
10
10
|
require "relaton_w3c/hash_converter"
|
11
11
|
require "relaton_w3c/data_fethcer"
|
12
|
+
require "relaton_w3c/data_index"
|
12
13
|
|
13
14
|
module RelatonW3c
|
14
15
|
class Error < StandardError; end
|
data/relaton_w3c.gemspec
CHANGED
@@ -39,7 +39,10 @@ Gem::Specification.new do |spec|
|
|
39
39
|
|
40
40
|
spec.add_dependency "linkeddata", "~> 3.1.0"
|
41
41
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
42
|
+
# spec.add_dependency "picky"
|
42
43
|
spec.add_dependency "rdf", "~> 3.1.0"
|
44
|
+
spec.add_dependency "rdf-normalize", "~> 0.4.0"
|
43
45
|
spec.add_dependency "relaton-bib", "~> 1.11.0"
|
46
|
+
spec.add_dependency "shex", "~> 0.6.0"
|
44
47
|
spec.add_dependency "sparql", "~> 3.1.0"
|
45
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-w3c
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.11.
|
4
|
+
version: 1.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: equivalent-xml
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: 3.1.0
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rdf-normalize
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.4.0
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.4.0
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: relaton-bib
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,6 +150,20 @@ dependencies:
|
|
136
150
|
- - "~>"
|
137
151
|
- !ruby/object:Gem::Version
|
138
152
|
version: 1.11.0
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: shex
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 0.6.0
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 0.6.0
|
139
167
|
- !ruby/object:Gem::Dependency
|
140
168
|
name: sparql
|
141
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -202,6 +230,7 @@ files:
|
|
202
230
|
- lib/relaton_w3c.rb
|
203
231
|
- lib/relaton_w3c/bibxml_parser.rb
|
204
232
|
- lib/relaton_w3c/data_fethcer.rb
|
233
|
+
- lib/relaton_w3c/data_index.rb
|
205
234
|
- lib/relaton_w3c/data_parser.rb
|
206
235
|
- lib/relaton_w3c/hash_converter.rb
|
207
236
|
- lib/relaton_w3c/hit.rb
|