relaton-w3c 1.10.1 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data/reference.W3C.DSig-label.xml +32 -32
- data/data/reference.W3C.P3P-rdfschema.xml +26 -26
- data/data/reference.W3C.P3P.xml +38 -38
- data/data/reference.W3C.PICS-labels.xml +43 -43
- data/data/reference.W3C.PICS-rules.xml +38 -38
- data/data/reference.W3C.PICS-services.xml +37 -37
- data/data/reference.W3C.daml-oil-reference.xml +39 -39
- data/data/reference.W3C.soap11.xml +56 -56
- data/data/reference.W3C.soap12-part1.xml +38 -38
- data/data/reference.W3C.soap12-part2.xml +38 -38
- data/data/reference.W3C.xkms.xml +50 -50
- data/data/reference.W3C.xml-c14n.xml +15 -15
- data/data/reference.W3C.xmldsig-core.xml +26 -26
- data/data/reference.W3C.xmlenc-core.xml +20 -20
- data/data/reference.W3C.xpath.xml +22 -22
- data/grammars/biblio.rng +24 -1
- data/grammars/isodoc.rng +73 -3
- data/lib/relaton_w3c/bibxml_parser.rb +7 -0
- data/lib/relaton_w3c/data_fetcher.rb +188 -0
- data/lib/relaton_w3c/data_index.rb +143 -0
- data/lib/relaton_w3c/data_parser.rb +171 -26
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +9 -7
- data/lib/relaton_w3c/workgroups.yaml +7 -0
- data/lib/relaton_w3c.rb +5 -4
- data/relaton_w3c.gemspec +4 -1
- metadata +35 -6
- data/lib/relaton_w3c/data_fethcer.rb +0 -110
data/grammars/isodoc.rng
CHANGED
@@ -152,9 +152,7 @@
|
|
152
152
|
<data type="boolean"/>
|
153
153
|
</attribute>
|
154
154
|
</optional>
|
155
|
-
<
|
156
|
-
<ref name="PureTextElement"/>
|
157
|
-
</oneOrMore>
|
155
|
+
<ref name="XrefBody"/>
|
158
156
|
</element>
|
159
157
|
</define>
|
160
158
|
<define name="erefType">
|
@@ -188,6 +186,42 @@
|
|
188
186
|
<ref name="PureTextElement"/>
|
189
187
|
</oneOrMore>
|
190
188
|
</define>
|
189
|
+
<define name="localityStack">
|
190
|
+
<element name="localityStack">
|
191
|
+
<optional>
|
192
|
+
<attribute name="connective">
|
193
|
+
<choice>
|
194
|
+
<value>and</value>
|
195
|
+
<value>or</value>
|
196
|
+
<value>from</value>
|
197
|
+
<value>to</value>
|
198
|
+
<value/>
|
199
|
+
</choice>
|
200
|
+
</attribute>
|
201
|
+
</optional>
|
202
|
+
<zeroOrMore>
|
203
|
+
<ref name="locality"/>
|
204
|
+
</zeroOrMore>
|
205
|
+
</element>
|
206
|
+
</define>
|
207
|
+
<define name="sourceLocalityStack">
|
208
|
+
<element name="sourceLocalityStack">
|
209
|
+
<optional>
|
210
|
+
<attribute name="connective">
|
211
|
+
<choice>
|
212
|
+
<value>and</value>
|
213
|
+
<value>or</value>
|
214
|
+
<value>from</value>
|
215
|
+
<value>to</value>
|
216
|
+
<value/>
|
217
|
+
</choice>
|
218
|
+
</attribute>
|
219
|
+
</optional>
|
220
|
+
<zeroOrMore>
|
221
|
+
<ref name="sourceLocality"/>
|
222
|
+
</zeroOrMore>
|
223
|
+
</element>
|
224
|
+
</define>
|
191
225
|
<define name="ul">
|
192
226
|
<element name="ul">
|
193
227
|
<attribute name="id">
|
@@ -1098,6 +1132,16 @@
|
|
1098
1132
|
</define>
|
1099
1133
|
</include>
|
1100
1134
|
<!-- end overrides -->
|
1135
|
+
<define name="image" combine="choice">
|
1136
|
+
<element name="svg">
|
1137
|
+
<oneOrMore>
|
1138
|
+
<choice>
|
1139
|
+
<text/>
|
1140
|
+
<ref name="AnyElement"/>
|
1141
|
+
</choice>
|
1142
|
+
</oneOrMore>
|
1143
|
+
</element>
|
1144
|
+
</define>
|
1101
1145
|
<define name="MultilingualRenderingType">
|
1102
1146
|
<choice>
|
1103
1147
|
<value>common</value>
|
@@ -2631,4 +2675,30 @@
|
|
2631
2675
|
</zeroOrMore>
|
2632
2676
|
</element>
|
2633
2677
|
</define>
|
2678
|
+
<define name="XrefBody">
|
2679
|
+
<zeroOrMore>
|
2680
|
+
<ref name="XrefTarget"/>
|
2681
|
+
</zeroOrMore>
|
2682
|
+
<oneOrMore>
|
2683
|
+
<ref name="PureTextElement"/>
|
2684
|
+
</oneOrMore>
|
2685
|
+
</define>
|
2686
|
+
<define name="XrefTarget">
|
2687
|
+
<element name="location">
|
2688
|
+
<attribute name="target">
|
2689
|
+
<data type="string">
|
2690
|
+
<param name="pattern">\i\c*|\c+#\c+</param>
|
2691
|
+
</data>
|
2692
|
+
</attribute>
|
2693
|
+
<attribute name="connective">
|
2694
|
+
<choice>
|
2695
|
+
<value>and</value>
|
2696
|
+
<value>or</value>
|
2697
|
+
<value>from</value>
|
2698
|
+
<value>to</value>
|
2699
|
+
<value/>
|
2700
|
+
</choice>
|
2701
|
+
</attribute>
|
2702
|
+
</element>
|
2703
|
+
</define>
|
2634
2704
|
</grammar>
|
@@ -13,5 +13,12 @@ module RelatonW3c
|
|
13
13
|
def pubid_type(_)
|
14
14
|
"W3C"
|
15
15
|
end
|
16
|
+
|
17
|
+
def docids(reference, ver)
|
18
|
+
ids = super
|
19
|
+
ids.reject! &:primary
|
20
|
+
id = "W3C #{reference[:target].split('/').last}"
|
21
|
+
ids.unshift RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)
|
22
|
+
end
|
16
23
|
end
|
17
24
|
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
require "rdf"
|
2
|
+
require "linkeddata"
|
3
|
+
require "sparql"
|
4
|
+
require "mechanize"
|
5
|
+
require "relaton_w3c/data_parser"
|
6
|
+
|
7
|
+
module RelatonW3c
|
8
|
+
class DataFetcher
|
9
|
+
attr_reader :data, :group_names
|
10
|
+
|
11
|
+
#
|
12
|
+
# Data fetcher initializer
|
13
|
+
#
|
14
|
+
# @param [String] output directory to save files
|
15
|
+
# @param [String] format format of output files (xml, yaml, bibxml)
|
16
|
+
#
|
17
|
+
def initialize(output, format)
|
18
|
+
@output = output
|
19
|
+
@format = format
|
20
|
+
@ext = format.sub(/^bib/, "")
|
21
|
+
dir = File.dirname(File.expand_path(__FILE__))
|
22
|
+
@group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
|
23
|
+
@data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
24
|
+
@files = []
|
25
|
+
@index = DataIndex.new
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Initialize fetcher and run fetch
|
30
|
+
#
|
31
|
+
# @param [Strin] output directory to save files, default: "data"
|
32
|
+
# @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
|
33
|
+
#
|
34
|
+
def self.fetch(output: "data", format: "yaml")
|
35
|
+
t1 = Time.now
|
36
|
+
puts "Started at: #{t1}"
|
37
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
38
|
+
new(output, format).fetch
|
39
|
+
t2 = Time.now
|
40
|
+
puts "Stopped at: #{t2}"
|
41
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# Parse documents
|
46
|
+
#
|
47
|
+
def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
48
|
+
query_versioned_docs.each do |sl|
|
49
|
+
save_doc DataParser.parse(sl, self)
|
50
|
+
rescue StandardError => e
|
51
|
+
warn "Error: document #{sl.link} #{e.message}"
|
52
|
+
warn e.backtrace.join("\n")
|
53
|
+
end
|
54
|
+
query_unversioned_docs.each do |sl|
|
55
|
+
save_doc DataParser.parse(sl, self)
|
56
|
+
rescue StandardError => e
|
57
|
+
warn "Error: document #{sl.version_of} #{e.message}"
|
58
|
+
warn e.backtrace.join("\n")
|
59
|
+
end
|
60
|
+
Dir[File.expand_path("../../data/*", __dir__)].each do |file|
|
61
|
+
xml = File.read file, encoding: "UTF-8"
|
62
|
+
save_doc BibXMLParser.parse(xml), warn_duplicate: false
|
63
|
+
rescue StandardError => e
|
64
|
+
warn "Error: document #{file} #{e.message}"
|
65
|
+
warn e.backtrace.join("\n")
|
66
|
+
end
|
67
|
+
@index.sort!.save
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Create index file
|
72
|
+
#
|
73
|
+
# def create_index
|
74
|
+
# index_file = "index-w3c.yaml"
|
75
|
+
# index_yaml = @index.sort do |a, b|
|
76
|
+
# compare_index_items a, b
|
77
|
+
# end.to_yaml
|
78
|
+
# File.write index_file, index_yaml, encoding: "UTF-8"
|
79
|
+
# end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Compare index items
|
83
|
+
#
|
84
|
+
# @param [Hash] aid first item
|
85
|
+
# @param [Hash] bid second item
|
86
|
+
#
|
87
|
+
# @return [Integer] comparison result
|
88
|
+
#
|
89
|
+
# def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
|
90
|
+
# ret = aid[:code] <=> bid[:code]
|
91
|
+
# ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
|
92
|
+
# ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
|
93
|
+
# # ret = aid[:type] <=> bid[:type] if ret.zero?
|
94
|
+
# ret
|
95
|
+
# end
|
96
|
+
|
97
|
+
#
|
98
|
+
# Weight of stage
|
99
|
+
#
|
100
|
+
# @param [String, nil] stage stage
|
101
|
+
#
|
102
|
+
# @return [Integer] weight
|
103
|
+
#
|
104
|
+
# def stage_weight(stage)
|
105
|
+
# return DataParser::STAGES.size if stage.nil?
|
106
|
+
|
107
|
+
# DataParser::STAGES.keys.index(stage)
|
108
|
+
# end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Weight of date
|
112
|
+
#
|
113
|
+
# @param [String] date date
|
114
|
+
#
|
115
|
+
# @return [String] weight
|
116
|
+
#
|
117
|
+
# def date_weight(date)
|
118
|
+
# return "99999999" if date.nil?
|
119
|
+
|
120
|
+
# date
|
121
|
+
# end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Query RDF source for documents
|
125
|
+
#
|
126
|
+
# @return [RDF::Query::Solutions] query results
|
127
|
+
#
|
128
|
+
def query_versioned_docs # rubocop:disable Metrics/MethodLength
|
129
|
+
sse = SPARQL.parse(%(
|
130
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
131
|
+
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
132
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
133
|
+
# PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
134
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
135
|
+
SELECT ?link ?title ?date ?version_of
|
136
|
+
WHERE {
|
137
|
+
?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
|
138
|
+
}
|
139
|
+
))
|
140
|
+
data.query sse
|
141
|
+
end
|
142
|
+
|
143
|
+
def query_unversioned_docs
|
144
|
+
sse = SPARQL.parse(%(
|
145
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
146
|
+
SELECT ?version_of
|
147
|
+
WHERE { ?x doc:versionOf ?version_of . }
|
148
|
+
))
|
149
|
+
data.query(sse).uniq &:version_of
|
150
|
+
end
|
151
|
+
|
152
|
+
#
|
153
|
+
# Save document to file
|
154
|
+
#
|
155
|
+
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
156
|
+
#
|
157
|
+
def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
158
|
+
return unless bib
|
159
|
+
|
160
|
+
c = case @format
|
161
|
+
when "xml" then bib.to_xml(bibdata: true)
|
162
|
+
when "yaml" then bib.to_hash.to_yaml
|
163
|
+
else bib.send("to_#{@format}")
|
164
|
+
end
|
165
|
+
# id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
|
166
|
+
file = file_name(bib.docnumber)
|
167
|
+
if @files.include?(file)
|
168
|
+
warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
|
169
|
+
else
|
170
|
+
@index.add bib.docnumber, file
|
171
|
+
@files << file
|
172
|
+
File.write file, c, encoding: "UTF-8"
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
#
|
177
|
+
# Generate file name
|
178
|
+
#
|
179
|
+
# @param [String] id document id
|
180
|
+
#
|
181
|
+
# @return [String] file name
|
182
|
+
#
|
183
|
+
def file_name(id)
|
184
|
+
name = id.sub(/^W3C\s/, "").gsub(/[\s,:\/+]/, "_").squeeze("_").downcase
|
185
|
+
File.join @output, "#{name}.#{@ext}"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
module RelatonW3c
|
2
|
+
class DataIndex
|
3
|
+
#
|
4
|
+
# Initialize data index.
|
5
|
+
#
|
6
|
+
# @param [String] index_file path to index file
|
7
|
+
# @param [Array<Hash>] index index data
|
8
|
+
#
|
9
|
+
def initialize(index_file: "index-w3c.yaml", index: [])
|
10
|
+
@index_file = index_file
|
11
|
+
@index = index
|
12
|
+
end
|
13
|
+
|
14
|
+
#
|
15
|
+
# Create index from a GitHub repository
|
16
|
+
#
|
17
|
+
# @return [RelatonW3c::DataIndex] data index
|
18
|
+
#
|
19
|
+
def self.create_from_repo
|
20
|
+
resp_index = Net::HTTP.get(URI("#{W3cBibliography::SOURCE}index-w3c.yaml"))
|
21
|
+
|
22
|
+
# Newer versions of Psych uses the `permitted_classes:` parameter
|
23
|
+
index = if YAML.method(:safe_load).parameters.collect(&:last).index(:permitted_classes)
|
24
|
+
YAML.safe_load(resp_index, permitted_classes: [Symbol])
|
25
|
+
else
|
26
|
+
YAML.safe_load(resp_index, [Symbol])
|
27
|
+
end
|
28
|
+
|
29
|
+
DataIndex.new index: index
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Add document to index
|
34
|
+
#
|
35
|
+
# @param [String] docnumber document number
|
36
|
+
# @param [String] file path to document file
|
37
|
+
#
|
38
|
+
def add(docnumber, file)
|
39
|
+
@index << docnumber_to_parts(docnumber, file)
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Save index to file.
|
44
|
+
#
|
45
|
+
def save
|
46
|
+
File.write @index_file, @index.to_yaml, encoding: "UTF-8"
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# Sort index
|
51
|
+
#
|
52
|
+
# @return [Array<Hash>] sorted index
|
53
|
+
#
|
54
|
+
def sort!
|
55
|
+
@index.sort! { |a, b| compare_index_items a, b }
|
56
|
+
self
|
57
|
+
end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Search filename in index
|
61
|
+
#
|
62
|
+
# @param [String] ref reference
|
63
|
+
#
|
64
|
+
# @return [String] document's filename
|
65
|
+
#
|
66
|
+
def search(ref) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
67
|
+
dparts = docnumber_to_parts(ref)
|
68
|
+
@index.detect do |parts|
|
69
|
+
parts[:code].match?(/^#{Regexp.escape dparts[:code]}/i) &&
|
70
|
+
(dparts[:stage].nil? || dparts[:stage].casecmp?(parts[:stage])) &&
|
71
|
+
(dparts[:type].nil? || dparts[:type].casecmp?(parts[:type])) &&
|
72
|
+
(dparts[:date].nil? || dparts[:date] == parts[:date]) &&
|
73
|
+
(dparts[:suff].nil? || dparts[:suff].casecmp?(parts[:suff]))
|
74
|
+
end&.fetch(:file)
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Compare index items
|
79
|
+
#
|
80
|
+
# @param [Hash] aid first item
|
81
|
+
# @param [Hash] bid second item
|
82
|
+
#
|
83
|
+
# @return [Integer] comparison result
|
84
|
+
#
|
85
|
+
def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
|
86
|
+
ret = aid[:code] <=> bid[:code]
|
87
|
+
ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
|
88
|
+
ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
|
89
|
+
# ret = aid[:type] <=> bid[:type] if ret.zero?
|
90
|
+
ret
|
91
|
+
end
|
92
|
+
|
93
|
+
#
|
94
|
+
# Weight of stage
|
95
|
+
#
|
96
|
+
# @param [String, nil] stage stage
|
97
|
+
#
|
98
|
+
# @return [Integer] weight
|
99
|
+
#
|
100
|
+
def stage_weight(stage)
|
101
|
+
return DataParser::STAGES.size if stage.nil?
|
102
|
+
|
103
|
+
DataParser::STAGES.keys.index(stage)
|
104
|
+
end
|
105
|
+
|
106
|
+
#
|
107
|
+
# Weight of date
|
108
|
+
#
|
109
|
+
# @param [String] date date
|
110
|
+
#
|
111
|
+
# @return [String] weight
|
112
|
+
#
|
113
|
+
def date_weight(date)
|
114
|
+
return "99999999" if date.nil?
|
115
|
+
|
116
|
+
date
|
117
|
+
end
|
118
|
+
|
119
|
+
#
|
120
|
+
# Parse document number to parts
|
121
|
+
#
|
122
|
+
# @param [String] docnumber document number
|
123
|
+
# @param [String, nil] file path to document file
|
124
|
+
#
|
125
|
+
# @return [Hash{Symbol=>String}] document parts
|
126
|
+
#
|
127
|
+
def docnumber_to_parts(docnumber, file = nil) # rubocop:disable Metrics/MethodLength
|
128
|
+
%r{
|
129
|
+
^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE))-)?
|
130
|
+
(?<code>\w+(?:[+-][\w.]+)*?)
|
131
|
+
(?:-(?<date>\d{8}|\d{6}))?
|
132
|
+
(?:/(?<suff>\w+))?$
|
133
|
+
}xi =~ docnumber
|
134
|
+
entry = { code: code }
|
135
|
+
entry[:file] = file if file
|
136
|
+
entry[:stage] = stage if stage
|
137
|
+
entry[:type] = type if type
|
138
|
+
entry[:date] = date if date
|
139
|
+
entry[:suff] = suff if suff
|
140
|
+
entry
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|