relaton-w3c 1.9.0 → 1.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/README.adoc +89 -62
- data/grammars/isodoc.rng +2106 -0
- data/grammars/reqt.rng +194 -0
- data/lib/relaton_w3c/data_fethcer.rb +106 -0
- data/lib/relaton_w3c/data_parser.rb +205 -0
- data/lib/relaton_w3c/hit_collection.rb +1 -1
- data/lib/relaton_w3c/processor.rb +14 -1
- data/lib/relaton_w3c/scrapper.rb +10 -10
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +19 -6
- data/lib/relaton_w3c/workgroups.yaml +339 -0
- data/lib/relaton_w3c.rb +1 -0
- data/relaton_w3c.gemspec +4 -0
- metadata +63 -2
data/grammars/reqt.rng
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
3
|
+
<!--
|
4
|
+
Presupposes isodoc.rnc, is included in it
|
5
|
+
include "isodoc.rnc" { }
|
6
|
+
-->
|
7
|
+
<define name="requirement">
|
8
|
+
<element name="requirement">
|
9
|
+
<ref name="RequirementType"/>
|
10
|
+
</element>
|
11
|
+
</define>
|
12
|
+
<define name="recommendation">
|
13
|
+
<element name="recommendation">
|
14
|
+
<ref name="RequirementType"/>
|
15
|
+
</element>
|
16
|
+
</define>
|
17
|
+
<define name="permission">
|
18
|
+
<element name="permission">
|
19
|
+
<ref name="RequirementType"/>
|
20
|
+
</element>
|
21
|
+
</define>
|
22
|
+
<define name="RequirementType">
|
23
|
+
<optional>
|
24
|
+
<attribute name="obligation">
|
25
|
+
<ref name="ObligationType"/>
|
26
|
+
</attribute>
|
27
|
+
</optional>
|
28
|
+
<optional>
|
29
|
+
<attribute name="unnumbered">
|
30
|
+
<data type="boolean"/>
|
31
|
+
</attribute>
|
32
|
+
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="number"/>
|
35
|
+
</optional>
|
36
|
+
<optional>
|
37
|
+
<attribute name="subsequence"/>
|
38
|
+
</optional>
|
39
|
+
<optional>
|
40
|
+
<attribute name="keep-with-next">
|
41
|
+
<data type="boolean"/>
|
42
|
+
</attribute>
|
43
|
+
</optional>
|
44
|
+
<optional>
|
45
|
+
<attribute name="keep-lines-together">
|
46
|
+
<data type="boolean"/>
|
47
|
+
</attribute>
|
48
|
+
</optional>
|
49
|
+
<attribute name="id">
|
50
|
+
<data type="ID"/>
|
51
|
+
</attribute>
|
52
|
+
<optional>
|
53
|
+
<attribute name="filename"/>
|
54
|
+
</optional>
|
55
|
+
<optional>
|
56
|
+
<attribute name="model"/>
|
57
|
+
</optional>
|
58
|
+
<optional>
|
59
|
+
<attribute name="type"/>
|
60
|
+
</optional>
|
61
|
+
<optional>
|
62
|
+
<ref name="reqtitle"/>
|
63
|
+
</optional>
|
64
|
+
<optional>
|
65
|
+
<ref name="label"/>
|
66
|
+
</optional>
|
67
|
+
<optional>
|
68
|
+
<ref name="subject"/>
|
69
|
+
</optional>
|
70
|
+
<zeroOrMore>
|
71
|
+
<ref name="reqinherit"/>
|
72
|
+
</zeroOrMore>
|
73
|
+
<zeroOrMore>
|
74
|
+
<ref name="classification"/>
|
75
|
+
</zeroOrMore>
|
76
|
+
<zeroOrMore>
|
77
|
+
<choice>
|
78
|
+
<ref name="measurementtarget"/>
|
79
|
+
<ref name="specification"/>
|
80
|
+
<ref name="verification"/>
|
81
|
+
<ref name="import"/>
|
82
|
+
<ref name="description"/>
|
83
|
+
</choice>
|
84
|
+
</zeroOrMore>
|
85
|
+
<optional>
|
86
|
+
<ref name="reqt_references"/>
|
87
|
+
</optional>
|
88
|
+
<zeroOrMore>
|
89
|
+
<choice>
|
90
|
+
<ref name="requirement"/>
|
91
|
+
<ref name="recommendation"/>
|
92
|
+
<ref name="permission"/>
|
93
|
+
</choice>
|
94
|
+
</zeroOrMore>
|
95
|
+
</define>
|
96
|
+
<define name="reqtitle">
|
97
|
+
<element name="title">
|
98
|
+
<ref name="FormattedString"/>
|
99
|
+
</element>
|
100
|
+
</define>
|
101
|
+
<define name="label">
|
102
|
+
<element name="label">
|
103
|
+
<text/>
|
104
|
+
</element>
|
105
|
+
</define>
|
106
|
+
<define name="subject">
|
107
|
+
<element name="subject">
|
108
|
+
<text/>
|
109
|
+
</element>
|
110
|
+
</define>
|
111
|
+
<define name="reqinherit">
|
112
|
+
<element name="inherit">
|
113
|
+
<text/>
|
114
|
+
</element>
|
115
|
+
</define>
|
116
|
+
<define name="measurementtarget">
|
117
|
+
<element name="measurement-target">
|
118
|
+
<ref name="RequirementSubpart"/>
|
119
|
+
</element>
|
120
|
+
</define>
|
121
|
+
<define name="specification">
|
122
|
+
<element name="specification">
|
123
|
+
<ref name="RequirementSubpart"/>
|
124
|
+
</element>
|
125
|
+
</define>
|
126
|
+
<define name="verification">
|
127
|
+
<element name="verification">
|
128
|
+
<ref name="RequirementSubpart"/>
|
129
|
+
</element>
|
130
|
+
</define>
|
131
|
+
<define name="import">
|
132
|
+
<element name="import">
|
133
|
+
<ref name="RequirementSubpart"/>
|
134
|
+
</element>
|
135
|
+
</define>
|
136
|
+
<define name="description">
|
137
|
+
<element name="description">
|
138
|
+
<ref name="RequirementSubpart"/>
|
139
|
+
</element>
|
140
|
+
</define>
|
141
|
+
<define name="reqt_references">
|
142
|
+
<element name="references">
|
143
|
+
<oneOrMore>
|
144
|
+
<ref name="bibitem"/>
|
145
|
+
</oneOrMore>
|
146
|
+
</element>
|
147
|
+
</define>
|
148
|
+
<define name="RequirementSubpart">
|
149
|
+
<optional>
|
150
|
+
<attribute name="type"/>
|
151
|
+
</optional>
|
152
|
+
<optional>
|
153
|
+
<attribute name="exclude">
|
154
|
+
<data type="boolean"/>
|
155
|
+
</attribute>
|
156
|
+
</optional>
|
157
|
+
<optional>
|
158
|
+
<attribute name="keep-with-next">
|
159
|
+
<data type="boolean"/>
|
160
|
+
</attribute>
|
161
|
+
</optional>
|
162
|
+
<optional>
|
163
|
+
<attribute name="keep-lines-together">
|
164
|
+
<data type="boolean"/>
|
165
|
+
</attribute>
|
166
|
+
</optional>
|
167
|
+
<oneOrMore>
|
168
|
+
<ref name="BasicBlock"/>
|
169
|
+
</oneOrMore>
|
170
|
+
</define>
|
171
|
+
<define name="ObligationType">
|
172
|
+
<choice>
|
173
|
+
<value>requirement</value>
|
174
|
+
<value>recommendation</value>
|
175
|
+
<value>permission</value>
|
176
|
+
</choice>
|
177
|
+
</define>
|
178
|
+
<define name="classification">
|
179
|
+
<element name="classification">
|
180
|
+
<ref name="classification_tag"/>
|
181
|
+
<ref name="classification_value"/>
|
182
|
+
</element>
|
183
|
+
</define>
|
184
|
+
<define name="classification_tag">
|
185
|
+
<element name="tag">
|
186
|
+
<text/>
|
187
|
+
</element>
|
188
|
+
</define>
|
189
|
+
<define name="classification_value">
|
190
|
+
<element name="value">
|
191
|
+
<text/>
|
192
|
+
</element>
|
193
|
+
</define>
|
194
|
+
</grammar>
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require "rdf"
|
2
|
+
require "linkeddata"
|
3
|
+
require "sparql"
|
4
|
+
require "mechanize"
|
5
|
+
require "relaton_w3c/data_parser"
|
6
|
+
|
7
|
+
module RelatonW3c
|
8
|
+
class DataFetcher
|
9
|
+
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
10
|
+
|
11
|
+
attr_reader :data, :group_names
|
12
|
+
|
13
|
+
#
|
14
|
+
# Data fetcher initializer
|
15
|
+
#
|
16
|
+
# @param [String] output directory to save files
|
17
|
+
# @param [String] format format of output files (xml, yaml, bibxml)
|
18
|
+
#
|
19
|
+
def initialize(output, format)
|
20
|
+
@output = output
|
21
|
+
@format = format
|
22
|
+
@ext = format.sub(/^bib/, "")
|
23
|
+
dir = File.dirname(File.expand_path(__FILE__))
|
24
|
+
@group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
|
25
|
+
@data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
26
|
+
@files = []
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# Initialize fetcher and run fetch
|
31
|
+
#
|
32
|
+
# @param [Strin] output directory to save files, default: "data"
|
33
|
+
# @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
|
34
|
+
#
|
35
|
+
def self.fetch(output: "data", format: "yaml")
|
36
|
+
t1 = Time.now
|
37
|
+
puts "Started at: #{t1}"
|
38
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
39
|
+
new(output, format).fetch
|
40
|
+
t2 = Time.now
|
41
|
+
puts "Stopped at: #{t2}"
|
42
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# Parse documents
|
47
|
+
#
|
48
|
+
def fetch
|
49
|
+
query.each { |sl| save_doc DataParser.parse(sl, self) }
|
50
|
+
end
|
51
|
+
|
52
|
+
#
|
53
|
+
# Query RDF source for documents
|
54
|
+
#
|
55
|
+
# @return [RDF::Query::Solutions] query results
|
56
|
+
#
|
57
|
+
def query # rubocop:disable Metrics/MethodLength
|
58
|
+
sse = SPARQL.parse(%(
|
59
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
60
|
+
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
61
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
62
|
+
# PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
63
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
64
|
+
SELECT ?link ?title ?date
|
65
|
+
WHERE {
|
66
|
+
?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
|
67
|
+
}
|
68
|
+
))
|
69
|
+
data.query sse
|
70
|
+
end
|
71
|
+
|
72
|
+
#
|
73
|
+
# Save document to file
|
74
|
+
#
|
75
|
+
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
76
|
+
#
|
77
|
+
def save_doc(bib) # rubocop:disable Metrics/MethodLength
|
78
|
+
return unless bib
|
79
|
+
|
80
|
+
c = case @format
|
81
|
+
when "xml" then bib.to_xml(bibdata: true)
|
82
|
+
when "yaml" then bib.to_hash.to_yaml
|
83
|
+
else bib.send("to_#{@format}")
|
84
|
+
end
|
85
|
+
file = file_name(bib)
|
86
|
+
if @files.include? file
|
87
|
+
warn "File #{file} already exists. Document: #{bib.docnumber}"
|
88
|
+
else
|
89
|
+
@files << file
|
90
|
+
end
|
91
|
+
File.write file, c, encoding: "UTF-8"
|
92
|
+
end
|
93
|
+
|
94
|
+
#
|
95
|
+
# Generate file name
|
96
|
+
#
|
97
|
+
# @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
|
98
|
+
#
|
99
|
+
# @return [String] file name
|
100
|
+
#
|
101
|
+
def file_name(bib)
|
102
|
+
name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
103
|
+
File.join @output, "#{name}.#{@ext}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
module RelatonW3c
|
2
|
+
class DataParser
|
3
|
+
#
|
4
|
+
# Document parser initalization
|
5
|
+
#
|
6
|
+
# @param [RDF::Query::Solution] sol entry from the SPARQL query
|
7
|
+
# @param [RelatonW3c::DataFetcher] fetcher data fetcher
|
8
|
+
#
|
9
|
+
def initialize(sol, fetcher)
|
10
|
+
@sol = sol
|
11
|
+
@fetcher = fetcher
|
12
|
+
end
|
13
|
+
|
14
|
+
#
|
15
|
+
# Initialize document parser and run it
|
16
|
+
#
|
17
|
+
# @param [RDF::Query::Solution] sol entry from the SPARQL query
|
18
|
+
# @param [RelatonW3c::DataFetcher] fetcher data fetcher
|
19
|
+
#
|
20
|
+
# @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
|
21
|
+
#
|
22
|
+
def self.parse(sol, fetcher)
|
23
|
+
new(sol, fetcher).parse
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# Parse document
|
28
|
+
#
|
29
|
+
# @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
|
30
|
+
#
|
31
|
+
def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
32
|
+
return unless @fetcher.class::USED_TYPES.include? type
|
33
|
+
|
34
|
+
RelatonW3c::W3cBibliographicItem.new(
|
35
|
+
type: "standard",
|
36
|
+
doctype: parse_doctype,
|
37
|
+
fetched: Date.today.to_s,
|
38
|
+
language: ["en"],
|
39
|
+
script: ["Latn"],
|
40
|
+
title: parse_title,
|
41
|
+
link: parse_link,
|
42
|
+
docid: parse_docid,
|
43
|
+
docnumber: identifier(@sol.link.to_s),
|
44
|
+
series: parse_series,
|
45
|
+
date: parse_date,
|
46
|
+
relation: parse_relation,
|
47
|
+
contributor: parse_contrib,
|
48
|
+
editorialgroup: parse_editorialgroup,
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
#
|
53
|
+
# Parse title
|
54
|
+
#
|
55
|
+
# @return [RelatonBib::TypedTitleStringCollection] title
|
56
|
+
#
|
57
|
+
def parse_title
|
58
|
+
t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
|
59
|
+
RelatonBib::TypedTitleStringCollection.new [t]
|
60
|
+
end
|
61
|
+
|
62
|
+
#
|
63
|
+
# Parse link
|
64
|
+
#
|
65
|
+
# @return [Array<RelatonBib::TypedUri>] link
|
66
|
+
#
|
67
|
+
def parse_link
|
68
|
+
[RelatonBib::TypedUri.new(type: "src", content: @sol.link.to_s)]
|
69
|
+
end
|
70
|
+
|
71
|
+
#
|
72
|
+
# Parse docidentifier
|
73
|
+
#
|
74
|
+
# @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
|
75
|
+
#
|
76
|
+
def parse_docid
|
77
|
+
id = pub_id(@sol.link.to_s)
|
78
|
+
[RelatonBib::DocumentIdentifier.new(type: "W3C", id: id)]
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Generate PubID
|
83
|
+
#
|
84
|
+
# @param [String] url url
|
85
|
+
#
|
86
|
+
# @return [String] PubID
|
87
|
+
#
|
88
|
+
def pub_id(url)
|
89
|
+
"W3C #{identifier(url)}"
|
90
|
+
end
|
91
|
+
|
92
|
+
def identifier(url)
|
93
|
+
/.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/.match(url)[1].to_s
|
94
|
+
end
|
95
|
+
|
96
|
+
#
|
97
|
+
# Parse series
|
98
|
+
#
|
99
|
+
# @return [Array<RelatonBib::Series>] series
|
100
|
+
#
|
101
|
+
def parse_series
|
102
|
+
title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
|
103
|
+
[RelatonBib::Series.new(title: title, number: identifier(@sol.link.to_s))]
|
104
|
+
end
|
105
|
+
|
106
|
+
def type # rubocop:disable Metrics/MethodLength
|
107
|
+
@type ||= begin
|
108
|
+
sse = SPARQL.parse(%(
|
109
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
110
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
111
|
+
SELECT ?type
|
112
|
+
WHERE {
|
113
|
+
{ <#{@sol.link}> rdf:type ?type }
|
114
|
+
}
|
115
|
+
))
|
116
|
+
tps = @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
|
117
|
+
tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
#
|
122
|
+
# Parse doctype
|
123
|
+
#
|
124
|
+
# @return [Strinf] doctype
|
125
|
+
#
|
126
|
+
def parse_doctype
|
127
|
+
Scrapper::DOCTYPES[type]
|
128
|
+
end
|
129
|
+
|
130
|
+
def parse_date
|
131
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
|
132
|
+
end
|
133
|
+
|
134
|
+
#
|
135
|
+
# Parse relation
|
136
|
+
#
|
137
|
+
# @return [Array<RelatonBib::DocumentRelation>] relation
|
138
|
+
#
|
139
|
+
def parse_relation # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
140
|
+
sse = SPARQL.parse(%(
|
141
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
142
|
+
SELECT ?obsoletes
|
143
|
+
WHERE {
|
144
|
+
VALUES ?p { doc:obsoletes }
|
145
|
+
{ <#{@sol.link}> ?p ?obsoletes }
|
146
|
+
}
|
147
|
+
))
|
148
|
+
@fetcher.data.query(sse).order_by(:obsoletes).map do |r|
|
149
|
+
tp, url = r.to_h.first
|
150
|
+
fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
|
151
|
+
bib = W3cBibliographicItem.new formattedref: fr
|
152
|
+
RelatonBib::DocumentRelation.new(type: tp.to_s, bibitem: bib)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
#
|
157
|
+
# Parse contributor
|
158
|
+
#
|
159
|
+
# @return [Array<RelatonBib::ContributionInfo>] contributor
|
160
|
+
#
|
161
|
+
def parse_contrib # rubocop:disable Metrics/MethodLength
|
162
|
+
sse = SPARQL.parse(%(
|
163
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
164
|
+
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
165
|
+
SELECT ?full_name
|
166
|
+
WHERE {
|
167
|
+
<#{@sol.link}> :editor/contact:fullName ?full_name
|
168
|
+
}
|
169
|
+
))
|
170
|
+
@fetcher.data.query(sse).order_by(:full_name).map do |ed|
|
171
|
+
cn = RelatonBib::LocalizedString.new(ed.full_name.to_s, "en", "Latn")
|
172
|
+
n = RelatonBib::FullName.new completename: cn
|
173
|
+
p = RelatonBib::Person.new name: n
|
174
|
+
RelatonBib::ContributionInfo.new entity: p, role: [type: "editor"]
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
#
|
179
|
+
# Parse editorialgroup
|
180
|
+
#
|
181
|
+
# @return [RelatonBib::EditorialGroup] editorialgroup
|
182
|
+
#
|
183
|
+
def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
184
|
+
sse = SPARQL.parse(%(
|
185
|
+
PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
|
186
|
+
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
187
|
+
SELECT ?home_page
|
188
|
+
WHERE {
|
189
|
+
<#{@sol.link}> org:deliveredBy/contact:homePage ?home_page
|
190
|
+
}
|
191
|
+
))
|
192
|
+
res = @fetcher.data.query(sse).order_by(:home_page)
|
193
|
+
tc = res.each_with_object([]) do |edg, obj|
|
194
|
+
wg = @fetcher.group_names[edg.home_page.to_s.sub(/\/$/, "")]
|
195
|
+
if wg
|
196
|
+
rwg = RelatonBib::WorkGroup.new name: wg["name"]
|
197
|
+
obj << RelatonBib::TechnicalCommittee.new(rwg)
|
198
|
+
else
|
199
|
+
warn "Working group name not found for #{edg.home_page}"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
RelatonBib::EditorialGroup.new tc
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
@@ -60,7 +60,7 @@ module RelatonW3c
|
|
60
60
|
# @param date [String]
|
61
61
|
# @return [TrueClass, FalseClass]
|
62
62
|
def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
|
63
|
-
if type && hit["type"] != short_type(type) || date && hit["date"] != date
|
63
|
+
if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
|
64
64
|
history = get_history hit, type, date
|
65
65
|
return false unless history.any?
|
66
66
|
|
@@ -4,11 +4,12 @@ module RelatonW3c
|
|
4
4
|
class Processor < Relaton::Processor
|
5
5
|
attr_reader :idtype
|
6
6
|
|
7
|
-
def initialize
|
7
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
8
8
|
@short = :relaton_w3c
|
9
9
|
@prefix = "W3C"
|
10
10
|
@defaultprefix = %r{^W3C\s}
|
11
11
|
@idtype = "W3C"
|
12
|
+
@datasets = %w[w3c-rdf]
|
12
13
|
end
|
13
14
|
|
14
15
|
# @param code [String]
|
@@ -19,6 +20,18 @@ module RelatonW3c
|
|
19
20
|
::RelatonW3c::W3cBibliography.get(code, date, opts)
|
20
21
|
end
|
21
22
|
|
23
|
+
#
|
24
|
+
# Fetch all the documents from http://www.w3.org/2002/01/tr-automation/tr.rdf
|
25
|
+
#
|
26
|
+
# @param [String] _source source name
|
27
|
+
# @param [Hash] opts
|
28
|
+
# @option opts [String] :output directory to output documents
|
29
|
+
# @option opts [String] :format
|
30
|
+
#
|
31
|
+
def fetch_data(_source, opts)
|
32
|
+
DataFetcher.fetch(**opts)
|
33
|
+
end
|
34
|
+
|
22
35
|
# @param xml [String]
|
23
36
|
# @return [RelatonCalconnect::CcBibliographicItem]
|
24
37
|
def from_xml(xml)
|
data/lib/relaton_w3c/scrapper.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
module RelatonW3c
|
2
2
|
class Scrapper
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
}.freeze
|
3
|
+
DOCTYPES = {
|
4
|
+
"CR" => "candidateRecommendation",
|
5
|
+
"NOTE" => "groupNote",
|
6
|
+
"PER" => "proposedEditedRecommendation",
|
7
|
+
"PR" => "proposedRecommendation",
|
8
|
+
"REC" => "recommendation",
|
9
|
+
"RET" => "retired",
|
10
|
+
"WD" => "workingDraft",
|
11
|
+
}.freeze
|
13
12
|
|
13
|
+
class << self
|
14
14
|
# @param hit [Hash]
|
15
15
|
# @return [RelatonW3c::W3cBibliographicItem]
|
16
16
|
def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
data/lib/relaton_w3c/version.rb
CHANGED
@@ -5,11 +5,21 @@ require "net/http"
|
|
5
5
|
module RelatonW3c
|
6
6
|
# Class methods for search W3C standards.
|
7
7
|
class W3cBibliography
|
8
|
+
SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/data/"
|
9
|
+
|
8
10
|
class << self
|
9
11
|
# @param text [String]
|
10
12
|
# @return [RelatonW3c::HitCollection]
|
11
|
-
def search(text)
|
12
|
-
HitCollection.new text
|
13
|
+
def search(text) # rubocop:disable Metrics/MethodLength
|
14
|
+
# HitCollection.new text
|
15
|
+
file = text.sub(/^W3C\s/, "").gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
16
|
+
url = "#{SOURCE}#{file}.yaml"
|
17
|
+
resp = Net::HTTP.get_response(URI.parse(url))
|
18
|
+
return unless resp.code == "200"
|
19
|
+
|
20
|
+
hash = YAML.safe_load resp.body
|
21
|
+
item_hash = ::RelatonW3c::HashConverter.hash_to_bib(hash)
|
22
|
+
::RelatonW3c::W3cBibliographicItem.new(**item_hash)
|
13
23
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
14
24
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
15
25
|
Net::ProtocolError, Errno::ETIMEDOUT
|
@@ -24,11 +34,14 @@ module RelatonW3c
|
|
24
34
|
def get(ref, _year = nil, _opts = {})
|
25
35
|
warn "[relaton-w3c] (\"#{ref}\") fetching..."
|
26
36
|
result = search(ref)
|
27
|
-
|
37
|
+
unless result
|
38
|
+
warn "[relaton-w3c] (\"#{ref}\") not found."
|
39
|
+
return
|
40
|
+
end
|
28
41
|
|
29
|
-
ret = result.first.fetch
|
30
|
-
warn "[relaton-w3c] (\"#{ref}\") found #{
|
31
|
-
|
42
|
+
# ret = result.first.fetch
|
43
|
+
warn "[relaton-w3c] (\"#{ref}\") found #{result.title.first.title.content}"
|
44
|
+
result
|
32
45
|
end
|
33
46
|
end
|
34
47
|
end
|