relaton-w3c 1.8.0 → 1.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.rubocop.yml +1 -1
- data/Gemfile +1 -1
- data/README.adoc +89 -62
- data/grammars/biblio.rng +1 -0
- data/grammars/isodoc.rng +2106 -0
- data/grammars/reqt.rng +194 -0
- data/lib/relaton_w3c/data_fethcer.rb +106 -0
- data/lib/relaton_w3c/data_parser.rb +205 -0
- data/lib/relaton_w3c/hash_converter.rb +1 -1
- data/lib/relaton_w3c/hit_collection.rb +8 -8
- data/lib/relaton_w3c/processor.rb +15 -2
- data/lib/relaton_w3c/scrapper.rb +17 -17
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliographic_item.rb +1 -1
- data/lib/relaton_w3c/w3c_bibliography.rb +14 -6
- data/lib/relaton_w3c/workgroups.yaml +339 -0
- data/lib/relaton_w3c/xml_parser.rb +1 -1
- data/lib/relaton_w3c.rb +1 -0
- data/relaton_w3c.gemspec +6 -4
- metadata +66 -5
data/grammars/reqt.rng
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
3
|
+
<!--
|
4
|
+
Presupposes isodoc.rnc, is included in it
|
5
|
+
include "isodoc.rnc" { }
|
6
|
+
-->
|
7
|
+
<define name="requirement">
|
8
|
+
<element name="requirement">
|
9
|
+
<ref name="RequirementType"/>
|
10
|
+
</element>
|
11
|
+
</define>
|
12
|
+
<define name="recommendation">
|
13
|
+
<element name="recommendation">
|
14
|
+
<ref name="RequirementType"/>
|
15
|
+
</element>
|
16
|
+
</define>
|
17
|
+
<define name="permission">
|
18
|
+
<element name="permission">
|
19
|
+
<ref name="RequirementType"/>
|
20
|
+
</element>
|
21
|
+
</define>
|
22
|
+
<define name="RequirementType">
|
23
|
+
<optional>
|
24
|
+
<attribute name="obligation">
|
25
|
+
<ref name="ObligationType"/>
|
26
|
+
</attribute>
|
27
|
+
</optional>
|
28
|
+
<optional>
|
29
|
+
<attribute name="unnumbered">
|
30
|
+
<data type="boolean"/>
|
31
|
+
</attribute>
|
32
|
+
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="number"/>
|
35
|
+
</optional>
|
36
|
+
<optional>
|
37
|
+
<attribute name="subsequence"/>
|
38
|
+
</optional>
|
39
|
+
<optional>
|
40
|
+
<attribute name="keep-with-next">
|
41
|
+
<data type="boolean"/>
|
42
|
+
</attribute>
|
43
|
+
</optional>
|
44
|
+
<optional>
|
45
|
+
<attribute name="keep-lines-together">
|
46
|
+
<data type="boolean"/>
|
47
|
+
</attribute>
|
48
|
+
</optional>
|
49
|
+
<attribute name="id">
|
50
|
+
<data type="ID"/>
|
51
|
+
</attribute>
|
52
|
+
<optional>
|
53
|
+
<attribute name="filename"/>
|
54
|
+
</optional>
|
55
|
+
<optional>
|
56
|
+
<attribute name="model"/>
|
57
|
+
</optional>
|
58
|
+
<optional>
|
59
|
+
<attribute name="type"/>
|
60
|
+
</optional>
|
61
|
+
<optional>
|
62
|
+
<ref name="reqtitle"/>
|
63
|
+
</optional>
|
64
|
+
<optional>
|
65
|
+
<ref name="label"/>
|
66
|
+
</optional>
|
67
|
+
<optional>
|
68
|
+
<ref name="subject"/>
|
69
|
+
</optional>
|
70
|
+
<zeroOrMore>
|
71
|
+
<ref name="reqinherit"/>
|
72
|
+
</zeroOrMore>
|
73
|
+
<zeroOrMore>
|
74
|
+
<ref name="classification"/>
|
75
|
+
</zeroOrMore>
|
76
|
+
<zeroOrMore>
|
77
|
+
<choice>
|
78
|
+
<ref name="measurementtarget"/>
|
79
|
+
<ref name="specification"/>
|
80
|
+
<ref name="verification"/>
|
81
|
+
<ref name="import"/>
|
82
|
+
<ref name="description"/>
|
83
|
+
</choice>
|
84
|
+
</zeroOrMore>
|
85
|
+
<optional>
|
86
|
+
<ref name="reqt_references"/>
|
87
|
+
</optional>
|
88
|
+
<zeroOrMore>
|
89
|
+
<choice>
|
90
|
+
<ref name="requirement"/>
|
91
|
+
<ref name="recommendation"/>
|
92
|
+
<ref name="permission"/>
|
93
|
+
</choice>
|
94
|
+
</zeroOrMore>
|
95
|
+
</define>
|
96
|
+
<define name="reqtitle">
|
97
|
+
<element name="title">
|
98
|
+
<ref name="FormattedString"/>
|
99
|
+
</element>
|
100
|
+
</define>
|
101
|
+
<define name="label">
|
102
|
+
<element name="label">
|
103
|
+
<text/>
|
104
|
+
</element>
|
105
|
+
</define>
|
106
|
+
<define name="subject">
|
107
|
+
<element name="subject">
|
108
|
+
<text/>
|
109
|
+
</element>
|
110
|
+
</define>
|
111
|
+
<define name="reqinherit">
|
112
|
+
<element name="inherit">
|
113
|
+
<text/>
|
114
|
+
</element>
|
115
|
+
</define>
|
116
|
+
<define name="measurementtarget">
|
117
|
+
<element name="measurement-target">
|
118
|
+
<ref name="RequirementSubpart"/>
|
119
|
+
</element>
|
120
|
+
</define>
|
121
|
+
<define name="specification">
|
122
|
+
<element name="specification">
|
123
|
+
<ref name="RequirementSubpart"/>
|
124
|
+
</element>
|
125
|
+
</define>
|
126
|
+
<define name="verification">
|
127
|
+
<element name="verification">
|
128
|
+
<ref name="RequirementSubpart"/>
|
129
|
+
</element>
|
130
|
+
</define>
|
131
|
+
<define name="import">
|
132
|
+
<element name="import">
|
133
|
+
<ref name="RequirementSubpart"/>
|
134
|
+
</element>
|
135
|
+
</define>
|
136
|
+
<define name="description">
|
137
|
+
<element name="description">
|
138
|
+
<ref name="RequirementSubpart"/>
|
139
|
+
</element>
|
140
|
+
</define>
|
141
|
+
<define name="reqt_references">
|
142
|
+
<element name="references">
|
143
|
+
<oneOrMore>
|
144
|
+
<ref name="bibitem"/>
|
145
|
+
</oneOrMore>
|
146
|
+
</element>
|
147
|
+
</define>
|
148
|
+
<define name="RequirementSubpart">
|
149
|
+
<optional>
|
150
|
+
<attribute name="type"/>
|
151
|
+
</optional>
|
152
|
+
<optional>
|
153
|
+
<attribute name="exclude">
|
154
|
+
<data type="boolean"/>
|
155
|
+
</attribute>
|
156
|
+
</optional>
|
157
|
+
<optional>
|
158
|
+
<attribute name="keep-with-next">
|
159
|
+
<data type="boolean"/>
|
160
|
+
</attribute>
|
161
|
+
</optional>
|
162
|
+
<optional>
|
163
|
+
<attribute name="keep-lines-together">
|
164
|
+
<data type="boolean"/>
|
165
|
+
</attribute>
|
166
|
+
</optional>
|
167
|
+
<oneOrMore>
|
168
|
+
<ref name="BasicBlock"/>
|
169
|
+
</oneOrMore>
|
170
|
+
</define>
|
171
|
+
<define name="ObligationType">
|
172
|
+
<choice>
|
173
|
+
<value>requirement</value>
|
174
|
+
<value>recommendation</value>
|
175
|
+
<value>permission</value>
|
176
|
+
</choice>
|
177
|
+
</define>
|
178
|
+
<define name="classification">
|
179
|
+
<element name="classification">
|
180
|
+
<ref name="classification_tag"/>
|
181
|
+
<ref name="classification_value"/>
|
182
|
+
</element>
|
183
|
+
</define>
|
184
|
+
<define name="classification_tag">
|
185
|
+
<element name="tag">
|
186
|
+
<text/>
|
187
|
+
</element>
|
188
|
+
</define>
|
189
|
+
<define name="classification_value">
|
190
|
+
<element name="value">
|
191
|
+
<text/>
|
192
|
+
</element>
|
193
|
+
</define>
|
194
|
+
</grammar>
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require "rdf"
|
2
|
+
require "linkeddata"
|
3
|
+
require "sparql"
|
4
|
+
require "mechanize"
|
5
|
+
require "relaton_w3c/data_parser"
|
6
|
+
|
7
|
+
module RelatonW3c
|
8
|
+
class DataFetcher
|
9
|
+
USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
|
10
|
+
|
11
|
+
attr_reader :data, :group_names
|
12
|
+
|
13
|
+
#
|
14
|
+
# Data fetcher initializer
|
15
|
+
#
|
16
|
+
# @param [String] output directory to save files
|
17
|
+
# @param [String] format format of output files (xml, yaml, bibxml)
|
18
|
+
#
|
19
|
+
def initialize(output, format)
|
20
|
+
@output = output
|
21
|
+
@format = format
|
22
|
+
@ext = format.sub(/^bib/, "")
|
23
|
+
dir = File.dirname(File.expand_path(__FILE__))
|
24
|
+
@group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
|
25
|
+
@data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
26
|
+
@files = []
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# Initialize fetcher and run fetch
|
31
|
+
#
|
32
|
+
# @param [Strin] output directory to save files, default: "data"
|
33
|
+
# @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
|
34
|
+
#
|
35
|
+
def self.fetch(output: "data", format: "yaml")
|
36
|
+
t1 = Time.now
|
37
|
+
puts "Started at: #{t1}"
|
38
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
39
|
+
new(output, format).fetch
|
40
|
+
t2 = Time.now
|
41
|
+
puts "Stopped at: #{t2}"
|
42
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# Parse documents
|
47
|
+
#
|
48
|
+
def fetch
|
49
|
+
query.each { |sl| save_doc DataParser.parse(sl, self) }
|
50
|
+
end
|
51
|
+
|
52
|
+
#
|
53
|
+
# Query RDF source for documents
|
54
|
+
#
|
55
|
+
# @return [RDF::Query::Solutions] query results
|
56
|
+
#
|
57
|
+
def query # rubocop:disable Metrics/MethodLength
|
58
|
+
sse = SPARQL.parse(%(
|
59
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
60
|
+
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
61
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
62
|
+
# PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
63
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
64
|
+
SELECT ?link ?title ?date
|
65
|
+
WHERE {
|
66
|
+
?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
|
67
|
+
}
|
68
|
+
))
|
69
|
+
data.query sse
|
70
|
+
end
|
71
|
+
|
72
|
+
#
|
73
|
+
# Save document to file
|
74
|
+
#
|
75
|
+
# @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
|
76
|
+
#
|
77
|
+
def save_doc(bib) # rubocop:disable Metrics/MethodLength
|
78
|
+
return unless bib
|
79
|
+
|
80
|
+
c = case @format
|
81
|
+
when "xml" then bib.to_xml(bibdata: true)
|
82
|
+
when "yaml" then bib.to_hash.to_yaml
|
83
|
+
else bib.send("to_#{@format}")
|
84
|
+
end
|
85
|
+
file = file_name(bib)
|
86
|
+
if @files.include? file
|
87
|
+
warn "File #{file} already exists. Document: #{bib.docnumber}"
|
88
|
+
else
|
89
|
+
@files << file
|
90
|
+
end
|
91
|
+
File.write file, c, encoding: "UTF-8"
|
92
|
+
end
|
93
|
+
|
94
|
+
#
|
95
|
+
# Generate file name
|
96
|
+
#
|
97
|
+
# @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
|
98
|
+
#
|
99
|
+
# @return [String] file name
|
100
|
+
#
|
101
|
+
def file_name(bib)
|
102
|
+
name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
103
|
+
File.join @output, "#{name}.#{@ext}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
module RelatonW3c
|
2
|
+
class DataParser
|
3
|
+
#
|
4
|
+
# Document parser initalization
|
5
|
+
#
|
6
|
+
# @param [RDF::Query::Solution] sol entry from the SPARQL query
|
7
|
+
# @param [RelatonW3c::DataFetcher] fetcher data fetcher
|
8
|
+
#
|
9
|
+
def initialize(sol, fetcher)
|
10
|
+
@sol = sol
|
11
|
+
@fetcher = fetcher
|
12
|
+
end
|
13
|
+
|
14
|
+
#
|
15
|
+
# Initialize document parser and run it
|
16
|
+
#
|
17
|
+
# @param [RDF::Query::Solution] sol entry from the SPARQL query
|
18
|
+
# @param [RelatonW3c::DataFetcher] fetcher data fetcher
|
19
|
+
#
|
20
|
+
# @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
|
21
|
+
#
|
22
|
+
def self.parse(sol, fetcher)
|
23
|
+
new(sol, fetcher).parse
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# Parse document
|
28
|
+
#
|
29
|
+
# @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
|
30
|
+
#
|
31
|
+
def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
32
|
+
return unless @fetcher.class::USED_TYPES.include? type
|
33
|
+
|
34
|
+
RelatonW3c::W3cBibliographicItem.new(
|
35
|
+
type: "standard",
|
36
|
+
doctype: parse_doctype,
|
37
|
+
fetched: Date.today.to_s,
|
38
|
+
language: ["en"],
|
39
|
+
script: ["Latn"],
|
40
|
+
title: parse_title,
|
41
|
+
link: parse_link,
|
42
|
+
docid: parse_docid,
|
43
|
+
docnumber: identifier(@sol.link.to_s),
|
44
|
+
series: parse_series,
|
45
|
+
date: parse_date,
|
46
|
+
relation: parse_relation,
|
47
|
+
contributor: parse_contrib,
|
48
|
+
editorialgroup: parse_editorialgroup,
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
#
|
53
|
+
# Parse title
|
54
|
+
#
|
55
|
+
# @return [RelatonBib::TypedTitleStringCollection] title
|
56
|
+
#
|
57
|
+
def parse_title
|
58
|
+
t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
|
59
|
+
RelatonBib::TypedTitleStringCollection.new [t]
|
60
|
+
end
|
61
|
+
|
62
|
+
#
|
63
|
+
# Parse link
|
64
|
+
#
|
65
|
+
# @return [Array<RelatonBib::TypedUri>] link
|
66
|
+
#
|
67
|
+
def parse_link
|
68
|
+
[RelatonBib::TypedUri.new(type: "src", content: @sol.link.to_s)]
|
69
|
+
end
|
70
|
+
|
71
|
+
#
|
72
|
+
# Parse docidentifier
|
73
|
+
#
|
74
|
+
# @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
|
75
|
+
#
|
76
|
+
def parse_docid
|
77
|
+
id = pub_id(@sol.link.to_s)
|
78
|
+
[RelatonBib::DocumentIdentifier.new(type: "W3C", id: id)]
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Generate PubID
|
83
|
+
#
|
84
|
+
# @param [String] url url
|
85
|
+
#
|
86
|
+
# @return [String] PubID
|
87
|
+
#
|
88
|
+
def pub_id(url)
|
89
|
+
"W3C #{identifier(url)}"
|
90
|
+
end
|
91
|
+
|
92
|
+
def identifier(url)
|
93
|
+
/.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/.match(url)[1].to_s
|
94
|
+
end
|
95
|
+
|
96
|
+
#
|
97
|
+
# Parse series
|
98
|
+
#
|
99
|
+
# @return [Array<RelatonBib::Series>] series
|
100
|
+
#
|
101
|
+
def parse_series
|
102
|
+
title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
|
103
|
+
[RelatonBib::Series.new(title: title, number: identifier(@sol.link.to_s))]
|
104
|
+
end
|
105
|
+
|
106
|
+
def type # rubocop:disable Metrics/MethodLength
|
107
|
+
@type ||= begin
|
108
|
+
sse = SPARQL.parse(%(
|
109
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
110
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
111
|
+
SELECT ?type
|
112
|
+
WHERE {
|
113
|
+
{ <#{@sol.link}> rdf:type ?type }
|
114
|
+
}
|
115
|
+
))
|
116
|
+
tps = @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
|
117
|
+
tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
#
|
122
|
+
# Parse doctype
|
123
|
+
#
|
124
|
+
# @return [Strinf] doctype
|
125
|
+
#
|
126
|
+
def parse_doctype
|
127
|
+
Scrapper::DOCTYPES[type]
|
128
|
+
end
|
129
|
+
|
130
|
+
def parse_date
|
131
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
|
132
|
+
end
|
133
|
+
|
134
|
+
#
|
135
|
+
# Parse relation
|
136
|
+
#
|
137
|
+
# @return [Array<RelatonBib::DocumentRelation>] relation
|
138
|
+
#
|
139
|
+
def parse_relation # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
140
|
+
sse = SPARQL.parse(%(
|
141
|
+
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
142
|
+
SELECT ?obsoletes
|
143
|
+
WHERE {
|
144
|
+
VALUES ?p { doc:obsoletes }
|
145
|
+
{ <#{@sol.link}> ?p ?obsoletes }
|
146
|
+
}
|
147
|
+
))
|
148
|
+
@fetcher.data.query(sse).order_by(:obsoletes).map do |r|
|
149
|
+
tp, url = r.to_h.first
|
150
|
+
fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
|
151
|
+
bib = W3cBibliographicItem.new formattedref: fr
|
152
|
+
RelatonBib::DocumentRelation.new(type: tp.to_s, bibitem: bib)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
#
|
157
|
+
# Parse contributor
|
158
|
+
#
|
159
|
+
# @return [Array<RelatonBib::ContributionInfo>] contributor
|
160
|
+
#
|
161
|
+
def parse_contrib # rubocop:disable Metrics/MethodLength
|
162
|
+
sse = SPARQL.parse(%(
|
163
|
+
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
164
|
+
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
165
|
+
SELECT ?full_name
|
166
|
+
WHERE {
|
167
|
+
<#{@sol.link}> :editor/contact:fullName ?full_name
|
168
|
+
}
|
169
|
+
))
|
170
|
+
@fetcher.data.query(sse).order_by(:full_name).map do |ed|
|
171
|
+
cn = RelatonBib::LocalizedString.new(ed.full_name.to_s, "en", "Latn")
|
172
|
+
n = RelatonBib::FullName.new completename: cn
|
173
|
+
p = RelatonBib::Person.new name: n
|
174
|
+
RelatonBib::ContributionInfo.new entity: p, role: [type: "editor"]
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
#
|
179
|
+
# Parse editorialgroup
|
180
|
+
#
|
181
|
+
# @return [RelatonBib::EditorialGroup] editorialgroup
|
182
|
+
#
|
183
|
+
def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
184
|
+
sse = SPARQL.parse(%(
|
185
|
+
PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
|
186
|
+
PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
|
187
|
+
SELECT ?home_page
|
188
|
+
WHERE {
|
189
|
+
<#{@sol.link}> org:deliveredBy/contact:homePage ?home_page
|
190
|
+
}
|
191
|
+
))
|
192
|
+
res = @fetcher.data.query(sse).order_by(:home_page)
|
193
|
+
tc = res.each_with_object([]) do |edg, obj|
|
194
|
+
wg = @fetcher.group_names[edg.home_page.to_s.sub(/\/$/, "")]
|
195
|
+
if wg
|
196
|
+
rwg = RelatonBib::WorkGroup.new name: wg["name"]
|
197
|
+
obj << RelatonBib::TechnicalCommittee.new(rwg)
|
198
|
+
else
|
199
|
+
warn "Working group name not found for #{edg.home_page}"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
RelatonBib::EditorialGroup.new tc
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
@@ -22,8 +22,8 @@ module RelatonW3c
|
|
22
22
|
# @param ref [String] reference to search
|
23
23
|
def initialize(ref)
|
24
24
|
%r{
|
25
|
-
^(W3C\s)?
|
26
|
-
(?<type>(CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
|
25
|
+
^(?:W3C\s)?
|
26
|
+
(?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
|
27
27
|
Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
|
28
28
|
Recommendation|Retired|Working\sDraft))? # type
|
29
29
|
\s?
|
@@ -41,7 +41,7 @@ module RelatonW3c
|
|
41
41
|
# @param title_date [String]
|
42
42
|
# @param type [String]
|
43
43
|
# @return [Array<Hash>]
|
44
|
-
def from_yaml(title_date, type)
|
44
|
+
def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
45
45
|
/(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
|
46
46
|
title ||= title_date
|
47
47
|
result = data.select do |hit|
|
@@ -50,7 +50,7 @@ module RelatonW3c
|
|
50
50
|
type_date_filter(hit, type, date)
|
51
51
|
end
|
52
52
|
if result.empty?
|
53
|
-
result = data.select { |h| h["link"].split("/").last.match?
|
53
|
+
result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
|
54
54
|
end
|
55
55
|
result.map { |h| Hit.new(h, self) }
|
56
56
|
end
|
@@ -60,7 +60,7 @@ module RelatonW3c
|
|
60
60
|
# @param date [String]
|
61
61
|
# @return [TrueClass, FalseClass]
|
62
62
|
def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
|
63
|
-
if type && hit["type"] != short_type(type) || date && hit["date"] != date
|
63
|
+
if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
|
64
64
|
history = get_history hit, type, date
|
65
65
|
return false unless history.any?
|
66
66
|
|
@@ -109,7 +109,7 @@ module RelatonW3c
|
|
109
109
|
# @param type [String]
|
110
110
|
# @return [String]
|
111
111
|
def short_type(type)
|
112
|
-
tp = TYPES.select { |
|
112
|
+
tp = TYPES.select { |_, v| v == type }.keys
|
113
113
|
tp.first || type
|
114
114
|
end
|
115
115
|
|
@@ -137,7 +137,7 @@ module RelatonW3c
|
|
137
137
|
# fetch data form server and save it to file.
|
138
138
|
#
|
139
139
|
def fetch_data
|
140
|
-
resp = Net::HTTP.get_response URI.parse(DOMAIN
|
140
|
+
resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
|
141
141
|
# return if there aren't any changes since last fetching
|
142
142
|
return unless resp.code == "200"
|
143
143
|
|
@@ -153,7 +153,7 @@ module RelatonW3c
|
|
153
153
|
# @param h_el [Nokogiri::XML::Element]
|
154
154
|
# @param link [Nokogiri::XML::Element]
|
155
155
|
# @param pubdetails [Nokogiri::XML::Element]
|
156
|
-
def fetch_hit(h_el, link, pubdetails)
|
156
|
+
def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
157
157
|
datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
|
158
158
|
editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
|
159
159
|
keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
|
@@ -4,11 +4,12 @@ module RelatonW3c
|
|
4
4
|
class Processor < Relaton::Processor
|
5
5
|
attr_reader :idtype
|
6
6
|
|
7
|
-
def initialize
|
7
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
8
8
|
@short = :relaton_w3c
|
9
9
|
@prefix = "W3C"
|
10
10
|
@defaultprefix = %r{^W3C\s}
|
11
11
|
@idtype = "W3C"
|
12
|
+
@datasets = %w[w3c-rdf]
|
12
13
|
end
|
13
14
|
|
14
15
|
# @param code [String]
|
@@ -19,6 +20,18 @@ module RelatonW3c
|
|
19
20
|
::RelatonW3c::W3cBibliography.get(code, date, opts)
|
20
21
|
end
|
21
22
|
|
23
|
+
#
|
24
|
+
# Fetch all the documents from http://www.w3.org/2002/01/tr-automation/tr.rdf
|
25
|
+
#
|
26
|
+
# @param [String] _source source name
|
27
|
+
# @param [Hash] opts
|
28
|
+
# @option opts [String] :output directory to output documents
|
29
|
+
# @option opts [String] :format
|
30
|
+
#
|
31
|
+
def fetch_data(_source, opts)
|
32
|
+
DataFetcher.fetch(**opts)
|
33
|
+
end
|
34
|
+
|
22
35
|
# @param xml [String]
|
23
36
|
# @return [RelatonCalconnect::CcBibliographicItem]
|
24
37
|
def from_xml(xml)
|
@@ -29,7 +42,7 @@ module RelatonW3c
|
|
29
42
|
# @return [RelatonIsoBib::CcBibliographicItem]
|
30
43
|
def hash_to_bib(hash)
|
31
44
|
item_hash = ::RelatonW3c::HashConverter.hash_to_bib(hash)
|
32
|
-
::RelatonW3c::W3cBibliographicItem.new
|
45
|
+
::RelatonW3c::W3cBibliographicItem.new(**item_hash)
|
33
46
|
end
|
34
47
|
|
35
48
|
# Returns hash of XML grammar
|