relaton-w3c 1.8.0 → 1.9.3

Sign up to get free protection for your applications and to get access to all the features.
data/grammars/reqt.rng ADDED
@@ -0,0 +1,194 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
3
+ <!--
4
+ Presupposes isodoc.rnc, is included in it
5
+ include "isodoc.rnc" { }
6
+ -->
7
+ <define name="requirement">
8
+ <element name="requirement">
9
+ <ref name="RequirementType"/>
10
+ </element>
11
+ </define>
12
+ <define name="recommendation">
13
+ <element name="recommendation">
14
+ <ref name="RequirementType"/>
15
+ </element>
16
+ </define>
17
+ <define name="permission">
18
+ <element name="permission">
19
+ <ref name="RequirementType"/>
20
+ </element>
21
+ </define>
22
+ <define name="RequirementType">
23
+ <optional>
24
+ <attribute name="obligation">
25
+ <ref name="ObligationType"/>
26
+ </attribute>
27
+ </optional>
28
+ <optional>
29
+ <attribute name="unnumbered">
30
+ <data type="boolean"/>
31
+ </attribute>
32
+ </optional>
33
+ <optional>
34
+ <attribute name="number"/>
35
+ </optional>
36
+ <optional>
37
+ <attribute name="subsequence"/>
38
+ </optional>
39
+ <optional>
40
+ <attribute name="keep-with-next">
41
+ <data type="boolean"/>
42
+ </attribute>
43
+ </optional>
44
+ <optional>
45
+ <attribute name="keep-lines-together">
46
+ <data type="boolean"/>
47
+ </attribute>
48
+ </optional>
49
+ <attribute name="id">
50
+ <data type="ID"/>
51
+ </attribute>
52
+ <optional>
53
+ <attribute name="filename"/>
54
+ </optional>
55
+ <optional>
56
+ <attribute name="model"/>
57
+ </optional>
58
+ <optional>
59
+ <attribute name="type"/>
60
+ </optional>
61
+ <optional>
62
+ <ref name="reqtitle"/>
63
+ </optional>
64
+ <optional>
65
+ <ref name="label"/>
66
+ </optional>
67
+ <optional>
68
+ <ref name="subject"/>
69
+ </optional>
70
+ <zeroOrMore>
71
+ <ref name="reqinherit"/>
72
+ </zeroOrMore>
73
+ <zeroOrMore>
74
+ <ref name="classification"/>
75
+ </zeroOrMore>
76
+ <zeroOrMore>
77
+ <choice>
78
+ <ref name="measurementtarget"/>
79
+ <ref name="specification"/>
80
+ <ref name="verification"/>
81
+ <ref name="import"/>
82
+ <ref name="description"/>
83
+ </choice>
84
+ </zeroOrMore>
85
+ <optional>
86
+ <ref name="reqt_references"/>
87
+ </optional>
88
+ <zeroOrMore>
89
+ <choice>
90
+ <ref name="requirement"/>
91
+ <ref name="recommendation"/>
92
+ <ref name="permission"/>
93
+ </choice>
94
+ </zeroOrMore>
95
+ </define>
96
+ <define name="reqtitle">
97
+ <element name="title">
98
+ <ref name="FormattedString"/>
99
+ </element>
100
+ </define>
101
+ <define name="label">
102
+ <element name="label">
103
+ <text/>
104
+ </element>
105
+ </define>
106
+ <define name="subject">
107
+ <element name="subject">
108
+ <text/>
109
+ </element>
110
+ </define>
111
+ <define name="reqinherit">
112
+ <element name="inherit">
113
+ <text/>
114
+ </element>
115
+ </define>
116
+ <define name="measurementtarget">
117
+ <element name="measurement-target">
118
+ <ref name="RequirementSubpart"/>
119
+ </element>
120
+ </define>
121
+ <define name="specification">
122
+ <element name="specification">
123
+ <ref name="RequirementSubpart"/>
124
+ </element>
125
+ </define>
126
+ <define name="verification">
127
+ <element name="verification">
128
+ <ref name="RequirementSubpart"/>
129
+ </element>
130
+ </define>
131
+ <define name="import">
132
+ <element name="import">
133
+ <ref name="RequirementSubpart"/>
134
+ </element>
135
+ </define>
136
+ <define name="description">
137
+ <element name="description">
138
+ <ref name="RequirementSubpart"/>
139
+ </element>
140
+ </define>
141
+ <define name="reqt_references">
142
+ <element name="references">
143
+ <oneOrMore>
144
+ <ref name="bibitem"/>
145
+ </oneOrMore>
146
+ </element>
147
+ </define>
148
+ <define name="RequirementSubpart">
149
+ <optional>
150
+ <attribute name="type"/>
151
+ </optional>
152
+ <optional>
153
+ <attribute name="exclude">
154
+ <data type="boolean"/>
155
+ </attribute>
156
+ </optional>
157
+ <optional>
158
+ <attribute name="keep-with-next">
159
+ <data type="boolean"/>
160
+ </attribute>
161
+ </optional>
162
+ <optional>
163
+ <attribute name="keep-lines-together">
164
+ <data type="boolean"/>
165
+ </attribute>
166
+ </optional>
167
+ <oneOrMore>
168
+ <ref name="BasicBlock"/>
169
+ </oneOrMore>
170
+ </define>
171
+ <define name="ObligationType">
172
+ <choice>
173
+ <value>requirement</value>
174
+ <value>recommendation</value>
175
+ <value>permission</value>
176
+ </choice>
177
+ </define>
178
+ <define name="classification">
179
+ <element name="classification">
180
+ <ref name="classification_tag"/>
181
+ <ref name="classification_value"/>
182
+ </element>
183
+ </define>
184
+ <define name="classification_tag">
185
+ <element name="tag">
186
+ <text/>
187
+ </element>
188
+ </define>
189
+ <define name="classification_value">
190
+ <element name="value">
191
+ <text/>
192
+ </element>
193
+ </define>
194
+ </grammar>
@@ -0,0 +1,106 @@
1
+ require "rdf"
2
+ require "linkeddata"
3
+ require "sparql"
4
+ require "mechanize"
5
+ require "relaton_w3c/data_parser"
6
+
7
+ module RelatonW3c
8
+ class DataFetcher
9
+ USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
10
+
11
+ attr_reader :data, :group_names
12
+
13
+ #
14
+ # Data fetcher initializer
15
+ #
16
+ # @param [String] output directory to save files
17
+ # @param [String] format format of output files (xml, yaml, bibxml)
18
+ #
19
+ def initialize(output, format)
20
+ @output = output
21
+ @format = format
22
+ @ext = format.sub(/^bib/, "")
23
+ dir = File.dirname(File.expand_path(__FILE__))
24
+ @group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
25
+ @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
26
+ @files = []
27
+ end
28
+
29
+ #
30
+ # Initialize fetcher and run fetch
31
+ #
32
+ # @param [Strin] output directory to save files, default: "data"
33
+ # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
34
+ #
35
+ def self.fetch(output: "data", format: "yaml")
36
+ t1 = Time.now
37
+ puts "Started at: #{t1}"
38
+ FileUtils.mkdir_p output unless Dir.exist? output
39
+ new(output, format).fetch
40
+ t2 = Time.now
41
+ puts "Stopped at: #{t2}"
42
+ puts "Done in: #{(t2 - t1).round} sec."
43
+ end
44
+
45
+ #
46
+ # Parse documents
47
+ #
48
+ def fetch
49
+ query.each { |sl| save_doc DataParser.parse(sl, self) }
50
+ end
51
+
52
+ #
53
+ # Query RDF source for documents
54
+ #
55
+ # @return [RDF::Query::Solutions] query results
56
+ #
57
+ def query # rubocop:disable Metrics/MethodLength
58
+ sse = SPARQL.parse(%(
59
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
60
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
61
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
62
+ # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
63
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
64
+ SELECT ?link ?title ?date
65
+ WHERE {
66
+ ?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
67
+ }
68
+ ))
69
+ data.query sse
70
+ end
71
+
72
+ #
73
+ # Save document to file
74
+ #
75
+ # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
76
+ #
77
+ def save_doc(bib) # rubocop:disable Metrics/MethodLength
78
+ return unless bib
79
+
80
+ c = case @format
81
+ when "xml" then bib.to_xml(bibdata: true)
82
+ when "yaml" then bib.to_hash.to_yaml
83
+ else bib.send("to_#{@format}")
84
+ end
85
+ file = file_name(bib)
86
+ if @files.include? file
87
+ warn "File #{file} already exists. Document: #{bib.docnumber}"
88
+ else
89
+ @files << file
90
+ end
91
+ File.write file, c, encoding: "UTF-8"
92
+ end
93
+
94
+ #
95
+ # Generate file name
96
+ #
97
+ # @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
98
+ #
99
+ # @return [String] file name
100
+ #
101
+ def file_name(bib)
102
+ name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
103
+ File.join @output, "#{name}.#{@ext}"
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,205 @@
1
+ module RelatonW3c
2
+ class DataParser
3
+ #
4
+ # Document parser initalization
5
+ #
6
+ # @param [RDF::Query::Solution] sol entry from the SPARQL query
7
+ # @param [RelatonW3c::DataFetcher] fetcher data fetcher
8
+ #
9
+ def initialize(sol, fetcher)
10
+ @sol = sol
11
+ @fetcher = fetcher
12
+ end
13
+
14
+ #
15
+ # Initialize document parser and run it
16
+ #
17
+ # @param [RDF::Query::Solution] sol entry from the SPARQL query
18
+ # @param [RelatonW3c::DataFetcher] fetcher data fetcher
19
+ #
20
+ # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
21
+ #
22
+ def self.parse(sol, fetcher)
23
+ new(sol, fetcher).parse
24
+ end
25
+
26
+ #
27
+ # Parse document
28
+ #
29
+ # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
30
+ #
31
+ def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
32
+ return unless @fetcher.class::USED_TYPES.include? type
33
+
34
+ RelatonW3c::W3cBibliographicItem.new(
35
+ type: "standard",
36
+ doctype: parse_doctype,
37
+ fetched: Date.today.to_s,
38
+ language: ["en"],
39
+ script: ["Latn"],
40
+ title: parse_title,
41
+ link: parse_link,
42
+ docid: parse_docid,
43
+ docnumber: identifier(@sol.link.to_s),
44
+ series: parse_series,
45
+ date: parse_date,
46
+ relation: parse_relation,
47
+ contributor: parse_contrib,
48
+ editorialgroup: parse_editorialgroup,
49
+ )
50
+ end
51
+
52
+ #
53
+ # Parse title
54
+ #
55
+ # @return [RelatonBib::TypedTitleStringCollection] title
56
+ #
57
+ def parse_title
58
+ t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
59
+ RelatonBib::TypedTitleStringCollection.new [t]
60
+ end
61
+
62
+ #
63
+ # Parse link
64
+ #
65
+ # @return [Array<RelatonBib::TypedUri>] link
66
+ #
67
+ def parse_link
68
+ [RelatonBib::TypedUri.new(type: "src", content: @sol.link.to_s)]
69
+ end
70
+
71
+ #
72
+ # Parse docidentifier
73
+ #
74
+ # @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
75
+ #
76
+ def parse_docid
77
+ id = pub_id(@sol.link.to_s)
78
+ [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id)]
79
+ end
80
+
81
+ #
82
+ # Generate PubID
83
+ #
84
+ # @param [String] url url
85
+ #
86
+ # @return [String] PubID
87
+ #
88
+ def pub_id(url)
89
+ "W3C #{identifier(url)}"
90
+ end
91
+
92
+ def identifier(url)
93
+ /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/.match(url)[1].to_s
94
+ end
95
+
96
+ #
97
+ # Parse series
98
+ #
99
+ # @return [Array<RelatonBib::Series>] series
100
+ #
101
+ def parse_series
102
+ title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
103
+ [RelatonBib::Series.new(title: title, number: identifier(@sol.link.to_s))]
104
+ end
105
+
106
+ def type # rubocop:disable Metrics/MethodLength
107
+ @type ||= begin
108
+ sse = SPARQL.parse(%(
109
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
110
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
111
+ SELECT ?type
112
+ WHERE {
113
+ { <#{@sol.link}> rdf:type ?type }
114
+ }
115
+ ))
116
+ tps = @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
117
+ tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
118
+ end
119
+ end
120
+
121
+ #
122
+ # Parse doctype
123
+ #
124
+ # @return [Strinf] doctype
125
+ #
126
+ def parse_doctype
127
+ Scrapper::DOCTYPES[type]
128
+ end
129
+
130
+ def parse_date
131
+ [RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
132
+ end
133
+
134
+ #
135
+ # Parse relation
136
+ #
137
+ # @return [Array<RelatonBib::DocumentRelation>] relation
138
+ #
139
+ def parse_relation # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
140
+ sse = SPARQL.parse(%(
141
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
142
+ SELECT ?obsoletes
143
+ WHERE {
144
+ VALUES ?p { doc:obsoletes }
145
+ { <#{@sol.link}> ?p ?obsoletes }
146
+ }
147
+ ))
148
+ @fetcher.data.query(sse).order_by(:obsoletes).map do |r|
149
+ tp, url = r.to_h.first
150
+ fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
151
+ bib = W3cBibliographicItem.new formattedref: fr
152
+ RelatonBib::DocumentRelation.new(type: tp.to_s, bibitem: bib)
153
+ end
154
+ end
155
+
156
+ #
157
+ # Parse contributor
158
+ #
159
+ # @return [Array<RelatonBib::ContributionInfo>] contributor
160
+ #
161
+ def parse_contrib # rubocop:disable Metrics/MethodLength
162
+ sse = SPARQL.parse(%(
163
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
164
+ PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
165
+ SELECT ?full_name
166
+ WHERE {
167
+ <#{@sol.link}> :editor/contact:fullName ?full_name
168
+ }
169
+ ))
170
+ @fetcher.data.query(sse).order_by(:full_name).map do |ed|
171
+ cn = RelatonBib::LocalizedString.new(ed.full_name.to_s, "en", "Latn")
172
+ n = RelatonBib::FullName.new completename: cn
173
+ p = RelatonBib::Person.new name: n
174
+ RelatonBib::ContributionInfo.new entity: p, role: [type: "editor"]
175
+ end
176
+ end
177
+
178
+ #
179
+ # Parse editorialgroup
180
+ #
181
+ # @return [RelatonBib::EditorialGroup] editorialgroup
182
+ #
183
+ def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
184
+ sse = SPARQL.parse(%(
185
+ PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
186
+ PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
187
+ SELECT ?home_page
188
+ WHERE {
189
+ <#{@sol.link}> org:deliveredBy/contact:homePage ?home_page
190
+ }
191
+ ))
192
+ res = @fetcher.data.query(sse).order_by(:home_page)
193
+ tc = res.each_with_object([]) do |edg, obj|
194
+ wg = @fetcher.group_names[edg.home_page.to_s.sub(/\/$/, "")]
195
+ if wg
196
+ rwg = RelatonBib::WorkGroup.new name: wg["name"]
197
+ obj << RelatonBib::TechnicalCommittee.new(rwg)
198
+ else
199
+ warn "Working group name not found for #{edg.home_page}"
200
+ end
201
+ end
202
+ RelatonBib::EditorialGroup.new tc
203
+ end
204
+ end
205
+ end
@@ -4,7 +4,7 @@ module RelatonW3c
4
4
  # @param item_hash [Hash]
5
5
  # @return [RelatonW3c::W3cBibliographicItem]
6
6
  def bib_item(item_hash)
7
- W3cBibliographicItem.new **item_hash
7
+ W3cBibliographicItem.new(**item_hash)
8
8
  end
9
9
  end
10
10
  end
@@ -22,8 +22,8 @@ module RelatonW3c
22
22
  # @param ref [String] reference to search
23
23
  def initialize(ref)
24
24
  %r{
25
- ^(W3C\s)?
26
- (?<type>(CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
25
+ ^(?:W3C\s)?
26
+ (?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
27
27
  Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
28
28
  Recommendation|Retired|Working\sDraft))? # type
29
29
  \s?
@@ -41,7 +41,7 @@ module RelatonW3c
41
41
  # @param title_date [String]
42
42
  # @param type [String]
43
43
  # @return [Array<Hash>]
44
- def from_yaml(title_date, type)
44
+ def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
45
45
  /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
46
46
  title ||= title_date
47
47
  result = data.select do |hit|
@@ -50,7 +50,7 @@ module RelatonW3c
50
50
  type_date_filter(hit, type, date)
51
51
  end
52
52
  if result.empty?
53
- result = data.select { |h| h["link"].split("/").last.match? /#{title}/ }
53
+ result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
54
54
  end
55
55
  result.map { |h| Hit.new(h, self) }
56
56
  end
@@ -60,7 +60,7 @@ module RelatonW3c
60
60
  # @param date [String]
61
61
  # @return [TrueClass, FalseClass]
62
62
  def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
63
- if type && hit["type"] != short_type(type) || date && hit["date"] != date
63
+ if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
64
64
  history = get_history hit, type, date
65
65
  return false unless history.any?
66
66
 
@@ -109,7 +109,7 @@ module RelatonW3c
109
109
  # @param type [String]
110
110
  # @return [String]
111
111
  def short_type(type)
112
- tp = TYPES.select { |k,v| v == type }.keys
112
+ tp = TYPES.select { |_, v| v == type }.keys
113
113
  tp.first || type
114
114
  end
115
115
 
@@ -137,7 +137,7 @@ module RelatonW3c
137
137
  # fetch data form server and save it to file.
138
138
  #
139
139
  def fetch_data
140
- resp = Net::HTTP.get_response URI.parse(DOMAIN + "/TR/")
140
+ resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
141
141
  # return if there aren't any changes since last fetching
142
142
  return unless resp.code == "200"
143
143
 
@@ -153,7 +153,7 @@ module RelatonW3c
153
153
  # @param h_el [Nokogiri::XML::Element]
154
154
  # @param link [Nokogiri::XML::Element]
155
155
  # @param pubdetails [Nokogiri::XML::Element]
156
- def fetch_hit(h_el, link, pubdetails)
156
+ def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
157
157
  datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
158
158
  editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
159
159
  keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
@@ -4,11 +4,12 @@ module RelatonW3c
4
4
  class Processor < Relaton::Processor
5
5
  attr_reader :idtype
6
6
 
7
- def initialize
7
+ def initialize # rubocop:disable Lint/MissingSuper
8
8
  @short = :relaton_w3c
9
9
  @prefix = "W3C"
10
10
  @defaultprefix = %r{^W3C\s}
11
11
  @idtype = "W3C"
12
+ @datasets = %w[w3c-rdf]
12
13
  end
13
14
 
14
15
  # @param code [String]
@@ -19,6 +20,18 @@ module RelatonW3c
19
20
  ::RelatonW3c::W3cBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from http://www.w3.org/2002/01/tr-automation/tr.rdf
25
+ #
26
+ # @param [String] _source source name
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonCalconnect::CcBibliographicItem]
24
37
  def from_xml(xml)
@@ -29,7 +42,7 @@ module RelatonW3c
29
42
  # @return [RelatonIsoBib::CcBibliographicItem]
30
43
  def hash_to_bib(hash)
31
44
  item_hash = ::RelatonW3c::HashConverter.hash_to_bib(hash)
32
- ::RelatonW3c::W3cBibliographicItem.new **item_hash
45
+ ::RelatonW3c::W3cBibliographicItem.new(**item_hash)
33
46
  end
34
47
 
35
48
  # Returns hash of XML grammar