relaton-w3c 1.8.0 → 1.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/grammars/reqt.rng ADDED
@@ -0,0 +1,194 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
3
+ <!--
4
+ Presupposes isodoc.rnc, is included in it
5
+ include "isodoc.rnc" { }
6
+ -->
7
+ <define name="requirement">
8
+ <element name="requirement">
9
+ <ref name="RequirementType"/>
10
+ </element>
11
+ </define>
12
+ <define name="recommendation">
13
+ <element name="recommendation">
14
+ <ref name="RequirementType"/>
15
+ </element>
16
+ </define>
17
+ <define name="permission">
18
+ <element name="permission">
19
+ <ref name="RequirementType"/>
20
+ </element>
21
+ </define>
22
+ <define name="RequirementType">
23
+ <optional>
24
+ <attribute name="obligation">
25
+ <ref name="ObligationType"/>
26
+ </attribute>
27
+ </optional>
28
+ <optional>
29
+ <attribute name="unnumbered">
30
+ <data type="boolean"/>
31
+ </attribute>
32
+ </optional>
33
+ <optional>
34
+ <attribute name="number"/>
35
+ </optional>
36
+ <optional>
37
+ <attribute name="subsequence"/>
38
+ </optional>
39
+ <optional>
40
+ <attribute name="keep-with-next">
41
+ <data type="boolean"/>
42
+ </attribute>
43
+ </optional>
44
+ <optional>
45
+ <attribute name="keep-lines-together">
46
+ <data type="boolean"/>
47
+ </attribute>
48
+ </optional>
49
+ <attribute name="id">
50
+ <data type="ID"/>
51
+ </attribute>
52
+ <optional>
53
+ <attribute name="filename"/>
54
+ </optional>
55
+ <optional>
56
+ <attribute name="model"/>
57
+ </optional>
58
+ <optional>
59
+ <attribute name="type"/>
60
+ </optional>
61
+ <optional>
62
+ <ref name="reqtitle"/>
63
+ </optional>
64
+ <optional>
65
+ <ref name="label"/>
66
+ </optional>
67
+ <optional>
68
+ <ref name="subject"/>
69
+ </optional>
70
+ <zeroOrMore>
71
+ <ref name="reqinherit"/>
72
+ </zeroOrMore>
73
+ <zeroOrMore>
74
+ <ref name="classification"/>
75
+ </zeroOrMore>
76
+ <zeroOrMore>
77
+ <choice>
78
+ <ref name="measurementtarget"/>
79
+ <ref name="specification"/>
80
+ <ref name="verification"/>
81
+ <ref name="import"/>
82
+ <ref name="description"/>
83
+ </choice>
84
+ </zeroOrMore>
85
+ <optional>
86
+ <ref name="reqt_references"/>
87
+ </optional>
88
+ <zeroOrMore>
89
+ <choice>
90
+ <ref name="requirement"/>
91
+ <ref name="recommendation"/>
92
+ <ref name="permission"/>
93
+ </choice>
94
+ </zeroOrMore>
95
+ </define>
96
+ <define name="reqtitle">
97
+ <element name="title">
98
+ <ref name="FormattedString"/>
99
+ </element>
100
+ </define>
101
+ <define name="label">
102
+ <element name="label">
103
+ <text/>
104
+ </element>
105
+ </define>
106
+ <define name="subject">
107
+ <element name="subject">
108
+ <text/>
109
+ </element>
110
+ </define>
111
+ <define name="reqinherit">
112
+ <element name="inherit">
113
+ <text/>
114
+ </element>
115
+ </define>
116
+ <define name="measurementtarget">
117
+ <element name="measurement-target">
118
+ <ref name="RequirementSubpart"/>
119
+ </element>
120
+ </define>
121
+ <define name="specification">
122
+ <element name="specification">
123
+ <ref name="RequirementSubpart"/>
124
+ </element>
125
+ </define>
126
+ <define name="verification">
127
+ <element name="verification">
128
+ <ref name="RequirementSubpart"/>
129
+ </element>
130
+ </define>
131
+ <define name="import">
132
+ <element name="import">
133
+ <ref name="RequirementSubpart"/>
134
+ </element>
135
+ </define>
136
+ <define name="description">
137
+ <element name="description">
138
+ <ref name="RequirementSubpart"/>
139
+ </element>
140
+ </define>
141
+ <define name="reqt_references">
142
+ <element name="references">
143
+ <oneOrMore>
144
+ <ref name="bibitem"/>
145
+ </oneOrMore>
146
+ </element>
147
+ </define>
148
+ <define name="RequirementSubpart">
149
+ <optional>
150
+ <attribute name="type"/>
151
+ </optional>
152
+ <optional>
153
+ <attribute name="exclude">
154
+ <data type="boolean"/>
155
+ </attribute>
156
+ </optional>
157
+ <optional>
158
+ <attribute name="keep-with-next">
159
+ <data type="boolean"/>
160
+ </attribute>
161
+ </optional>
162
+ <optional>
163
+ <attribute name="keep-lines-together">
164
+ <data type="boolean"/>
165
+ </attribute>
166
+ </optional>
167
+ <oneOrMore>
168
+ <ref name="BasicBlock"/>
169
+ </oneOrMore>
170
+ </define>
171
+ <define name="ObligationType">
172
+ <choice>
173
+ <value>requirement</value>
174
+ <value>recommendation</value>
175
+ <value>permission</value>
176
+ </choice>
177
+ </define>
178
+ <define name="classification">
179
+ <element name="classification">
180
+ <ref name="classification_tag"/>
181
+ <ref name="classification_value"/>
182
+ </element>
183
+ </define>
184
+ <define name="classification_tag">
185
+ <element name="tag">
186
+ <text/>
187
+ </element>
188
+ </define>
189
+ <define name="classification_value">
190
+ <element name="value">
191
+ <text/>
192
+ </element>
193
+ </define>
194
+ </grammar>
@@ -0,0 +1,106 @@
1
+ require "rdf"
2
+ require "linkeddata"
3
+ require "sparql"
4
+ require "mechanize"
5
+ require "relaton_w3c/data_parser"
6
+
7
+ module RelatonW3c
8
+ class DataFetcher
9
+ USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
10
+
11
+ attr_reader :data, :group_names
12
+
13
+ #
14
+ # Data fetcher initializer
15
+ #
16
+ # @param [String] output directory to save files
17
+ # @param [String] format format of output files (xml, yaml, bibxml)
18
+ #
19
+ def initialize(output, format)
20
+ @output = output
21
+ @format = format
22
+ @ext = format.sub(/^bib/, "")
23
+ dir = File.dirname(File.expand_path(__FILE__))
24
+ @group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
25
+ @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
26
+ @files = []
27
+ end
28
+
29
+ #
30
+ # Initialize fetcher and run fetch
31
+ #
32
+ # @param [Strin] output directory to save files, default: "data"
33
+ # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
34
+ #
35
+ def self.fetch(output: "data", format: "yaml")
36
+ t1 = Time.now
37
+ puts "Started at: #{t1}"
38
+ FileUtils.mkdir_p output unless Dir.exist? output
39
+ new(output, format).fetch
40
+ t2 = Time.now
41
+ puts "Stopped at: #{t2}"
42
+ puts "Done in: #{(t2 - t1).round} sec."
43
+ end
44
+
45
+ #
46
+ # Parse documents
47
+ #
48
+ def fetch
49
+ query.each { |sl| save_doc DataParser.parse(sl, self) }
50
+ end
51
+
52
+ #
53
+ # Query RDF source for documents
54
+ #
55
+ # @return [RDF::Query::Solutions] query results
56
+ #
57
+ def query # rubocop:disable Metrics/MethodLength
58
+ sse = SPARQL.parse(%(
59
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
60
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
61
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
62
+ # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
63
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
64
+ SELECT ?link ?title ?date
65
+ WHERE {
66
+ ?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
67
+ }
68
+ ))
69
+ data.query sse
70
+ end
71
+
72
+ #
73
+ # Save document to file
74
+ #
75
+ # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
76
+ #
77
+ def save_doc(bib) # rubocop:disable Metrics/MethodLength
78
+ return unless bib
79
+
80
+ c = case @format
81
+ when "xml" then bib.to_xml(bibdata: true)
82
+ when "yaml" then bib.to_hash.to_yaml
83
+ else bib.send("to_#{@format}")
84
+ end
85
+ file = file_name(bib)
86
+ if @files.include? file
87
+ warn "File #{file} already exists. Document: #{bib.docnumber}"
88
+ else
89
+ @files << file
90
+ end
91
+ File.write file, c, encoding: "UTF-8"
92
+ end
93
+
94
+ #
95
+ # Generate file name
96
+ #
97
+ # @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
98
+ #
99
+ # @return [String] file name
100
+ #
101
+ def file_name(bib)
102
+ name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
103
+ File.join @output, "#{name}.#{@ext}"
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,205 @@
1
+ module RelatonW3c
2
+ class DataParser
3
+ #
4
+ # Document parser initalization
5
+ #
6
+ # @param [RDF::Query::Solution] sol entry from the SPARQL query
7
+ # @param [RelatonW3c::DataFetcher] fetcher data fetcher
8
+ #
9
+ def initialize(sol, fetcher)
10
+ @sol = sol
11
+ @fetcher = fetcher
12
+ end
13
+
14
+ #
15
+ # Initialize document parser and run it
16
+ #
17
+ # @param [RDF::Query::Solution] sol entry from the SPARQL query
18
+ # @param [RelatonW3c::DataFetcher] fetcher data fetcher
19
+ #
20
+ # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
21
+ #
22
+ def self.parse(sol, fetcher)
23
+ new(sol, fetcher).parse
24
+ end
25
+
26
+ #
27
+ # Parse document
28
+ #
29
+ # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
30
+ #
31
+ def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
32
+ return unless @fetcher.class::USED_TYPES.include? type
33
+
34
+ RelatonW3c::W3cBibliographicItem.new(
35
+ type: "standard",
36
+ doctype: parse_doctype,
37
+ fetched: Date.today.to_s,
38
+ language: ["en"],
39
+ script: ["Latn"],
40
+ title: parse_title,
41
+ link: parse_link,
42
+ docid: parse_docid,
43
+ docnumber: identifier(@sol.link.to_s),
44
+ series: parse_series,
45
+ date: parse_date,
46
+ relation: parse_relation,
47
+ contributor: parse_contrib,
48
+ editorialgroup: parse_editorialgroup,
49
+ )
50
+ end
51
+
52
+ #
53
+ # Parse title
54
+ #
55
+ # @return [RelatonBib::TypedTitleStringCollection] title
56
+ #
57
+ def parse_title
58
+ t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
59
+ RelatonBib::TypedTitleStringCollection.new [t]
60
+ end
61
+
62
+ #
63
+ # Parse link
64
+ #
65
+ # @return [Array<RelatonBib::TypedUri>] link
66
+ #
67
+ def parse_link
68
+ [RelatonBib::TypedUri.new(type: "src", content: @sol.link.to_s)]
69
+ end
70
+
71
+ #
72
+ # Parse docidentifier
73
+ #
74
+ # @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
75
+ #
76
+ def parse_docid
77
+ id = pub_id(@sol.link.to_s)
78
+ [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id)]
79
+ end
80
+
81
+ #
82
+ # Generate PubID
83
+ #
84
+ # @param [String] url url
85
+ #
86
+ # @return [String] PubID
87
+ #
88
+ def pub_id(url)
89
+ "W3C #{identifier(url)}"
90
+ end
91
+
92
+ def identifier(url)
93
+ /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/.match(url)[1].to_s
94
+ end
95
+
96
+ #
97
+ # Parse series
98
+ #
99
+ # @return [Array<RelatonBib::Series>] series
100
+ #
101
+ def parse_series
102
+ title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
103
+ [RelatonBib::Series.new(title: title, number: identifier(@sol.link.to_s))]
104
+ end
105
+
106
+ def type # rubocop:disable Metrics/MethodLength
107
+ @type ||= begin
108
+ sse = SPARQL.parse(%(
109
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
110
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
111
+ SELECT ?type
112
+ WHERE {
113
+ { <#{@sol.link}> rdf:type ?type }
114
+ }
115
+ ))
116
+ tps = @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
117
+ tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
118
+ end
119
+ end
120
+
121
+ #
122
+ # Parse doctype
123
+ #
124
+ # @return [Strinf] doctype
125
+ #
126
+ def parse_doctype
127
+ Scrapper::DOCTYPES[type]
128
+ end
129
+
130
+ def parse_date
131
+ [RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
132
+ end
133
+
134
+ #
135
+ # Parse relation
136
+ #
137
+ # @return [Array<RelatonBib::DocumentRelation>] relation
138
+ #
139
+ def parse_relation # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
140
+ sse = SPARQL.parse(%(
141
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
142
+ SELECT ?obsoletes
143
+ WHERE {
144
+ VALUES ?p { doc:obsoletes }
145
+ { <#{@sol.link}> ?p ?obsoletes }
146
+ }
147
+ ))
148
+ @fetcher.data.query(sse).order_by(:obsoletes).map do |r|
149
+ tp, url = r.to_h.first
150
+ fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
151
+ bib = W3cBibliographicItem.new formattedref: fr
152
+ RelatonBib::DocumentRelation.new(type: tp.to_s, bibitem: bib)
153
+ end
154
+ end
155
+
156
+ #
157
+ # Parse contributor
158
+ #
159
+ # @return [Array<RelatonBib::ContributionInfo>] contributor
160
+ #
161
+ def parse_contrib # rubocop:disable Metrics/MethodLength
162
+ sse = SPARQL.parse(%(
163
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
164
+ PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
165
+ SELECT ?full_name
166
+ WHERE {
167
+ <#{@sol.link}> :editor/contact:fullName ?full_name
168
+ }
169
+ ))
170
+ @fetcher.data.query(sse).order_by(:full_name).map do |ed|
171
+ cn = RelatonBib::LocalizedString.new(ed.full_name.to_s, "en", "Latn")
172
+ n = RelatonBib::FullName.new completename: cn
173
+ p = RelatonBib::Person.new name: n
174
+ RelatonBib::ContributionInfo.new entity: p, role: [type: "editor"]
175
+ end
176
+ end
177
+
178
+ #
179
+ # Parse editorialgroup
180
+ #
181
+ # @return [RelatonBib::EditorialGroup] editorialgroup
182
+ #
183
+ def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
184
+ sse = SPARQL.parse(%(
185
+ PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
186
+ PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
187
+ SELECT ?home_page
188
+ WHERE {
189
+ <#{@sol.link}> org:deliveredBy/contact:homePage ?home_page
190
+ }
191
+ ))
192
+ res = @fetcher.data.query(sse).order_by(:home_page)
193
+ tc = res.each_with_object([]) do |edg, obj|
194
+ wg = @fetcher.group_names[edg.home_page.to_s.sub(/\/$/, "")]
195
+ if wg
196
+ rwg = RelatonBib::WorkGroup.new name: wg["name"]
197
+ obj << RelatonBib::TechnicalCommittee.new(rwg)
198
+ else
199
+ warn "Working group name not found for #{edg.home_page}"
200
+ end
201
+ end
202
+ RelatonBib::EditorialGroup.new tc
203
+ end
204
+ end
205
+ end
@@ -4,7 +4,7 @@ module RelatonW3c
4
4
  # @param item_hash [Hash]
5
5
  # @return [RelatonW3c::W3cBibliographicItem]
6
6
  def bib_item(item_hash)
7
- W3cBibliographicItem.new **item_hash
7
+ W3cBibliographicItem.new(**item_hash)
8
8
  end
9
9
  end
10
10
  end
@@ -22,8 +22,8 @@ module RelatonW3c
22
22
  # @param ref [String] reference to search
23
23
  def initialize(ref)
24
24
  %r{
25
- ^(W3C\s)?
26
- (?<type>(CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
25
+ ^(?:W3C\s)?
26
+ (?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
27
27
  Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
28
28
  Recommendation|Retired|Working\sDraft))? # type
29
29
  \s?
@@ -41,7 +41,7 @@ module RelatonW3c
41
41
  # @param title_date [String]
42
42
  # @param type [String]
43
43
  # @return [Array<Hash>]
44
- def from_yaml(title_date, type)
44
+ def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
45
45
  /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
46
46
  title ||= title_date
47
47
  result = data.select do |hit|
@@ -50,7 +50,7 @@ module RelatonW3c
50
50
  type_date_filter(hit, type, date)
51
51
  end
52
52
  if result.empty?
53
- result = data.select { |h| h["link"].split("/").last.match? /#{title}/ }
53
+ result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
54
54
  end
55
55
  result.map { |h| Hit.new(h, self) }
56
56
  end
@@ -60,7 +60,7 @@ module RelatonW3c
60
60
  # @param date [String]
61
61
  # @return [TrueClass, FalseClass]
62
62
  def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
63
- if type && hit["type"] != short_type(type) || date && hit["date"] != date
63
+ if (type && hit["type"] != short_type(type)) || (date && hit["date"] != date)
64
64
  history = get_history hit, type, date
65
65
  return false unless history.any?
66
66
 
@@ -109,7 +109,7 @@ module RelatonW3c
109
109
  # @param type [String]
110
110
  # @return [String]
111
111
  def short_type(type)
112
- tp = TYPES.select { |k,v| v == type }.keys
112
+ tp = TYPES.select { |_, v| v == type }.keys
113
113
  tp.first || type
114
114
  end
115
115
 
@@ -137,7 +137,7 @@ module RelatonW3c
137
137
  # fetch data form server and save it to file.
138
138
  #
139
139
  def fetch_data
140
- resp = Net::HTTP.get_response URI.parse(DOMAIN + "/TR/")
140
+ resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
141
141
  # return if there aren't any changes since last fetching
142
142
  return unless resp.code == "200"
143
143
 
@@ -153,7 +153,7 @@ module RelatonW3c
153
153
  # @param h_el [Nokogiri::XML::Element]
154
154
  # @param link [Nokogiri::XML::Element]
155
155
  # @param pubdetails [Nokogiri::XML::Element]
156
- def fetch_hit(h_el, link, pubdetails)
156
+ def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
157
157
  datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
158
158
  editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
159
159
  keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
@@ -4,11 +4,12 @@ module RelatonW3c
4
4
  class Processor < Relaton::Processor
5
5
  attr_reader :idtype
6
6
 
7
- def initialize
7
+ def initialize # rubocop:disable Lint/MissingSuper
8
8
  @short = :relaton_w3c
9
9
  @prefix = "W3C"
10
10
  @defaultprefix = %r{^W3C\s}
11
11
  @idtype = "W3C"
12
+ @datasets = %w[w3c-rdf]
12
13
  end
13
14
 
14
15
  # @param code [String]
@@ -19,6 +20,18 @@ module RelatonW3c
19
20
  ::RelatonW3c::W3cBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from http://www.w3.org/2002/01/tr-automation/tr.rdf
25
+ #
26
+ # @param [String] _source source name
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonCalconnect::CcBibliographicItem]
24
37
  def from_xml(xml)
@@ -29,7 +42,7 @@ module RelatonW3c
29
42
  # @return [RelatonIsoBib::CcBibliographicItem]
30
43
  def hash_to_bib(hash)
31
44
  item_hash = ::RelatonW3c::HashConverter.hash_to_bib(hash)
32
- ::RelatonW3c::W3cBibliographicItem.new **item_hash
45
+ ::RelatonW3c::W3cBibliographicItem.new(**item_hash)
33
46
  end
34
47
 
35
48
  # Returns hash of XML grammar