relaton-w3c 1.11.0 → 1.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ce3bb960545ad6330d96829be3142732ba1c756e24b0e339ca52a094d7c9447
4
- data.tar.gz: 8659ebfc94938514ca01a3f202f8b70fe4926268eedf7cc8cb83ef521eda5c8a
3
+ metadata.gz: 4c96856baa51c84d8397b62be6dee1b8225e7ab854cd70176187e68fc269a23c
4
+ data.tar.gz: 54d5cc018e0de6ecca6d402ccbe41947047190d08e922c8f46b7cb20164cea22
5
5
  SHA512:
6
- metadata.gz: 5620a3934fd79f4307489fc8c8129cf8f3c601aa71e3ef0ca568db8ced117bdc2260b392f8711c80f605930fa8fcc5a7f4b0fdf2cd72b551a7747d5bff902ce2
7
- data.tar.gz: 71deae107a6f79a4633822855cdf70f4f6e693601f96bbb5f323642cdb78b8f686057793f8483dd012c1ea530adcce8d1d1ab405b8faae30d94a7e1c71def229
6
+ metadata.gz: 070eb14907a49f99b7c0f45841d83250c1c051cd9a16b9cddfc83f6d874274f401ac95c9554f6994c6702e557ef8a200e576ee5fe1d0b7c7f7583afb53074794
7
+ data.tar.gz: cab9e48e248b889c5d15e3449a93dc5cb100b7680a87f025ebfd090a1afba36e77730d9e0bc85a10bc9208b9ea2d813e444e0a28b2ac77d412650395479468f3
@@ -1,5 +1,5 @@
1
1
  <?xml version="1.0" encoding='UTF-8'?>
2
- <reference anchor="W3C.P3P" taret="http://www.w3.org/TR/P3P/">
2
+ <reference anchor="W3C.P3P" target="http://www.w3.org/TR/P3P/">
3
3
  <front>
4
4
  <title>The Platform for Privacy Preferences 1.0 (P3P1.0) Specification</title>
5
5
  <author initials="M." surname="Marchiori" fullname="Massimo Marchiori">
@@ -13,5 +13,12 @@ module RelatonW3c
13
13
  def pubid_type(_)
14
14
  "W3C"
15
15
  end
16
+
17
+ def docids(reference, ver)
18
+ ids = super
19
+ ids.reject! &:primary
20
+ id = "W3C #{reference[:target].split('/').last}"
21
+ ids.unshift RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)
22
+ end
16
23
  end
17
24
  end
@@ -0,0 +1,188 @@
1
+ require "rdf"
2
+ require "linkeddata"
3
+ require "sparql"
4
+ require "mechanize"
5
+ require "relaton_w3c/data_parser"
6
+
7
+ module RelatonW3c
8
+ class DataFetcher
9
+ attr_reader :data, :group_names
10
+
11
+ #
12
+ # Data fetcher initializer
13
+ #
14
+ # @param [String] output directory to save files
15
+ # @param [String] format format of output files (xml, yaml, bibxml)
16
+ #
17
+ def initialize(output, format)
18
+ @output = output
19
+ @format = format
20
+ @ext = format.sub(/^bib/, "")
21
+ dir = File.dirname(File.expand_path(__FILE__))
22
+ @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
23
+ @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
24
+ @files = []
25
+ @index = DataIndex.new
26
+ end
27
+
28
+ #
29
+ # Initialize fetcher and run fetch
30
+ #
31
+ # @param [Strin] output directory to save files, default: "data"
32
+ # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
33
+ #
34
+ def self.fetch(output: "data", format: "yaml")
35
+ t1 = Time.now
36
+ puts "Started at: #{t1}"
37
+ FileUtils.mkdir_p output unless Dir.exist? output
38
+ new(output, format).fetch
39
+ t2 = Time.now
40
+ puts "Stopped at: #{t2}"
41
+ puts "Done in: #{(t2 - t1).round} sec."
42
+ end
43
+
44
+ #
45
+ # Parse documents
46
+ #
47
+ def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
48
+ query_versioned_docs.each do |sl|
49
+ save_doc DataParser.parse(sl, self)
50
+ rescue StandardError => e
51
+ warn "Error: document #{sl.link} #{e.message}"
52
+ warn e.backtrace.join("\n")
53
+ end
54
+ query_unversioned_docs.each do |sl|
55
+ save_doc DataParser.parse(sl, self)
56
+ rescue StandardError => e
57
+ warn "Error: document #{sl.version_of} #{e.message}"
58
+ warn e.backtrace.join("\n")
59
+ end
60
+ Dir[File.expand_path("../../data/*", __dir__)].each do |file|
61
+ xml = File.read file, encoding: "UTF-8"
62
+ save_doc BibXMLParser.parse(xml), warn_duplicate: false
63
+ rescue StandardError => e
64
+ warn "Error: document #{file} #{e.message}"
65
+ warn e.backtrace.join("\n")
66
+ end
67
+ @index.sort!.save
68
+ end
69
+
70
+ #
71
+ # Create index file
72
+ #
73
+ # def create_index
74
+ # index_file = "index-w3c.yaml"
75
+ # index_yaml = @index.sort do |a, b|
76
+ # compare_index_items a, b
77
+ # end.to_yaml
78
+ # File.write index_file, index_yaml, encoding: "UTF-8"
79
+ # end
80
+
81
+ #
82
+ # Compare index items
83
+ #
84
+ # @param [Hash] aid first item
85
+ # @param [Hash] bid second item
86
+ #
87
+ # @return [Integer] comparison result
88
+ #
89
+ # def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
90
+ # ret = aid[:code] <=> bid[:code]
91
+ # ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
92
+ # ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
93
+ # # ret = aid[:type] <=> bid[:type] if ret.zero?
94
+ # ret
95
+ # end
96
+
97
+ #
98
+ # Weight of stage
99
+ #
100
+ # @param [String, nil] stage stage
101
+ #
102
+ # @return [Integer] weight
103
+ #
104
+ # def stage_weight(stage)
105
+ # return DataParser::STAGES.size if stage.nil?
106
+
107
+ # DataParser::STAGES.keys.index(stage)
108
+ # end
109
+
110
+ #
111
+ # Weight of date
112
+ #
113
+ # @param [String] date date
114
+ #
115
+ # @return [String] weight
116
+ #
117
+ # def date_weight(date)
118
+ # return "99999999" if date.nil?
119
+
120
+ # date
121
+ # end
122
+
123
+ #
124
+ # Query RDF source for documents
125
+ #
126
+ # @return [RDF::Query::Solutions] query results
127
+ #
128
+ def query_versioned_docs # rubocop:disable Metrics/MethodLength
129
+ sse = SPARQL.parse(%(
130
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
131
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
132
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
133
+ # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
134
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
135
+ SELECT ?link ?title ?date ?version_of
136
+ WHERE {
137
+ ?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
138
+ }
139
+ ))
140
+ data.query sse
141
+ end
142
+
143
+ def query_unversioned_docs
144
+ sse = SPARQL.parse(%(
145
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
146
+ SELECT ?version_of
147
+ WHERE { ?x doc:versionOf ?version_of . }
148
+ ))
149
+ data.query(sse).uniq &:version_of
150
+ end
151
+
152
+ #
153
+ # Save document to file
154
+ #
155
+ # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
156
+ #
157
+ def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
158
+ return unless bib
159
+
160
+ c = case @format
161
+ when "xml" then bib.to_xml(bibdata: true)
162
+ when "yaml" then bib.to_hash.to_yaml
163
+ else bib.send("to_#{@format}")
164
+ end
165
+ # id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
166
+ file = file_name(bib.docnumber)
167
+ if @files.include?(file)
168
+ warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
169
+ else
170
+ @index.add bib.docnumber, file
171
+ @files << file
172
+ File.write file, c, encoding: "UTF-8"
173
+ end
174
+ end
175
+
176
+ #
177
+ # Generate file name
178
+ #
179
+ # @param [String] id document id
180
+ #
181
+ # @return [String] file name
182
+ #
183
+ def file_name(id)
184
+ name = id.sub(/^W3C\s/, "").gsub(/[\s,:\/+]/, "_").squeeze("_").downcase
185
+ File.join @output, "#{name}.#{@ext}"
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,146 @@
1
+ require "zip"
2
+
3
+ module RelatonW3c
4
+ class DataIndex
5
+ #
6
+ # Initialize data index.
7
+ #
8
+ # @param [String] index_file path to index file
9
+ # @param [Array<Hash>] index index data
10
+ #
11
+ def initialize(index_file: "index-w3c.yaml", index: [])
12
+ @index_file = index_file
13
+ @index = index
14
+ end
15
+
16
+ #
17
+ # Create index from a GitHub repository
18
+ #
19
+ # @return [RelatonW3c::DataIndex] data index
20
+ #
21
+ def self.create_from_repo # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
22
+ resp = Zip::InputStream.new URI("#{W3cBibliography::SOURCE}index-w3c.zip").open
23
+ zip = resp.get_next_entry
24
+
25
+ # Newer versions of Psych uses the `permitted_classes:` parameter
26
+ index = if YAML.method(:safe_load).parameters.collect(&:last).index(:permitted_classes)
27
+ YAML.safe_load(zip.get_input_stream.read, permitted_classes: [Symbol])
28
+ else
29
+ YAML.safe_load(zip.get_input_stream.read, [Symbol])
30
+ end
31
+
32
+ DataIndex.new index: index
33
+ end
34
+
35
+ #
36
+ # Add document to index
37
+ #
38
+ # @param [String] docnumber document number
39
+ # @param [String] file path to document file
40
+ #
41
+ def add(docnumber, file)
42
+ @index << docnumber_to_parts(docnumber, file)
43
+ end
44
+
45
+ #
46
+ # Save index to file.
47
+ #
48
+ def save
49
+ File.write @index_file, @index.to_yaml, encoding: "UTF-8"
50
+ end
51
+
52
+ #
53
+ # Sort index
54
+ #
55
+ # @return [Array<Hash>] sorted index
56
+ #
57
+ def sort!
58
+ @index.sort! { |a, b| compare_index_items a, b }
59
+ self
60
+ end
61
+
62
+ #
63
+ # Search filename in index
64
+ #
65
+ # @param [String] ref reference
66
+ #
67
+ # @return [String] document's filename
68
+ #
69
+ def search(ref) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
70
+ dparts = docnumber_to_parts(ref)
71
+ @index.detect do |parts|
72
+ parts[:code].match?(/^#{Regexp.escape dparts[:code]}/i) &&
73
+ (dparts[:stage].nil? || dparts[:stage].casecmp?(parts[:stage])) &&
74
+ (dparts[:type].nil? || dparts[:type].casecmp?(parts[:type])) &&
75
+ (dparts[:date].nil? || dparts[:date] == parts[:date]) &&
76
+ (dparts[:suff].nil? || dparts[:suff].casecmp?(parts[:suff]))
77
+ end&.fetch(:file)
78
+ end
79
+
80
+ #
81
+ # Compare index items
82
+ #
83
+ # @param [Hash] aid first item
84
+ # @param [Hash] bid second item
85
+ #
86
+ # @return [Integer] comparison result
87
+ #
88
+ def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
89
+ ret = aid[:code] <=> bid[:code]
90
+ ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
91
+ ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
92
+ # ret = aid[:type] <=> bid[:type] if ret.zero?
93
+ ret
94
+ end
95
+
96
+ #
97
+ # Weight of stage
98
+ #
99
+ # @param [String, nil] stage stage
100
+ #
101
+ # @return [Integer] weight
102
+ #
103
+ def stage_weight(stage)
104
+ return DataParser::STAGES.size if stage.nil?
105
+
106
+ DataParser::STAGES.keys.index(stage)
107
+ end
108
+
109
+ #
110
+ # Weight of date
111
+ #
112
+ # @param [String] date date
113
+ #
114
+ # @return [String] weight
115
+ #
116
+ def date_weight(date)
117
+ return "99999999" if date.nil?
118
+
119
+ date
120
+ end
121
+
122
+ #
123
+ # Parse document number to parts
124
+ #
125
+ # @param [String] docnumber document number
126
+ # @param [String, nil] file path to document file
127
+ #
128
+ # @return [Hash{Symbol=>String}] document parts
129
+ #
130
+ def docnumber_to_parts(docnumber, file = nil) # rubocop:disable Metrics/MethodLength
131
+ %r{
132
+ ^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE))-)?
133
+ (?<code>\w+(?:[+-][\w.]+)*?)
134
+ (?:-(?<date>\d{8}|\d{6}))?
135
+ (?:/(?<suff>\w+))?$
136
+ }xi =~ docnumber
137
+ entry = { code: code }
138
+ entry[:file] = file if file
139
+ entry[:stage] = stage if stage
140
+ entry[:type] = type if type
141
+ entry[:date] = date if date
142
+ entry[:suff] = suff if suff
143
+ entry
144
+ end
145
+ end
146
+ end
@@ -1,5 +1,24 @@
1
1
  module RelatonW3c
2
2
  class DataParser
3
+ USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
4
+
5
+ DOCTYPES = {
6
+ "TR" => "technicalReport",
7
+ "NOTE" => "groupNote",
8
+ }.freeze
9
+
10
+ STAGES = {
11
+ "RET" => "retired",
12
+ "SPSD" => "supersededRecommendation",
13
+ "OBSL" => "obsoletedRecommendation",
14
+ "WD" => "workingDraft",
15
+ "CRD" => "candidateRecommendationDraft",
16
+ "CR" => "candidateRecommendation",
17
+ "PR" => "proposedRecommendation",
18
+ "PER" => "proposedEditedRecommendation",
19
+ "REC" => "recommendation",
20
+ }.freeze
21
+
3
22
  #
4
23
  # Document parser initalization
5
24
  #
@@ -29,7 +48,7 @@ module RelatonW3c
29
48
  # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
30
49
  #
31
50
  def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
32
- return unless @fetcher.class::USED_TYPES.include? type
51
+ return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }
33
52
 
34
53
  RelatonW3c::W3cBibliographicItem.new(
35
54
  type: "standard",
@@ -37,10 +56,12 @@ module RelatonW3c
37
56
  fetched: Date.today.to_s,
38
57
  language: ["en"],
39
58
  script: ["Latn"],
59
+ docstatus: parse_docstatus,
40
60
  title: parse_title,
41
61
  link: parse_link,
42
62
  docid: parse_docid,
43
- docnumber: identifier(@sol.link.to_s),
63
+ formattedref: parse_formattedref,
64
+ docnumber: identifier,
44
65
  series: parse_series,
45
66
  date: parse_date,
46
67
  relation: parse_relation,
@@ -49,12 +70,24 @@ module RelatonW3c
49
70
  )
50
71
  end
51
72
 
73
+ #
74
+ # Extract documetn status
75
+ #
76
+ # @return [RelatonBib::DocumentStatus, nil] dcoument status
77
+ #
78
+ def parse_docstatus
79
+ stage = types_stages&.detect { |st| STAGES.include?(st) }
80
+ RelatonBib::DocumentStatus.new stage: STAGES[stage] if stage
81
+ end
82
+
52
83
  #
53
84
  # Parse title
54
85
  #
55
86
  # @return [RelatonBib::TypedTitleStringCollection] title
56
87
  #
57
88
  def parse_title
89
+ return [] unless @sol.respond_to?(:title)
90
+
58
91
  t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
59
92
  RelatonBib::TypedTitleStringCollection.new [t]
60
93
  end
@@ -65,7 +98,9 @@ module RelatonW3c
65
98
  # @return [Array<RelatonBib::TypedUri>] link
66
99
  #
67
100
  def parse_link
68
- [RelatonBib::TypedUri.new(type: "src", content: @sol.link.to_s)]
101
+ link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
102
+
103
+ [RelatonBib::TypedUri.new(type: "src", content: link.to_s)]
69
104
  end
70
105
 
71
106
  #
@@ -74,23 +109,45 @@ module RelatonW3c
74
109
  # @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
75
110
  #
76
111
  def parse_docid
77
- id = pub_id(@sol.link.to_s)
112
+ return [] unless @sol.respond_to?(:link)
113
+
114
+ id = pub_id(@sol.link)
78
115
  [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)]
79
116
  end
80
117
 
81
118
  #
82
119
  # Generate PubID
83
120
  #
84
- # @param [String] url url
85
- #
86
- # @return [String] PubID
121
+ # @return [RDF::URI] PubID
87
122
  #
88
123
  def pub_id(url)
89
124
  "W3C #{identifier(url)}"
90
125
  end
91
126
 
92
- def identifier(url)
93
- /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/.match(url)[1].to_s
127
+ #
128
+ # Generate identifier from URL
129
+ #
130
+ # @param [RDF::URI, nil] link
131
+ #
132
+ # @return [String] identifier
133
+ #
134
+ def identifier(link = nil)
135
+ url = link || (@sol.respond_to?(:link) ? @sol.link : @sol.version_of)
136
+ self.class.parse_identifier(url.to_s)
137
+ end
138
+
139
+ #
140
+ # Parse identifier from URL
141
+ #
142
+ # @param [String] url URL
143
+ #
144
+ # @return [String] identifier
145
+ #
146
+ def self.parse_identifier(url)
147
+ if /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/ =~ url.to_s
148
+ $1.to_s
149
+ else url.to_s.split("/").last
150
+ end
94
151
  end
95
152
 
96
153
  #
@@ -99,12 +156,31 @@ module RelatonW3c
99
156
  # @return [Array<RelatonBib::Series>] series
100
157
  #
101
158
  def parse_series
159
+ return [] unless type
160
+
102
161
  title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
103
- [RelatonBib::Series.new(title: title, number: identifier(@sol.link.to_s))]
162
+ [RelatonBib::Series.new(title: title, number: identifier)]
104
163
  end
105
164
 
106
- def type # rubocop:disable Metrics/MethodLength
107
- @type ||= begin
165
+ #
166
+ # Extract type
167
+ #
168
+ # @return [String] type
169
+ #
170
+ def type
171
+ # thre are many types, we need to find the right one
172
+ @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) }
173
+ end
174
+
175
+ #
176
+ # Fetches types and stages
177
+ #
178
+ # @return [Array<String>] types and stages
179
+ #
180
+ def types_stages # rubocop:disable Metrics/MethodLength
181
+ return unless @sol.respond_to?(:link)
182
+
183
+ @types_stages ||= begin
108
184
  sse = SPARQL.parse(%(
109
185
  PREFIX : <http://www.w3.org/2001/02pd/rec54#>
110
186
  PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -113,8 +189,7 @@ module RelatonW3c
113
189
  { <#{@sol.link}> rdf:type ?type }
114
190
  }
115
191
  ))
116
- tps = @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
117
- tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
192
+ @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
118
193
  end
119
194
  end
120
195
 
@@ -124,10 +199,17 @@ module RelatonW3c
124
199
  # @return [Strinf] doctype
125
200
  #
126
201
  def parse_doctype
127
- Scrapper::DOCTYPES[type]
202
+ DOCTYPES[type] || "recommendation"
128
203
  end
129
204
 
205
+ #
206
+ # Parse date
207
+ #
208
+ # @return [Array<RelatonBib::BibliographicDate>] date
209
+ #
130
210
  def parse_date
211
+ return [] unless @sol.respond_to?(:date)
212
+
131
213
  [RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
132
214
  end
133
215
 
@@ -136,29 +218,90 @@ module RelatonW3c
136
218
  #
137
219
  # @return [Array<RelatonBib::DocumentRelation>] relation
138
220
  #
139
- def parse_relation # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
221
+ def parse_relation
222
+ if @sol.respond_to?(:link)
223
+ relations + editor_drafts
224
+ else document_versions
225
+ end
226
+ end
227
+
228
+ def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
229
+ {
230
+ "doc:obsoletes" => { type: "obsoletes" },
231
+ "mat:hasErrata" => { type: "updatedBy", description: "errata" },
232
+ # "mat:hasTranslations" => "hasTranslation",
233
+ # "mat:hasImplReport" => "hasImpReport",
234
+ ":previousEdition" => { type: "editionOf" },
235
+ }.reduce([]) do |acc, (predicate, tp)|
236
+ acc + relation_query(predicate).map do |r|
237
+ fr = RelatonBib::LocalizedString.new pub_id(r.rel.to_s)
238
+ bib = W3cBibliographicItem.new formattedref: fr
239
+ tp[:description] = RelatonBib::FormattedString.new content: tp[:description] if tp[:description]
240
+ RelatonBib::DocumentRelation.new(**tp, bibitem: bib)
241
+ end
242
+ end
243
+ end
244
+
245
+ def editor_drafts # rubocop:disable Metrics/MethodLength
140
246
  sse = SPARQL.parse(%(
141
- PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
142
- SELECT ?obsoletes
143
- WHERE {
144
- VALUES ?p { doc:obsoletes }
145
- { <#{@sol.link}> ?p ?obsoletes }
146
- }
247
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
248
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
249
+ SELECT ?rel
250
+ WHERE { <#{@sol.link}> :ED ?rel . }
147
251
  ))
148
- @fetcher.data.query(sse).order_by(:obsoletes).map do |r|
149
- tp, url = r.to_h.first
150
- fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
252
+ @fetcher.data.query(sse).map do |s|
253
+ fr = RelatonBib::LocalizedString.new pub_id(s.rel.to_s)
151
254
  bib = W3cBibliographicItem.new formattedref: fr
152
- RelatonBib::DocumentRelation.new(type: tp.to_s, bibitem: bib)
255
+ desc = RelatonBib::FormattedString.new content: "Editor's draft"
256
+ RelatonBib::DocumentRelation.new(
257
+ type: "hasDraft", description: desc, bibitem: bib,
258
+ )
259
+ end
260
+ end
261
+
262
+ def relation_query(predicate)
263
+ sse = SPARQL.parse(%(
264
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
265
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
266
+ PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
267
+ SELECT ?rel
268
+ WHERE { <#{@sol.link}> #{predicate} ?rel . }
269
+ ))
270
+ @fetcher.data.query(sse).order_by(:rel)
271
+ end
272
+
273
+ def document_versions
274
+ sse = SPARQL.parse(%(
275
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
276
+ SELECT ?link
277
+ WHERE { ?link doc:versionOf <#{@sol.version_of}> }
278
+ ))
279
+ @fetcher.data.query(sse).map do |r|
280
+ fref = RelatonBib::FormattedRef.new content: pub_id(r.link)
281
+ bib = W3cBibliographicItem.new formattedref: fref
282
+ RelatonBib::DocumentRelation.new(type: "hasEdition", bibitem: bib)
153
283
  end
154
284
  end
155
285
 
286
+ #
287
+ # Parse formattedref
288
+ #
289
+ # @return [RelatonBib::FormattedRef] formattedref
290
+ #
291
+ def parse_formattedref
292
+ return if @sol.respond_to?(:link)
293
+
294
+ RelatonBib::FormattedRef.new(content: pub_id(@sol.version_of))
295
+ end
296
+
156
297
  #
157
298
  # Parse contributor
158
299
  #
159
300
  # @return [Array<RelatonBib::ContributionInfo>] contributor
160
301
  #
161
302
  def parse_contrib # rubocop:disable Metrics/MethodLength
303
+ return [] unless @sol.respond_to?(:link)
304
+
162
305
  sse = SPARQL.parse(%(
163
306
  PREFIX : <http://www.w3.org/2001/02pd/rec54#>
164
307
  PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
@@ -181,6 +324,8 @@ module RelatonW3c
181
324
  # @return [RelatonBib::EditorialGroup] editorialgroup
182
325
  #
183
326
  def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
327
+ return unless @sol.respond_to?(:link)
328
+
184
329
  sse = SPARQL.parse(%(
185
330
  PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
186
331
  PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.11.0".freeze
2
+ VERSION = "1.11.3".freeze
3
3
  end
@@ -5,15 +5,17 @@ require "net/http"
5
5
  module RelatonW3c
6
6
  # Class methods for search W3C standards.
7
7
  class W3cBibliography
8
- SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/data/"
8
+ SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/"
9
9
 
10
10
  class << self
11
11
  # @param text [String]
12
12
  # @return [RelatonW3c::HitCollection]
13
13
  def search(text) # rubocop:disable Metrics/MethodLength
14
- # HitCollection.new text
15
- file = text.sub(/^W3C\s/, "").gsub(/[\s,:\/]/, "_").squeeze("_").upcase
16
- url = "#{SOURCE}#{file}.yaml"
14
+ ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
15
+ file = DataIndex.create_from_repo.search(ref)
16
+ return unless file
17
+
18
+ url = "#{SOURCE}#{file}"
17
19
  resp = Net::HTTP.get_response(URI.parse(url))
18
20
  return unless resp.code == "200"
19
21
 
@@ -24,7 +26,7 @@ module RelatonW3c
24
26
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
25
27
  Net::ProtocolError, Errno::ETIMEDOUT
26
28
  raise RelatonBib::RequestError,
27
- "Could not access #{HitCollection::DOMAIN}"
29
+ "Could not access #{url}"
28
30
  end
29
31
 
30
32
  # @param ref [String] the W3C standard Code to look up
@@ -39,8 +41,8 @@ module RelatonW3c
39
41
  return
40
42
  end
41
43
 
42
- # ret = result.first.fetch
43
- warn "[relaton-w3c] (\"#{ref}\") found #{result.title.first.title.content}"
44
+ found = result.docnumber
45
+ warn "[relaton-w3c] (\"#{ref}\") found #{found}"
44
46
  result
45
47
  end
46
48
  end
@@ -32,6 +32,9 @@
32
32
  'https://www.w3.org/WAI/EO':
33
33
  name: Education and Outreach Working Group
34
34
  abbrev: EOWG
35
+ 'https://www.w3.org/WAI/about/groups/eowg':
36
+ name: Education and Outreach Working Group
37
+ abbrev: EOWG
35
38
  'https://www.w3.org/2001/sw/WebOnt':
36
39
  name: Web-Ontology Working Group
37
40
  'http://www.w3.org/MarkUp/Forms':
@@ -54,6 +57,8 @@
54
57
  name: Web Applications Working Group
55
58
  'https://www.w3.org/2008/webapps':
56
59
  name: Web Applications Working Group
60
+ 'https://www.w3.org/groups/wg/webapps':
61
+ name: Web Applications Working Group
57
62
  'https://www.w3.org/das':
58
63
  name: Devices and Sensors Working Group
59
64
  abbrev: DAS WG
@@ -226,6 +231,8 @@
226
231
  abbrev: ARIA WG
227
232
  'https://www.w3.org/wasm':
228
233
  name: WebAssembly Working Group
234
+ 'https://www.w3.org/groups/wg/wasm':
235
+ name: WebAssembly Working Group
229
236
  'https://www.w3.org/groups/wg/webediting':
230
237
  name: Web Editing Working Group
231
238
  'https://www.w3.org/2014/data-shapes':
data/lib/relaton_w3c.rb CHANGED
@@ -2,13 +2,14 @@ require "relaton_bib"
2
2
  require "relaton_w3c/version"
3
3
  require "relaton_w3c/w3c_bibliography"
4
4
  require "relaton_w3c/w3c_bibliographic_item"
5
- require "relaton_w3c/hit_collection"
6
- require "relaton_w3c/hit"
7
- require "relaton_w3c/scrapper"
5
+ # require "relaton_w3c/hit_collection"
6
+ # require "relaton_w3c/hit"
7
+ # require "relaton_w3c/scrapper"
8
8
  require "relaton_w3c/xml_parser"
9
9
  require "relaton_w3c/bibxml_parser"
10
10
  require "relaton_w3c/hash_converter"
11
- require "relaton_w3c/data_fethcer"
11
+ require "relaton_w3c/data_fetcher"
12
+ require "relaton_w3c/data_index"
12
13
 
13
14
  module RelatonW3c
14
15
  class Error < StandardError; end
data/relaton_w3c.gemspec CHANGED
@@ -40,6 +40,9 @@ Gem::Specification.new do |spec|
40
40
  spec.add_dependency "linkeddata", "~> 3.1.0"
41
41
  spec.add_dependency "mechanize", "~> 2.8.0"
42
42
  spec.add_dependency "rdf", "~> 3.1.0"
43
+ spec.add_dependency "rdf-normalize", "~> 0.4.0"
43
44
  spec.add_dependency "relaton-bib", "~> 1.11.0"
45
+ spec.add_dependency "rubyzip", "~> 2.3.0"
46
+ spec.add_dependency "shex", "~> 0.6.0"
44
47
  spec.add_dependency "sparql", "~> 3.1.0"
45
48
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.0
4
+ version: 1.11.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-10 00:00:00.000000000 Z
11
+ date: 2022-04-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: 3.1.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: rdf-normalize
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 0.4.0
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 0.4.0
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: relaton-bib
127
141
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +150,34 @@ dependencies:
136
150
  - - "~>"
137
151
  - !ruby/object:Gem::Version
138
152
  version: 1.11.0
153
+ - !ruby/object:Gem::Dependency
154
+ name: rubyzip
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 2.3.0
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 2.3.0
167
+ - !ruby/object:Gem::Dependency
168
+ name: shex
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: 0.6.0
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: 0.6.0
139
181
  - !ruby/object:Gem::Dependency
140
182
  name: sparql
141
183
  requirement: !ruby/object:Gem::Requirement
@@ -201,7 +243,8 @@ files:
201
243
  - grammars/reqt.rng
202
244
  - lib/relaton_w3c.rb
203
245
  - lib/relaton_w3c/bibxml_parser.rb
204
- - lib/relaton_w3c/data_fethcer.rb
246
+ - lib/relaton_w3c/data_fetcher.rb
247
+ - lib/relaton_w3c/data_index.rb
205
248
  - lib/relaton_w3c/data_parser.rb
206
249
  - lib/relaton_w3c/hash_converter.rb
207
250
  - lib/relaton_w3c/hit.rb
@@ -1,110 +0,0 @@
1
- require "rdf"
2
- require "linkeddata"
3
- require "sparql"
4
- require "mechanize"
5
- require "relaton_w3c/data_parser"
6
-
7
- module RelatonW3c
8
- class DataFetcher
9
- USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
10
-
11
- attr_reader :data, :group_names
12
-
13
- #
14
- # Data fetcher initializer
15
- #
16
- # @param [String] output directory to save files
17
- # @param [String] format format of output files (xml, yaml, bibxml)
18
- #
19
- def initialize(output, format)
20
- @output = output
21
- @format = format
22
- @ext = format.sub(/^bib/, "")
23
- dir = File.dirname(File.expand_path(__FILE__))
24
- @group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
25
- @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
26
- @files = []
27
- end
28
-
29
- #
30
- # Initialize fetcher and run fetch
31
- #
32
- # @param [Strin] output directory to save files, default: "data"
33
- # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
34
- #
35
- def self.fetch(output: "data", format: "yaml")
36
- t1 = Time.now
37
- puts "Started at: #{t1}"
38
- FileUtils.mkdir_p output unless Dir.exist? output
39
- new(output, format).fetch
40
- t2 = Time.now
41
- puts "Stopped at: #{t2}"
42
- puts "Done in: #{(t2 - t1).round} sec."
43
- end
44
-
45
- #
46
- # Parse documents
47
- #
48
- def fetch
49
- query.each { |sl| save_doc DataParser.parse(sl, self) }
50
- Dir[File.expand_path("../../data/*", __dir__)].each do |file|
51
- xml = File.read file, encoding: "UTF-8"
52
- save_doc BibXMLParser.parse(xml)
53
- end
54
- end
55
-
56
- #
57
- # Query RDF source for documents
58
- #
59
- # @return [RDF::Query::Solutions] query results
60
- #
61
- def query # rubocop:disable Metrics/MethodLength
62
- sse = SPARQL.parse(%(
63
- PREFIX : <http://www.w3.org/2001/02pd/rec54#>
64
- PREFIX dc: <http://purl.org/dc/elements/1.1/>
65
- PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
66
- # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
67
- PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
68
- SELECT ?link ?title ?date
69
- WHERE {
70
- ?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
71
- }
72
- ))
73
- data.query sse
74
- end
75
-
76
- #
77
- # Save document to file
78
- #
79
- # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
80
- #
81
- def save_doc(bib) # rubocop:disable Metrics/MethodLength
82
- return unless bib
83
-
84
- c = case @format
85
- when "xml" then bib.to_xml(bibdata: true)
86
- when "yaml" then bib.to_hash.to_yaml
87
- else bib.send("to_#{@format}")
88
- end
89
- file = file_name(bib)
90
- if @files.include? file
91
- warn "File #{file} already exists. Document: #{bib.docnumber}"
92
- else
93
- @files << file
94
- end
95
- File.write file, c, encoding: "UTF-8"
96
- end
97
-
98
- #
99
- # Generate file name
100
- #
101
- # @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
102
- #
103
- # @return [String] file name
104
- #
105
- def file_name(bib)
106
- name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
107
- File.join @output, "#{name}.#{@ext}"
108
- end
109
- end
110
- end