relaton-w3c 1.11.0 → 1.11.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ce3bb960545ad6330d96829be3142732ba1c756e24b0e339ca52a094d7c9447
4
- data.tar.gz: 8659ebfc94938514ca01a3f202f8b70fe4926268eedf7cc8cb83ef521eda5c8a
3
+ metadata.gz: 4c96856baa51c84d8397b62be6dee1b8225e7ab854cd70176187e68fc269a23c
4
+ data.tar.gz: 54d5cc018e0de6ecca6d402ccbe41947047190d08e922c8f46b7cb20164cea22
5
5
  SHA512:
6
- metadata.gz: 5620a3934fd79f4307489fc8c8129cf8f3c601aa71e3ef0ca568db8ced117bdc2260b392f8711c80f605930fa8fcc5a7f4b0fdf2cd72b551a7747d5bff902ce2
7
- data.tar.gz: 71deae107a6f79a4633822855cdf70f4f6e693601f96bbb5f323642cdb78b8f686057793f8483dd012c1ea530adcce8d1d1ab405b8faae30d94a7e1c71def229
6
+ metadata.gz: 070eb14907a49f99b7c0f45841d83250c1c051cd9a16b9cddfc83f6d874274f401ac95c9554f6994c6702e557ef8a200e576ee5fe1d0b7c7f7583afb53074794
7
+ data.tar.gz: cab9e48e248b889c5d15e3449a93dc5cb100b7680a87f025ebfd090a1afba36e77730d9e0bc85a10bc9208b9ea2d813e444e0a28b2ac77d412650395479468f3
@@ -1,5 +1,5 @@
1
1
  <?xml version="1.0" encoding='UTF-8'?>
2
- <reference anchor="W3C.P3P" taret="http://www.w3.org/TR/P3P/">
2
+ <reference anchor="W3C.P3P" target="http://www.w3.org/TR/P3P/">
3
3
  <front>
4
4
  <title>The Platform for Privacy Preferences 1.0 (P3P1.0) Specification</title>
5
5
  <author initials="M." surname="Marchiori" fullname="Massimo Marchiori">
@@ -13,5 +13,12 @@ module RelatonW3c
13
13
  def pubid_type(_)
14
14
  "W3C"
15
15
  end
16
+
17
+ def docids(reference, ver)
18
+ ids = super
19
+ ids.reject! &:primary
20
+ id = "W3C #{reference[:target].split('/').last}"
21
+ ids.unshift RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)
22
+ end
16
23
  end
17
24
  end
@@ -0,0 +1,188 @@
1
+ require "rdf"
2
+ require "linkeddata"
3
+ require "sparql"
4
+ require "mechanize"
5
+ require "relaton_w3c/data_parser"
6
+
7
+ module RelatonW3c
8
+ class DataFetcher
9
+ attr_reader :data, :group_names
10
+
11
+ #
12
+ # Data fetcher initializer
13
+ #
14
+ # @param [String] output directory to save files
15
+ # @param [String] format format of output files (xml, yaml, bibxml)
16
+ #
17
+ def initialize(output, format)
18
+ @output = output
19
+ @format = format
20
+ @ext = format.sub(/^bib/, "")
21
+ dir = File.dirname(File.expand_path(__FILE__))
22
+ @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
23
+ @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
24
+ @files = []
25
+ @index = DataIndex.new
26
+ end
27
+
28
+ #
29
+ # Initialize fetcher and run fetch
30
+ #
31
+ # @param [Strin] output directory to save files, default: "data"
32
+ # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
33
+ #
34
+ def self.fetch(output: "data", format: "yaml")
35
+ t1 = Time.now
36
+ puts "Started at: #{t1}"
37
+ FileUtils.mkdir_p output unless Dir.exist? output
38
+ new(output, format).fetch
39
+ t2 = Time.now
40
+ puts "Stopped at: #{t2}"
41
+ puts "Done in: #{(t2 - t1).round} sec."
42
+ end
43
+
44
+ #
45
+ # Parse documents
46
+ #
47
+ def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
48
+ query_versioned_docs.each do |sl|
49
+ save_doc DataParser.parse(sl, self)
50
+ rescue StandardError => e
51
+ warn "Error: document #{sl.link} #{e.message}"
52
+ warn e.backtrace.join("\n")
53
+ end
54
+ query_unversioned_docs.each do |sl|
55
+ save_doc DataParser.parse(sl, self)
56
+ rescue StandardError => e
57
+ warn "Error: document #{sl.version_of} #{e.message}"
58
+ warn e.backtrace.join("\n")
59
+ end
60
+ Dir[File.expand_path("../../data/*", __dir__)].each do |file|
61
+ xml = File.read file, encoding: "UTF-8"
62
+ save_doc BibXMLParser.parse(xml), warn_duplicate: false
63
+ rescue StandardError => e
64
+ warn "Error: document #{file} #{e.message}"
65
+ warn e.backtrace.join("\n")
66
+ end
67
+ @index.sort!.save
68
+ end
69
+
70
+ #
71
+ # Create index file
72
+ #
73
+ # def create_index
74
+ # index_file = "index-w3c.yaml"
75
+ # index_yaml = @index.sort do |a, b|
76
+ # compare_index_items a, b
77
+ # end.to_yaml
78
+ # File.write index_file, index_yaml, encoding: "UTF-8"
79
+ # end
80
+
81
+ #
82
+ # Compare index items
83
+ #
84
+ # @param [Hash] aid first item
85
+ # @param [Hash] bid second item
86
+ #
87
+ # @return [Integer] comparison result
88
+ #
89
+ # def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
90
+ # ret = aid[:code] <=> bid[:code]
91
+ # ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
92
+ # ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
93
+ # # ret = aid[:type] <=> bid[:type] if ret.zero?
94
+ # ret
95
+ # end
96
+
97
+ #
98
+ # Weight of stage
99
+ #
100
+ # @param [String, nil] stage stage
101
+ #
102
+ # @return [Integer] weight
103
+ #
104
+ # def stage_weight(stage)
105
+ # return DataParser::STAGES.size if stage.nil?
106
+
107
+ # DataParser::STAGES.keys.index(stage)
108
+ # end
109
+
110
+ #
111
+ # Weight of date
112
+ #
113
+ # @param [String] date date
114
+ #
115
+ # @return [String] weight
116
+ #
117
+ # def date_weight(date)
118
+ # return "99999999" if date.nil?
119
+
120
+ # date
121
+ # end
122
+
123
+ #
124
+ # Query RDF source for documents
125
+ #
126
+ # @return [RDF::Query::Solutions] query results
127
+ #
128
+ def query_versioned_docs # rubocop:disable Metrics/MethodLength
129
+ sse = SPARQL.parse(%(
130
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
131
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
132
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
133
+ # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
134
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
135
+ SELECT ?link ?title ?date ?version_of
136
+ WHERE {
137
+ ?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
138
+ }
139
+ ))
140
+ data.query sse
141
+ end
142
+
143
+ def query_unversioned_docs
144
+ sse = SPARQL.parse(%(
145
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
146
+ SELECT ?version_of
147
+ WHERE { ?x doc:versionOf ?version_of . }
148
+ ))
149
+ data.query(sse).uniq &:version_of
150
+ end
151
+
152
+ #
153
+ # Save document to file
154
+ #
155
+ # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
156
+ #
157
+ def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
158
+ return unless bib
159
+
160
+ c = case @format
161
+ when "xml" then bib.to_xml(bibdata: true)
162
+ when "yaml" then bib.to_hash.to_yaml
163
+ else bib.send("to_#{@format}")
164
+ end
165
+ # id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
166
+ file = file_name(bib.docnumber)
167
+ if @files.include?(file)
168
+ warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
169
+ else
170
+ @index.add bib.docnumber, file
171
+ @files << file
172
+ File.write file, c, encoding: "UTF-8"
173
+ end
174
+ end
175
+
176
+ #
177
+ # Generate file name
178
+ #
179
+ # @param [String] id document id
180
+ #
181
+ # @return [String] file name
182
+ #
183
+ def file_name(id)
184
+ name = id.sub(/^W3C\s/, "").gsub(/[\s,:\/+]/, "_").squeeze("_").downcase
185
+ File.join @output, "#{name}.#{@ext}"
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,146 @@
1
+ require "zip"
2
+
3
+ module RelatonW3c
4
+ class DataIndex
5
+ #
6
+ # Initialize data index.
7
+ #
8
+ # @param [String] index_file path to index file
9
+ # @param [Array<Hash>] index index data
10
+ #
11
+ def initialize(index_file: "index-w3c.yaml", index: [])
12
+ @index_file = index_file
13
+ @index = index
14
+ end
15
+
16
+ #
17
+ # Create index from a GitHub repository
18
+ #
19
+ # @return [RelatonW3c::DataIndex] data index
20
+ #
21
+ def self.create_from_repo # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
22
+ resp = Zip::InputStream.new URI("#{W3cBibliography::SOURCE}index-w3c.zip").open
23
+ zip = resp.get_next_entry
24
+
25
+ # Newer versions of Psych uses the `permitted_classes:` parameter
26
+ index = if YAML.method(:safe_load).parameters.collect(&:last).index(:permitted_classes)
27
+ YAML.safe_load(zip.get_input_stream.read, permitted_classes: [Symbol])
28
+ else
29
+ YAML.safe_load(zip.get_input_stream.read, [Symbol])
30
+ end
31
+
32
+ DataIndex.new index: index
33
+ end
34
+
35
+ #
36
+ # Add document to index
37
+ #
38
+ # @param [String] docnumber document number
39
+ # @param [String] file path to document file
40
+ #
41
+ def add(docnumber, file)
42
+ @index << docnumber_to_parts(docnumber, file)
43
+ end
44
+
45
+ #
46
+ # Save index to file.
47
+ #
48
+ def save
49
+ File.write @index_file, @index.to_yaml, encoding: "UTF-8"
50
+ end
51
+
52
+ #
53
+ # Sort index
54
+ #
55
+ # @return [Array<Hash>] sorted index
56
+ #
57
+ def sort!
58
+ @index.sort! { |a, b| compare_index_items a, b }
59
+ self
60
+ end
61
+
62
+ #
63
+ # Search filename in index
64
+ #
65
+ # @param [String] ref reference
66
+ #
67
+ # @return [String] document's filename
68
+ #
69
+ def search(ref) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
70
+ dparts = docnumber_to_parts(ref)
71
+ @index.detect do |parts|
72
+ parts[:code].match?(/^#{Regexp.escape dparts[:code]}/i) &&
73
+ (dparts[:stage].nil? || dparts[:stage].casecmp?(parts[:stage])) &&
74
+ (dparts[:type].nil? || dparts[:type].casecmp?(parts[:type])) &&
75
+ (dparts[:date].nil? || dparts[:date] == parts[:date]) &&
76
+ (dparts[:suff].nil? || dparts[:suff].casecmp?(parts[:suff]))
77
+ end&.fetch(:file)
78
+ end
79
+
80
+ #
81
+ # Compare index items
82
+ #
83
+ # @param [Hash] aid first item
84
+ # @param [Hash] bid second item
85
+ #
86
+ # @return [Integer] comparison result
87
+ #
88
+ def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
89
+ ret = aid[:code] <=> bid[:code]
90
+ ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
91
+ ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
92
+ # ret = aid[:type] <=> bid[:type] if ret.zero?
93
+ ret
94
+ end
95
+
96
+ #
97
+ # Weight of stage
98
+ #
99
+ # @param [String, nil] stage stage
100
+ #
101
+ # @return [Integer] weight
102
+ #
103
+ def stage_weight(stage)
104
+ return DataParser::STAGES.size if stage.nil?
105
+
106
+ DataParser::STAGES.keys.index(stage)
107
+ end
108
+
109
+ #
110
+ # Weight of date
111
+ #
112
+ # @param [String] date date
113
+ #
114
+ # @return [String] weight
115
+ #
116
+ def date_weight(date)
117
+ return "99999999" if date.nil?
118
+
119
+ date
120
+ end
121
+
122
+ #
123
+ # Parse document number to parts
124
+ #
125
+ # @param [String] docnumber document number
126
+ # @param [String, nil] file path to document file
127
+ #
128
+ # @return [Hash{Symbol=>String}] document parts
129
+ #
130
+ def docnumber_to_parts(docnumber, file = nil) # rubocop:disable Metrics/MethodLength
131
+ %r{
132
+ ^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE))-)?
133
+ (?<code>\w+(?:[+-][\w.]+)*?)
134
+ (?:-(?<date>\d{8}|\d{6}))?
135
+ (?:/(?<suff>\w+))?$
136
+ }xi =~ docnumber
137
+ entry = { code: code }
138
+ entry[:file] = file if file
139
+ entry[:stage] = stage if stage
140
+ entry[:type] = type if type
141
+ entry[:date] = date if date
142
+ entry[:suff] = suff if suff
143
+ entry
144
+ end
145
+ end
146
+ end
@@ -1,5 +1,24 @@
1
1
  module RelatonW3c
2
2
  class DataParser
3
+ USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
4
+
5
+ DOCTYPES = {
6
+ "TR" => "technicalReport",
7
+ "NOTE" => "groupNote",
8
+ }.freeze
9
+
10
+ STAGES = {
11
+ "RET" => "retired",
12
+ "SPSD" => "supersededRecommendation",
13
+ "OBSL" => "obsoletedRecommendation",
14
+ "WD" => "workingDraft",
15
+ "CRD" => "candidateRecommendationDraft",
16
+ "CR" => "candidateRecommendation",
17
+ "PR" => "proposedRecommendation",
18
+ "PER" => "proposedEditedRecommendation",
19
+ "REC" => "recommendation",
20
+ }.freeze
21
+
3
22
  #
4
23
  # Document parser initalization
5
24
  #
@@ -29,7 +48,7 @@ module RelatonW3c
29
48
  # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
30
49
  #
31
50
  def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
32
- return unless @fetcher.class::USED_TYPES.include? type
51
+ return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }
33
52
 
34
53
  RelatonW3c::W3cBibliographicItem.new(
35
54
  type: "standard",
@@ -37,10 +56,12 @@ module RelatonW3c
37
56
  fetched: Date.today.to_s,
38
57
  language: ["en"],
39
58
  script: ["Latn"],
59
+ docstatus: parse_docstatus,
40
60
  title: parse_title,
41
61
  link: parse_link,
42
62
  docid: parse_docid,
43
- docnumber: identifier(@sol.link.to_s),
63
+ formattedref: parse_formattedref,
64
+ docnumber: identifier,
44
65
  series: parse_series,
45
66
  date: parse_date,
46
67
  relation: parse_relation,
@@ -49,12 +70,24 @@ module RelatonW3c
49
70
  )
50
71
  end
51
72
 
73
+ #
74
+ # Extract documetn status
75
+ #
76
+ # @return [RelatonBib::DocumentStatus, nil] dcoument status
77
+ #
78
+ def parse_docstatus
79
+ stage = types_stages&.detect { |st| STAGES.include?(st) }
80
+ RelatonBib::DocumentStatus.new stage: STAGES[stage] if stage
81
+ end
82
+
52
83
  #
53
84
  # Parse title
54
85
  #
55
86
  # @return [RelatonBib::TypedTitleStringCollection] title
56
87
  #
57
88
  def parse_title
89
+ return [] unless @sol.respond_to?(:title)
90
+
58
91
  t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
59
92
  RelatonBib::TypedTitleStringCollection.new [t]
60
93
  end
@@ -65,7 +98,9 @@ module RelatonW3c
65
98
  # @return [Array<RelatonBib::TypedUri>] link
66
99
  #
67
100
  def parse_link
68
- [RelatonBib::TypedUri.new(type: "src", content: @sol.link.to_s)]
101
+ link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
102
+
103
+ [RelatonBib::TypedUri.new(type: "src", content: link.to_s)]
69
104
  end
70
105
 
71
106
  #
@@ -74,23 +109,45 @@ module RelatonW3c
74
109
  # @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
75
110
  #
76
111
  def parse_docid
77
- id = pub_id(@sol.link.to_s)
112
+ return [] unless @sol.respond_to?(:link)
113
+
114
+ id = pub_id(@sol.link)
78
115
  [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)]
79
116
  end
80
117
 
81
118
  #
82
119
  # Generate PubID
83
120
  #
84
- # @param [String] url url
85
- #
86
- # @return [String] PubID
121
+ # @return [RDF::URI] PubID
87
122
  #
88
123
  def pub_id(url)
89
124
  "W3C #{identifier(url)}"
90
125
  end
91
126
 
92
- def identifier(url)
93
- /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/.match(url)[1].to_s
127
+ #
128
+ # Generate identifier from URL
129
+ #
130
+ # @param [RDF::URI, nil] link
131
+ #
132
+ # @return [String] identifier
133
+ #
134
+ def identifier(link = nil)
135
+ url = link || (@sol.respond_to?(:link) ? @sol.link : @sol.version_of)
136
+ self.class.parse_identifier(url.to_s)
137
+ end
138
+
139
+ #
140
+ # Parse identifier from URL
141
+ #
142
+ # @param [String] url URL
143
+ #
144
+ # @return [String] identifier
145
+ #
146
+ def self.parse_identifier(url)
147
+ if /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/ =~ url.to_s
148
+ $1.to_s
149
+ else url.to_s.split("/").last
150
+ end
94
151
  end
95
152
 
96
153
  #
@@ -99,12 +156,31 @@ module RelatonW3c
99
156
  # @return [Array<RelatonBib::Series>] series
100
157
  #
101
158
  def parse_series
159
+ return [] unless type
160
+
102
161
  title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
103
- [RelatonBib::Series.new(title: title, number: identifier(@sol.link.to_s))]
162
+ [RelatonBib::Series.new(title: title, number: identifier)]
104
163
  end
105
164
 
106
- def type # rubocop:disable Metrics/MethodLength
107
- @type ||= begin
165
+ #
166
+ # Extract type
167
+ #
168
+ # @return [String] type
169
+ #
170
+ def type
171
+ # thre are many types, we need to find the right one
172
+ @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) }
173
+ end
174
+
175
+ #
176
+ # Fetches types and stages
177
+ #
178
+ # @return [Array<String>] types and stages
179
+ #
180
+ def types_stages # rubocop:disable Metrics/MethodLength
181
+ return unless @sol.respond_to?(:link)
182
+
183
+ @types_stages ||= begin
108
184
  sse = SPARQL.parse(%(
109
185
  PREFIX : <http://www.w3.org/2001/02pd/rec54#>
110
186
  PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -113,8 +189,7 @@ module RelatonW3c
113
189
  { <#{@sol.link}> rdf:type ?type }
114
190
  }
115
191
  ))
116
- tps = @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
117
- tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
192
+ @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
118
193
  end
119
194
  end
120
195
 
@@ -124,10 +199,17 @@ module RelatonW3c
124
199
  # @return [Strinf] doctype
125
200
  #
126
201
  def parse_doctype
127
- Scrapper::DOCTYPES[type]
202
+ DOCTYPES[type] || "recommendation"
128
203
  end
129
204
 
205
+ #
206
+ # Parse date
207
+ #
208
+ # @return [Array<RelatonBib::BibliographicDate>] date
209
+ #
130
210
  def parse_date
211
+ return [] unless @sol.respond_to?(:date)
212
+
131
213
  [RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
132
214
  end
133
215
 
@@ -136,29 +218,90 @@ module RelatonW3c
136
218
  #
137
219
  # @return [Array<RelatonBib::DocumentRelation>] relation
138
220
  #
139
- def parse_relation # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
221
+ def parse_relation
222
+ if @sol.respond_to?(:link)
223
+ relations + editor_drafts
224
+ else document_versions
225
+ end
226
+ end
227
+
228
+ def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
229
+ {
230
+ "doc:obsoletes" => { type: "obsoletes" },
231
+ "mat:hasErrata" => { type: "updatedBy", description: "errata" },
232
+ # "mat:hasTranslations" => "hasTranslation",
233
+ # "mat:hasImplReport" => "hasImpReport",
234
+ ":previousEdition" => { type: "editionOf" },
235
+ }.reduce([]) do |acc, (predicate, tp)|
236
+ acc + relation_query(predicate).map do |r|
237
+ fr = RelatonBib::LocalizedString.new pub_id(r.rel.to_s)
238
+ bib = W3cBibliographicItem.new formattedref: fr
239
+ tp[:description] = RelatonBib::FormattedString.new content: tp[:description] if tp[:description]
240
+ RelatonBib::DocumentRelation.new(**tp, bibitem: bib)
241
+ end
242
+ end
243
+ end
244
+
245
+ def editor_drafts # rubocop:disable Metrics/MethodLength
140
246
  sse = SPARQL.parse(%(
141
- PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
142
- SELECT ?obsoletes
143
- WHERE {
144
- VALUES ?p { doc:obsoletes }
145
- { <#{@sol.link}> ?p ?obsoletes }
146
- }
247
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
248
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
249
+ SELECT ?rel
250
+ WHERE { <#{@sol.link}> :ED ?rel . }
147
251
  ))
148
- @fetcher.data.query(sse).order_by(:obsoletes).map do |r|
149
- tp, url = r.to_h.first
150
- fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
252
+ @fetcher.data.query(sse).map do |s|
253
+ fr = RelatonBib::LocalizedString.new pub_id(s.rel.to_s)
151
254
  bib = W3cBibliographicItem.new formattedref: fr
152
- RelatonBib::DocumentRelation.new(type: tp.to_s, bibitem: bib)
255
+ desc = RelatonBib::FormattedString.new content: "Editor's draft"
256
+ RelatonBib::DocumentRelation.new(
257
+ type: "hasDraft", description: desc, bibitem: bib,
258
+ )
259
+ end
260
+ end
261
+
262
+ def relation_query(predicate)
263
+ sse = SPARQL.parse(%(
264
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
265
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
266
+ PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
267
+ SELECT ?rel
268
+ WHERE { <#{@sol.link}> #{predicate} ?rel . }
269
+ ))
270
+ @fetcher.data.query(sse).order_by(:rel)
271
+ end
272
+
273
+ def document_versions
274
+ sse = SPARQL.parse(%(
275
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
276
+ SELECT ?link
277
+ WHERE { ?link doc:versionOf <#{@sol.version_of}> }
278
+ ))
279
+ @fetcher.data.query(sse).map do |r|
280
+ fref = RelatonBib::FormattedRef.new content: pub_id(r.link)
281
+ bib = W3cBibliographicItem.new formattedref: fref
282
+ RelatonBib::DocumentRelation.new(type: "hasEdition", bibitem: bib)
153
283
  end
154
284
  end
155
285
 
286
+ #
287
+ # Parse formattedref
288
+ #
289
+ # @return [RelatonBib::FormattedRef] formattedref
290
+ #
291
+ def parse_formattedref
292
+ return if @sol.respond_to?(:link)
293
+
294
+ RelatonBib::FormattedRef.new(content: pub_id(@sol.version_of))
295
+ end
296
+
156
297
  #
157
298
  # Parse contributor
158
299
  #
159
300
  # @return [Array<RelatonBib::ContributionInfo>] contributor
160
301
  #
161
302
  def parse_contrib # rubocop:disable Metrics/MethodLength
303
+ return [] unless @sol.respond_to?(:link)
304
+
162
305
  sse = SPARQL.parse(%(
163
306
  PREFIX : <http://www.w3.org/2001/02pd/rec54#>
164
307
  PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
@@ -181,6 +324,8 @@ module RelatonW3c
181
324
  # @return [RelatonBib::EditorialGroup] editorialgroup
182
325
  #
183
326
  def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
327
+ return unless @sol.respond_to?(:link)
328
+
184
329
  sse = SPARQL.parse(%(
185
330
  PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
186
331
  PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.11.0".freeze
2
+ VERSION = "1.11.3".freeze
3
3
  end
@@ -5,15 +5,17 @@ require "net/http"
5
5
  module RelatonW3c
6
6
  # Class methods for search W3C standards.
7
7
  class W3cBibliography
8
- SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/data/"
8
+ SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/"
9
9
 
10
10
  class << self
11
11
  # @param text [String]
12
12
  # @return [RelatonW3c::HitCollection]
13
13
  def search(text) # rubocop:disable Metrics/MethodLength
14
- # HitCollection.new text
15
- file = text.sub(/^W3C\s/, "").gsub(/[\s,:\/]/, "_").squeeze("_").upcase
16
- url = "#{SOURCE}#{file}.yaml"
14
+ ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
15
+ file = DataIndex.create_from_repo.search(ref)
16
+ return unless file
17
+
18
+ url = "#{SOURCE}#{file}"
17
19
  resp = Net::HTTP.get_response(URI.parse(url))
18
20
  return unless resp.code == "200"
19
21
 
@@ -24,7 +26,7 @@ module RelatonW3c
24
26
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
25
27
  Net::ProtocolError, Errno::ETIMEDOUT
26
28
  raise RelatonBib::RequestError,
27
- "Could not access #{HitCollection::DOMAIN}"
29
+ "Could not access #{url}"
28
30
  end
29
31
 
30
32
  # @param ref [String] the W3C standard Code to look up
@@ -39,8 +41,8 @@ module RelatonW3c
39
41
  return
40
42
  end
41
43
 
42
- # ret = result.first.fetch
43
- warn "[relaton-w3c] (\"#{ref}\") found #{result.title.first.title.content}"
44
+ found = result.docnumber
45
+ warn "[relaton-w3c] (\"#{ref}\") found #{found}"
44
46
  result
45
47
  end
46
48
  end
@@ -32,6 +32,9 @@
32
32
  'https://www.w3.org/WAI/EO':
33
33
  name: Education and Outreach Working Group
34
34
  abbrev: EOWG
35
+ 'https://www.w3.org/WAI/about/groups/eowg':
36
+ name: Education and Outreach Working Group
37
+ abbrev: EOWG
35
38
  'https://www.w3.org/2001/sw/WebOnt':
36
39
  name: Web-Ontology Working Group
37
40
  'http://www.w3.org/MarkUp/Forms':
@@ -54,6 +57,8 @@
54
57
  name: Web Applications Working Group
55
58
  'https://www.w3.org/2008/webapps':
56
59
  name: Web Applications Working Group
60
+ 'https://www.w3.org/groups/wg/webapps':
61
+ name: Web Applications Working Group
57
62
  'https://www.w3.org/das':
58
63
  name: Devices and Sensors Working Group
59
64
  abbrev: DAS WG
@@ -226,6 +231,8 @@
226
231
  abbrev: ARIA WG
227
232
  'https://www.w3.org/wasm':
228
233
  name: WebAssembly Working Group
234
+ 'https://www.w3.org/groups/wg/wasm':
235
+ name: WebAssembly Working Group
229
236
  'https://www.w3.org/groups/wg/webediting':
230
237
  name: Web Editing Working Group
231
238
  'https://www.w3.org/2014/data-shapes':
data/lib/relaton_w3c.rb CHANGED
@@ -2,13 +2,14 @@ require "relaton_bib"
2
2
  require "relaton_w3c/version"
3
3
  require "relaton_w3c/w3c_bibliography"
4
4
  require "relaton_w3c/w3c_bibliographic_item"
5
- require "relaton_w3c/hit_collection"
6
- require "relaton_w3c/hit"
7
- require "relaton_w3c/scrapper"
5
+ # require "relaton_w3c/hit_collection"
6
+ # require "relaton_w3c/hit"
7
+ # require "relaton_w3c/scrapper"
8
8
  require "relaton_w3c/xml_parser"
9
9
  require "relaton_w3c/bibxml_parser"
10
10
  require "relaton_w3c/hash_converter"
11
- require "relaton_w3c/data_fethcer"
11
+ require "relaton_w3c/data_fetcher"
12
+ require "relaton_w3c/data_index"
12
13
 
13
14
  module RelatonW3c
14
15
  class Error < StandardError; end
data/relaton_w3c.gemspec CHANGED
@@ -40,6 +40,9 @@ Gem::Specification.new do |spec|
40
40
  spec.add_dependency "linkeddata", "~> 3.1.0"
41
41
  spec.add_dependency "mechanize", "~> 2.8.0"
42
42
  spec.add_dependency "rdf", "~> 3.1.0"
43
+ spec.add_dependency "rdf-normalize", "~> 0.4.0"
43
44
  spec.add_dependency "relaton-bib", "~> 1.11.0"
45
+ spec.add_dependency "rubyzip", "~> 2.3.0"
46
+ spec.add_dependency "shex", "~> 0.6.0"
44
47
  spec.add_dependency "sparql", "~> 3.1.0"
45
48
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.0
4
+ version: 1.11.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-10 00:00:00.000000000 Z
11
+ date: 2022-04-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: 3.1.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: rdf-normalize
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 0.4.0
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 0.4.0
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: relaton-bib
127
141
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +150,34 @@ dependencies:
136
150
  - - "~>"
137
151
  - !ruby/object:Gem::Version
138
152
  version: 1.11.0
153
+ - !ruby/object:Gem::Dependency
154
+ name: rubyzip
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 2.3.0
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 2.3.0
167
+ - !ruby/object:Gem::Dependency
168
+ name: shex
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: 0.6.0
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: 0.6.0
139
181
  - !ruby/object:Gem::Dependency
140
182
  name: sparql
141
183
  requirement: !ruby/object:Gem::Requirement
@@ -201,7 +243,8 @@ files:
201
243
  - grammars/reqt.rng
202
244
  - lib/relaton_w3c.rb
203
245
  - lib/relaton_w3c/bibxml_parser.rb
204
- - lib/relaton_w3c/data_fethcer.rb
246
+ - lib/relaton_w3c/data_fetcher.rb
247
+ - lib/relaton_w3c/data_index.rb
205
248
  - lib/relaton_w3c/data_parser.rb
206
249
  - lib/relaton_w3c/hash_converter.rb
207
250
  - lib/relaton_w3c/hit.rb
@@ -1,110 +0,0 @@
1
- require "rdf"
2
- require "linkeddata"
3
- require "sparql"
4
- require "mechanize"
5
- require "relaton_w3c/data_parser"
6
-
7
- module RelatonW3c
8
- class DataFetcher
9
- USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
10
-
11
- attr_reader :data, :group_names
12
-
13
- #
14
- # Data fetcher initializer
15
- #
16
- # @param [String] output directory to save files
17
- # @param [String] format format of output files (xml, yaml, bibxml)
18
- #
19
- def initialize(output, format)
20
- @output = output
21
- @format = format
22
- @ext = format.sub(/^bib/, "")
23
- dir = File.dirname(File.expand_path(__FILE__))
24
- @group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
25
- @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
26
- @files = []
27
- end
28
-
29
- #
30
- # Initialize fetcher and run fetch
31
- #
32
- # @param [Strin] output directory to save files, default: "data"
33
- # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
34
- #
35
- def self.fetch(output: "data", format: "yaml")
36
- t1 = Time.now
37
- puts "Started at: #{t1}"
38
- FileUtils.mkdir_p output unless Dir.exist? output
39
- new(output, format).fetch
40
- t2 = Time.now
41
- puts "Stopped at: #{t2}"
42
- puts "Done in: #{(t2 - t1).round} sec."
43
- end
44
-
45
- #
46
- # Parse documents
47
- #
48
- def fetch
49
- query.each { |sl| save_doc DataParser.parse(sl, self) }
50
- Dir[File.expand_path("../../data/*", __dir__)].each do |file|
51
- xml = File.read file, encoding: "UTF-8"
52
- save_doc BibXMLParser.parse(xml)
53
- end
54
- end
55
-
56
- #
57
- # Query RDF source for documents
58
- #
59
- # @return [RDF::Query::Solutions] query results
60
- #
61
- def query # rubocop:disable Metrics/MethodLength
62
- sse = SPARQL.parse(%(
63
- PREFIX : <http://www.w3.org/2001/02pd/rec54#>
64
- PREFIX dc: <http://purl.org/dc/elements/1.1/>
65
- PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
66
- # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
67
- PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
68
- SELECT ?link ?title ?date
69
- WHERE {
70
- ?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
71
- }
72
- ))
73
- data.query sse
74
- end
75
-
76
- #
77
- # Save document to file
78
- #
79
- # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
80
- #
81
- def save_doc(bib) # rubocop:disable Metrics/MethodLength
82
- return unless bib
83
-
84
- c = case @format
85
- when "xml" then bib.to_xml(bibdata: true)
86
- when "yaml" then bib.to_hash.to_yaml
87
- else bib.send("to_#{@format}")
88
- end
89
- file = file_name(bib)
90
- if @files.include? file
91
- warn "File #{file} already exists. Document: #{bib.docnumber}"
92
- else
93
- @files << file
94
- end
95
- File.write file, c, encoding: "UTF-8"
96
- end
97
-
98
- #
99
- # Generate file name
100
- #
101
- # @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
102
- #
103
- # @return [String] file name
104
- #
105
- def file_name(bib)
106
- name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
107
- File.join @output, "#{name}.#{@ext}"
108
- end
109
- end
110
- end