relaton-w3c 1.10.0 → 1.11.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bb96ed72b04b04404d6c20fe1c3d83e918580e8721083bb0d08602d22762576b
4
- data.tar.gz: 775aefb0d4f807e6002b691448eefbed4687d0a27bae1152371385dbb5bcb6b4
3
+ metadata.gz: 9b9169cf7e9b747f47505f8ff279b33104ab64128d053013afa9307872ffa498
4
+ data.tar.gz: '09e56a18aa8db35ed2747f508ce9b786cc540bd28d4802f4cdbd41ac490a98ab'
5
5
  SHA512:
6
- metadata.gz: 4f768f5a43524e9a159b66b0646bf8ac0643ed3e9dfa5d0adb7444d08e01967de02ff3001d948cc5289685a7108f456eb5fc6ea7536134ef3c2557d057aa5536
7
- data.tar.gz: db77e522c3b6aa6b035ae32589528d75365d9d82b4f1bc9b9daa38af03ba044503e312a8175060f1c825d0d8fb63d3d64313fae33992ea5ce5a4035ca3be297b
6
+ metadata.gz: 137b95e319714c074fe712e28d1b58123fb1c82d4a2ce155533c5c3d125e01c29b35496954eb7055bbe3badb0bdce3143e6b5735c3cacab8d08ab019971504a0
7
+ data.tar.gz: 477eaeba6d35b38835275cc765c88ab24366cadae358421cb396ab2a2a28e2000d0a67ff742ac4bbb1947fa569129fdd9b961221c71803cab7c0cd826042dadf
@@ -1,38 +1,38 @@
1
- <?xml version="1.0" encoding='UTF-8'?>
2
- <reference anchor="W3C.P3P" taret="http://www.w3.org/TR/P3P/">
3
- <front>
4
- <title>The Platform for Privacy Preferences 1.0 (P3P1.0) Specification</title>
5
- <author initials="M." surname="Marchiori" fullname="Massimo Marchiori">
6
- <organization abbrev="W3C">World Wide Web Consortium</organization>
7
- <address>
8
- <email>massimo@w3.or</email>
9
- </address>
10
- </author>
11
- <author initials="L." surname="Cranor" fullname="Lorrie Cranor">
12
- <organization>AT&amp;T</organization>
13
- <address>
14
- <url>http://lorrie.cranor.or/</url>
15
- </address>
16
- </author>
17
- <author initials="M." surname="Lanheinrich" fullname="Marc Langheinrich">
18
- <organization>ETH Zurich</organization>
19
- <address>
20
- <url>http://www.inf.ethz.ch/~lanhein/</url>
21
- </address>
22
- </author>
23
- <author initials="M." surname="Presler-Marshall" fullname="Martin Presler-Marshall">
24
- <organization>IBM</organization>
25
- <address>
26
- <email>mpresler@us.ibm.com</email>
27
- </address>
28
- </author>
29
- <author initials="J." surname="Reale" fullname="Joseph Reagle">
30
- <organization>W3C</organization>
31
- <address>
32
- <url>http://www.w3.or/People/Reagle/Overview.html</url>
33
- </address>
34
- </author>
35
- <date day="16" month="April" year="2002"/>
36
- </front>
37
- <seriesInfo name="W3C Recommendation" value="REC-PICS-services"/>
38
- </reference>
1
+ <?xml version="1.0" encoding='UTF-8'?>
2
+ <reference anchor="W3C.P3P" target="http://www.w3.org/TR/P3P/">
3
+ <front>
4
+ <title>The Platform for Privacy Preferences 1.0 (P3P1.0) Specification</title>
5
+ <author initials="M." surname="Marchiori" fullname="Massimo Marchiori">
6
+ <organization abbrev="W3C">World Wide Web Consortium</organization>
7
+ <address>
8
+ <email>massimo@w3.or</email>
9
+ </address>
10
+ </author>
11
+ <author initials="L." surname="Cranor" fullname="Lorrie Cranor">
12
+ <organization>AT&amp;T</organization>
13
+ <address>
14
+ <url>http://lorrie.cranor.or/</url>
15
+ </address>
16
+ </author>
17
+ <author initials="M." surname="Lanheinrich" fullname="Marc Langheinrich">
18
+ <organization>ETH Zurich</organization>
19
+ <address>
20
+ <url>http://www.inf.ethz.ch/~lanhein/</url>
21
+ </address>
22
+ </author>
23
+ <author initials="M." surname="Presler-Marshall" fullname="Martin Presler-Marshall">
24
+ <organization>IBM</organization>
25
+ <address>
26
+ <email>mpresler@us.ibm.com</email>
27
+ </address>
28
+ </author>
29
+ <author initials="J." surname="Reale" fullname="Joseph Reagle">
30
+ <organization>W3C</organization>
31
+ <address>
32
+ <url>http://www.w3.or/People/Reagle/Overview.html</url>
33
+ </address>
34
+ </author>
35
+ <date day="16" month="April" year="2002"/>
36
+ </front>
37
+ <seriesInfo name="W3C Recommendation" value="REC-PICS-services"/>
38
+ </reference>
data/grammars/biblio.rng CHANGED
@@ -209,9 +209,6 @@
209
209
  <zeroOrMore>
210
210
  <ref name="contact"/>
211
211
  </zeroOrMore>
212
- <zeroOrMore>
213
- <ref name="uri"/>
214
- </zeroOrMore>
215
212
  </element>
216
213
  </define>
217
214
  <define name="fullname">
@@ -680,6 +677,9 @@
680
677
  <zeroOrMore>
681
678
  <ref name="extent"/>
682
679
  </zeroOrMore>
680
+ <optional>
681
+ <ref name="bibliographic_size"/>
682
+ </optional>
683
683
  <zeroOrMore>
684
684
  <ref name="accesslocation"/>
685
685
  </zeroOrMore>
@@ -828,6 +828,11 @@
828
828
  <optional>
829
829
  <attribute name="scope"/>
830
830
  </optional>
831
+ <optional>
832
+ <attribute name="primary">
833
+ <data type="boolean"/>
834
+ </attribute>
835
+ </optional>
831
836
  <text/>
832
837
  </element>
833
838
  </define>
@@ -920,9 +925,29 @@
920
925
  <text/>
921
926
  </element>
922
927
  </define>
928
+ <define name="sizevalue">
929
+ <element name="value">
930
+ <attribute name="type"/>
931
+ <text/>
932
+ </element>
933
+ </define>
934
+ <define name="bibliographic_size">
935
+ <element name="size">
936
+ <oneOrMore>
937
+ <ref name="sizevalue"/>
938
+ </oneOrMore>
939
+ </element>
940
+ </define>
923
941
  <define name="extent">
924
942
  <element name="extent">
925
- <ref name="BibItemLocality"/>
943
+ <choice>
944
+ <zeroOrMore>
945
+ <ref name="locality"/>
946
+ </zeroOrMore>
947
+ <zeroOrMore>
948
+ <ref name="localityStack"/>
949
+ </zeroOrMore>
950
+ </choice>
926
951
  </element>
927
952
  </define>
928
953
  <define name="series">
data/grammars/isodoc.rng CHANGED
@@ -152,9 +152,7 @@
152
152
  <data type="boolean"/>
153
153
  </attribute>
154
154
  </optional>
155
- <oneOrMore>
156
- <ref name="PureTextElement"/>
157
- </oneOrMore>
155
+ <ref name="XrefBody"/>
158
156
  </element>
159
157
  </define>
160
158
  <define name="erefType">
@@ -188,6 +186,42 @@
188
186
  <ref name="PureTextElement"/>
189
187
  </oneOrMore>
190
188
  </define>
189
+ <define name="localityStack">
190
+ <element name="localityStack">
191
+ <optional>
192
+ <attribute name="connective">
193
+ <choice>
194
+ <value>and</value>
195
+ <value>or</value>
196
+ <value>from</value>
197
+ <value>to</value>
198
+ <value/>
199
+ </choice>
200
+ </attribute>
201
+ </optional>
202
+ <zeroOrMore>
203
+ <ref name="locality"/>
204
+ </zeroOrMore>
205
+ </element>
206
+ </define>
207
+ <define name="sourceLocalityStack">
208
+ <element name="sourceLocalityStack">
209
+ <optional>
210
+ <attribute name="connective">
211
+ <choice>
212
+ <value>and</value>
213
+ <value>or</value>
214
+ <value>from</value>
215
+ <value>to</value>
216
+ <value/>
217
+ </choice>
218
+ </attribute>
219
+ </optional>
220
+ <zeroOrMore>
221
+ <ref name="sourceLocality"/>
222
+ </zeroOrMore>
223
+ </element>
224
+ </define>
191
225
  <define name="ul">
192
226
  <element name="ul">
193
227
  <attribute name="id">
@@ -1098,6 +1132,16 @@
1098
1132
  </define>
1099
1133
  </include>
1100
1134
  <!-- end overrides -->
1135
+ <define name="image" combine="choice">
1136
+ <element name="svg">
1137
+ <oneOrMore>
1138
+ <choice>
1139
+ <text/>
1140
+ <ref name="AnyElement"/>
1141
+ </choice>
1142
+ </oneOrMore>
1143
+ </element>
1144
+ </define>
1101
1145
  <define name="MultilingualRenderingType">
1102
1146
  <choice>
1103
1147
  <value>common</value>
@@ -2631,4 +2675,30 @@
2631
2675
  </zeroOrMore>
2632
2676
  </element>
2633
2677
  </define>
2678
+ <define name="XrefBody">
2679
+ <zeroOrMore>
2680
+ <ref name="XrefTarget"/>
2681
+ </zeroOrMore>
2682
+ <oneOrMore>
2683
+ <ref name="PureTextElement"/>
2684
+ </oneOrMore>
2685
+ </define>
2686
+ <define name="XrefTarget">
2687
+ <element name="location">
2688
+ <attribute name="target">
2689
+ <data type="string">
2690
+ <param name="pattern">\i\c*|\c+#\c+</param>
2691
+ </data>
2692
+ </attribute>
2693
+ <attribute name="connective">
2694
+ <choice>
2695
+ <value>and</value>
2696
+ <value>or</value>
2697
+ <value>from</value>
2698
+ <value>to</value>
2699
+ <value/>
2700
+ </choice>
2701
+ </attribute>
2702
+ </element>
2703
+ </define>
2634
2704
  </grammar>
@@ -13,5 +13,12 @@ module RelatonW3c
13
13
  def pubid_type(_)
14
14
  "W3C"
15
15
  end
16
+
17
+ def docids(reference, ver)
18
+ ids = super
19
+ ids.reject! &:primary
20
+ id = "W3C #{reference[:target].split('/').last}"
21
+ ids.unshift RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)
22
+ end
16
23
  end
17
24
  end
@@ -6,8 +6,6 @@ require "relaton_w3c/data_parser"
6
6
 
7
7
  module RelatonW3c
8
8
  class DataFetcher
9
- USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
10
-
11
9
  attr_reader :data, :group_names
12
10
 
13
11
  #
@@ -21,9 +19,10 @@ module RelatonW3c
21
19
  @format = format
22
20
  @ext = format.sub(/^bib/, "")
23
21
  dir = File.dirname(File.expand_path(__FILE__))
24
- @group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
22
+ @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
25
23
  @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
26
24
  @files = []
25
+ @index = DataIndex.new
27
26
  end
28
27
 
29
28
  #
@@ -45,40 +44,117 @@ module RelatonW3c
45
44
  #
46
45
  # Parse documents
47
46
  #
48
- def fetch
49
- query.each { |sl| save_doc DataParser.parse(sl, self) }
47
+ def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
48
+ query_versioned_docs.each do |sl|
49
+ save_doc DataParser.parse(sl, self)
50
+ rescue StandardError => e
51
+ warn "Error: document #{sl.link} #{e.message}"
52
+ warn e.backtrace.join("\n")
53
+ end
54
+ query_unversioned_docs.each do |sl|
55
+ save_doc DataParser.parse(sl, self)
56
+ rescue StandardError => e
57
+ warn "Error: document #{sl.version_of} #{e.message}"
58
+ warn e.backtrace.join("\n")
59
+ end
50
60
  Dir[File.expand_path("../../data/*", __dir__)].each do |file|
51
61
  xml = File.read file, encoding: "UTF-8"
52
- save_doc BibXMLParser.parse(xml)
62
+ save_doc BibXMLParser.parse(xml), warn_duplicate: false
63
+ rescue StandardError => e
64
+ warn "Error: document #{file} #{e.message}"
65
+ warn e.backtrace.join("\n")
53
66
  end
67
+ @index.sort!.save
54
68
  end
55
69
 
70
+ #
71
+ # Create index file
72
+ #
73
+ # def create_index
74
+ # index_file = "index-w3c.yaml"
75
+ # index_yaml = @index.sort do |a, b|
76
+ # compare_index_items a, b
77
+ # end.to_yaml
78
+ # File.write index_file, index_yaml, encoding: "UTF-8"
79
+ # end
80
+
81
+ #
82
+ # Compare index items
83
+ #
84
+ # @param [Hash] aid first item
85
+ # @param [Hash] bid second item
86
+ #
87
+ # @return [Integer] comparison result
88
+ #
89
+ # def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
90
+ # ret = aid[:code] <=> bid[:code]
91
+ # ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
92
+ # ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
93
+ # # ret = aid[:type] <=> bid[:type] if ret.zero?
94
+ # ret
95
+ # end
96
+
97
+ #
98
+ # Weight of stage
99
+ #
100
+ # @param [String, nil] stage stage
101
+ #
102
+ # @return [Integer] weight
103
+ #
104
+ # def stage_weight(stage)
105
+ # return DataParser::STAGES.size if stage.nil?
106
+
107
+ # DataParser::STAGES.keys.index(stage)
108
+ # end
109
+
110
+ #
111
+ # Weight of date
112
+ #
113
+ # @param [String] date date
114
+ #
115
+ # @return [String] weight
116
+ #
117
+ # def date_weight(date)
118
+ # return "99999999" if date.nil?
119
+
120
+ # date
121
+ # end
122
+
56
123
  #
57
124
  # Query RDF source for documents
58
125
  #
59
126
  # @return [RDF::Query::Solutions] query results
60
127
  #
61
- def query # rubocop:disable Metrics/MethodLength
128
+ def query_versioned_docs # rubocop:disable Metrics/MethodLength
62
129
  sse = SPARQL.parse(%(
63
130
  PREFIX : <http://www.w3.org/2001/02pd/rec54#>
64
131
  PREFIX dc: <http://purl.org/dc/elements/1.1/>
65
132
  PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
66
133
  # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
67
134
  PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
68
- SELECT ?link ?title ?date
135
+ SELECT ?link ?title ?date ?version_of
69
136
  WHERE {
70
- ?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
137
+ ?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
71
138
  }
72
139
  ))
73
140
  data.query sse
74
141
  end
75
142
 
143
+ def query_unversioned_docs
144
+ sse = SPARQL.parse(%(
145
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
146
+ SELECT ?version_of
147
+ WHERE { ?x doc:versionOf ?version_of . }
148
+ ))
149
+ data.query(sse).uniq &:version_of
150
+ end
151
+
76
152
  #
77
153
  # Save document to file
78
154
  #
79
155
  # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
80
156
  #
81
- def save_doc(bib) # rubocop:disable Metrics/MethodLength
157
+ def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
82
158
  return unless bib
83
159
 
84
160
  c = case @format
@@ -86,24 +162,26 @@ module RelatonW3c
86
162
  when "yaml" then bib.to_hash.to_yaml
87
163
  else bib.send("to_#{@format}")
88
164
  end
89
- file = file_name(bib)
90
- if @files.include? file
91
- warn "File #{file} already exists. Document: #{bib.docnumber}"
165
+ # id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
166
+ file = file_name(bib.docnumber)
167
+ if @files.include?(file)
168
+ warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
92
169
  else
170
+ @index.add bib.docnumber, file
93
171
  @files << file
172
+ File.write file, c, encoding: "UTF-8"
94
173
  end
95
- File.write file, c, encoding: "UTF-8"
96
174
  end
97
175
 
98
176
  #
99
177
  # Generate file name
100
178
  #
101
- # @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
179
+ # @param [String] id document id
102
180
  #
103
181
  # @return [String] file name
104
182
  #
105
- def file_name(bib)
106
- name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
183
+ def file_name(id)
184
+ name = id.sub(/^W3C\s/, "").gsub(/[\s,:\/+]/, "_").squeeze("_").downcase
107
185
  File.join @output, "#{name}.#{@ext}"
108
186
  end
109
187
  end
@@ -0,0 +1,135 @@
1
+ module RelatonW3c
2
+ class DataIndex
3
+ #
4
+ # Initialize data index.
5
+ #
6
+ # @param [String] index_file path to index file
7
+ # @param [Array<Hash>] index index data
8
+ #
9
+ def initialize(index_file: "index-w3c.yaml", index: [])
10
+ @index_file = index_file
11
+ @index = index
12
+ end
13
+
14
+ #
15
+ # Create index from a GitHub repository
16
+ #
17
+ # @return [RelatonW3c::DataIndex] data index
18
+ #
19
+ def self.create_from_repo
20
+ resp_index = Net::HTTP.get(URI("#{W3cBibliography::SOURCE}index-w3c.yaml"))
21
+ DataIndex.new index: YAML.safe_load(resp_index, [Symbol])
22
+ end
23
+
24
+ #
25
+ # Add document to index
26
+ #
27
+ # @param [String] docnumber document number
28
+ # @param [String] file path to document file
29
+ #
30
+ def add(docnumber, file)
31
+ @index << docnumber_to_parts(docnumber, file)
32
+ end
33
+
34
+ #
35
+ # Save index to file.
36
+ #
37
+ def save
38
+ File.write @index_file, @index.to_yaml, encoding: "UTF-8"
39
+ end
40
+
41
+ #
42
+ # Sort index
43
+ #
44
+ # @return [Array<Hash>] sorted index
45
+ #
46
+ def sort!
47
+ @index.sort! { |a, b| compare_index_items a, b }
48
+ self
49
+ end
50
+
51
+ #
52
+ # Search filename in index
53
+ #
54
+ # @param [String] ref reference
55
+ #
56
+ # @return [String] document's filename
57
+ #
58
+ def search(ref) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
59
+ dparts = docnumber_to_parts(ref)
60
+ @index.detect do |parts|
61
+ parts[:code].match?(/^#{Regexp.escape dparts[:code]}/i) &&
62
+ (dparts[:stage].nil? || dparts[:stage].casecmp?(parts[:stage])) &&
63
+ (dparts[:type].nil? || dparts[:type].casecmp?(parts[:type])) &&
64
+ (dparts[:date].nil? || dparts[:date] == parts[:date]) &&
65
+ (dparts[:suff].nil? || dparts[:suff].casecmp?(parts[:suff]))
66
+ end&.fetch(:file)
67
+ end
68
+
69
+ #
70
+ # Compare index items
71
+ #
72
+ # @param [Hash] aid first item
73
+ # @param [Hash] bid second item
74
+ #
75
+ # @return [Integer] comparison result
76
+ #
77
+ def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
78
+ ret = aid[:code] <=> bid[:code]
79
+ ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
80
+ ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
81
+ # ret = aid[:type] <=> bid[:type] if ret.zero?
82
+ ret
83
+ end
84
+
85
+ #
86
+ # Weight of stage
87
+ #
88
+ # @param [String, nil] stage stage
89
+ #
90
+ # @return [Integer] weight
91
+ #
92
+ def stage_weight(stage)
93
+ return DataParser::STAGES.size if stage.nil?
94
+
95
+ DataParser::STAGES.keys.index(stage)
96
+ end
97
+
98
+ #
99
+ # Weight of date
100
+ #
101
+ # @param [String] date date
102
+ #
103
+ # @return [String] weight
104
+ #
105
+ def date_weight(date)
106
+ return "99999999" if date.nil?
107
+
108
+ date
109
+ end
110
+
111
+ #
112
+ # Parse document number to parts
113
+ #
114
+ # @param [String] docnumber document number
115
+ # @param [String, nil] file path to document file
116
+ #
117
+ # @return [Hash{Symbol=>String}] document parts
118
+ #
119
+ def docnumber_to_parts(docnumber, file = nil) # rubocop:disable Metrics/MethodLength
120
+ %r{
121
+ ^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE))-)?
122
+ (?<code>\w+(?:[+-][\w.]+)*?)
123
+ (?:-(?<date>\d{8}|\d{6}))?
124
+ (?:/(?<suff>\w+))?$
125
+ }xi =~ docnumber
126
+ entry = { code: code }
127
+ entry[:file] = file if file
128
+ entry[:stage] = stage if stage
129
+ entry[:type] = type if type
130
+ entry[:date] = date if date
131
+ entry[:suff] = suff if suff
132
+ entry
133
+ end
134
+ end
135
+ end
@@ -1,5 +1,24 @@
1
1
  module RelatonW3c
2
2
  class DataParser
3
+ USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
4
+
5
+ DOCTYPES = {
6
+ "TR" => "technicalReport",
7
+ "NOTE" => "groupNote",
8
+ }.freeze
9
+
10
+ STAGES = {
11
+ "RET" => "retired",
12
+ "SPSD" => "supersededRecommendation",
13
+ "OBSL" => "obsoletedRecommendation",
14
+ "WD" => "workingDraft",
15
+ "CRD" => "candidateRecommendationDraft",
16
+ "CR" => "candidateRecommendation",
17
+ "PR" => "proposedRecommendation",
18
+ "PER" => "proposedEditedRecommendation",
19
+ "REC" => "recommendation",
20
+ }.freeze
21
+
3
22
  #
4
23
  # Document parser initalization
5
24
  #
@@ -29,7 +48,7 @@ module RelatonW3c
29
48
  # @return [RelatonW3c:W3cBibliographicItem, nil] bibliographic item
30
49
  #
31
50
  def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
32
- return unless @fetcher.class::USED_TYPES.include? type
51
+ return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }
33
52
 
34
53
  RelatonW3c::W3cBibliographicItem.new(
35
54
  type: "standard",
@@ -37,10 +56,12 @@ module RelatonW3c
37
56
  fetched: Date.today.to_s,
38
57
  language: ["en"],
39
58
  script: ["Latn"],
59
+ docstatus: parse_docstatus,
40
60
  title: parse_title,
41
61
  link: parse_link,
42
62
  docid: parse_docid,
43
- docnumber: identifier(@sol.link.to_s),
63
+ formattedref: parse_formattedref,
64
+ docnumber: identifier,
44
65
  series: parse_series,
45
66
  date: parse_date,
46
67
  relation: parse_relation,
@@ -49,12 +70,24 @@ module RelatonW3c
49
70
  )
50
71
  end
51
72
 
73
+ #
74
+ # Extract documetn status
75
+ #
76
+ # @return [RelatonBib::DocumentStatus, nil] dcoument status
77
+ #
78
+ def parse_docstatus
79
+ stage = types_stages&.detect { |st| STAGES.include?(st) }
80
+ RelatonBib::DocumentStatus.new stage: STAGES[stage] if stage
81
+ end
82
+
52
83
  #
53
84
  # Parse title
54
85
  #
55
86
  # @return [RelatonBib::TypedTitleStringCollection] title
56
87
  #
57
88
  def parse_title
89
+ return [] unless @sol.respond_to?(:title)
90
+
58
91
  t = RelatonBib::TypedTitleString.new content: @sol.title.to_s
59
92
  RelatonBib::TypedTitleStringCollection.new [t]
60
93
  end
@@ -65,7 +98,9 @@ module RelatonW3c
65
98
  # @return [Array<RelatonBib::TypedUri>] link
66
99
  #
67
100
  def parse_link
68
- [RelatonBib::TypedUri.new(type: "src", content: @sol.link.to_s)]
101
+ link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
102
+
103
+ [RelatonBib::TypedUri.new(type: "src", content: link.to_s)]
69
104
  end
70
105
 
71
106
  #
@@ -74,23 +109,45 @@ module RelatonW3c
74
109
  # @return [Arra<RelatonBib::DocumentIdentifier>] docidentifier
75
110
  #
76
111
  def parse_docid
77
- id = pub_id(@sol.link.to_s)
78
- [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id)]
112
+ return [] unless @sol.respond_to?(:link)
113
+
114
+ id = pub_id(@sol.link)
115
+ [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)]
79
116
  end
80
117
 
81
118
  #
82
119
  # Generate PubID
83
120
  #
84
- # @param [String] url url
85
- #
86
- # @return [String] PubID
121
+ # @return [RDF::URI] PubID
87
122
  #
88
123
  def pub_id(url)
89
124
  "W3C #{identifier(url)}"
90
125
  end
91
126
 
92
- def identifier(url)
93
- /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/.match(url)[1].to_s
127
+ #
128
+ # Generate identifier from URL
129
+ #
130
+ # @param [RDF::URI, nil] link
131
+ #
132
+ # @return [String] identifier
133
+ #
134
+ def identifier(link = nil)
135
+ url = link || (@sol.respond_to?(:link) ? @sol.link : @sol.version_of)
136
+ self.class.parse_identifier(url.to_s)
137
+ end
138
+
139
+ #
140
+ # Parse identifier from URL
141
+ #
142
+ # @param [String] url URL
143
+ #
144
+ # @return [String] identifier
145
+ #
146
+ def self.parse_identifier(url)
147
+ if /.+\/(\w+(?:-[\w.]+)+(?:\/\w+)?)/ =~ url.to_s
148
+ $1.to_s
149
+ else url.to_s.split("/").last
150
+ end
94
151
  end
95
152
 
96
153
  #
@@ -99,12 +156,31 @@ module RelatonW3c
99
156
  # @return [Array<RelatonBib::Series>] series
100
157
  #
101
158
  def parse_series
159
+ return [] unless type
160
+
102
161
  title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
103
- [RelatonBib::Series.new(title: title, number: identifier(@sol.link.to_s))]
162
+ [RelatonBib::Series.new(title: title, number: identifier)]
104
163
  end
105
164
 
106
- def type # rubocop:disable Metrics/MethodLength
107
- @type ||= begin
165
+ #
166
+ # Extract type
167
+ #
168
+ # @return [String] type
169
+ #
170
+ def type
171
+ # thre are many types, we need to find the right one
172
+ @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) }
173
+ end
174
+
175
+ #
176
+ # Fetches types and stages
177
+ #
178
+ # @return [Array<String>] types and stages
179
+ #
180
+ def types_stages # rubocop:disable Metrics/MethodLength
181
+ return unless @sol.respond_to?(:link)
182
+
183
+ @types_stages ||= begin
108
184
  sse = SPARQL.parse(%(
109
185
  PREFIX : <http://www.w3.org/2001/02pd/rec54#>
110
186
  PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -113,8 +189,7 @@ module RelatonW3c
113
189
  { <#{@sol.link}> rdf:type ?type }
114
190
  }
115
191
  ))
116
- tps = @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
117
- tps.detect { |t| Scrapper::DOCTYPES.key?(t) }
192
+ @fetcher.data.query(sse).map { |s| s.type.to_s.split("#").last }
118
193
  end
119
194
  end
120
195
 
@@ -124,10 +199,17 @@ module RelatonW3c
124
199
  # @return [Strinf] doctype
125
200
  #
126
201
  def parse_doctype
127
- Scrapper::DOCTYPES[type]
202
+ DOCTYPES[type] || "recommendation"
128
203
  end
129
204
 
205
+ #
206
+ # Parse date
207
+ #
208
+ # @return [Array<RelatonBib::BibliographicDate>] date
209
+ #
130
210
  def parse_date
211
+ return [] unless @sol.respond_to?(:date)
212
+
131
213
  [RelatonBib::BibliographicDate.new(type: "published", on: @sol.date.to_s)]
132
214
  end
133
215
 
@@ -136,29 +218,90 @@ module RelatonW3c
136
218
  #
137
219
  # @return [Array<RelatonBib::DocumentRelation>] relation
138
220
  #
139
- def parse_relation # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
221
+ def parse_relation
222
+ if @sol.respond_to?(:link)
223
+ relations + editor_drafts
224
+ else document_versions
225
+ end
226
+ end
227
+
228
+ def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
229
+ {
230
+ "doc:obsoletes" => { type: "obsoletes" },
231
+ "mat:hasErrata" => { type: "updatedBy", description: "errata" },
232
+ # "mat:hasTranslations" => "hasTranslation",
233
+ # "mat:hasImplReport" => "hasImpReport",
234
+ ":previousEdition" => { type: "editionOf" },
235
+ }.reduce([]) do |acc, (predicate, tp)|
236
+ acc + relation_query(predicate).map do |r|
237
+ fr = RelatonBib::LocalizedString.new pub_id(r.rel.to_s)
238
+ bib = W3cBibliographicItem.new formattedref: fr
239
+ tp[:description] = RelatonBib::FormattedString.new content: tp[:description] if tp[:description]
240
+ RelatonBib::DocumentRelation.new(**tp, bibitem: bib)
241
+ end
242
+ end
243
+ end
244
+
245
+ def editor_drafts # rubocop:disable Metrics/MethodLength
140
246
  sse = SPARQL.parse(%(
141
- PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
142
- SELECT ?obsoletes
143
- WHERE {
144
- VALUES ?p { doc:obsoletes }
145
- { <#{@sol.link}> ?p ?obsoletes }
146
- }
247
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
248
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
249
+ SELECT ?rel
250
+ WHERE { <#{@sol.link}> :ED ?rel . }
147
251
  ))
148
- @fetcher.data.query(sse).order_by(:obsoletes).map do |r|
149
- tp, url = r.to_h.first
150
- fr = RelatonBib::LocalizedString.new pub_id(url.to_s)
252
+ @fetcher.data.query(sse).map do |s|
253
+ fr = RelatonBib::LocalizedString.new pub_id(s.rel.to_s)
151
254
  bib = W3cBibliographicItem.new formattedref: fr
152
- RelatonBib::DocumentRelation.new(type: tp.to_s, bibitem: bib)
255
+ desc = RelatonBib::FormattedString.new content: "Editor's draft"
256
+ RelatonBib::DocumentRelation.new(
257
+ type: "hasDraft", description: desc, bibitem: bib,
258
+ )
259
+ end
260
+ end
261
+
262
+ def relation_query(predicate)
263
+ sse = SPARQL.parse(%(
264
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
265
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
266
+ PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
267
+ SELECT ?rel
268
+ WHERE { <#{@sol.link}> #{predicate} ?rel . }
269
+ ))
270
+ @fetcher.data.query(sse).order_by(:rel)
271
+ end
272
+
273
+ def document_versions
274
+ sse = SPARQL.parse(%(
275
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
276
+ SELECT ?link
277
+ WHERE { ?link doc:versionOf <#{@sol.version_of}> }
278
+ ))
279
+ @fetcher.data.query(sse).map do |r|
280
+ fref = RelatonBib::FormattedRef.new content: pub_id(r.link)
281
+ bib = W3cBibliographicItem.new formattedref: fref
282
+ RelatonBib::DocumentRelation.new(type: "hasEdition", bibitem: bib)
153
283
  end
154
284
  end
155
285
 
286
+ #
287
+ # Parse formattedref
288
+ #
289
+ # @return [RelatonBib::FormattedRef] formattedref
290
+ #
291
+ def parse_formattedref
292
+ return if @sol.respond_to?(:link)
293
+
294
+ RelatonBib::FormattedRef.new(content: pub_id(@sol.version_of))
295
+ end
296
+
156
297
  #
157
298
  # Parse contributor
158
299
  #
159
300
  # @return [Array<RelatonBib::ContributionInfo>] contributor
160
301
  #
161
302
  def parse_contrib # rubocop:disable Metrics/MethodLength
303
+ return [] unless @sol.respond_to?(:link)
304
+
162
305
  sse = SPARQL.parse(%(
163
306
  PREFIX : <http://www.w3.org/2001/02pd/rec54#>
164
307
  PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
@@ -181,6 +324,8 @@ module RelatonW3c
181
324
  # @return [RelatonBib::EditorialGroup] editorialgroup
182
325
  #
183
326
  def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
327
+ return unless @sol.respond_to?(:link)
328
+
184
329
  sse = SPARQL.parse(%(
185
330
  PREFIX org: <http://www.w3.org/2001/04/roadmap/org#>
186
331
  PREFIX contact: <http://www.w3.org/2000/10/swap/pim/contact#>
@@ -39,7 +39,7 @@ module RelatonW3c
39
39
  # @return [Array<RelatonBib::DocumentIdentifier>]
40
40
  def fetch_docid(hit)
41
41
  id = hit["link"].split("/").last
42
- [RelatonBib::DocumentIdentifier.new(id: id, type: "W3C")]
42
+ [RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)]
43
43
  end
44
44
 
45
45
  # @param hit [Hash]
@@ -1,3 +1,3 @@
1
1
  module RelatonW3c
2
- VERSION = "1.10.0".freeze
2
+ VERSION = "1.11.1".freeze
3
3
  end
@@ -5,15 +5,17 @@ require "net/http"
5
5
  module RelatonW3c
6
6
  # Class methods for search W3C standards.
7
7
  class W3cBibliography
8
- SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/data/"
8
+ SOURCE = "https://raw.githubusercontent.com/relaton/relaton-data-w3c/main/"
9
9
 
10
10
  class << self
11
11
  # @param text [String]
12
12
  # @return [RelatonW3c::HitCollection]
13
13
  def search(text) # rubocop:disable Metrics/MethodLength
14
- # HitCollection.new text
15
- file = text.sub(/^W3C\s/, "").gsub(/[\s,:\/]/, "_").squeeze("_").upcase
16
- url = "#{SOURCE}#{file}.yaml"
14
+ ref = DataParser.parse_identifier text.sub(/^W3C\s/, "")
15
+ file = DataIndex.create_from_repo.search(ref)
16
+ return unless file
17
+
18
+ url = "#{SOURCE}#{file}"
17
19
  resp = Net::HTTP.get_response(URI.parse(url))
18
20
  return unless resp.code == "200"
19
21
 
@@ -24,7 +26,7 @@ module RelatonW3c
24
26
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
25
27
  Net::ProtocolError, Errno::ETIMEDOUT
26
28
  raise RelatonBib::RequestError,
27
- "Could not access #{HitCollection::DOMAIN}"
29
+ "Could not access #{url}"
28
30
  end
29
31
 
30
32
  # @param ref [String] the W3C standard Code to look up
@@ -39,8 +41,8 @@ module RelatonW3c
39
41
  return
40
42
  end
41
43
 
42
- # ret = result.first.fetch
43
- warn "[relaton-w3c] (\"#{ref}\") found #{result.title.first.title.content}"
44
+ found = result.docnumber
45
+ warn "[relaton-w3c] (\"#{ref}\") found #{found}"
44
46
  result
45
47
  end
46
48
  end
@@ -32,6 +32,9 @@
32
32
  'https://www.w3.org/WAI/EO':
33
33
  name: Education and Outreach Working Group
34
34
  abbrev: EOWG
35
+ 'https://www.w3.org/WAI/about/groups/eowg':
36
+ name: Education and Outreach Working Group
37
+ abbrev: EOWG
35
38
  'https://www.w3.org/2001/sw/WebOnt':
36
39
  name: Web-Ontology Working Group
37
40
  'http://www.w3.org/MarkUp/Forms':
@@ -54,6 +57,8 @@
54
57
  name: Web Applications Working Group
55
58
  'https://www.w3.org/2008/webapps':
56
59
  name: Web Applications Working Group
60
+ 'https://www.w3.org/groups/wg/webapps':
61
+ name: Web Applications Working Group
57
62
  'https://www.w3.org/das':
58
63
  name: Devices and Sensors Working Group
59
64
  abbrev: DAS WG
@@ -226,6 +231,8 @@
226
231
  abbrev: ARIA WG
227
232
  'https://www.w3.org/wasm':
228
233
  name: WebAssembly Working Group
234
+ 'https://www.w3.org/groups/wg/wasm':
235
+ name: WebAssembly Working Group
229
236
  'https://www.w3.org/groups/wg/webediting':
230
237
  name: Web Editing Working Group
231
238
  'https://www.w3.org/2014/data-shapes':
data/lib/relaton_w3c.rb CHANGED
@@ -2,13 +2,14 @@ require "relaton_bib"
2
2
  require "relaton_w3c/version"
3
3
  require "relaton_w3c/w3c_bibliography"
4
4
  require "relaton_w3c/w3c_bibliographic_item"
5
- require "relaton_w3c/hit_collection"
6
- require "relaton_w3c/hit"
7
- require "relaton_w3c/scrapper"
5
+ # require "relaton_w3c/hit_collection"
6
+ # require "relaton_w3c/hit"
7
+ # require "relaton_w3c/scrapper"
8
8
  require "relaton_w3c/xml_parser"
9
9
  require "relaton_w3c/bibxml_parser"
10
10
  require "relaton_w3c/hash_converter"
11
11
  require "relaton_w3c/data_fethcer"
12
+ require "relaton_w3c/data_index"
12
13
 
13
14
  module RelatonW3c
14
15
  class Error < StandardError; end
data/relaton_w3c.gemspec CHANGED
@@ -39,7 +39,10 @@ Gem::Specification.new do |spec|
39
39
 
40
40
  spec.add_dependency "linkeddata", "~> 3.1.0"
41
41
  spec.add_dependency "mechanize", "~> 2.8.0"
42
+ # spec.add_dependency "picky"
42
43
  spec.add_dependency "rdf", "~> 3.1.0"
43
- spec.add_dependency "relaton-bib", "~> 1.10.0"
44
+ spec.add_dependency "rdf-normalize", "~> 0.4.0"
45
+ spec.add_dependency "relaton-bib", "~> 1.11.0"
46
+ spec.add_dependency "shex", "~> 0.6.0"
44
47
  spec.add_dependency "sparql", "~> 3.1.0"
45
48
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-w3c
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.0
4
+ version: 1.11.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-30 00:00:00.000000000 Z
11
+ date: 2022-04-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -122,20 +122,48 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: 3.1.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: rdf-normalize
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 0.4.0
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 0.4.0
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: relaton-bib
127
141
  requirement: !ruby/object:Gem::Requirement
128
142
  requirements:
129
143
  - - "~>"
130
144
  - !ruby/object:Gem::Version
131
- version: 1.10.0
145
+ version: 1.11.0
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 1.11.0
153
+ - !ruby/object:Gem::Dependency
154
+ name: shex
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 0.6.0
132
160
  type: :runtime
133
161
  prerelease: false
134
162
  version_requirements: !ruby/object:Gem::Requirement
135
163
  requirements:
136
164
  - - "~>"
137
165
  - !ruby/object:Gem::Version
138
- version: 1.10.0
166
+ version: 0.6.0
139
167
  - !ruby/object:Gem::Dependency
140
168
  name: sparql
141
169
  requirement: !ruby/object:Gem::Requirement
@@ -202,6 +230,7 @@ files:
202
230
  - lib/relaton_w3c.rb
203
231
  - lib/relaton_w3c/bibxml_parser.rb
204
232
  - lib/relaton_w3c/data_fethcer.rb
233
+ - lib/relaton_w3c/data_index.rb
205
234
  - lib/relaton_w3c/data_parser.rb
206
235
  - lib/relaton_w3c/hash_converter.rb
207
236
  - lib/relaton_w3c/hit.rb