relaton-w3c 1.10.1 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/grammars/isodoc.rng CHANGED
@@ -152,9 +152,7 @@
152
152
  <data type="boolean"/>
153
153
  </attribute>
154
154
  </optional>
155
- <oneOrMore>
156
- <ref name="PureTextElement"/>
157
- </oneOrMore>
155
+ <ref name="XrefBody"/>
158
156
  </element>
159
157
  </define>
160
158
  <define name="erefType">
@@ -188,6 +186,42 @@
188
186
  <ref name="PureTextElement"/>
189
187
  </oneOrMore>
190
188
  </define>
189
+ <define name="localityStack">
190
+ <element name="localityStack">
191
+ <optional>
192
+ <attribute name="connective">
193
+ <choice>
194
+ <value>and</value>
195
+ <value>or</value>
196
+ <value>from</value>
197
+ <value>to</value>
198
+ <value/>
199
+ </choice>
200
+ </attribute>
201
+ </optional>
202
+ <zeroOrMore>
203
+ <ref name="locality"/>
204
+ </zeroOrMore>
205
+ </element>
206
+ </define>
207
+ <define name="sourceLocalityStack">
208
+ <element name="sourceLocalityStack">
209
+ <optional>
210
+ <attribute name="connective">
211
+ <choice>
212
+ <value>and</value>
213
+ <value>or</value>
214
+ <value>from</value>
215
+ <value>to</value>
216
+ <value/>
217
+ </choice>
218
+ </attribute>
219
+ </optional>
220
+ <zeroOrMore>
221
+ <ref name="sourceLocality"/>
222
+ </zeroOrMore>
223
+ </element>
224
+ </define>
191
225
  <define name="ul">
192
226
  <element name="ul">
193
227
  <attribute name="id">
@@ -1098,6 +1132,16 @@
1098
1132
  </define>
1099
1133
  </include>
1100
1134
  <!-- end overrides -->
1135
+ <define name="image" combine="choice">
1136
+ <element name="svg">
1137
+ <oneOrMore>
1138
+ <choice>
1139
+ <text/>
1140
+ <ref name="AnyElement"/>
1141
+ </choice>
1142
+ </oneOrMore>
1143
+ </element>
1144
+ </define>
1101
1145
  <define name="MultilingualRenderingType">
1102
1146
  <choice>
1103
1147
  <value>common</value>
@@ -2631,4 +2675,30 @@
2631
2675
  </zeroOrMore>
2632
2676
  </element>
2633
2677
  </define>
2678
+ <define name="XrefBody">
2679
+ <zeroOrMore>
2680
+ <ref name="XrefTarget"/>
2681
+ </zeroOrMore>
2682
+ <oneOrMore>
2683
+ <ref name="PureTextElement"/>
2684
+ </oneOrMore>
2685
+ </define>
2686
+ <define name="XrefTarget">
2687
+ <element name="location">
2688
+ <attribute name="target">
2689
+ <data type="string">
2690
+ <param name="pattern">\i\c*|\c+#\c+</param>
2691
+ </data>
2692
+ </attribute>
2693
+ <attribute name="connective">
2694
+ <choice>
2695
+ <value>and</value>
2696
+ <value>or</value>
2697
+ <value>from</value>
2698
+ <value>to</value>
2699
+ <value/>
2700
+ </choice>
2701
+ </attribute>
2702
+ </element>
2703
+ </define>
2634
2704
  </grammar>
@@ -13,5 +13,12 @@ module RelatonW3c
13
13
  def pubid_type(_)
14
14
  "W3C"
15
15
  end
16
+
17
+ def docids(reference, ver)
18
+ ids = super
19
+ ids.reject! &:primary
20
+ id = "W3C #{reference[:target].split('/').last}"
21
+ ids.unshift RelatonBib::DocumentIdentifier.new(id: id, type: "W3C", primary: true)
22
+ end
16
23
  end
17
24
  end
@@ -0,0 +1,188 @@
1
+ require "rdf"
2
+ require "linkeddata"
3
+ require "sparql"
4
+ require "mechanize"
5
+ require "relaton_w3c/data_parser"
6
+
7
+ module RelatonW3c
8
+ class DataFetcher
9
+ attr_reader :data, :group_names
10
+
11
+ #
12
+ # Data fetcher initializer
13
+ #
14
+ # @param [String] output directory to save files
15
+ # @param [String] format format of output files (xml, yaml, bibxml)
16
+ #
17
+ def initialize(output, format)
18
+ @output = output
19
+ @format = format
20
+ @ext = format.sub(/^bib/, "")
21
+ dir = File.dirname(File.expand_path(__FILE__))
22
+ @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
23
+ @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
24
+ @files = []
25
+ @index = DataIndex.new
26
+ end
27
+
28
+ #
29
+ # Initialize fetcher and run fetch
30
+ #
31
+ # @param [Strin] output directory to save files, default: "data"
32
+ # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
33
+ #
34
+ def self.fetch(output: "data", format: "yaml")
35
+ t1 = Time.now
36
+ puts "Started at: #{t1}"
37
+ FileUtils.mkdir_p output unless Dir.exist? output
38
+ new(output, format).fetch
39
+ t2 = Time.now
40
+ puts "Stopped at: #{t2}"
41
+ puts "Done in: #{(t2 - t1).round} sec."
42
+ end
43
+
44
+ #
45
+ # Parse documents
46
+ #
47
+ def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
48
+ query_versioned_docs.each do |sl|
49
+ save_doc DataParser.parse(sl, self)
50
+ rescue StandardError => e
51
+ warn "Error: document #{sl.link} #{e.message}"
52
+ warn e.backtrace.join("\n")
53
+ end
54
+ query_unversioned_docs.each do |sl|
55
+ save_doc DataParser.parse(sl, self)
56
+ rescue StandardError => e
57
+ warn "Error: document #{sl.version_of} #{e.message}"
58
+ warn e.backtrace.join("\n")
59
+ end
60
+ Dir[File.expand_path("../../data/*", __dir__)].each do |file|
61
+ xml = File.read file, encoding: "UTF-8"
62
+ save_doc BibXMLParser.parse(xml), warn_duplicate: false
63
+ rescue StandardError => e
64
+ warn "Error: document #{file} #{e.message}"
65
+ warn e.backtrace.join("\n")
66
+ end
67
+ @index.sort!.save
68
+ end
69
+
70
+ #
71
+ # Create index file
72
+ #
73
+ # def create_index
74
+ # index_file = "index-w3c.yaml"
75
+ # index_yaml = @index.sort do |a, b|
76
+ # compare_index_items a, b
77
+ # end.to_yaml
78
+ # File.write index_file, index_yaml, encoding: "UTF-8"
79
+ # end
80
+
81
+ #
82
+ # Compare index items
83
+ #
84
+ # @param [Hash] aid first item
85
+ # @param [Hash] bid second item
86
+ #
87
+ # @return [Integer] comparison result
88
+ #
89
+ # def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
90
+ # ret = aid[:code] <=> bid[:code]
91
+ # ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
92
+ # ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
93
+ # # ret = aid[:type] <=> bid[:type] if ret.zero?
94
+ # ret
95
+ # end
96
+
97
+ #
98
+ # Weight of stage
99
+ #
100
+ # @param [String, nil] stage stage
101
+ #
102
+ # @return [Integer] weight
103
+ #
104
+ # def stage_weight(stage)
105
+ # return DataParser::STAGES.size if stage.nil?
106
+
107
+ # DataParser::STAGES.keys.index(stage)
108
+ # end
109
+
110
+ #
111
+ # Weight of date
112
+ #
113
+ # @param [String] date date
114
+ #
115
+ # @return [String] weight
116
+ #
117
+ # def date_weight(date)
118
+ # return "99999999" if date.nil?
119
+
120
+ # date
121
+ # end
122
+
123
+ #
124
+ # Query RDF source for documents
125
+ #
126
+ # @return [RDF::Query::Solutions] query results
127
+ #
128
+ def query_versioned_docs # rubocop:disable Metrics/MethodLength
129
+ sse = SPARQL.parse(%(
130
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
131
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
132
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
133
+ # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
134
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
135
+ SELECT ?link ?title ?date ?version_of
136
+ WHERE {
137
+ ?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
138
+ }
139
+ ))
140
+ data.query sse
141
+ end
142
+
143
+ def query_unversioned_docs
144
+ sse = SPARQL.parse(%(
145
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
146
+ SELECT ?version_of
147
+ WHERE { ?x doc:versionOf ?version_of . }
148
+ ))
149
+ data.query(sse).uniq &:version_of
150
+ end
151
+
152
+ #
153
+ # Save document to file
154
+ #
155
+ # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
156
+ #
157
+ def save_doc(bib, warn_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
158
+ return unless bib
159
+
160
+ c = case @format
161
+ when "xml" then bib.to_xml(bibdata: true)
162
+ when "yaml" then bib.to_hash.to_yaml
163
+ else bib.send("to_#{@format}")
164
+ end
165
+ # id = bib.docidentifier.detect(&:primary)&.id || bib.formattedref.content
166
+ file = file_name(bib.docnumber)
167
+ if @files.include?(file)
168
+ warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
169
+ else
170
+ @index.add bib.docnumber, file
171
+ @files << file
172
+ File.write file, c, encoding: "UTF-8"
173
+ end
174
+ end
175
+
176
+ #
177
+ # Generate file name
178
+ #
179
+ # @param [String] id document id
180
+ #
181
+ # @return [String] file name
182
+ #
183
+ def file_name(id)
184
+ name = id.sub(/^W3C\s/, "").gsub(/[\s,:\/+]/, "_").squeeze("_").downcase
185
+ File.join @output, "#{name}.#{@ext}"
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,143 @@
1
+ module RelatonW3c
2
+ class DataIndex
3
+ #
4
+ # Initialize data index.
5
+ #
6
+ # @param [String] index_file path to index file
7
+ # @param [Array<Hash>] index index data
8
+ #
9
+ def initialize(index_file: "index-w3c.yaml", index: [])
10
+ @index_file = index_file
11
+ @index = index
12
+ end
13
+
14
+ #
15
+ # Create index from a GitHub repository
16
+ #
17
+ # @return [RelatonW3c::DataIndex] data index
18
+ #
19
+ def self.create_from_repo
20
+ resp_index = Net::HTTP.get(URI("#{W3cBibliography::SOURCE}index-w3c.yaml"))
21
+
22
+ # Newer versions of Psych uses the `permitted_classes:` parameter
23
+ index = if YAML.method(:safe_load).parameters.collect(&:last).index(:permitted_classes)
24
+ YAML.safe_load(resp_index, permitted_classes: [Symbol])
25
+ else
26
+ YAML.safe_load(resp_index, [Symbol])
27
+ end
28
+
29
+ DataIndex.new index: index
30
+ end
31
+
32
+ #
33
+ # Add document to index
34
+ #
35
+ # @param [String] docnumber document number
36
+ # @param [String] file path to document file
37
+ #
38
+ def add(docnumber, file)
39
+ @index << docnumber_to_parts(docnumber, file)
40
+ end
41
+
42
+ #
43
+ # Save index to file.
44
+ #
45
+ def save
46
+ File.write @index_file, @index.to_yaml, encoding: "UTF-8"
47
+ end
48
+
49
+ #
50
+ # Sort index
51
+ #
52
+ # @return [Array<Hash>] sorted index
53
+ #
54
+ def sort!
55
+ @index.sort! { |a, b| compare_index_items a, b }
56
+ self
57
+ end
58
+
59
+ #
60
+ # Search filename in index
61
+ #
62
+ # @param [String] ref reference
63
+ #
64
+ # @return [String] document's filename
65
+ #
66
+ def search(ref) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
67
+ dparts = docnumber_to_parts(ref)
68
+ @index.detect do |parts|
69
+ parts[:code].match?(/^#{Regexp.escape dparts[:code]}/i) &&
70
+ (dparts[:stage].nil? || dparts[:stage].casecmp?(parts[:stage])) &&
71
+ (dparts[:type].nil? || dparts[:type].casecmp?(parts[:type])) &&
72
+ (dparts[:date].nil? || dparts[:date] == parts[:date]) &&
73
+ (dparts[:suff].nil? || dparts[:suff].casecmp?(parts[:suff]))
74
+ end&.fetch(:file)
75
+ end
76
+
77
+ #
78
+ # Compare index items
79
+ #
80
+ # @param [Hash] aid first item
81
+ # @param [Hash] bid second item
82
+ #
83
+ # @return [Integer] comparison result
84
+ #
85
+ def compare_index_items(aid, bid) # rubocop:disable Metrics/AbcSize
86
+ ret = aid[:code] <=> bid[:code]
87
+ ret = stage_weight(bid[:stage]) <=> stage_weight(aid[:stage]) if ret.zero?
88
+ ret = date_weight(bid[:date]) <=> date_weight(aid[:date]) if ret.zero?
89
+ # ret = aid[:type] <=> bid[:type] if ret.zero?
90
+ ret
91
+ end
92
+
93
+ #
94
+ # Weight of stage
95
+ #
96
+ # @param [String, nil] stage stage
97
+ #
98
+ # @return [Integer] weight
99
+ #
100
+ def stage_weight(stage)
101
+ return DataParser::STAGES.size if stage.nil?
102
+
103
+ DataParser::STAGES.keys.index(stage)
104
+ end
105
+
106
+ #
107
+ # Weight of date
108
+ #
109
+ # @param [String] date date
110
+ #
111
+ # @return [String] weight
112
+ #
113
+ def date_weight(date)
114
+ return "99999999" if date.nil?
115
+
116
+ date
117
+ end
118
+
119
+ #
120
+ # Parse document number to parts
121
+ #
122
+ # @param [String] docnumber document number
123
+ # @param [String, nil] file path to document file
124
+ #
125
+ # @return [Hash{Symbol=>String}] document parts
126
+ #
127
+ def docnumber_to_parts(docnumber, file = nil) # rubocop:disable Metrics/MethodLength
128
+ %r{
129
+ ^(?:(?:(?<stage>WD|CRD|CR|PR|PER|REC|SPSD|OBSL|RET)|(?<type>D?NOTE))-)?
130
+ (?<code>\w+(?:[+-][\w.]+)*?)
131
+ (?:-(?<date>\d{8}|\d{6}))?
132
+ (?:/(?<suff>\w+))?$
133
+ }xi =~ docnumber
134
+ entry = { code: code }
135
+ entry[:file] = file if file
136
+ entry[:stage] = stage if stage
137
+ entry[:type] = type if type
138
+ entry[:date] = date if date
139
+ entry[:suff] = suff if suff
140
+ entry
141
+ end
142
+ end
143
+ end