schemaforge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+ require "net/http"
5
+
6
+ module SchemaForge
7
+ class Loader
8
+ Accumulators = Struct.new(:types, :elements, :groups, :attribute_groups)
9
+
10
+ def self.load(path_or_uri, fetch_remote: true)
11
+ new(fetch_remote: fetch_remote).load(path_or_uri)
12
+ end
13
+
14
+ def initialize(fetch_remote: true)
15
+ @fetch_remote = fetch_remote
16
+ @visited = {}
17
+ end
18
+
19
+ def load(path_or_uri)
20
+ doc = load_one(path_or_uri)
21
+ merge_includes(doc, base_for(path_or_uri))
22
+ end
23
+
24
+ private
25
+
26
+ def load_one(path_or_uri)
27
+ key = canonicalize(path_or_uri)
28
+ return nil if @visited.key?(key)
29
+
30
+ @visited[key] = true
31
+ source = read(path_or_uri)
32
+ Parser.parse(source, base_path: path_or_uri.to_s)
33
+ end
34
+
35
+ def merge_includes(doc, base)
36
+ return doc if doc.nil?
37
+
38
+ acc = Accumulators.new(
39
+ doc.types.dup,
40
+ doc.elements.dup,
41
+ doc.groups.dup,
42
+ doc.attribute_groups.dup
43
+ )
44
+
45
+ apply_includes(doc.includes, base, acc)
46
+ apply_imports(doc.imports, base, acc)
47
+
48
+ Schema::Document.new(
49
+ target_namespace: doc.target_namespace,
50
+ namespaces: doc.namespaces,
51
+ types: acc.types,
52
+ elements: acc.elements,
53
+ groups: acc.groups,
54
+ attribute_groups: acc.attribute_groups,
55
+ imports: doc.imports.dup,
56
+ includes: []
57
+ )
58
+ end
59
+
60
+ def apply_includes(includes, base, acc)
61
+ includes.each do |inc|
62
+ inc_path = resolve(base, inc.schema_location)
63
+ included = load_one(inc_path)
64
+ next if included.nil?
65
+
66
+ included = merge_includes(included, base_for(inc_path))
67
+ acc.types.concat(included.types)
68
+ acc.elements.concat(included.elements)
69
+ acc.groups.concat(included.groups)
70
+ acc.attribute_groups.concat(included.attribute_groups)
71
+ end
72
+ end
73
+
74
+ def apply_imports(imports, base, acc)
75
+ imports.each do |imp|
76
+ next if imp.schema_location.nil? || imp.schema_location.empty?
77
+
78
+ imp_path = resolve(base, imp.schema_location)
79
+ imported = load_one(imp_path)
80
+ next if imported.nil?
81
+
82
+ imported = merge_includes(imported, base_for(imp_path))
83
+ acc.types.concat(imported.types)
84
+ acc.elements.concat(imported.elements)
85
+ acc.groups.concat(imported.groups)
86
+ acc.attribute_groups.concat(imported.attribute_groups)
87
+ end
88
+ end
89
+
90
+ def read(path_or_uri)
91
+ if remote?(path_or_uri)
92
+ raise FetchError.new("remote fetch disabled for #{path_or_uri}", source: path_or_uri.to_s) unless @fetch_remote
93
+
94
+ fetch_remote(path_or_uri)
95
+ else
96
+ File.read(path_or_uri)
97
+ end
98
+ end
99
+
100
+ def fetch_remote(uri)
101
+ uri = URI(uri)
102
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https", read_timeout: 30) do |http|
103
+ http.get(uri.request_uri)
104
+ end
105
+ raise FetchError.new("HTTP #{response.code} for #{uri}", source: uri.to_s) unless response.is_a?(Net::HTTPSuccess)
106
+
107
+ response.body
108
+ end
109
+
110
+ def remote?(loc)
111
+ loc.to_s.match?(%r{\Ahttps?://})
112
+ end
113
+
114
+ def base_for(path_or_uri)
115
+ remote?(path_or_uri) ? URI(path_or_uri) : File.dirname(File.expand_path(path_or_uri))
116
+ end
117
+
118
+ def resolve(base, location)
119
+ return location if remote?(location)
120
+
121
+ if base.is_a?(URI)
122
+ URI.join(base.to_s, location).to_s
123
+ else
124
+ File.expand_path(location, base)
125
+ end
126
+ end
127
+
128
+ def canonicalize(loc)
129
+ remote?(loc) ? URI(loc).to_s : File.expand_path(loc)
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SchemaForge
4
+ module Naming
5
+ RUBY_KEYWORDS = %w[
6
+ BEGIN END alias and begin break case class def defined? do else elsif end
7
+ ensure false for if in module next nil not or redo rescue retry return self
8
+ super then true undef unless until when while yield
9
+ ].freeze
10
+
11
+ module_function
12
+
13
+ def pascal_case(name)
14
+ keyword = RUBY_KEYWORDS.include?(name.to_s)
15
+ base = name.to_s.split(/[^A-Za-z0-9]/).reject(&:empty?).map { |s| s[0].to_s.upcase + s[1..].to_s }.join
16
+ base = "_" if base.empty?
17
+ keyword ? "#{base}_" : base
18
+ end
19
+
20
+ def snake_case(name)
21
+ s = name.to_s
22
+ .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
23
+ .gsub(/([a-z\d])([A-Z])/, '\1_\2')
24
+ .tr("-", "_")
25
+ .downcase
26
+ append_underscore_if_keyword(s)
27
+ end
28
+
29
+ def attr_field(name)
30
+ "#{snake_case(name).chomp('_')}_attr"
31
+ end
32
+
33
+ def append_underscore_if_keyword(name)
34
+ RUBY_KEYWORDS.include?(name) ? "#{name}_" : name
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,346 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module SchemaForge
6
+ class Parser
7
+ XSD_NS = "http://www.w3.org/2001/XMLSchema"
8
+
9
+ NUMERIC_FACETS = %w[length minLength maxLength totalDigits fractionDigits].freeze
10
+ VALUE_FACETS = %w[minInclusive maxInclusive minExclusive maxExclusive whiteSpace pattern].freeze
11
+
12
+ FACET_KEY = {
13
+ "length" => :length,
14
+ "minLength" => :min_length,
15
+ "maxLength" => :max_length,
16
+ "totalDigits" => :total_digits,
17
+ "fractionDigits" => :fraction_digits,
18
+ "minInclusive" => :min_inclusive,
19
+ "maxInclusive" => :max_inclusive,
20
+ "minExclusive" => :min_exclusive,
21
+ "maxExclusive" => :max_exclusive,
22
+ "whiteSpace" => :white_space,
23
+ "pattern" => :pattern,
24
+ "enumeration" => :enumeration
25
+ }.freeze
26
+
27
+ def self.parse(source, base_path:)
28
+ new(source, base_path: base_path).parse
29
+ end
30
+
31
+ def initialize(source, base_path:)
32
+ @source = source
33
+ @base_path = base_path
34
+ end
35
+
36
+ def parse
37
+ doc = Nokogiri::XML(@source, &:strict)
38
+ schema_node = doc.root
39
+ unless schema_node && schema_node.name == "schema"
40
+ raise ParseError.new("missing <schema> root", source: @base_path)
41
+ end
42
+
43
+ @namespaces = build_namespaces(schema_node)
44
+ @target_namespace = schema_node["targetNamespace"]
45
+
46
+ types = collect_types(schema_node)
47
+ groups = collect_groups(schema_node)
48
+ attribute_groups = collect_attribute_groups(schema_node)
49
+
50
+ Schema::Document.new(
51
+ target_namespace: @target_namespace,
52
+ namespaces: @namespaces,
53
+ types: types,
54
+ elements: collect_elements(schema_node),
55
+ groups: groups,
56
+ attribute_groups: attribute_groups,
57
+ imports: collect_imports(schema_node),
58
+ includes: collect_includes(schema_node)
59
+ )
60
+ rescue Nokogiri::XML::SyntaxError => e
61
+ raise ParseError.new(e.message, source: @base_path)
62
+ end
63
+
64
+ private
65
+
66
+ def build_namespaces(node)
67
+ node.namespaces.transform_keys { |k| k.sub(/\Axmlns:?/, "") }
68
+ end
69
+
70
+ def collect_types(schema_node)
71
+ simple = schema_node.xpath("./xs:simpleType", "xs" => XSD_NS).map { |n| parse_simple_type(n, anonymous: false) }
72
+ complex = schema_node.xpath("./xs:complexType", "xs" => XSD_NS).map do |n|
73
+ parse_complex_type(n, anonymous: false)
74
+ end
75
+ simple + complex
76
+ end
77
+
78
+ def collect_groups(schema_node)
79
+ schema_node.xpath("./xs:group", "xs" => XSD_NS).map do |n|
80
+ Schema::Group.new(name: n["name"], content: parse_content_model(n))
81
+ end
82
+ end
83
+
84
+ def collect_elements(schema_node)
85
+ schema_node.xpath("./xs:element", "xs" => XSD_NS).map { |n| parse_element(n) }
86
+ end
87
+
88
+ def collect_includes(schema_node)
89
+ schema_node.xpath("./xs:include", "xs" => XSD_NS).map do |n|
90
+ Schema::Include.new(schema_location: n["schemaLocation"])
91
+ end
92
+ end
93
+
94
+ def collect_imports(schema_node)
95
+ schema_node.xpath("./xs:import", "xs" => XSD_NS).map do |n|
96
+ Schema::Import.new(namespace: n["namespace"], schema_location: n["schemaLocation"])
97
+ end
98
+ end
99
+
100
+ def collect_attribute_groups(schema_node)
101
+ schema_node.xpath("./xs:attributeGroup", "xs" => XSD_NS).map do |n|
102
+ Schema::AttributeGroup.new(
103
+ name: n["name"],
104
+ attributes: n.xpath("./xs:attribute", "xs" => XSD_NS).map { |a| parse_attribute(a) },
105
+ attribute_groups: collect_attribute_group_refs(n)
106
+ )
107
+ end
108
+ end
109
+
110
+ def collect_attribute_group_refs(node)
111
+ node.xpath("./xs:attributeGroup[@ref]", "xs" => XSD_NS).map do |r|
112
+ Schema::AttributeGroupRef.new(ref: parse_type_ref(r["ref"]))
113
+ end
114
+ end
115
+
116
+ def parse_simple_type(node, anonymous:)
117
+ body =
118
+ if (r = node.at_xpath("./xs:restriction", "xs" => XSD_NS))
119
+ parse_restriction(r)
120
+ elsif (l = node.at_xpath("./xs:list", "xs" => XSD_NS))
121
+ parse_list(l)
122
+ elsif (u = node.at_xpath("./xs:union", "xs" => XSD_NS))
123
+ parse_union(u)
124
+ end
125
+
126
+ Schema::SimpleType.new(
127
+ name: node["name"],
128
+ anonymous: anonymous,
129
+ body: body,
130
+ doc: parse_annotation(node)
131
+ )
132
+ end
133
+
134
+ def parse_restriction(node)
135
+ facets = {}
136
+ node.element_children.each do |child|
137
+ next unless child.namespace && child.namespace.href == XSD_NS
138
+
139
+ accumulate_facet(facets, child)
140
+ end
141
+ Schema::Restriction.new(base: parse_type_ref(node["base"]), facets: facets)
142
+ end
143
+
144
+ def accumulate_facet(facets, child)
145
+ case child.name
146
+ when "enumeration"
147
+ (facets[:enumeration] ||= []) << child["value"]
148
+ when *NUMERIC_FACETS
149
+ facets[FACET_KEY[child.name]] = Integer(child["value"])
150
+ when *VALUE_FACETS
151
+ v = child["value"]
152
+ facets[FACET_KEY[child.name]] = numeric_facet?(child.name) ? Float(v) : v
153
+ end
154
+ end
155
+
156
+ def numeric_facet?(name)
157
+ %w[minInclusive maxInclusive minExclusive maxExclusive].include?(name)
158
+ end
159
+
160
+ def parse_list(node)
161
+ Schema::List.new(item_type: parse_type_ref(node["itemType"]))
162
+ end
163
+
164
+ def parse_union(node)
165
+ members = (node["memberTypes"] || "").split(/\s+/).reject(&:empty?).map { |s| parse_type_ref(s) }
166
+ Schema::Union.new(member_types: members)
167
+ end
168
+
169
+ def parse_complex_type(node, anonymous:)
170
+ derivation, base, attributes, attribute_groups, content = resolve_complex_type_body(node)
171
+
172
+ Schema::ComplexType.new(
173
+ name: node["name"],
174
+ anonymous: anonymous,
175
+ base: base,
176
+ derivation: derivation,
177
+ attributes: attributes,
178
+ attribute_groups: attribute_groups,
179
+ content: content,
180
+ mixed: node["mixed"] == "true",
181
+ doc: parse_annotation(node)
182
+ )
183
+ end
184
+
185
+ def resolve_complex_type_body(node)
186
+ if (sc = node.at_xpath("./xs:simpleContent", "xs" => XSD_NS))
187
+ parse_simple_content(sc)
188
+ elsif (cc = node.at_xpath("./xs:complexContent", "xs" => XSD_NS))
189
+ parse_complex_content(cc)
190
+ else
191
+ attrs = node.xpath("./xs:attribute", "xs" => XSD_NS).map { |a| parse_attribute(a) }
192
+ [nil, nil, attrs, collect_attribute_group_refs(node), parse_content_model(node)]
193
+ end
194
+ end
195
+
196
+ def parse_simple_content(node)
197
+ ext = node.at_xpath("./xs:extension", "xs" => XSD_NS)
198
+ raise ParseError.new("simpleContent without extension not yet supported", source: @base_path) unless ext
199
+
200
+ attrs = ext.xpath("./xs:attribute", "xs" => XSD_NS).map { |a| parse_attribute(a) }
201
+ [:extension, parse_type_ref(ext["base"]), attrs, collect_attribute_group_refs(ext), nil]
202
+ end
203
+
204
+ def parse_complex_content(node)
205
+ ext = node.at_xpath("./xs:extension", "xs" => XSD_NS)
206
+ raise ParseError.new("complexContent without extension not yet supported", source: @base_path) unless ext
207
+
208
+ attrs = ext.xpath("./xs:attribute", "xs" => XSD_NS).map { |a| parse_attribute(a) }
209
+ [:extension, parse_type_ref(ext["base"]), attrs, collect_attribute_group_refs(ext), parse_content_model(ext)]
210
+ end
211
+
212
+ def parse_content_model(node)
213
+ seq = node.at_xpath("./xs:sequence", "xs" => XSD_NS)
214
+ return parse_sequence(seq) if seq
215
+
216
+ ch = node.at_xpath("./xs:choice", "xs" => XSD_NS)
217
+ return parse_choice(ch) if ch
218
+
219
+ nil
220
+ end
221
+
222
+ def parse_sequence(node)
223
+ Schema::Sequence.new(
224
+ items: parse_particles(node),
225
+ min_occurs: occurs(node["minOccurs"], 1),
226
+ max_occurs: occurs(node["maxOccurs"], 1)
227
+ )
228
+ end
229
+
230
+ def parse_choice(node)
231
+ Schema::Choice.new(
232
+ items: parse_particles(node),
233
+ min_occurs: occurs(node["minOccurs"], 1),
234
+ max_occurs: occurs(node["maxOccurs"], 1)
235
+ )
236
+ end
237
+
238
+ def parse_particles(parent)
239
+ particles = []
240
+ parent.element_children.each do |child|
241
+ next unless child.namespace && child.namespace.href == XSD_NS
242
+
243
+ particle = parse_particle(child)
244
+ particles << particle if particle
245
+ end
246
+ particles
247
+ end
248
+
249
+ def parse_particle(child)
250
+ case child.name
251
+ when "element" then parse_element(child)
252
+ when "sequence" then parse_sequence(child)
253
+ when "choice" then parse_choice(child)
254
+ when "group" then parse_group_particle(child)
255
+ when "any"
256
+ Schema::Any.new(
257
+ namespace: child["namespace"],
258
+ process_contents: child["processContents"],
259
+ min_occurs: occurs(child["minOccurs"], 1),
260
+ max_occurs: occurs(child["maxOccurs"], 1)
261
+ )
262
+ end
263
+ end
264
+
265
+ def parse_group_particle(node)
266
+ return unless node["ref"]
267
+
268
+ Schema::GroupRef.new(
269
+ ref: parse_type_ref(node["ref"]),
270
+ min_occurs: occurs(node["minOccurs"], 1),
271
+ max_occurs: occurs(node["maxOccurs"], 1)
272
+ )
273
+ end
274
+
275
+ def parse_element(node)
276
+ inline_type = nil
277
+ if (ct = node.at_xpath("./xs:complexType", "xs" => XSD_NS))
278
+ inline_type = parse_complex_type(ct, anonymous: true)
279
+ elsif (st = node.at_xpath("./xs:simpleType", "xs" => XSD_NS))
280
+ inline_type = parse_simple_type(st, anonymous: true)
281
+ end
282
+
283
+ Schema::Element.new(
284
+ name: node["name"],
285
+ type_ref: parse_type_ref(node["type"]),
286
+ inline_type: inline_type,
287
+ min_occurs: occurs(node["minOccurs"], 1),
288
+ max_occurs: occurs(node["maxOccurs"], 1),
289
+ nillable: node["nillable"] == "true",
290
+ default: node["default"],
291
+ fixed: node["fixed"],
292
+ doc: parse_annotation(node)
293
+ )
294
+ end
295
+
296
+ def parse_attribute(node)
297
+ use = node["use"]&.to_sym || :optional
298
+ inline_type = nil
299
+ if (st = node.at_xpath("./xs:simpleType", "xs" => XSD_NS))
300
+ inline_type = parse_simple_type(st, anonymous: true)
301
+ end
302
+
303
+ Schema::Attribute.new(
304
+ name: node["name"],
305
+ type_ref: parse_type_ref(node["type"]),
306
+ inline_type: inline_type,
307
+ use: use,
308
+ default: node["default"],
309
+ fixed: node["fixed"],
310
+ doc: parse_annotation(node)
311
+ )
312
+ end
313
+
314
+ def parse_annotation(node)
315
+ ann = node.at_xpath("./xs:annotation", "xs" => XSD_NS)
316
+ return nil unless ann
317
+
318
+ doc = ann.at_xpath("./xs:documentation", "xs" => XSD_NS)
319
+ app = ann.at_xpath("./xs:appinfo", "xs" => XSD_NS)
320
+ result = {}
321
+ result[:documentation] = doc.content.strip if doc
322
+ result[:appinfo] = app.content.strip if app
323
+ result.empty? ? nil : result
324
+ end
325
+
326
+ def occurs(value, default)
327
+ return default if value.nil? || value.empty?
328
+ return :unbounded if value == "unbounded"
329
+
330
+ Integer(value)
331
+ end
332
+
333
+ def parse_type_ref(qname)
334
+ return nil if qname.nil? || qname.empty?
335
+
336
+ if qname.include?(":")
337
+ prefix, local = qname.split(":", 2)
338
+ Schema::TypeRef.new(namespace: @namespaces[prefix], local_name: local)
339
+ else
340
+ ns = @namespaces[""] == XSD_NS ? XSD_NS : @target_namespace
341
+ ns = XSD_NS if BuiltinTypes.builtin?(XSD_NS, qname)
342
+ Schema::TypeRef.new(namespace: ns, local_name: qname)
343
+ end
344
+ end
345
+ end
346
+ end