schemaforge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6c62b91be85dba818c0260c7ba4b48dcc3f07ba741b723e88dae713b7b145f96
4
+ data.tar.gz: 72d41eef0776df16199b1e41caaf468c2d43550806fd16ac51021d79e56db581
5
+ SHA512:
6
+ metadata.gz: 1e6be4fdcee6980b73b2eda3d682b9d3c8399b16ed9965b1bbac21d7526ca3f2bcff36abbc7385280fca9c22ddcc8ae3f79104c0da249adca1eedcbd27b66da3
7
+ data.tar.gz: 541defa841034c0c28ab0ac2642d0fc91ce88c838cbbf25066c751cc5b7648f6b953f5d90da827b299f894222c2bea934f41ff75ff374baf99d22f7fa483edc8
data/LICENSE.txt ADDED
@@ -0,0 +1,19 @@
1
+ MIT License
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included
11
+ in all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19
+ DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,25 @@
1
+ # SchemaForge
2
+
3
+ Generate Ruby `Data.define` value objects from XSD (XML Schema) files.
4
+
5
+ ## Status
6
+
7
+ Released. The design is documented in the repository.
8
+
9
+ ## Usage
10
+
11
+ ```ruby
12
+ require "schema_forge"
13
+
14
+ SchemaForge.generate(
15
+ input: "schemas/",
16
+ output: "lib/generated",
17
+ module_name: "Schema",
18
+ )
19
+ ```
20
+
21
+ CLI:
22
+
23
+ ```sh
24
+ schema_forge schemas/ --out lib/generated --module Schema
25
+ ```
data/exe/schemaforge ADDED
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
5
+
6
+ require "optparse"
7
+ require "schema_forge"
8
+
9
+ options = { module_name: "Schema", out: "schema_forge_out", fetch_remote: true }
10
+ parser = OptionParser.new do |o|
11
+ o.banner = "Usage: schemaforge <input> [options]"
12
+ o.on("--out DIR", "Output directory (default: schema_forge_out)") { |v| options[:out] = v }
13
+ o.on("--module NAME", "Module wrapping generated types (default: Schema)") { |v| options[:module_name] = v }
14
+ o.on("--[no-]fetch-remote", "Fetch remote xs:import/include (default: yes)") { |v| options[:fetch_remote] = v }
15
+ o.on("--version", "Print version and exit") do
16
+ puts SchemaForge::VERSION
17
+ exit 0
18
+ end
19
+ o.on("-h", "--help", "Print this help and exit") do
20
+ puts o
21
+ exit 0
22
+ end
23
+ end
24
+
25
+ remaining = parser.parse!
26
+ input = remaining.shift
27
+ if input.nil?
28
+ warn parser
29
+ exit 1
30
+ end
31
+
32
+ begin
33
+ SchemaForge.generate(
34
+ input: input,
35
+ output: options[:out],
36
+ module_name: options[:module_name],
37
+ fetch_remote: options[:fetch_remote]
38
+ )
39
+ puts "done"
40
+ rescue SchemaForge::Error => e
41
+ warn "schema_forge: #{e.message}"
42
+ warn " source: #{e.source}" if e.source
43
+ exit 1
44
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SchemaForge
4
+ module BuiltinTypes
5
+ XSD_NAMESPACE = "http://www.w3.org/2001/XMLSchema"
6
+
7
+ STRING_TYPES = %w[
8
+ string anyURI QName NOTATION normalizedString token language
9
+ Name NCName ID IDREF IDREFS ENTITY ENTITIES NMTOKEN NMTOKENS
10
+ base64Binary hexBinary
11
+ date dateTime time duration gDay gMonth gMonthDay gYear gYearMonth
12
+ ].freeze
13
+
14
+ INTEGER_TYPES = %w[
15
+ byte short int long integer negativeInteger nonNegativeInteger
16
+ nonPositiveInteger positiveInteger
17
+ unsignedByte unsignedShort unsignedInt unsignedLong
18
+ ].freeze
19
+
20
+ FLOAT_TYPES = %w[decimal float double].freeze
21
+
22
+ BOOLEAN_TYPES = %w[boolean].freeze
23
+
24
+ MAPPING = STRING_TYPES.to_h { |t| [t, "String"] }.merge(
25
+ INTEGER_TYPES.to_h { |t| [t, "Integer"] }
26
+ ).merge(
27
+ FLOAT_TYPES.to_h { |t| [t, "Float"] }
28
+ ).merge(
29
+ BOOLEAN_TYPES.to_h { |t| [t, "Object"] }
30
+ ).freeze
31
+
32
+ module_function
33
+
34
+ def ruby_type_for(namespace, local_name)
35
+ return nil unless namespace == XSD_NAMESPACE
36
+
37
+ MAPPING[local_name]
38
+ end
39
+
40
+ def builtin?(namespace, local_name)
41
+ !ruby_type_for(namespace, local_name).nil?
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SchemaForge
4
+ class Error < StandardError
5
+ attr_reader :source
6
+
7
+ def initialize(message = nil, source: nil)
8
+ super(message)
9
+ @source = source
10
+ end
11
+ end
12
+
13
+ class ParseError < Error; end
14
+ class ResolveError < Error; end
15
+ class FetchError < Error; end
16
+ class GenerateError < Error; end
17
+ end
@@ -0,0 +1,391 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "naming"
4
+
5
+ module SchemaForge
6
+ class Generator
7
+ HEADER = <<~HEADER
8
+ # Code generated by SchemaForge. DO NOT EDIT.
9
+ # frozen_string_literal: true
10
+ HEADER
11
+
12
+ def initialize(module_name:, hooks: nil)
13
+ @module_name = module_name
14
+ @hooks = hooks
15
+ end
16
+
17
+ def emit(doc)
18
+ out = +""
19
+ out << HEADER
20
+ out << "\n"
21
+ out << "module #{@module_name}\n"
22
+ if doc.types.empty? && doc.elements.empty?
23
+ out << " # (no types)\n"
24
+ else
25
+ doc.types.each do |t|
26
+ if @hooks.nil? || @hooks.on_generate(t, self)
27
+ out << emit_type(t)
28
+ out << "\n"
29
+ end
30
+ end
31
+ end
32
+ out << "end\n"
33
+ @hooks ? @hooks.on_emit_source(out) : out
34
+ end
35
+
36
+ private
37
+
38
+ def emit_type(type)
39
+ case type
40
+ when Schema::SimpleType then emit_simple_type(type)
41
+ when Schema::ComplexType then emit_complex_type(type)
42
+ else ""
43
+ end
44
+ end
45
+
46
+ def emit_simple_type(simple)
47
+ base_ruby_type = ruby_type_for_simple(simple)
48
+ facets = simple.body.is_a?(Schema::Restriction) ? (simple.body.facets || {}) : {}
49
+
50
+ if facets.empty?
51
+ out = +""
52
+ out << " # simpleType: #{simple_base_label(simple)}\n" if simple.body
53
+ out << " #{Naming.pascal_case(simple.name)} = #{base_ruby_type}\n"
54
+ out
55
+ else
56
+ emit_simple_wrapper(simple, base_ruby_type, facets)
57
+ end
58
+ end
59
+
60
+ def emit_simple_wrapper(simple, base_ruby_type, facets)
61
+ type_name = Naming.pascal_case(simple.name)
62
+ lines = +""
63
+ lines << " # simpleType: #{simple_base_label(simple)}\n"
64
+ facets.each { |k, v| lines << " # #{k}: #{Array(v).join(', ')}\n" }
65
+ lines << " #{type_name} = Data.define(:value) do\n"
66
+ lines << " def initialize(value:)\n"
67
+ lines << type_check_line(type_name, base_ruby_type)
68
+ lines << enum_check_line(type_name) if facets[:enumeration]
69
+ lines << emit_facet_validations(type_name, "value", facets)
70
+ lines << " super\n"
71
+ lines << " end\n"
72
+ lines << " end\n"
73
+ lines << " #{type_name}::VALUES = %w[#{facets[:enumeration].join(' ')}].freeze\n" if facets[:enumeration]
74
+ lines
75
+ end
76
+
77
+ def type_check_line(type_name, base_ruby_type)
78
+ msg = "#{type_name}#value must be a #{base_ruby_type}"
79
+ " raise ArgumentError, \"#{msg}\" unless value.is_a?(#{base_ruby_type})\n"
80
+ end
81
+
82
+ def enum_check_line(type_name)
83
+ msg = "#{type_name}#value must be one of \#{#{type_name}::VALUES}"
84
+ " raise ArgumentError, \"#{msg}\" unless #{type_name}::VALUES.include?(value)\n"
85
+ end
86
+
87
+ def emit_facet_validations(type_name, field, facets)
88
+ out = +""
89
+ facets.each do |key, value|
90
+ line = facet_validation_line(type_name, field, key, value)
91
+ out << line if line
92
+ end
93
+ out
94
+ end
95
+
96
+ def facet_validation_line(type_name, field, key, value)
97
+ pfx = "#{type_name}##{field}"
98
+ string_facet_line(pfx, field, key, value) ||
99
+ numeric_facet_line(pfx, field, key, value)
100
+ # :enumeration handled separately via constant (T18)
101
+ end
102
+
103
+ def string_facet_line(pfx, field, key, value)
104
+ case key
105
+ when :length
106
+ " raise ArgumentError, \"#{pfx} length must be #{value}\" " \
107
+ "unless #{field}.length == #{value}\n"
108
+ when :min_length
109
+ " raise ArgumentError, \"#{pfx} length must be >= #{value}\" " \
110
+ "if #{field}.length < #{value}\n"
111
+ when :max_length
112
+ " raise ArgumentError, \"#{pfx} length must be <= #{value}\" " \
113
+ "if #{field}.length > #{value}\n"
114
+ when :pattern
115
+ " raise ArgumentError, \"#{pfx} must match /#{value}/\" " \
116
+ "unless #{field}.match?(/\\A#{value}\\z/)\n"
117
+ end
118
+ end
119
+
120
+ def numeric_facet_line(pfx, field, key, value)
121
+ case key
122
+ when :min_inclusive
123
+ " raise ArgumentError, \"#{pfx} must be >= #{value}\" if #{field} < #{value}\n"
124
+ when :max_inclusive
125
+ " raise ArgumentError, \"#{pfx} must be <= #{value}\" if #{field} > #{value}\n"
126
+ when :min_exclusive
127
+ " raise ArgumentError, \"#{pfx} must be > #{value}\" if #{field} <= #{value}\n"
128
+ when :max_exclusive
129
+ " raise ArgumentError, \"#{pfx} must be < #{value}\" if #{field} >= #{value}\n"
130
+ when :total_digits then total_digits_line(pfx, field, value)
131
+ when :fraction_digits then fraction_digits_line(pfx, field, value)
132
+ end
133
+ end
134
+
135
+ def total_digits_line(pfx, field, value)
136
+ check = "#{field}.to_s.gsub('-', '').gsub('.', '').length > #{value}"
137
+ " raise ArgumentError, \"#{pfx} must have at most #{value} digits\" if #{check}\n"
138
+ end
139
+
140
+ def fraction_digits_line(pfx, field, value)
141
+ check = "(#{field}.to_s.split('.')[1] || '').length > #{value}"
142
+ " raise ArgumentError, \"#{pfx} must have at most #{value} fraction digits\" " \
143
+ "if #{check}\n"
144
+ end
145
+
146
+ def simple_base_label(simple)
147
+ case simple.body
148
+ when Schema::Restriction then simple.body.base ? simple.body.base.local_name : "(unknown)"
149
+ when Schema::List then "list of #{simple.body.item_type.local_name}"
150
+ when Schema::Union then "union of #{simple.body.member_types.map(&:local_name).join(', ')}"
151
+ else "(no body)"
152
+ end
153
+ end
154
+
155
+ def ruby_type_for_simple(simple)
156
+ case simple.body
157
+ when Schema::Restriction
158
+ ref = simple.body.base
159
+ ref ? (BuiltinTypes.ruby_type_for(ref.namespace, ref.local_name) || "String") : "String"
160
+ else
161
+ "String"
162
+ end
163
+ end
164
+
165
+ def emit_complex_type(complex_type)
166
+ type_name = Naming.pascal_case(complex_type.name)
167
+
168
+ element_fields = collect_element_fields(complex_type)
169
+ attr_fields = complex_type.attributes.map { |a| build_attr_field(a) }
170
+ value_fields = simple_content_value_fields(complex_type)
171
+ fields = attr_fields + value_fields + element_fields
172
+
173
+ choice_field_names = collect_choice_field_names(complex_type)
174
+
175
+ lines = +""
176
+ lines << emit_complex_doc(complex_type)
177
+ lines << " # choice: exactly one of #{choice_field_names.join(', ')}\n" if choice_field_names
178
+ lines << " #{type_name} = Data.define(#{fields.map { |f| ":#{f[:name]}" }.join(', ')})"
179
+ lines << " do\n"
180
+ lines << " def initialize(#{kwargs_for(fields)})\n"
181
+ lines << emit_choice_validation(type_name, choice_field_names) if choice_field_names
182
+ fields.each { |f| lines << emit_field_validation(type_name, f) }
183
+ lines << " super\n"
184
+ lines << " end\n"
185
+ lines << " end\n"
186
+ lines
187
+ end
188
+
189
+ def collect_choice_field_names(complex_type)
190
+ return nil unless complex_type.content.is_a?(Schema::Choice) && !repeating?(complex_type.content)
191
+
192
+ complex_type.content.items.filter_map { |i| Naming.snake_case(i.name) if i.is_a?(Schema::Element) }
193
+ end
194
+
195
+ def emit_choice_validation(type_name, choice_field_names)
196
+ names_array = "[#{choice_field_names.join(', ')}]"
197
+ msg = "#{type_name}: exactly one of #{choice_field_names.join(', ')} must be set"
198
+ out = +""
199
+ out << " set = #{names_array}.count { |v| !v.nil? }\n"
200
+ out << " raise ArgumentError, \"#{msg}\" unless set == 1\n"
201
+ out
202
+ end
203
+
204
+ def emit_complex_doc(complex_type)
205
+ out = +""
206
+ emit_complex_doc_annotations(out, complex_type)
207
+ out << " # mixed content\n" if complex_type.mixed
208
+ if complex_type.derivation == :extension && complex_type.base
209
+ out << " # extends #{complex_type.base.local_name}\n"
210
+ end
211
+ out
212
+ end
213
+
214
+ def emit_complex_doc_annotations(out, complex_type)
215
+ return unless complex_type.doc
216
+
217
+ out << " # #{complex_type.doc[:appinfo]}\n" if complex_type.doc[:appinfo]
218
+ out << " # #{complex_type.doc[:documentation]}\n" if complex_type.doc[:documentation]
219
+ end
220
+
221
+ def simple_content_extension?(complex_type)
222
+ complex_type.derivation == :extension && complex_type.base && complex_type.content.nil?
223
+ end
224
+
225
+ def simple_content_value_fields(complex_type)
226
+ return [] unless simple_content_extension?(complex_type)
227
+
228
+ ruby_type = BuiltinTypes.ruby_type_for(complex_type.base.namespace, complex_type.base.local_name) ||
229
+ Naming.pascal_case(complex_type.base.local_name)
230
+ [{
231
+ name: "value",
232
+ ruby_type: ruby_type,
233
+ list: false,
234
+ optional: false,
235
+ source: :element,
236
+ min_occurs: 1,
237
+ max_occurs: 1
238
+ }]
239
+ end
240
+
241
+ def collect_element_fields(complex_type)
242
+ case complex_type.content
243
+ when Schema::Sequence
244
+ repeating = repeating?(complex_type.content)
245
+ seq_min = complex_type.content.min_occurs
246
+ seq_max = complex_type.content.max_occurs
247
+ complex_type.content.items.flat_map do |item|
248
+ element_field_for(item, sequence_repeats: repeating, seq_min: seq_min, seq_max: seq_max)
249
+ end
250
+ when Schema::Choice
251
+ repeating = repeating?(complex_type.content)
252
+ choice_optional = !repeating
253
+ complex_type.content.items.flat_map do |item|
254
+ element_field_for(item, sequence_repeats: repeating, choice_optional: choice_optional)
255
+ end
256
+ else
257
+ []
258
+ end
259
+ end
260
+
261
+ def repeating?(content)
262
+ content.max_occurs == :unbounded || (content.max_occurs.is_a?(Integer) && content.max_occurs > 1)
263
+ end
264
+
265
+ def element_field_for(particle, sequence_repeats:, choice_optional: false, seq_min: nil, seq_max: nil)
266
+ case particle
267
+ when Schema::Element
268
+ element_fields_for_element(particle, sequence_repeats, choice_optional, seq_min, seq_max)
269
+ when Schema::Choice
270
+ element_fields_for_choice(particle)
271
+ when Schema::Any
272
+ [{ name: "any", ruby_type: nil, list: false, optional: true, source: :any, min_occurs: 0, max_occurs: 1 }]
273
+ else
274
+ []
275
+ end
276
+ end
277
+
278
+ def element_fields_for_element(particle, sequence_repeats, choice_optional, seq_min, seq_max)
279
+ list = list_element?(particle, sequence_repeats)
280
+ optional = choice_optional || (particle.min_occurs.zero? && !list)
281
+ eff_min = list && !seq_min.nil? ? seq_min : particle.min_occurs
282
+ eff_max = list && !seq_max.nil? ? seq_max : particle.max_occurs
283
+ [{
284
+ name: Naming.snake_case(particle.name),
285
+ ruby_type: ruby_type_for_ref(particle.type_ref),
286
+ list: list,
287
+ optional: optional,
288
+ source: :element,
289
+ min_occurs: eff_min,
290
+ max_occurs: eff_max
291
+ }]
292
+ end
293
+
294
+ def element_fields_for_choice(particle)
295
+ choice_repeating = repeating?(particle)
296
+ particle.items.flat_map do |item|
297
+ element_field_for(
298
+ item,
299
+ sequence_repeats: choice_repeating,
300
+ choice_optional: !choice_repeating,
301
+ seq_min: particle.min_occurs,
302
+ seq_max: particle.max_occurs
303
+ )
304
+ end
305
+ end
306
+
307
+ def list_element?(particle, sequence_repeats)
308
+ sequence_repeats || particle.max_occurs == :unbounded ||
309
+ (particle.max_occurs.is_a?(Integer) && particle.max_occurs > 1)
310
+ end
311
+
312
+ def build_attr_field(attr)
313
+ ruby_type = if attr.type_ref
314
+ BuiltinTypes.ruby_type_for(attr.type_ref.namespace, attr.type_ref.local_name) ||
315
+ Naming.pascal_case(attr.type_ref.local_name)
316
+ end
317
+ {
318
+ name: Naming.attr_field(attr.name),
319
+ ruby_type: ruby_type,
320
+ list: false,
321
+ optional: attr.use != :required,
322
+ source: :attribute,
323
+ use: attr.use
324
+ }
325
+ end
326
+
327
+ def kwargs_for(fields)
328
+ fields.map do |f|
329
+ if f[:list]
330
+ "#{f[:name]}: []"
331
+ elsif f[:optional]
332
+ "#{f[:name]}: nil"
333
+ else
334
+ "#{f[:name]}:"
335
+ end
336
+ end.join(", ")
337
+ end
338
+
339
+ def emit_field_validation(type_name, field)
340
+ return "" if field[:ruby_type].nil?
341
+
342
+ if field[:list]
343
+ emit_list_field_validation(type_name, field)
344
+ elsif field[:optional]
345
+ emit_optional_field_validation(type_name, field)
346
+ else
347
+ emit_required_field_validation(type_name, field)
348
+ end
349
+ end
350
+
351
+ def emit_list_field_validation(type_name, field)
352
+ ruby_type = field[:ruby_type]
353
+ label = "#{type_name}##{field[:name]}"
354
+ out = +""
355
+ out << " raise ArgumentError, \"#{label} must be an Array\" unless #{field[:name]}.is_a?(Array)\n"
356
+ out << " raise ArgumentError, \"#{label} elements must be #{ruby_type}\" " \
357
+ "unless #{field[:name]}.all? { |e| e.is_a?(#{ruby_type}) }\n"
358
+ if field[:min_occurs].is_a?(Integer) && field[:min_occurs].positive?
359
+ out << " raise ArgumentError, \"#{label} must have at least #{field[:min_occurs]} element(s)\" " \
360
+ "if #{field[:name]}.size < #{field[:min_occurs]}\n"
361
+ end
362
+ if field[:max_occurs].is_a?(Integer)
363
+ out << " raise ArgumentError, \"#{label} must have at most #{field[:max_occurs]} element(s)\" " \
364
+ "if #{field[:name]}.size > #{field[:max_occurs]}\n"
365
+ end
366
+ out
367
+ end
368
+
369
+ def emit_optional_field_validation(type_name, field)
370
+ ruby_type = field[:ruby_type]
371
+ label = "#{type_name}##{field[:name]}"
372
+ article = ruby_type =~ /\A[AEIOU]/ ? "" : "a "
373
+ " raise ArgumentError, \"#{label} must be #{article}#{ruby_type} or nil\" " \
374
+ "unless #{field[:name]}.nil? || #{field[:name]}.is_a?(#{ruby_type})\n"
375
+ end
376
+
377
+ def emit_required_field_validation(type_name, field)
378
+ ruby_type = field[:ruby_type]
379
+ label = "#{type_name}##{field[:name]}"
380
+ article = ruby_type =~ /\A[AEIOU]/ ? "an" : "a"
381
+ " raise ArgumentError, \"#{label} must be #{article} #{ruby_type}\" " \
382
+ "unless #{field[:name]}.is_a?(#{ruby_type})\n"
383
+ end
384
+
385
+ def ruby_type_for_ref(ref)
386
+ return nil if ref.nil?
387
+
388
+ BuiltinTypes.ruby_type_for(ref.namespace, ref.local_name) || Naming.pascal_case(ref.local_name)
389
+ end
390
+ end
391
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SchemaForge
4
+ module Hooks
5
+ class Base
6
+ def on_complex_type(_node, _schema)
7
+ true
8
+ end
9
+
10
+ def on_simple_type(_node, _schema)
11
+ true
12
+ end
13
+
14
+ def on_element(_node, _schema)
15
+ true
16
+ end
17
+
18
+ def on_attribute(_node, _schema)
19
+ true
20
+ end
21
+
22
+ def on_annotation(_node, _schema)
23
+ true
24
+ end
25
+
26
+ def on_generate(_type, _generator)
27
+ true
28
+ end
29
+
30
+ def on_emit_source(source)
31
+ source
32
+ end
33
+ end
34
+ end
35
+ end