metaschema 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,483 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Metaschema
4
+ class ConstraintValidator
5
+ attr_reader :errors
6
+
7
+ def initialize
8
+ @errors = []
9
+ end
10
+
11
+ # Validate a generated class instance against its metaschema constraints.
12
+ # Returns an array of ConstraintError objects.
13
+ def validate(instance, constraint_def)
14
+ @errors = []
15
+ return @errors unless constraint_def
16
+
17
+ validate_allowed_values(instance, constraint_def)
18
+ validate_matches(instance, constraint_def)
19
+ if constraint_def.respond_to?(:has_cardinality)
20
+ validate_has_cardinality(instance,
21
+ constraint_def)
22
+ end
23
+ if constraint_def.respond_to?(:is_unique)
24
+ validate_is_unique(instance,
25
+ constraint_def)
26
+ end
27
+ if constraint_def.respond_to?(:expect)
28
+ validate_expect(instance,
29
+ constraint_def)
30
+ end
31
+ if constraint_def.respond_to?(:index_has_key)
32
+ validate_index_has_key(instance,
33
+ constraint_def)
34
+ end
35
+
36
+ @errors
37
+ end
38
+
39
+ # Recursively validate an entire instance tree.
40
+ # Validates each node's own constraints, then recurses into children.
41
+ def self.validate_tree(instance)
42
+ errors = []
43
+
44
+ if instance.is_a?(Lutaml::Model::Serializable)
45
+ # Validate this instance's own constraints
46
+ if instance.respond_to?(:validate_constraints)
47
+ errors.concat(instance.validate_constraints)
48
+ end
49
+
50
+ # Validate occurrence constraints (min/max-occurs)
51
+ if instance.respond_to?(:validate_occurrences)
52
+ errors.concat(instance.validate_occurrences)
53
+ end
54
+
55
+ # Recurse into all attribute values
56
+ instance.class.attributes.each_key do |attr_name|
57
+ value = instance.send(attr_name)
58
+ next if value.nil?
59
+
60
+ if value.is_a?(Array)
61
+ value.each { |v| errors.concat(validate_tree(v)) if v.is_a?(Lutaml::Model::Serializable) }
62
+ elsif value.is_a?(Lutaml::Model::Serializable)
63
+ errors.concat(validate_tree(value))
64
+ end
65
+ end
66
+ end
67
+
68
+ errors
69
+ end
70
+
71
+ private
72
+
73
+ # ── allowed-values ────────────────────────────────────────────────
74
+
75
+ def validate_allowed_values(instance, constraint_def)
76
+ constraints = Array(constraint_def.allowed_values)
77
+ constraints.each do |c|
78
+ target = c.target || "."
79
+ values = resolve_target_values(instance, target)
80
+ allowed = Array(c.enum).filter_map(&:value)
81
+ allow_other = c.allow_other == "yes"
82
+ level = c.level || "ERROR"
83
+
84
+ values.each do |val|
85
+ next if val.nil? || val.to_s.empty?
86
+ next if allow_other
87
+ next if allowed.include?(val.to_s)
88
+
89
+ @errors << ConstraintError.new(
90
+ constraint_type: :allowed_values,
91
+ level: level,
92
+ message: "Value '#{val}' not in allowed values: #{allowed.join(', ')}",
93
+ target: target,
94
+ )
95
+ end
96
+ end
97
+ end
98
+
99
+ # ── matches ───────────────────────────────────────────────────────
100
+
101
+ def validate_matches(instance, constraint_def)
102
+ constraints = Array(constraint_def.matches)
103
+ constraints.each do |c|
104
+ target = c.target || "."
105
+ values = resolve_target_values(instance, target)
106
+ level = c.level || "ERROR"
107
+
108
+ values.each do |val|
109
+ next if val.nil? || val.to_s.empty?
110
+
111
+ if c.regex
112
+ unless val.to_s.match?(Regexp.new(c.regex))
113
+ @errors << ConstraintError.new(
114
+ constraint_type: :matches,
115
+ level: level,
116
+ message: "Value '#{val}' does not match regex '#{c.regex}'",
117
+ target: target,
118
+ )
119
+ end
120
+ elsif c.datatype
121
+ unless datatype_matches?(val, c.datatype)
122
+ @errors << ConstraintError.new(
123
+ constraint_type: :matches,
124
+ level: level,
125
+ message: "Value '#{val}' does not match datatype '#{c.datatype}'",
126
+ target: target,
127
+ )
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ # ── has-cardinality ──────────────────────────────────────────────
135
+
136
+ def validate_has_cardinality(instance, constraint_def)
137
+ constraints = Array(constraint_def.has_cardinality)
138
+ constraints.each do |c|
139
+ target = c.target || "."
140
+ level = c.level || "ERROR"
141
+ count = count_target_items(instance, target)
142
+
143
+ if c.min_occurs && count < c.min_occurs
144
+ @errors << ConstraintError.new(
145
+ constraint_type: :has_cardinality,
146
+ level: level,
147
+ message: "Expected at least #{c.min_occurs} items at '#{target}', got #{count}",
148
+ target: target,
149
+ )
150
+ end
151
+
152
+ if c.max_occurs && c.max_occurs != "unbounded" && count > c.max_occurs.to_i
153
+ @errors << ConstraintError.new(
154
+ constraint_type: :has_cardinality,
155
+ level: level,
156
+ message: "Expected at most #{c.max_occurs} items at '#{target}', got #{count}",
157
+ target: target,
158
+ )
159
+ end
160
+ end
161
+ end
162
+
163
+ # ── is-unique ────────────────────────────────────────────────────
164
+
165
+ def validate_is_unique(instance, constraint_def)
166
+ constraints = Array(constraint_def.is_unique)
167
+ constraints.each do |c|
168
+ target = c.target || "."
169
+ level = c.level || "ERROR"
170
+ key_fields = Array(c.key_field).map(&:target)
171
+
172
+ items = resolve_target_collection(instance, target)
173
+ next unless items.is_a?(Array) && items.length > 1
174
+
175
+ # Build key tuples for each item
176
+ seen = {}
177
+ items.each_with_index do |item, idx|
178
+ key = if key_fields.empty?
179
+ extract_value(item)
180
+ else
181
+ key_fields.map do |kf|
182
+ resolve_flag_value(item, kf)
183
+ end
184
+ end
185
+ key_str = Array(key).join("|")
186
+
187
+ if seen.key?(key_str)
188
+ @errors << ConstraintError.new(
189
+ constraint_type: :is_unique,
190
+ level: level,
191
+ message: "Duplicate key '#{key_str}' at '#{target}' (items #{seen[key_str]} and #{idx})",
192
+ target: target,
193
+ )
194
+ else
195
+ seen[key_str] = idx
196
+ end
197
+ end
198
+ end
199
+ end
200
+
201
+ # ── expect ───────────────────────────────────────────────────────
202
+
203
+ def validate_expect(_instance, constraint_def)
204
+ # expect constraints use XPath test expressions which are complex
205
+ # to evaluate without a full XPath engine. Log as WARNING for now.
206
+ constraints = Array(constraint_def.expect)
207
+ constraints.each do |c|
208
+ # Future: evaluate c.test against instance
209
+ end
210
+ end
211
+
212
+ # ── index-has-key ────────────────────────────────────────────────
213
+
214
+ def validate_index_has_key(_instance, constraint_def)
215
+ # index-has-key requires an index registry which is complex.
216
+ # Stub for now.
217
+ constraints = Array(constraint_def.index_has_key)
218
+ constraints.each do |c|
219
+ # Future: look up index by c.name and validate keys
220
+ end
221
+ end
222
+
223
+ # ── Target Resolution ────────────────────────────────────────────
224
+
225
+ # Resolve a Metaschema target expression to values from an instance.
226
+ # Delegates to MetapathEvaluator for complex expressions.
227
+ def resolve_target_values(instance, target)
228
+ return [extract_value(instance)] if target == "."
229
+
230
+ # Use MetapathEvaluator for complex patterns
231
+ if complex_target?(target)
232
+ evaluator = MetapathEvaluator.new(instance)
233
+ return evaluator.resolve(target)
234
+ end
235
+
236
+ # .//name — descendant search
237
+ if target.start_with?(".//")
238
+ path = target[3..]
239
+ return resolve_descendant_values(instance, path)
240
+ end
241
+
242
+ # .[@flag='value']/rest — conditional
243
+ if target.start_with?(".[@") && target.include?("]/")
244
+ return resolve_conditional_path(instance, target)
245
+ end
246
+
247
+ # @flag-name — flag value
248
+ if target.start_with?("@")
249
+ flag_name = target[1..].gsub("-", "_")
250
+ return [resolve_flag_value(instance, flag_name)]
251
+ end
252
+
253
+ # field-name — child field value
254
+ [resolve_child_value(instance, target)]
255
+ end
256
+
257
+ # Determine if a target expression requires MetapathEvaluator.
258
+ def complex_target?(target)
259
+ target.include?("has-oscal-namespace") ||
260
+ target.include?("starts-with") ||
261
+ target.include?(" and ") ||
262
+ target.include?(" or ") ||
263
+ target.include?("(.)") ||
264
+ target.match?(/\w+\[.*\]/) ||
265
+ (target.include?("/@") && !target.start_with?(".[@"))
266
+ end
267
+
268
+ # Count items at a target path (for cardinality checks).
269
+ def count_target_items(instance, target)
270
+ if complex_target?(target)
271
+ evaluator = MetapathEvaluator.new(instance)
272
+ items = evaluator.resolve_collection(target)
273
+ return items.compact.length
274
+ end
275
+
276
+ return 1 unless target.include?("/") || target.start_with?(".")
277
+
278
+ # Handle conditional paths like ".[@type='quatrain']/line"
279
+ if target.start_with?(".[@") && target.include?("]/")
280
+ filter_attr, filter_val, rest = parse_conditional(target)
281
+ flag_val = resolve_flag_value(instance, filter_attr)
282
+ return 0 unless flag_val.to_s == filter_val
283
+
284
+ child_name = rest.gsub("-", "_").to_sym
285
+ child = get_child(instance, child_name)
286
+ return 0 unless child
287
+ return child.length if child.is_a?(Array)
288
+
289
+ return 1
290
+ end
291
+
292
+ # .//name — count all descendants
293
+ if target.start_with?(".//")
294
+ path = target[3..]
295
+ values = resolve_descendant_values(instance, path)
296
+ return values.length
297
+ end
298
+
299
+ 0
300
+ end
301
+
302
+ # Resolve a collection of items at a target path (for uniqueness checks).
303
+ def resolve_target_collection(instance, target)
304
+ return [instance] if target == "."
305
+
306
+ if complex_target?(target)
307
+ evaluator = MetapathEvaluator.new(instance)
308
+ return evaluator.resolve_collection(target)
309
+ end
310
+
311
+ # Simple child name
312
+ child_name = target.gsub("-", "_").to_sym
313
+ child = get_child(instance, child_name)
314
+ return child if child.is_a?(Array)
315
+
316
+ child ? [child] : []
317
+ end
318
+
319
+ def extract_value(item)
320
+ return item unless item.is_a?(Lutaml::Model::Serializable)
321
+
322
+ # Try common value attributes
323
+ if item.respond_to?(:content)
324
+ val = item.content
325
+ return val unless using_default?(item, :content)
326
+ end
327
+
328
+ item
329
+ end
330
+
331
+ def resolve_flag_value(instance, flag_name)
332
+ return instance unless instance.is_a?(Lutaml::Model::Serializable)
333
+
334
+ sym = flag_name.to_s.gsub("-", "_").to_sym
335
+ return instance.send(sym) if instance.respond_to?(sym)
336
+
337
+ nil
338
+ end
339
+
340
+ def resolve_child_value(instance, child_name)
341
+ return instance unless instance.is_a?(Lutaml::Model::Serializable)
342
+
343
+ sym = child_name.to_s.gsub("-", "_").to_sym
344
+ child = get_child(instance, sym)
345
+ return extract_value(child) if child
346
+
347
+ nil
348
+ end
349
+
350
+ def resolve_descendant_values(instance, path)
351
+ # Simplified: split path and search recursively
352
+ parts = path.split("/")
353
+ collect_descendants(instance, parts)
354
+ end
355
+
356
+ def collect_descendants(instance, parts)
357
+ return [] unless instance.is_a?(Lutaml::Model::Serializable)
358
+
359
+ current_name = parts[0].gsub("-", "_").to_sym
360
+ rest = parts[1..]
361
+
362
+ child = get_child(instance, current_name)
363
+ return [] unless child
364
+
365
+ items = child.is_a?(Array) ? child : [child]
366
+
367
+ if rest.empty?
368
+ items.map { |i| extract_value(i) }
369
+ else
370
+ items.flat_map { |i| collect_descendants(i, rest) }
371
+ end
372
+ end
373
+
374
+ def resolve_conditional_path(instance, target)
375
+ filter_attr, filter_val, rest = parse_conditional(target)
376
+
377
+ flag_val = resolve_flag_value(instance, filter_attr)
378
+ return [] unless flag_val.to_s == filter_val
379
+
380
+ resolve_target_values(instance, rest)
381
+ end
382
+
383
+ def parse_conditional(target)
384
+ # Parse ".[@attr='value']/rest"
385
+ m = target.match(/\.\[@(\w+)(?:-\w+)*='([^']+)'\]\/(.+)/)
386
+ return [nil, nil, target] unless m
387
+
388
+ m[1..].first # raw attr including hyphens
389
+ # Re-extract properly
390
+ match = target.match(/\.\[@([\w-]+)='([^']+)'\]\/(.+)/)
391
+ [match[1].gsub("-", "_"), match[2], match[3]]
392
+ end
393
+
394
+ def get_child(instance, sym)
395
+ return nil unless instance.respond_to?(sym)
396
+
397
+ instance.send(sym)
398
+ end
399
+
400
+ def using_default?(instance, attr_name)
401
+ instance.respond_to?(:using_default?) && instance.using_default?(attr_name)
402
+ rescue NoMethodError
403
+ false
404
+ end
405
+
406
+ def datatype_matches?(value, datatype)
407
+ case datatype
408
+ when "string" then true
409
+ when "integer", "int" then value.to_s.match?(/\A-?\d+\z/)
410
+ when "positive-integer" then value.to_s.match?(/\A[1-9]\d*\z/)
411
+ when "boolean" then ["true", "false", "1", "0"].include?(value.to_s)
412
+ when "date" then value.to_s.match?(/\A\d{4}-\d{2}-\d{2}\z/)
413
+ when "datetime" then value.to_s.match?(/\A\d{4}-\d{2}-\d{2}T/)
414
+ when "uri" then value.to_s.match?(/\A[a-zA-Z][a-zA-Z0-9+\-.]*:/)
415
+ when "uuid" then value.to_s.match?(/\A[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-/)
416
+ else true # Unknown datatype, pass by default
417
+ end
418
+ end
419
+
420
+ # Validate min/max occurrence constraints on an instance.
421
+ # occurrence_constraints is a Hash of {attr_name => {min: N, max: N}}
422
+ def self.validate_occurrences(instance, occurrence_constraints)
423
+ errors = []
424
+ return errors unless occurrence_constraints && !occurrence_constraints.empty?
425
+
426
+ occurrence_constraints.each do |attr_name, constraints|
427
+ value = instance.respond_to?(attr_name) ? instance.send(attr_name) : nil
428
+ count = case value
429
+ when nil then 0
430
+ when Array then value.length
431
+ else 1
432
+ end
433
+
434
+ min = constraints[:min]
435
+ max = constraints[:max]
436
+
437
+ if min&.positive? && count < min
438
+ errors << ConstraintError.new(
439
+ constraint_type: :occurrence,
440
+ level: "ERROR",
441
+ message: "Expected at least #{min} '#{attr_name}', got #{count}",
442
+ target: attr_name.to_s,
443
+ )
444
+ end
445
+
446
+ if max && count > max
447
+ errors << ConstraintError.new(
448
+ constraint_type: :occurrence,
449
+ level: "ERROR",
450
+ message: "Expected at most #{max} '#{attr_name}', got #{count}",
451
+ target: attr_name.to_s,
452
+ )
453
+ end
454
+ end
455
+
456
+ errors
457
+ end
458
+
459
+ # Simple wrapper for constraint error info
460
+ class ConstraintError
461
+ attr_reader :constraint_type, :level, :message, :target
462
+
463
+ def initialize(constraint_type:, level:, message:, target:)
464
+ @constraint_type = constraint_type
465
+ @level = level
466
+ @message = message
467
+ @target = target
468
+ end
469
+
470
+ def to_s
471
+ "[#{level}] #{constraint_type}: #{message} (target: #{target})"
472
+ end
473
+
474
+ def error?
475
+ level == "ERROR"
476
+ end
477
+
478
+ def warning?
479
+ level == "WARNING"
480
+ end
481
+ end
482
+ end
483
+ end
@@ -9,7 +9,7 @@ module Metaschema
9
9
  end
10
10
 
11
11
  class InlineMarkupType < Lutaml::Model::Serializable
12
- attribute :content, :string
12
+ attribute :content, :string, collection: true
13
13
  attribute :a, AnchorType, collection: true
14
14
  attribute :insert, InsertType, collection: true
15
15
  attribute :br, :string, collection: true