edoxen 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,297 +2,209 @@
2
2
 
3
3
  require "json_schemer"
4
4
  require "yaml"
5
- require "json"
6
- require "psych"
7
5
 
8
6
  module Edoxen
7
+ # Validates Edoxen YAML files against `schema/edoxen.yaml`.
8
+ #
9
+ # The SchemaValidator is intentionally small and only owns one concern:
10
+ # JSON-Schema validation with line-accurate error reporting. It does not
11
+ # touch the Ruby model — CLI layers SchemaValidator and
12
+ # `ResolutionCollection.from_yaml` to catch both schema-level violations
13
+ # (additionalProperties, required, enum) and structural issues
14
+ # (numeric/date coercion, missing nested classes) the schema can't express.
15
+ #
16
+ # Line tracking is built from indentation heuristics over the source text.
17
+ # That's a deliberate trade-off: a 30-line heuristic versus introducing a
18
+ # full line-tracking YAML parser dependency, in exchange for error reports
19
+ # that are accurate on real-world 2-space-indented fixture files (which is
20
+ # all we use). The lookup is OCP-compliant (longest-prefix match — no
21
+ # hard-coded path shapes) so adding new collection fields never requires
22
+ # touching this class.
9
23
  class SchemaValidator
10
24
  class ValidationError < StandardError
11
- attr_reader :file, :line, :column, :message, :data_pointer
25
+ attr_reader :file, :line, :column, :pointer, :message_text
12
26
 
13
- def initialize(file, line, column, message, data_pointer = nil)
27
+ def initialize(file:, line:, column:, pointer:, message_text:)
14
28
  @file = file
15
29
  @line = line
16
30
  @column = column
17
- @message = message
18
- @data_pointer = data_pointer
19
- super("#{file}:#{line}:#{column}: #{message}")
31
+ @pointer = pointer
32
+ @message_text = message_text
33
+ super(format_line(file, line, column, message_text, pointer))
20
34
  end
21
35
 
22
36
  def to_clickable_format
23
- "#{file}:#{line}:#{column}: #{message}"
24
- end
25
- end
26
-
27
- # Custom YAML handler to track line numbers
28
- class LineTrackingHandler < Psych::Handler
29
- attr_reader :line_map, :path_stack, :array_indices
30
-
31
- def initialize
32
- @line_map = {}
33
- @path_stack = []
34
- @array_indices = {}
35
- @current_path = ""
36
- end
37
-
38
- def start_document(version, tag_directives, implicit)
39
- # Document start
40
- end
41
-
42
- def start_mapping(anchor, tag, implicit, style)
43
- # Starting a new mapping/object
44
- end
45
-
46
- def start_sequence(_anchor, _tag, _implicit, _style)
47
- # Starting a new sequence/array
48
- parent_path = "/#{@path_stack.join("/")}"
49
- parent_path = "" if parent_path == "/"
50
- @array_indices[parent_path] = -1
51
- end
52
-
53
- def scalar(value, anchor, tag, plain, quoted, style)
54
- # This is called for each scalar value
55
- # We need to track this in context of the current path
56
- end
57
-
58
- def alias(anchor)
59
- # Handle YAML aliases
60
- end
61
-
62
- def end_sequence
63
- # End of sequence/array
37
+ format_line(@file, @line, @column, @message_text, @pointer)
64
38
  end
65
39
 
66
- def end_mapping
67
- # End of mapping/object
68
- end
40
+ private
69
41
 
70
- def end_document(implicit)
71
- # Document end
42
+ def format_line(file, line, column, message_text, pointer)
43
+ suffix = pointer.to_s.empty? ? "" : " at `#{pointer}`"
44
+ "#{file}:#{line}:#{column}: #{message_text}#{suffix}"
72
45
  end
73
46
  end
74
47
 
75
- def initialize(schema_path = nil)
76
- @schema_path = schema_path || File.join(__dir__, "..", "..", "schema", "edoxen.yaml")
77
- @schemer = nil
78
- load_schema
48
+ def initialize(schema_path = default_schema_path)
49
+ @schema_path = schema_path
50
+ @schemer = load_schemer(schema_path)
79
51
  end
80
52
 
53
+ # Validate a YAML file. Returns an array of ValidationError (empty = ok).
81
54
  def validate_file(file_path)
82
- return [ValidationError.new(file_path, 1, 1, "File not found")] unless File.exist?(file_path)
83
-
84
- content = File.read(file_path)
85
- validate_content(content, file_path)
55
+ validate_content(File.read(file_path), file_path)
56
+ rescue Errno::ENOENT
57
+ [ValidationError.new(
58
+ file: file_path, line: 1, column: 1,
59
+ pointer: "", message_text: "File not found"
60
+ )]
86
61
  end
87
62
 
63
+ # Validate a YAML string. Returns an array of ValidationError.
88
64
  def validate_content(content, file_path)
89
- errors = []
90
-
91
- begin
92
- # Parse YAML and build line map
93
- yaml_data, line_map = parse_yaml_with_line_tracking(content)
94
-
95
- # Validate against schema
96
- if @schemer
97
- schema_errors = @schemer.validate(yaml_data).to_a
98
- errors.concat(convert_schema_errors(schema_errors, file_path, line_map))
99
- else
100
- errors << ValidationError.new(file_path, 1, 1, "Schema not available for validation")
101
- end
102
- rescue Psych::SyntaxError => e
103
- line = e.line || 1
104
- column = e.column || 1
105
- errors << ValidationError.new(file_path, line, column, "YAML syntax error: #{e.problem}")
106
- rescue StandardError => e
107
- errors << ValidationError.new(file_path, 1, 1, "Validation error: #{e.message}")
108
- end
109
-
110
- errors
65
+ data = normalize_dates(YAML.safe_load(content, permitted_classes: [Date, Time]))
66
+ line_map = LineMap.build(content)
67
+
68
+ @schemer.validate(data).to_a.map do |err|
69
+ pointer = err.fetch("data_pointer", "")
70
+ message = format_message(err)
71
+ line, column = LineMap.locate(pointer, line_map)
72
+ ValidationError.new(
73
+ file: file_path, line: line, column: column,
74
+ pointer: pointer, message_text: message
75
+ )
76
+ end
77
+ rescue Psych::SyntaxError => e
78
+ [ValidationError.new(
79
+ file: file_path, line: e.line || 1, column: e.column || 1,
80
+ pointer: "", message_text: "YAML syntax error: #{e.problem}"
81
+ )]
111
82
  end
112
83
 
113
84
  private
114
85
 
115
- def load_schema
116
- return unless File.exist?(@schema_path)
117
-
118
- begin
119
- schema_content = File.read(@schema_path)
120
- schema_data = YAML.safe_load(schema_content)
121
- @schemer = JSONSchemer.schema(schema_data)
122
- rescue StandardError => e
123
- warn "Warning: Could not load schema from #{@schema_path}: #{e.message}"
124
- @schemer = nil
125
- end
86
+ def default_schema_path
87
+ File.expand_path("../../schema/edoxen.yaml", __dir__)
126
88
  end
127
89
 
128
- def parse_yaml_with_line_tracking(content)
129
- # Parse YAML normally
130
- yaml_data = YAML.safe_load(content)
131
-
132
- # Build line map by parsing the content line by line
133
- line_map = build_line_map(content)
134
-
135
- [yaml_data, line_map]
90
+ def load_schemer(path)
91
+ JSONSchemer.schema(YAML.safe_load(File.read(path), permitted_classes: [Date, Time]))
136
92
  end
137
93
 
138
- def build_line_map(content)
139
- line_map = {}
140
- lines = content.split("\n")
141
- path_stack = []
142
- array_indices = {}
143
-
144
- lines.each_with_index do |line, index|
145
- line_number = index + 1
146
- stripped = line.strip
147
-
148
- # Skip empty lines and comments
149
- next if stripped.empty? || stripped.start_with?("#")
150
-
151
- # Calculate indentation
152
- indent = line.length - line.lstrip.length
153
- level = indent / 2 # Assuming 2-space indentation
154
-
155
- # Adjust path stack based on indentation level
156
- path_stack = path_stack[0, level] if level < path_stack.length
157
-
158
- if stripped.start_with?("- ")
159
- # Array item
160
- parent_path = path_stack.empty? ? "" : "/#{path_stack.join("/")}"
161
- array_indices[parent_path] ||= -1
162
- array_indices[parent_path] += 1
163
-
164
- array_index = array_indices[parent_path]
165
- current_path = "#{parent_path}/#{array_index}"
166
- line_map[current_path] = line_number
167
-
168
- # Check if array item has a key
169
- item_content = stripped[2..].strip
170
- if item_content.include?(":")
171
- key = item_content.split(":").first.strip.gsub(/["']/, "")
172
- if key.empty?
173
- path_stack = path_stack[0, level] + [array_index.to_s]
174
- else
175
- key_path = "#{current_path}/#{key}"
176
- line_map[key_path] = line_number
177
- path_stack = path_stack[0, level] + [array_index.to_s, key]
178
- end
179
- else
180
- path_stack = path_stack[0, level] + [array_index.to_s]
181
- end
182
- elsif stripped.include?(":")
183
- # Regular key-value pair
184
- key = stripped.split(":").first.strip.gsub(/["']/, "")
185
- next if key.empty?
186
-
187
- path_stack = path_stack[0, level] + [key]
188
- current_path = "/#{path_stack.join("/")}"
189
- line_map[current_path] = line_number
190
-
191
- # Reset array indices for this path and deeper
192
- array_indices.each_key do |path|
193
- array_indices.delete(path) if path.start_with?(current_path)
194
- end
195
- end
94
+ # Coerce Date/Time instances back to ISO strings before handing the
95
+ # data to json_schemer — the schema declares them as `type: string,
96
+ # format: date` because that's also the wire form. Walking the hash
97
+ # here keeps the gem OCP-compliant (no json_schemer plugin/tweak).
98
+ def normalize_dates(value)
99
+ case value
100
+ when Date then value.iso8601
101
+ when Time then value.iso8601
102
+ when Hash then value.transform_values { |v| normalize_dates(v) }
103
+ when Array then value.map { |v| normalize_dates(v) }
104
+ else value
196
105
  end
197
-
198
- line_map
199
106
  end
200
107
 
201
- def convert_schema_errors(schema_errors, file_path, line_map)
202
- schema_errors.map do |error|
203
- data_pointer = error["data_pointer"] || ""
204
- line = find_line_for_pointer(data_pointer, line_map)
205
- column = 1
206
-
207
- message = build_error_message(error)
208
-
209
- ValidationError.new(file_path, line, column, message, data_pointer)
108
+ def format_message(err)
109
+ type = err["type"]
110
+ details = err["details"] || {}
111
+ pointer = err["data_pointer"].to_s
112
+ case type
113
+ when "required"
114
+ missing = Array(details["missing_keys"])
115
+ if missing.size == 1
116
+ "object is missing required property: #{missing.first}"
117
+ else
118
+ "object is missing required properties: #{missing.join(", ")}"
119
+ end
120
+ when "additionalProperties"
121
+ extra = Array(details["extra_keys"])
122
+ case extra.size
123
+ when 0 then "object has disallowed additional properties"
124
+ when 1 then "object property '#{extra.first}' is a disallowed additional property"
125
+ else "object properties #{extra.map { |k| "'#{k}'" }.join(", ")} are disallowed additional properties"
126
+ end
127
+ when "enum"
128
+ vals = Array(details["valid_values"])
129
+ enums = vals.empty? ? "(see schema)" : vals.join(", ")
130
+ "value #{details["value"].inspect} is not one of: #{enums}"
131
+ when "type"
132
+ expected = Array(details["expected_types"])
133
+ "value is not #{expected.empty? ? "the expected type" : expected.join(" or ")}"
134
+ when "pattern"
135
+ "value #{details["value"].inspect} does not match pattern #{details["pattern"].inspect}"
136
+ when "minItems"
137
+ "array has fewer than #{details["minimum"]} items"
138
+ when "maxItems"
139
+ "array has more than #{details["maximum"]} items"
140
+ else
141
+ err["error"] || "validation failed"
210
142
  end
211
143
  end
212
144
 
213
- def find_line_for_pointer(pointer, line_map)
214
- return 1 if pointer.empty?
215
-
216
- # Try exact match first
217
- return line_map[pointer] if line_map[pointer]
218
-
219
- # Try progressively shorter paths
220
- parts = pointer.split("/").reject(&:empty?)
221
- (parts.length - 1).downto(0) do |i|
222
- partial_path = "/#{parts[0..i].join("/")}"
223
- return line_map[partial_path] if line_map[partial_path]
224
- end
225
-
226
- # For paths like /resolutions/0/actions/0/type, try to find the specific action
227
- if parts.length >= 4 && parts[0] == "resolutions" && parts[2] == "actions"
228
- # Try to find the specific action line
229
- resolution_index = parts[1]
230
- action_index = parts[3]
231
- parts[4] if parts.length > 4
232
-
233
- # Look for patterns like /resolutions/0/actions/0
234
- action_path = "/resolutions/#{resolution_index}/actions/#{action_index}"
235
- return line_map[action_path] if line_map[action_path]
236
-
237
- # Look for the actions array start
238
- actions_path = "/resolutions/#{resolution_index}/actions"
239
- return line_map[actions_path] if line_map[actions_path]
240
- end
241
-
242
- # Try to find the closest match by looking for the last non-numeric part
243
- if parts.any?
244
- parts.reverse.each do |part|
245
- next if part.match?(/^\d+$/) # Skip array indices
246
-
247
- line_map.each do |path, line|
248
- return line if path.end_with?("/#{part}")
145
+ # Builds a {path => line_no} map for a YAML source. The path is the
146
+ # JSON-Schema-style pointer: "/metadata", "/resolutions/0/title", etc.
147
+ # This is a tag-class is allocated freely — no `instance_variable_set`
148
+ # ever crosses another object's boundary.
149
+ module LineMap
150
+ module_function
151
+
152
+ # @return [Hash{String => Integer}]
153
+ def build(content)
154
+ map = {}
155
+ stack = []
156
+ array_counter = Hash.new(-1)
157
+ array_path_for = {}
158
+
159
+ content.each_line.with_index(1) do |raw, line_no|
160
+ line = raw.chomp
161
+ stripped = line.strip
162
+ next if stripped.empty? || stripped.start_with?("#") || stripped == "---"
163
+
164
+ indent = line.index(/\S/) || 0
165
+ level = indent / 2
166
+ stack = stack.first(level)
167
+
168
+ if stripped.start_with?("- ")
169
+ parent = stack.empty? ? "" : "/#{stack.join("/")}"
170
+ parent_key = parent.empty? ? parent : parent.dup
171
+ array_counter[parent_key] = -1 unless array_counter.key?(parent_key)
172
+ array_counter[parent_key] += 1
173
+ array_index = array_counter[parent_key]
174
+ array_path = "#{parent}/#{array_index}"
175
+ map[array_path] = line_no
176
+
177
+ remainder = stripped[2..].to_s.strip
178
+ if remainder.match(/\A(.+?):(\s|$)/)
179
+ key = Regexp.last_match(1).strip.gsub(/["']/, "")
180
+ map["#{array_path}/#{key}"] = line_no
181
+ stack = stack.first(level) + [array_index.to_s, key]
182
+ else
183
+ stack = stack.first(level) + [array_index.to_s]
184
+ end
185
+ elsif (md = stripped.match(/\A(.+?):(\s|$)/))
186
+ key = md[1].strip.gsub(/["']/, "")
187
+ stack = stack.first(level) + [key]
188
+ full = "/#{stack.join("/")}"
189
+ map[full] = line_no
190
+ array_counter.delete_if { |p, _| p.start_with?(full) }
249
191
  end
250
192
  end
193
+ map
251
194
  end
252
195
 
253
- # Default to line 1
254
- 1
255
- end
256
-
257
- def build_error_message(error)
258
- type = error["type"]
259
- details = error["details"] || {}
260
- data_pointer = error["data_pointer"] || ""
196
+ # @return [Array(Integer, Integer)] [line, column] for the given
197
+ # JSON-Schema data pointer. Picks the longest prefix in the line map
198
+ # — pure longest-match, no knowledge of specific path shapes.
199
+ def locate(pointer, line_map)
200
+ pointer = pointer.to_s
201
+ return [1, 1] if pointer.empty?
261
202
 
262
- base_message = case type
263
- when "required"
264
- missing = details["missing_keys"] || []
265
- if missing.length == 1
266
- "object is missing required property: #{missing.first}"
267
- else
268
- "object is missing required properties: #{missing.join(", ")}"
269
- end
270
- when "additionalProperties"
271
- extra = details["extra_keys"] || []
272
- if extra.length == 1
273
- "object property '#{extra.first}' is a disallowed additional property"
274
- else
275
- "object properties #{extra.map do |k|
276
- "'#{k}'"
277
- end.join(", ")} are disallowed additional properties"
278
- end
279
- when "enum"
280
- value = details["value"]
281
- valid_values = details["valid_values"] || []
282
- "value '#{value}' is not one of: #{valid_values}"
283
- when "type"
284
- details["actual_type"]
285
- expected = details["expected_types"] || []
286
- "value is not #{expected.join(" or ")}"
287
- else
288
- error["error"] || "validation failed"
289
- end
203
+ match_key = line_map.keys
204
+ .select { |path| pointer.start_with?(path) || path.start_with?(pointer) }
205
+ .max_by(&:length)
290
206
 
291
- # Add data pointer for debugging if it's not empty
292
- if !data_pointer.empty?
293
- "#{base_message} at `#{data_pointer}`"
294
- else
295
- base_message
207
+ match_key ? [line_map[match_key], 1] : [1, 1]
296
208
  end
297
209
  end
298
210
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Edoxen
4
+ # Per-language canonical source URL (e.g. one PDF per language).
5
+ # Carries the URL ref, its format, and the ISO 639-3 language_code the URL
6
+ # is the canonical source for.
7
+ class SourceUrl < Lutaml::Model::Serializable
8
+ attribute :ref, :string
9
+ attribute :format, :string
10
+ attribute :language_code, :string
11
+
12
+ key_value do
13
+ map "ref", to: :ref
14
+ map "format", to: :format
15
+ map "language_code", to: :language_code
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Edoxen
4
+ # A structured resolution identifier, e.g. prefix "ISO" + number "2019-01".
5
+ # A Resolution carries 1..* StructuredIdentifier so a single resolution can
6
+ # hold its TC number, its SC number, and any cross-cutting reference number
7
+ # without forcing callers to flatten them into one opaque string.
8
+ class StructuredIdentifier < Lutaml::Model::Serializable
9
+ attribute :prefix, :string
10
+ attribute :number, :string
11
+
12
+ key_value do
13
+ map "prefix", to: :prefix
14
+ map "number", to: :number
15
+ end
16
+ end
17
+ end
data/lib/edoxen/url.rb CHANGED
@@ -1,10 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "lutaml/model"
4
-
5
3
  module Edoxen
4
+ # URL with a kind (access / report) and an optional format hint (pdf, html).
6
5
  class Url < Lutaml::Model::Serializable
7
- attribute :kind, :string, values: %w[access report]
6
+ attribute :kind, :string, values: Enums::URL_KIND
8
7
  attribute :ref, :string
9
8
  attribute :format, :string
10
9
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Edoxen
4
- VERSION = "0.1.2"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/edoxen.rb CHANGED
@@ -1,23 +1,45 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "lutaml/model"
4
- require_relative "edoxen/version"
5
4
 
6
- # Configure lutaml-model for YAML serialization
7
- Lutaml::Model::Config.configure do |config|
8
- config.yaml_adapter_type = :standard_yaml
9
- config.json_adapter_type = :standard_json
5
+ # Configure the lutaml-model serialization framework used throughout the
6
+ # Edoxen information-model gem.
7
+ Lutaml::Model::Config.configure do |c|
8
+ c.yaml_adapter_type = :standard_yaml
9
+ c.json_adapter_type = :standard_json
10
10
  end
11
11
 
12
12
  module Edoxen
13
- class Error < StandardError; end
13
+ # Autoload every constant defined under the Edoxen namespace from its
14
+ # native `lib/edoxen/<name>.rb` file. This is the only place where file
15
+ # paths are tied to constants; everywhere else, models reference each
16
+ # other by class name (resolved lazily by Ruby).
17
+ #
18
+ # There are intentionally NO `require_relative` calls in this gem —
19
+ # autoload keeps load-order semantics clean and lets us tolerate the
20
+ # extensive cross-references between model classes
21
+ # (Resolution <-> Localization, ResolutionMetadata <-> Localization, etc.).
22
+ autoload :VERSION, "edoxen/version"
23
+ autoload :Error, "edoxen/error"
24
+ autoload :Enums, "edoxen/enums"
14
25
 
15
- # Load all model classes
16
- require_relative "edoxen/action"
17
- require_relative "edoxen/approval"
18
- require_relative "edoxen/consideration"
19
- require_relative "edoxen/metadata"
20
- require_relative "edoxen/resolution"
21
- require_relative "edoxen/resolution_set"
22
- require_relative "edoxen/cli"
26
+ # Information-model classes (one per file, one concept per class).
27
+ # Names mirror ../edoxen-model/models/*.lutaml.
28
+ autoload :StructuredIdentifier, "edoxen/structured_identifier"
29
+ autoload :MeetingIdentifier, "edoxen/meeting_identifier"
30
+ autoload :ResolutionDate, "edoxen/resolution_date"
31
+ autoload :Action, "edoxen/action"
32
+ autoload :Approval, "edoxen/approval"
33
+ autoload :Consideration, "edoxen/consideration"
34
+ autoload :SourceUrl, "edoxen/source_url"
35
+ autoload :Localization, "edoxen/localization"
36
+ autoload :Url, "edoxen/url"
37
+ autoload :ResolutionRelation, "edoxen/resolution_relation"
38
+ autoload :Resolution, "edoxen/resolution"
39
+ autoload :ResolutionMetadata, "edoxen/resolution_metadata"
40
+ autoload :ResolutionCollection, "edoxen/resolution_collection"
41
+
42
+ # Services.
43
+ autoload :SchemaValidator, "edoxen/schema_validator"
44
+ autoload :Cli, "edoxen/cli"
23
45
  end