edoxen 0.1.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Edoxen
4
+ # Top-level container for a published resolution collection: metadata
5
+ # plus the list of resolutions.
6
+ class ResolutionCollection < Lutaml::Model::Serializable
7
+ attribute :metadata, ResolutionMetadata
8
+ attribute :resolutions, Resolution, collection: true
9
+ end
10
+ end
@@ -1,17 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "lutaml/model"
4
-
5
3
  module Edoxen
4
+ # Date with semantic kind (adoption, drafted, discussed).
5
+ # ResolutionDate is the only carrier of a *typed* date in the model —
6
+ # plain `Date` in lutaml-model has no semantic context.
6
7
  class ResolutionDate < Lutaml::Model::Serializable
7
- attribute :start, :date
8
- attribute :end, :date
9
- attribute :kind, :string, values: %w[ballot enactment effective decision meeting]
10
-
11
- key_value do
12
- map "start", to: :start
13
- map "end", to: :end
14
- map "kind", to: :kind
15
- end
8
+ attribute :date, :date
9
+ attribute :type, :string, values: Enums::RESOLUTION_DATE_TYPE
16
10
  end
17
11
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Edoxen
4
+ # Collection-level metadata: the title (string for default / single-language
5
+ # collections, or `title_localized[]` for multilingual), the meeting date,
6
+ # the source secretariat, per-language source PDFs, and the host venue.
7
+ class ResolutionMetadata < Lutaml::Model::Serializable
8
+ attribute :title, :string
9
+ attribute :title_localized, Localization, collection: true
10
+ attribute :date, :date
11
+ attribute :source, :string
12
+ attribute :source_urls, SourceUrl, collection: true
13
+ attribute :city, :string
14
+ attribute :country_code, :string
15
+ end
16
+ end
@@ -1,51 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # class ResolutionRelation {
4
- # source: StructuredIdentifier (Resolution)
5
- # destination: StructuredIdentifier (Resolution)
6
- # type: ResolutionRelationType
7
- # }
8
-
9
- # enum ResolutionRelationType {
10
- # annexOf {
11
- # This resolution is an annex to the target resolution.
12
- # }
13
-
14
- # hasAnnex {
15
- # The target resolution is an annex of the source resolution.
16
- # }
17
-
18
- # updates {
19
- # This resolution updates the target resolution.
20
- # }
21
-
22
- # refines {
23
- # This resolution refines the target resolution.
24
- # }
25
-
26
- # replaces/obsoletes {
27
- # This resolution replaces/obsoletes the target resolution.
28
- # }
29
-
30
- # considers {
31
- # This resolution is made in consideration of the target resolution.
32
- # }
33
- # }
34
-
35
- require "lutaml/model"
36
-
37
3
  module Edoxen
4
+ # Directed relation between two resolutions, identified by their
5
+ # StructuredIdentifier (prefix + number).
38
6
  class ResolutionRelation < Lutaml::Model::Serializable
39
- RESOLUTION_RELATIONSHIP_ENUM = %w[annexOf hasAnnex updates refines replaces obsoletes considers].freeze
40
-
41
- attribute :source, :string
42
- attribute :destination, :string
43
- attribute :type, :string, values: RESOLUTION_RELATIONSHIP_ENUM
44
-
45
- key_value do
46
- map "source", to: :source
47
- map "destination", to: :destination
48
- map "type", to: :type
49
- end
7
+ attribute :source, StructuredIdentifier
8
+ attribute :destination, StructuredIdentifier
9
+ attribute :type, :string, values: Enums::RESOLUTION_RELATION_TYPE
50
10
  end
51
11
  end
@@ -2,297 +2,193 @@
2
2
 
3
3
  require "json_schemer"
4
4
  require "yaml"
5
- require "json"
6
- require "psych"
7
5
 
8
6
  module Edoxen
7
+ # Validates Edoxen YAML files against `schema/edoxen.yaml`.
8
+ #
9
+ # The SchemaValidator is intentionally small and only owns one concern:
10
+ # JSON-Schema validation with line-accurate error reporting. It does not
11
+ # touch the Ruby model — CLI layers SchemaValidator and
12
+ # `ResolutionCollection.from_yaml` to catch both schema-level violations
13
+ # (additionalProperties, required, enum) and structural issues
14
+ # (numeric/date coercion, missing nested classes) the schema can't express.
15
+ #
16
+ # Line tracking is built from indentation heuristics over the source text.
17
+ # That's a deliberate trade-off: a 30-line heuristic versus introducing a
18
+ # full line-tracking YAML parser dependency, in exchange for error reports
19
+ # that are accurate on real-world 2-space-indented fixture files (which is
20
+ # all we use). The lookup is OCP-compliant (longest-prefix match — no
21
+ # hard-coded path shapes) so adding new collection fields never requires
22
+ # touching this class.
9
23
  class SchemaValidator
10
- class ValidationError < StandardError
11
- attr_reader :file, :line, :column, :message, :data_pointer
12
-
13
- def initialize(file, line, column, message, data_pointer = nil)
14
- @file = file
15
- @line = line
16
- @column = column
17
- @message = message
18
- @data_pointer = data_pointer
19
- super("#{file}:#{line}:#{column}: #{message}")
20
- end
21
-
22
- def to_clickable_format
23
- "#{file}:#{line}:#{column}: #{message}"
24
- end
25
- end
26
-
27
- # Custom YAML handler to track line numbers
28
- class LineTrackingHandler < Psych::Handler
29
- attr_reader :line_map, :path_stack, :array_indices
30
-
31
- def initialize
32
- @line_map = {}
33
- @path_stack = []
34
- @array_indices = {}
35
- @current_path = ""
36
- end
37
-
38
- def start_document(version, tag_directives, implicit)
39
- # Document start
40
- end
41
-
42
- def start_mapping(anchor, tag, implicit, style)
43
- # Starting a new mapping/object
44
- end
45
-
46
- def start_sequence(_anchor, _tag, _implicit, _style)
47
- # Starting a new sequence/array
48
- parent_path = "/#{@path_stack.join("/")}"
49
- parent_path = "" if parent_path == "/"
50
- @array_indices[parent_path] = -1
51
- end
52
-
53
- def scalar(value, anchor, tag, plain, quoted, style)
54
- # This is called for each scalar value
55
- # We need to track this in context of the current path
56
- end
57
-
58
- def alias(anchor)
59
- # Handle YAML aliases
60
- end
61
-
62
- def end_sequence
63
- # End of sequence/array
64
- end
65
-
66
- def end_mapping
67
- # End of mapping/object
68
- end
69
-
70
- def end_document(implicit)
71
- # Document end
72
- end
73
- end
74
-
75
- def initialize(schema_path = nil)
76
- @schema_path = schema_path || File.join(__dir__, "..", "..", "schema", "edoxen.yaml")
77
- @schemer = nil
78
- load_schema
24
+ # Back-compat alias. The canonical type is Edoxen::ValidationError;
25
+ # this constant lets existing callers keep writing
26
+ # `SchemaValidator::ValidationError` after the unification.
27
+ ValidationError = Edoxen::ValidationError
28
+
29
+ def initialize(schema_path = default_schema_path)
30
+ @schema_path = schema_path
31
+ @schemer = load_schemer(schema_path)
79
32
  end
80
33
 
34
+ # Validate a YAML file. Returns an array of Edoxen::ValidationError
35
+ # (empty = ok).
81
36
  def validate_file(file_path)
82
- return [ValidationError.new(file_path, 1, 1, "File not found")] unless File.exist?(file_path)
83
-
84
- content = File.read(file_path)
85
- validate_content(content, file_path)
37
+ validate_content(File.read(file_path), file_path)
38
+ rescue Errno::ENOENT
39
+ [ValidationError.new(
40
+ file: file_path, line: 1, column: 1,
41
+ message_text: "File not found", source: Edoxen::ValidationError::SOURCE_SCHEMA
42
+ )]
86
43
  end
87
44
 
45
+ # Validate a YAML string. Returns an array of Edoxen::ValidationError.
88
46
  def validate_content(content, file_path)
89
- errors = []
90
-
91
- begin
92
- # Parse YAML and build line map
93
- yaml_data, line_map = parse_yaml_with_line_tracking(content)
94
-
95
- # Validate against schema
96
- if @schemer
97
- schema_errors = @schemer.validate(yaml_data).to_a
98
- errors.concat(convert_schema_errors(schema_errors, file_path, line_map))
99
- else
100
- errors << ValidationError.new(file_path, 1, 1, "Schema not available for validation")
101
- end
102
- rescue Psych::SyntaxError => e
103
- line = e.line || 1
104
- column = e.column || 1
105
- errors << ValidationError.new(file_path, line, column, "YAML syntax error: #{e.problem}")
106
- rescue StandardError => e
107
- errors << ValidationError.new(file_path, 1, 1, "Validation error: #{e.message}")
108
- end
109
-
110
- errors
47
+ data = normalize_dates(YAML.safe_load(content, permitted_classes: [Date, Time]))
48
+ line_map = LineMap.build(content)
49
+
50
+ @schemer.validate(data).to_a.map do |err|
51
+ pointer = err.fetch("data_pointer", "")
52
+ message = format_message(err)
53
+ line, column = LineMap.locate(pointer, line_map)
54
+ ValidationError.new(
55
+ file: file_path, line: line, column: column,
56
+ message_text: message, pointer: pointer,
57
+ source: Edoxen::ValidationError::SOURCE_SCHEMA
58
+ )
59
+ end
60
+ rescue Psych::SyntaxError => e
61
+ [ValidationError.new(
62
+ file: file_path, line: e.line || 1, column: e.column || 1,
63
+ message_text: "YAML syntax error: #{e.problem}",
64
+ source: Edoxen::ValidationError::SOURCE_SYNTAX
65
+ )]
111
66
  end
112
67
 
113
68
  private
114
69
 
115
- def load_schema
116
- return unless File.exist?(@schema_path)
117
-
118
- begin
119
- schema_content = File.read(@schema_path)
120
- schema_data = YAML.safe_load(schema_content)
121
- @schemer = JSONSchemer.schema(schema_data)
122
- rescue StandardError => e
123
- warn "Warning: Could not load schema from #{@schema_path}: #{e.message}"
124
- @schemer = nil
125
- end
70
+ def default_schema_path
71
+ File.expand_path("../../schema/edoxen.yaml", __dir__)
126
72
  end
127
73
 
128
- def parse_yaml_with_line_tracking(content)
129
- # Parse YAML normally
130
- yaml_data = YAML.safe_load(content)
131
-
132
- # Build line map by parsing the content line by line
133
- line_map = build_line_map(content)
134
-
135
- [yaml_data, line_map]
74
+ def load_schemer(path)
75
+ JSONSchemer.schema(YAML.safe_load(File.read(path), permitted_classes: [Date, Time]))
136
76
  end
137
77
 
138
- def build_line_map(content)
139
- line_map = {}
140
- lines = content.split("\n")
141
- path_stack = []
142
- array_indices = {}
143
-
144
- lines.each_with_index do |line, index|
145
- line_number = index + 1
146
- stripped = line.strip
147
-
148
- # Skip empty lines and comments
149
- next if stripped.empty? || stripped.start_with?("#")
150
-
151
- # Calculate indentation
152
- indent = line.length - line.lstrip.length
153
- level = indent / 2 # Assuming 2-space indentation
154
-
155
- # Adjust path stack based on indentation level
156
- path_stack = path_stack[0, level] if level < path_stack.length
157
-
158
- if stripped.start_with?("- ")
159
- # Array item
160
- parent_path = path_stack.empty? ? "" : "/#{path_stack.join("/")}"
161
- array_indices[parent_path] ||= -1
162
- array_indices[parent_path] += 1
163
-
164
- array_index = array_indices[parent_path]
165
- current_path = "#{parent_path}/#{array_index}"
166
- line_map[current_path] = line_number
167
-
168
- # Check if array item has a key
169
- item_content = stripped[2..].strip
170
- if item_content.include?(":")
171
- key = item_content.split(":").first.strip.gsub(/["']/, "")
172
- if key.empty?
173
- path_stack = path_stack[0, level] + [array_index.to_s]
174
- else
175
- key_path = "#{current_path}/#{key}"
176
- line_map[key_path] = line_number
177
- path_stack = path_stack[0, level] + [array_index.to_s, key]
178
- end
179
- else
180
- path_stack = path_stack[0, level] + [array_index.to_s]
181
- end
182
- elsif stripped.include?(":")
183
- # Regular key-value pair
184
- key = stripped.split(":").first.strip.gsub(/["']/, "")
185
- next if key.empty?
186
-
187
- path_stack = path_stack[0, level] + [key]
188
- current_path = "/#{path_stack.join("/")}"
189
- line_map[current_path] = line_number
190
-
191
- # Reset array indices for this path and deeper
192
- array_indices.each_key do |path|
193
- array_indices.delete(path) if path.start_with?(current_path)
194
- end
195
- end
78
+ # Coerce Date/Time instances back to ISO strings before handing the
79
+ # data to json_schemer — the schema declares them as `type: string,
80
+ # format: date` because that's also the wire form. Walking the hash
81
+ # here keeps the gem OCP-compliant (no json_schemer plugin/tweak).
82
+ def normalize_dates(value)
83
+ case value
84
+ when Date then value.iso8601
85
+ when Time then value.iso8601
86
+ when Hash then value.transform_values { |v| normalize_dates(v) }
87
+ when Array then value.map { |v| normalize_dates(v) }
88
+ else value
196
89
  end
197
-
198
- line_map
199
90
  end
200
91
 
201
- def convert_schema_errors(schema_errors, file_path, line_map)
202
- schema_errors.map do |error|
203
- data_pointer = error["data_pointer"] || ""
204
- line = find_line_for_pointer(data_pointer, line_map)
205
- column = 1
206
-
207
- message = build_error_message(error)
208
-
209
- ValidationError.new(file_path, line, column, message, data_pointer)
92
+ def format_message(err)
93
+ type = err["type"]
94
+ details = err["details"] || {}
95
+ pointer = err["data_pointer"].to_s
96
+ case type
97
+ when "required"
98
+ missing = Array(details["missing_keys"])
99
+ if missing.size == 1
100
+ "object is missing required property: #{missing.first}"
101
+ else
102
+ "object is missing required properties: #{missing.join(", ")}"
103
+ end
104
+ when "additionalProperties"
105
+ extra = Array(details["extra_keys"])
106
+ case extra.size
107
+ when 0 then "object has disallowed additional properties"
108
+ when 1 then "object property '#{extra.first}' is a disallowed additional property"
109
+ else "object properties #{extra.map { |k| "'#{k}'" }.join(", ")} are disallowed additional properties"
110
+ end
111
+ when "enum"
112
+ vals = Array(details["valid_values"])
113
+ enums = vals.empty? ? "(see schema)" : vals.join(", ")
114
+ "value #{details["value"].inspect} is not one of: #{enums}"
115
+ when "type"
116
+ expected = Array(details["expected_types"])
117
+ "value is not #{expected.empty? ? "the expected type" : expected.join(" or ")}"
118
+ when "pattern"
119
+ "value #{details["value"].inspect} does not match pattern #{details["pattern"].inspect}"
120
+ when "minItems"
121
+ "array has fewer than #{details["minimum"]} items"
122
+ when "maxItems"
123
+ "array has more than #{details["maximum"]} items"
124
+ else
125
+ err["error"] || "validation failed"
210
126
  end
211
127
  end
212
128
 
213
- def find_line_for_pointer(pointer, line_map)
214
- return 1 if pointer.empty?
215
-
216
- # Try exact match first
217
- return line_map[pointer] if line_map[pointer]
218
-
219
- # Try progressively shorter paths
220
- parts = pointer.split("/").reject(&:empty?)
221
- (parts.length - 1).downto(0) do |i|
222
- partial_path = "/#{parts[0..i].join("/")}"
223
- return line_map[partial_path] if line_map[partial_path]
224
- end
225
-
226
- # For paths like /resolutions/0/actions/0/type, try to find the specific action
227
- if parts.length >= 4 && parts[0] == "resolutions" && parts[2] == "actions"
228
- # Try to find the specific action line
229
- resolution_index = parts[1]
230
- action_index = parts[3]
231
- parts[4] if parts.length > 4
232
-
233
- # Look for patterns like /resolutions/0/actions/0
234
- action_path = "/resolutions/#{resolution_index}/actions/#{action_index}"
235
- return line_map[action_path] if line_map[action_path]
236
-
237
- # Look for the actions array start
238
- actions_path = "/resolutions/#{resolution_index}/actions"
239
- return line_map[actions_path] if line_map[actions_path]
240
- end
241
-
242
- # Try to find the closest match by looking for the last non-numeric part
243
- if parts.any?
244
- parts.reverse.each do |part|
245
- next if part.match?(/^\d+$/) # Skip array indices
246
-
247
- line_map.each do |path, line|
248
- return line if path.end_with?("/#{part}")
129
+ # Builds a {path => line_no} map for a YAML source. The path is the
130
+ # JSON-Schema-style pointer: "/metadata", "/resolutions/0/title", etc.
131
+ # This is a tag-class is allocated freely — no `instance_variable_set`
132
+ # ever crosses another object's boundary.
133
+ module LineMap
134
+ module_function
135
+
136
+ # @return [Hash{String => Integer}]
137
+ def build(content)
138
+ map = {}
139
+ stack = []
140
+ array_counter = Hash.new(-1)
141
+ array_path_for = {}
142
+
143
+ content.each_line.with_index(1) do |raw, line_no|
144
+ line = raw.chomp
145
+ stripped = line.strip
146
+ next if stripped.empty? || stripped.start_with?("#") || stripped == "---"
147
+
148
+ indent = line.index(/\S/) || 0
149
+ level = indent / 2
150
+ stack = stack.first(level)
151
+
152
+ if stripped.start_with?("- ")
153
+ parent = stack.empty? ? "" : "/#{stack.join("/")}"
154
+ parent_key = parent.empty? ? parent : parent.dup
155
+ array_counter[parent_key] = -1 unless array_counter.key?(parent_key)
156
+ array_counter[parent_key] += 1
157
+ array_index = array_counter[parent_key]
158
+ array_path = "#{parent}/#{array_index}"
159
+ map[array_path] = line_no
160
+
161
+ remainder = stripped[2..].to_s.strip
162
+ if remainder.match(/\A(.+?):(\s|$)/)
163
+ key = Regexp.last_match(1).strip.gsub(/["']/, "")
164
+ map["#{array_path}/#{key}"] = line_no
165
+ stack = stack.first(level) + [array_index.to_s, key]
166
+ else
167
+ stack = stack.first(level) + [array_index.to_s]
168
+ end
169
+ elsif (md = stripped.match(/\A(.+?):(\s|$)/))
170
+ key = md[1].strip.gsub(/["']/, "")
171
+ stack = stack.first(level) + [key]
172
+ full = "/#{stack.join("/")}"
173
+ map[full] = line_no
174
+ array_counter.delete_if { |p, _| p.start_with?(full) }
249
175
  end
250
176
  end
177
+ map
251
178
  end
252
179
 
253
- # Default to line 1
254
- 1
255
- end
256
-
257
- def build_error_message(error)
258
- type = error["type"]
259
- details = error["details"] || {}
260
- data_pointer = error["data_pointer"] || ""
180
+ # @return [Array(Integer, Integer)] [line, column] for the given
181
+ # JSON-Schema data pointer. Picks the longest prefix in the line map
182
+ # — pure longest-match, no knowledge of specific path shapes.
183
+ def locate(pointer, line_map)
184
+ pointer = pointer.to_s
185
+ return [1, 1] if pointer.empty?
261
186
 
262
- base_message = case type
263
- when "required"
264
- missing = details["missing_keys"] || []
265
- if missing.length == 1
266
- "object is missing required property: #{missing.first}"
267
- else
268
- "object is missing required properties: #{missing.join(", ")}"
269
- end
270
- when "additionalProperties"
271
- extra = details["extra_keys"] || []
272
- if extra.length == 1
273
- "object property '#{extra.first}' is a disallowed additional property"
274
- else
275
- "object properties #{extra.map do |k|
276
- "'#{k}'"
277
- end.join(", ")} are disallowed additional properties"
278
- end
279
- when "enum"
280
- value = details["value"]
281
- valid_values = details["valid_values"] || []
282
- "value '#{value}' is not one of: #{valid_values}"
283
- when "type"
284
- details["actual_type"]
285
- expected = details["expected_types"] || []
286
- "value is not #{expected.join(" or ")}"
287
- else
288
- error["error"] || "validation failed"
289
- end
187
+ match_key = line_map.keys
188
+ .select { |path| pointer.start_with?(path) || path.start_with?(pointer) }
189
+ .max_by(&:length)
290
190
 
291
- # Add data pointer for debugging if it's not empty
292
- if !data_pointer.empty?
293
- "#{base_message} at `#{data_pointer}`"
294
- else
295
- base_message
191
+ match_key ? [line_map[match_key], 1] : [1, 1]
296
192
  end
297
193
  end
298
194
  end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Edoxen
4
+ # Per-language canonical source URL (e.g. one PDF per language).
5
+ # Carries the URL ref, its format, and the ISO 639-3 language_code the URL
6
+ # is the canonical source for.
7
+ class SourceUrl < Lutaml::Model::Serializable
8
+ attribute :ref, :string
9
+ attribute :format, :string
10
+ attribute :language_code, :string
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Edoxen
4
+ # A structured resolution identifier, e.g. prefix "ISO" + number "2019-01".
5
+ # A Resolution carries 1..* StructuredIdentifier so a single resolution can
6
+ # hold its TC number, its SC number, and any cross-cutting reference number
7
+ # without forcing callers to flatten them into one opaque string.
8
+ class StructuredIdentifier < Lutaml::Model::Serializable
9
+ attribute :prefix, :string
10
+ attribute :number, :string
11
+ end
12
+ end
data/lib/edoxen/url.rb CHANGED
@@ -1,17 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "lutaml/model"
4
-
5
3
  module Edoxen
4
+ # URL with a kind (access / report) and an optional format hint (pdf, html).
6
5
  class Url < Lutaml::Model::Serializable
7
- attribute :kind, :string, values: %w[access report]
6
+ attribute :kind, :string, values: Enums::URL_KIND
8
7
  attribute :ref, :string
9
8
  attribute :format, :string
10
-
11
- key_value do
12
- map "kind", to: :kind
13
- map "ref", to: :ref
14
- map "format", to: :format
15
- end
16
9
  end
17
10
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Edoxen
4
- VERSION = "0.1.2"
4
+ VERSION = "0.3.1"
5
5
  end