edoxen 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,211 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json_schemer"
4
+ require "yaml"
5
+
6
+ module Edoxen
7
+ # Validates Edoxen YAML files against `schema/edoxen.yaml`.
8
+ #
9
+ # The SchemaValidator is intentionally small and only owns one concern:
10
+ # JSON-Schema validation with line-accurate error reporting. It does not
11
+ # touch the Ruby model — CLI layers SchemaValidator and
12
+ # `ResolutionCollection.from_yaml` to catch both schema-level violations
13
+ # (additionalProperties, required, enum) and structural issues
14
+ # (numeric/date coercion, missing nested classes) the schema can't express.
15
+ #
16
+ # Line tracking is built from indentation heuristics over the source text.
17
+ # That's a deliberate trade-off: a 30-line heuristic versus introducing a
18
+ # full line-tracking YAML parser dependency, in exchange for error reports
19
+ # that are accurate on real-world 2-space-indented fixture files (which is
20
+ # all we use). The lookup is OCP-compliant (longest-prefix match — no
21
+ # hard-coded path shapes) so adding new collection fields never requires
22
+ # touching this class.
23
+ class SchemaValidator
24
+ class ValidationError < StandardError
25
+ attr_reader :file, :line, :column, :pointer, :message_text
26
+
27
+ def initialize(file:, line:, column:, pointer:, message_text:)
28
+ @file = file
29
+ @line = line
30
+ @column = column
31
+ @pointer = pointer
32
+ @message_text = message_text
33
+ super(format_line(file, line, column, message_text, pointer))
34
+ end
35
+
36
+ def to_clickable_format
37
+ format_line(@file, @line, @column, @message_text, @pointer)
38
+ end
39
+
40
+ private
41
+
42
+ def format_line(file, line, column, message_text, pointer)
43
+ suffix = pointer.to_s.empty? ? "" : " at `#{pointer}`"
44
+ "#{file}:#{line}:#{column}: #{message_text}#{suffix}"
45
+ end
46
+ end
47
+
48
+ def initialize(schema_path = default_schema_path)
49
+ @schema_path = schema_path
50
+ @schemer = load_schemer(schema_path)
51
+ end
52
+
53
+ # Validate a YAML file. Returns an array of ValidationError (empty = ok).
54
+ def validate_file(file_path)
55
+ validate_content(File.read(file_path), file_path)
56
+ rescue Errno::ENOENT
57
+ [ValidationError.new(
58
+ file: file_path, line: 1, column: 1,
59
+ pointer: "", message_text: "File not found"
60
+ )]
61
+ end
62
+
63
+ # Validate a YAML string. Returns an array of ValidationError.
64
+ def validate_content(content, file_path)
65
+ data = normalize_dates(YAML.safe_load(content, permitted_classes: [Date, Time]))
66
+ line_map = LineMap.build(content)
67
+
68
+ @schemer.validate(data).to_a.map do |err|
69
+ pointer = err.fetch("data_pointer", "")
70
+ message = format_message(err)
71
+ line, column = LineMap.locate(pointer, line_map)
72
+ ValidationError.new(
73
+ file: file_path, line: line, column: column,
74
+ pointer: pointer, message_text: message
75
+ )
76
+ end
77
+ rescue Psych::SyntaxError => e
78
+ [ValidationError.new(
79
+ file: file_path, line: e.line || 1, column: e.column || 1,
80
+ pointer: "", message_text: "YAML syntax error: #{e.problem}"
81
+ )]
82
+ end
83
+
84
+ private
85
+
86
+ def default_schema_path
87
+ File.expand_path("../../schema/edoxen.yaml", __dir__)
88
+ end
89
+
90
+ def load_schemer(path)
91
+ JSONSchemer.schema(YAML.safe_load(File.read(path), permitted_classes: [Date, Time]))
92
+ end
93
+
94
+ # Coerce Date/Time instances back to ISO strings before handing the
95
+ # data to json_schemer — the schema declares them as `type: string,
96
+ # format: date` because that's also the wire form. Walking the hash
97
+ # here keeps the gem OCP-compliant (no json_schemer plugin/tweak).
98
+ def normalize_dates(value)
99
+ case value
100
+ when Date then value.iso8601
101
+ when Time then value.iso8601
102
+ when Hash then value.transform_values { |v| normalize_dates(v) }
103
+ when Array then value.map { |v| normalize_dates(v) }
104
+ else value
105
+ end
106
+ end
107
+
108
+ def format_message(err)
109
+ type = err["type"]
110
+ details = err["details"] || {}
111
+ pointer = err["data_pointer"].to_s
112
+ case type
113
+ when "required"
114
+ missing = Array(details["missing_keys"])
115
+ if missing.size == 1
116
+ "object is missing required property: #{missing.first}"
117
+ else
118
+ "object is missing required properties: #{missing.join(", ")}"
119
+ end
120
+ when "additionalProperties"
121
+ extra = Array(details["extra_keys"])
122
+ case extra.size
123
+ when 0 then "object has disallowed additional properties"
124
+ when 1 then "object property '#{extra.first}' is a disallowed additional property"
125
+ else "object properties #{extra.map { |k| "'#{k}'" }.join(", ")} are disallowed additional properties"
126
+ end
127
+ when "enum"
128
+ vals = Array(details["valid_values"])
129
+ enums = vals.empty? ? "(see schema)" : vals.join(", ")
130
+ "value #{details["value"].inspect} is not one of: #{enums}"
131
+ when "type"
132
+ expected = Array(details["expected_types"])
133
+ "value is not #{expected.empty? ? "the expected type" : expected.join(" or ")}"
134
+ when "pattern"
135
+ "value #{details["value"].inspect} does not match pattern #{details["pattern"].inspect}"
136
+ when "minItems"
137
+ "array has fewer than #{details["minimum"]} items"
138
+ when "maxItems"
139
+ "array has more than #{details["maximum"]} items"
140
+ else
141
+ err["error"] || "validation failed"
142
+ end
143
+ end
144
+
145
+ # Builds a {path => line_no} map for a YAML source. The path is the
146
+ # JSON-Schema-style pointer: "/metadata", "/resolutions/0/title", etc.
147
+ # This is a tag-class is allocated freely — no `instance_variable_set`
148
+ # ever crosses another object's boundary.
149
+ module LineMap
150
+ module_function
151
+
152
+ # @return [Hash{String => Integer}]
153
+ def build(content)
154
+ map = {}
155
+ stack = []
156
+ array_counter = Hash.new(-1)
157
+ array_path_for = {}
158
+
159
+ content.each_line.with_index(1) do |raw, line_no|
160
+ line = raw.chomp
161
+ stripped = line.strip
162
+ next if stripped.empty? || stripped.start_with?("#") || stripped == "---"
163
+
164
+ indent = line.index(/\S/) || 0
165
+ level = indent / 2
166
+ stack = stack.first(level)
167
+
168
+ if stripped.start_with?("- ")
169
+ parent = stack.empty? ? "" : "/#{stack.join("/")}"
170
+ parent_key = parent.empty? ? parent : parent.dup
171
+ array_counter[parent_key] = -1 unless array_counter.key?(parent_key)
172
+ array_counter[parent_key] += 1
173
+ array_index = array_counter[parent_key]
174
+ array_path = "#{parent}/#{array_index}"
175
+ map[array_path] = line_no
176
+
177
+ remainder = stripped[2..].to_s.strip
178
+ if remainder.match(/\A(.+?):(\s|$)/)
179
+ key = Regexp.last_match(1).strip.gsub(/["']/, "")
180
+ map["#{array_path}/#{key}"] = line_no
181
+ stack = stack.first(level) + [array_index.to_s, key]
182
+ else
183
+ stack = stack.first(level) + [array_index.to_s]
184
+ end
185
+ elsif (md = stripped.match(/\A(.+?):(\s|$)/))
186
+ key = md[1].strip.gsub(/["']/, "")
187
+ stack = stack.first(level) + [key]
188
+ full = "/#{stack.join("/")}"
189
+ map[full] = line_no
190
+ array_counter.delete_if { |p, _| p.start_with?(full) }
191
+ end
192
+ end
193
+ map
194
+ end
195
+
196
+ # @return [Array(Integer, Integer)] [line, column] for the given
197
+ # JSON-Schema data pointer. Picks the longest prefix in the line map
198
+ # — pure longest-match, no knowledge of specific path shapes.
199
+ def locate(pointer, line_map)
200
+ pointer = pointer.to_s
201
+ return [1, 1] if pointer.empty?
202
+
203
+ match_key = line_map.keys
204
+ .select { |path| pointer.start_with?(path) || path.start_with?(pointer) }
205
+ .max_by(&:length)
206
+
207
+ match_key ? [line_map[match_key], 1] : [1, 1]
208
+ end
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Edoxen
4
+ # Per-language canonical source URL (e.g. one PDF per language).
5
+ # Carries the URL ref, its format, and the ISO 639-3 language_code the URL
6
+ # is the canonical source for.
7
+ class SourceUrl < Lutaml::Model::Serializable
8
+ attribute :ref, :string
9
+ attribute :format, :string
10
+ attribute :language_code, :string
11
+
12
+ key_value do
13
+ map "ref", to: :ref
14
+ map "format", to: :format
15
+ map "language_code", to: :language_code
16
+ end
17
+ end
18
+ end
@@ -1,20 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # StructuredIdentifier {
4
- # number:
5
- # prefix:
6
- # }
7
-
8
- require "lutaml/model"
9
-
10
3
  module Edoxen
4
+ # A structured resolution identifier, e.g. prefix "ISO" + number "2019-01".
5
+ # A Resolution carries 1..* StructuredIdentifier so a single resolution can
6
+ # hold its TC number, its SC number, and any cross-cutting reference number
7
+ # without forcing callers to flatten them into one opaque string.
11
8
  class StructuredIdentifier < Lutaml::Model::Serializable
12
- attribute :number, :string
13
9
  attribute :prefix, :string
10
+ attribute :number, :string
14
11
 
15
12
  key_value do
16
- map "number", to: :number
17
13
  map "prefix", to: :prefix
14
+ map "number", to: :number
18
15
  end
19
16
  end
20
17
  end
data/lib/edoxen/url.rb ADDED
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Edoxen
4
+ # URL with a kind (access / report) and an optional format hint (pdf, html).
5
+ class Url < Lutaml::Model::Serializable
6
+ attribute :kind, :string, values: Enums::URL_KIND
7
+ attribute :ref, :string
8
+ attribute :format, :string
9
+
10
+ key_value do
11
+ map "kind", to: :kind
12
+ map "ref", to: :ref
13
+ map "format", to: :format
14
+ end
15
+ end
16
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Edoxen
4
- VERSION = "0.1.1"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/edoxen.rb CHANGED
@@ -1,27 +1,45 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "lutaml/model"
4
- require_relative "edoxen/version"
5
4
 
6
- # Configure lutaml-model for YAML serialization
7
- Lutaml::Model::Config.configure do |config|
8
- config.yaml_adapter_type = :standard_yaml
9
- config.json_adapter_type = :standard_json
5
+ # Configure the lutaml-model serialization framework used throughout the
6
+ # Edoxen information-model gem.
7
+ Lutaml::Model::Config.configure do |c|
8
+ c.yaml_adapter_type = :standard_yaml
9
+ c.json_adapter_type = :standard_json
10
10
  end
11
11
 
12
12
  module Edoxen
13
- class Error < StandardError; end
13
+ # Autoload every constant defined under the Edoxen namespace from its
14
+ # native `lib/edoxen/<name>.rb` file. This is the only place where file
15
+ # paths are tied to constants; everywhere else, models reference each
16
+ # other by class name (resolved lazily by Ruby).
17
+ #
18
+ # There are intentionally NO `require_relative` calls in this gem —
19
+ # autoload keeps load-order semantics clean and lets us tolerate the
20
+ # extensive cross-references between model classes
21
+ # (Resolution <-> Localization, ResolutionMetadata <-> Localization, etc.).
22
+ autoload :VERSION, "edoxen/version"
23
+ autoload :Error, "edoxen/error"
24
+ autoload :Enums, "edoxen/enums"
14
25
 
15
- # Load all model classes
16
- require_relative "edoxen/action"
17
- require_relative "edoxen/approval"
18
- require_relative "edoxen/consideration"
19
- require_relative "edoxen/meeting"
20
- require_relative "edoxen/resolution"
21
- require_relative "edoxen/resolution_collection"
22
- require_relative "edoxen/resolution_date"
23
- require_relative "edoxen/resolution_relationship"
24
- require_relative "edoxen/structured_identifier"
25
- require_relative "edoxen/subject_body"
26
- require_relative "edoxen/meeting_identfier"
26
+ # Information-model classes (one per file, one concept per class).
27
+ # Names mirror ../edoxen-model/models/*.lutaml.
28
+ autoload :StructuredIdentifier, "edoxen/structured_identifier"
29
+ autoload :MeetingIdentifier, "edoxen/meeting_identifier"
30
+ autoload :ResolutionDate, "edoxen/resolution_date"
31
+ autoload :Action, "edoxen/action"
32
+ autoload :Approval, "edoxen/approval"
33
+ autoload :Consideration, "edoxen/consideration"
34
+ autoload :SourceUrl, "edoxen/source_url"
35
+ autoload :Localization, "edoxen/localization"
36
+ autoload :Url, "edoxen/url"
37
+ autoload :ResolutionRelation, "edoxen/resolution_relation"
38
+ autoload :Resolution, "edoxen/resolution"
39
+ autoload :ResolutionMetadata, "edoxen/resolution_metadata"
40
+ autoload :ResolutionCollection, "edoxen/resolution_collection"
41
+
42
+ # Services.
43
+ autoload :SchemaValidator, "edoxen/schema_validator"
44
+ autoload :Cli, "edoxen/cli"
27
45
  end