odin-foundation 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/lib/odin/diff/differ.rb +115 -0
  3. data/lib/odin/diff/patcher.rb +64 -0
  4. data/lib/odin/export.rb +330 -0
  5. data/lib/odin/parsing/parser.rb +1193 -0
  6. data/lib/odin/parsing/token.rb +26 -0
  7. data/lib/odin/parsing/token_type.rb +40 -0
  8. data/lib/odin/parsing/tokenizer.rb +825 -0
  9. data/lib/odin/parsing/value_parser.rb +322 -0
  10. data/lib/odin/resolver/import_resolver.rb +137 -0
  11. data/lib/odin/serialization/canonicalize.rb +112 -0
  12. data/lib/odin/serialization/stringify.rb +582 -0
  13. data/lib/odin/transform/format_exporters.rb +819 -0
  14. data/lib/odin/transform/source_parsers.rb +385 -0
  15. data/lib/odin/transform/transform_engine.rb +2837 -0
  16. data/lib/odin/transform/transform_parser.rb +979 -0
  17. data/lib/odin/transform/transform_types.rb +278 -0
  18. data/lib/odin/transform/verb_context.rb +87 -0
  19. data/lib/odin/transform/verbs/aggregation_verbs.rb +106 -0
  20. data/lib/odin/transform/verbs/collection_verbs.rb +640 -0
  21. data/lib/odin/transform/verbs/datetime_verbs.rb +602 -0
  22. data/lib/odin/transform/verbs/financial_verbs.rb +356 -0
  23. data/lib/odin/transform/verbs/geo_verbs.rb +125 -0
  24. data/lib/odin/transform/verbs/numeric_verbs.rb +434 -0
  25. data/lib/odin/transform/verbs/object_verbs.rb +123 -0
  26. data/lib/odin/types/array_item.rb +42 -0
  27. data/lib/odin/types/diff.rb +89 -0
  28. data/lib/odin/types/directive.rb +28 -0
  29. data/lib/odin/types/document.rb +92 -0
  30. data/lib/odin/types/document_builder.rb +67 -0
  31. data/lib/odin/types/dyn_value.rb +270 -0
  32. data/lib/odin/types/errors.rb +149 -0
  33. data/lib/odin/types/modifiers.rb +45 -0
  34. data/lib/odin/types/ordered_map.rb +79 -0
  35. data/lib/odin/types/schema.rb +262 -0
  36. data/lib/odin/types/value_type.rb +28 -0
  37. data/lib/odin/types/values.rb +618 -0
  38. data/lib/odin/types.rb +12 -0
  39. data/lib/odin/utils/format_utils.rb +186 -0
  40. data/lib/odin/utils/path_utils.rb +25 -0
  41. data/lib/odin/utils/security_limits.rb +17 -0
  42. data/lib/odin/validation/format_validators.rb +238 -0
  43. data/lib/odin/validation/redos_protection.rb +102 -0
  44. data/lib/odin/validation/schema_parser.rb +813 -0
  45. data/lib/odin/validation/schema_serializer.rb +262 -0
  46. data/lib/odin/validation/validator.rb +1061 -0
  47. data/lib/odin/version.rb +5 -0
  48. data/lib/odin.rb +90 -0
  49. metadata +160 -0
@@ -0,0 +1,385 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "csv"
5
+ require "rexml/document"
6
+ require "yaml"
7
+
8
+ module Odin
9
+ module Transform
10
+ module SourceParsers
11
+ # Parse JSON string into DynValue
12
+ def self.parse_json(input)
13
+ raise ArgumentError, "Input cannot be nil or empty" if input.nil? || input.strip.empty?
14
+
15
+ parsed = JSON.parse(input)
16
+ Types::DynValue.from_json_value(parsed)
17
+ rescue JSON::ParserError => e
18
+ raise FormatError, "Invalid JSON: #{e.message}"
19
+ end
20
+
21
+ # Parse CSV string into DynValue (array of objects)
22
+ def self.parse_csv(input, headers: true, delimiter: ",")
23
+ return Types::DynValue.of_array([]) if input.nil? || input.strip.empty?
24
+
25
+ # Strip BOM
26
+ cleaned = input.sub(/\A\xEF\xBB\xBF/n, "")
27
+ cleaned = cleaned.encode("UTF-8", "UTF-8", invalid: :replace, undef: :replace)
28
+
29
+ rows = parse_csv_rows(cleaned, delimiter)
30
+ return Types::DynValue.of_array([]) if rows.empty?
31
+
32
+ if headers && rows.size > 1
33
+ header_row = rows[0]
34
+ data_rows = rows[1..]
35
+ items = data_rows.map do |row|
36
+ fields = {}
37
+ header_row.each_with_index do |col, i|
38
+ val = i < row.size ? row[i] : ""
39
+ fields[col] = infer_type(val)
40
+ end
41
+ Types::DynValue.of_object(fields)
42
+ end
43
+ Types::DynValue.of_array(items)
44
+ elsif headers
45
+ # Only header, no data
46
+ Types::DynValue.of_array([])
47
+ else
48
+ items = rows.map do |row|
49
+ Types::DynValue.of_array(row.map { |cell| infer_type(cell) })
50
+ end
51
+ Types::DynValue.of_array(items)
52
+ end
53
+ end
54
+
55
+ # Parse XML string into DynValue
56
+ def self.parse_xml(input)
57
+ raise ArgumentError, "Input cannot be nil or empty" if input.nil? || input.strip.empty?
58
+
59
+ # Pre-process: mark self-closing elements with a synthetic attribute
60
+ # REXML doesn't distinguish <tag/> from <tag></tag>, so we inject a marker
61
+ marked = input.gsub(/<([a-zA-Z_][\w:.-]*)\s*(\s[^>]*)?\/>/) do |_match|
62
+ tag_name = $1
63
+ attrs = $2 || ""
64
+ "<#{tag_name}#{attrs} __odin_sc=\"1\"/>"
65
+ end
66
+
67
+ doc = REXML::Document.new(marked)
68
+ root = doc.root
69
+ raise FormatError, "No root element found" unless root
70
+
71
+ root_name = qualified_name(root)
72
+ content = parse_xml_element(root, 0)
73
+ Types::DynValue.of_object({ root_name => content })
74
+ rescue REXML::ParseException => e
75
+ raise FormatError, "Invalid XML: #{e.message}"
76
+ end
77
+
78
+ # Parse fixed-width text into DynValue
79
+ # columns: [{name:, pos:, len:, trim: true}]
80
+ def self.parse_fixed_width(input, columns:)
81
+ return Types::DynValue.of_array([]) if input.nil? || input.strip.empty?
82
+ raise ArgumentError, "Columns specification required" if columns.nil? || columns.empty?
83
+
84
+ lines = input.lines.map(&:chomp).reject(&:empty?)
85
+ rows = lines.map do |line|
86
+ fields = {}
87
+ columns.each do |col|
88
+ start_pos = col[:pos] || 0
89
+ len = col[:len] || 0
90
+ name = col[:name]
91
+ trim = col.fetch(:trim, true)
92
+
93
+ raw = if start_pos < line.length
94
+ end_pos = [start_pos + len, line.length].min
95
+ line[start_pos...end_pos] || ""
96
+ else
97
+ ""
98
+ end
99
+ raw = raw.strip if trim
100
+ fields[name] = Types::DynValue.of_string(raw)
101
+ end
102
+ Types::DynValue.of_object(fields)
103
+ end
104
+
105
+ rows.size == 1 ? rows[0] : Types::DynValue.of_array(rows)
106
+ end
107
+
108
+ # Parse flat key=value pairs into DynValue
109
+ def self.parse_flat_kvp(input)
110
+ return Types::DynValue.of_object({}) if input.nil? || input.strip.empty?
111
+
112
+ result = {}
113
+ input.each_line do |line|
114
+ line = line.chomp.sub(/\r$/, "")
115
+ next if line.strip.empty?
116
+ next if line.strip.start_with?("#", ";")
117
+
118
+ eq_pos = line.index("=")
119
+ next unless eq_pos
120
+
121
+ key = line[0...eq_pos].strip
122
+ val_str = line[(eq_pos + 1)..].strip
123
+
124
+ value = parse_flat_value(val_str)
125
+ set_nested(result, key, value)
126
+ end
127
+
128
+ Types::DynValue.of_object(result.transform_values { |v| wrap_nested(v) })
129
+ end
130
+
131
+ # Parse YAML string into DynValue
132
+ def self.parse_yaml(input)
133
+ return Types::DynValue.of_object({}) if input.nil? || input.strip.empty?
134
+
135
+ parsed = YAML.safe_load(input, permitted_classes: [Date, Time, BigDecimal])
136
+ Types::DynValue.from_ruby(parsed)
137
+ rescue Psych::SyntaxError => e
138
+ raise FormatError, "Invalid YAML: #{e.message}"
139
+ end
140
+
141
+ # ── Private Helpers ──
142
+
143
+ # CSV row parser handling quoted fields, embedded commas, embedded newlines
144
+ def self.parse_csv_rows(input, delimiter)
145
+ rows = []
146
+ current_row = []
147
+ current_field = +""
148
+ in_quotes = false
149
+ i = 0
150
+ chars = input.chars
151
+
152
+ while i < chars.length
153
+ ch = chars[i]
154
+
155
+ if in_quotes
156
+ if ch == '"'
157
+ if i + 1 < chars.length && chars[i + 1] == '"'
158
+ current_field << '"'
159
+ i += 2
160
+ else
161
+ in_quotes = false
162
+ i += 1
163
+ end
164
+ else
165
+ current_field << ch
166
+ i += 1
167
+ end
168
+ elsif ch == '"'
169
+ in_quotes = true
170
+ i += 1
171
+ elsif ch == delimiter
172
+ current_row << current_field
173
+ current_field = +""
174
+ i += 1
175
+ elsif ch == "\r"
176
+ if i + 1 < chars.length && chars[i + 1] == "\n"
177
+ i += 2
178
+ else
179
+ i += 1
180
+ end
181
+ current_row << current_field
182
+ rows << current_row unless current_row.all?(&:empty?) && rows.empty? && current_row.size <= 1
183
+ current_row = []
184
+ current_field = +""
185
+ elsif ch == "\n"
186
+ current_row << current_field
187
+ rows << current_row
188
+ current_row = []
189
+ current_field = +""
190
+ i += 1
191
+ else
192
+ current_field << ch
193
+ i += 1
194
+ end
195
+ end
196
+
197
+ # Final field/row
198
+ unless current_field.empty? && current_row.empty?
199
+ current_row << current_field
200
+ rows << current_row
201
+ end
202
+
203
+ rows
204
+ end
205
+
206
+ # Type inference for CSV/flat values
207
+ def self.infer_type(val)
208
+ return Types::DynValue.of_null if val.nil? || val == "null"
209
+ return Types::DynValue.of_bool(true) if val == "true"
210
+ return Types::DynValue.of_bool(false) if val == "false"
211
+
212
+ if val.match?(/\A-?\d+\z/)
213
+ Types::DynValue.of_integer(val.to_i)
214
+ elsif val.match?(/\A-?\d+(\.\d+)?([eE][+-]?\d+)?\z/)
215
+ Types::DynValue.of_float(val.to_f)
216
+ else
217
+ Types::DynValue.of_string(val)
218
+ end
219
+ end
220
+
221
+ # Parse flat value (quoted string, null, bool, number, or plain string)
222
+ def self.parse_flat_value(val_str)
223
+ return Types::DynValue.of_null if val_str.empty? || val_str == "~"
224
+
225
+ if val_str.start_with?('"') && val_str.end_with?('"') && val_str.length >= 2
226
+ return Types::DynValue.of_string(val_str[1...-1])
227
+ end
228
+
229
+ infer_type(val_str)
230
+ end
231
+
232
+ # Set nested value using dotted/bracket path
233
+ def self.set_nested(root, path, value)
234
+ segments = parse_path_segments(path)
235
+ current = root
236
+
237
+ segments[0...-1].each_with_index do |seg, idx|
238
+ next_seg = segments[idx + 1]
239
+ if seg.is_a?(Integer)
240
+ current[seg] ||= next_seg.is_a?(Integer) ? [] : {}
241
+ current = current[seg]
242
+ else
243
+ current[seg] ||= next_seg.is_a?(Integer) ? [] : {}
244
+ current = current[seg]
245
+ end
246
+ end
247
+
248
+ last = segments.last
249
+ current[last] = value
250
+ end
251
+
252
+ # Parse path into segments: "a.b[0].c" -> ["a", "b", 0, "c"]
253
+ def self.parse_path_segments(path)
254
+ segments = []
255
+ path.scan(/([^.\[\]]+)|\[(\d+)\]/) do |name, index|
256
+ if index
257
+ segments << index.to_i
258
+ else
259
+ segments << name
260
+ end
261
+ end
262
+ segments
263
+ end
264
+
265
+ # Wrap nested Hash/Array into DynValue
266
+ def self.wrap_nested(obj)
267
+ case obj
268
+ when Types::DynValue then obj
269
+ when Hash
270
+ Types::DynValue.of_object(obj.transform_values { |v| wrap_nested(v) })
271
+ when Array
272
+ Types::DynValue.of_array(obj.map { |v| wrap_nested(v) })
273
+ else
274
+ obj
275
+ end
276
+ end
277
+
278
+ # Parse XML element recursively
279
+ def self.parse_xml_element(element, depth)
280
+ raise FormatError, "XML nesting depth exceeded (max 100)" if depth > 100
281
+
282
+ # Check xsi:nil
283
+ nil_attr = element.attributes["xsi:nil"] || element.attributes["nil"]
284
+ return Types::DynValue.of_null if nil_attr == "true" || nil_attr == "1"
285
+
286
+ # Check for self-closing marker (injected during pre-processing)
287
+ is_self_closing = element.attributes["__odin_sc"] == "1"
288
+ element.attributes.delete("__odin_sc") if is_self_closing
289
+
290
+ children = element.elements.to_a
291
+ has_text = element.texts.any? { |t| !t.value.strip.empty? }
292
+
293
+ # Count real attributes (excluding our synthetic marker, already removed)
294
+ real_attrs_empty = element.attributes.size == 0
295
+
296
+ if children.empty? && real_attrs_empty
297
+ # Self-closing <tag/> becomes null; empty <tag></tag> becomes empty string
298
+ if is_self_closing
299
+ return Types::DynValue.of_null
300
+ end
301
+ # Leaf element with only text
302
+ text = element.text || ""
303
+ text = text.strip
304
+ return Types::DynValue.of_string(text)
305
+ end
306
+
307
+ if children.empty? && !element.attributes.empty? && !has_text
308
+ # Only attributes, no children or text — self-closing with attrs
309
+ fields = {}
310
+ element.attributes.each do |name, val|
311
+ next if name.start_with?("xmlns") || name == "xsi:nil" || name == "nil" || name == "nillable"
312
+
313
+ fields["@#{strip_ns(name)}"] = Types::DynValue.of_string(val.to_s)
314
+ end
315
+ return Types::DynValue.of_object(fields) unless fields.empty?
316
+
317
+ return Types::DynValue.of_null
318
+ end
319
+
320
+ # Complex element: build object
321
+ fields = {}
322
+
323
+ # Attributes
324
+ element.attributes.each do |name, val|
325
+ next if name.start_with?("xmlns") || name == "xsi:nil" || name == "nil" || name == "nillable"
326
+
327
+ fields["@#{strip_ns(name)}"] = Types::DynValue.of_string(val.to_s)
328
+ end
329
+
330
+ # Text content
331
+ if has_text && !children.empty?
332
+ text = element.texts.map { |t| t.value }.join.strip
333
+ fields["_text"] = Types::DynValue.of_string(text) unless text.empty?
334
+ elsif has_text && children.empty?
335
+ text = element.text&.strip || ""
336
+ fields["_text"] = Types::DynValue.of_string(text) unless text.empty?
337
+ end
338
+
339
+ # Child elements — use qualified names (with namespace prefix) to match Java behavior
340
+ child_counts = Hash.new(0)
341
+ children.each { |c| child_counts[qualified_name(c)] += 1 }
342
+
343
+ child_arrays = {}
344
+ children.each do |child|
345
+ name = qualified_name(child)
346
+ child_val = parse_xml_element(child, depth + 1)
347
+
348
+ # Elements named 'item' are always treated as arrays (matches TypeScript)
349
+ if child_counts[name] > 1 || name == "item"
350
+ child_arrays[name] ||= []
351
+ child_arrays[name] << child_val
352
+ else
353
+ fields[name] = child_val
354
+ end
355
+ end
356
+
357
+ child_arrays.each do |name, items|
358
+ fields[name] = Types::DynValue.of_array(items)
359
+ end
360
+
361
+ Types::DynValue.of_object(fields)
362
+ end
363
+
364
+ # Get the full qualified name of an element (prefix:localName or just localName)
365
+ def self.qualified_name(element)
366
+ if element.prefix && !element.prefix.empty?
367
+ "#{element.prefix}:#{element.name}"
368
+ else
369
+ element.name
370
+ end
371
+ end
372
+
373
+ # Strip namespace prefix from element/attribute name
374
+ def self.strip_ns(name)
375
+ name.include?(":") ? name.split(":", 2).last : name
376
+ end
377
+
378
+ private_class_method :parse_csv_rows, :infer_type, :parse_flat_value,
379
+ :set_nested, :parse_path_segments, :wrap_nested,
380
+ :parse_xml_element, :strip_ns
381
+
382
+ class FormatError < StandardError; end
383
+ end
384
+ end
385
+ end