odin-foundation 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/odin/diff/differ.rb +115 -0
- data/lib/odin/diff/patcher.rb +64 -0
- data/lib/odin/export.rb +330 -0
- data/lib/odin/parsing/parser.rb +1193 -0
- data/lib/odin/parsing/token.rb +26 -0
- data/lib/odin/parsing/token_type.rb +40 -0
- data/lib/odin/parsing/tokenizer.rb +825 -0
- data/lib/odin/parsing/value_parser.rb +322 -0
- data/lib/odin/resolver/import_resolver.rb +137 -0
- data/lib/odin/serialization/canonicalize.rb +112 -0
- data/lib/odin/serialization/stringify.rb +582 -0
- data/lib/odin/transform/format_exporters.rb +819 -0
- data/lib/odin/transform/source_parsers.rb +385 -0
- data/lib/odin/transform/transform_engine.rb +2837 -0
- data/lib/odin/transform/transform_parser.rb +979 -0
- data/lib/odin/transform/transform_types.rb +278 -0
- data/lib/odin/transform/verb_context.rb +87 -0
- data/lib/odin/transform/verbs/aggregation_verbs.rb +106 -0
- data/lib/odin/transform/verbs/collection_verbs.rb +640 -0
- data/lib/odin/transform/verbs/datetime_verbs.rb +602 -0
- data/lib/odin/transform/verbs/financial_verbs.rb +356 -0
- data/lib/odin/transform/verbs/geo_verbs.rb +125 -0
- data/lib/odin/transform/verbs/numeric_verbs.rb +434 -0
- data/lib/odin/transform/verbs/object_verbs.rb +123 -0
- data/lib/odin/types/array_item.rb +42 -0
- data/lib/odin/types/diff.rb +89 -0
- data/lib/odin/types/directive.rb +28 -0
- data/lib/odin/types/document.rb +92 -0
- data/lib/odin/types/document_builder.rb +67 -0
- data/lib/odin/types/dyn_value.rb +270 -0
- data/lib/odin/types/errors.rb +149 -0
- data/lib/odin/types/modifiers.rb +45 -0
- data/lib/odin/types/ordered_map.rb +79 -0
- data/lib/odin/types/schema.rb +262 -0
- data/lib/odin/types/value_type.rb +28 -0
- data/lib/odin/types/values.rb +618 -0
- data/lib/odin/types.rb +12 -0
- data/lib/odin/utils/format_utils.rb +186 -0
- data/lib/odin/utils/path_utils.rb +25 -0
- data/lib/odin/utils/security_limits.rb +17 -0
- data/lib/odin/validation/format_validators.rb +238 -0
- data/lib/odin/validation/redos_protection.rb +102 -0
- data/lib/odin/validation/schema_parser.rb +813 -0
- data/lib/odin/validation/schema_serializer.rb +262 -0
- data/lib/odin/validation/validator.rb +1061 -0
- data/lib/odin/version.rb +5 -0
- data/lib/odin.rb +90 -0
- metadata +160 -0
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "csv"
|
|
5
|
+
require "rexml/document"
|
|
6
|
+
require "yaml"
|
|
7
|
+
|
|
8
|
+
module Odin
|
|
9
|
+
module Transform
|
|
10
|
+
module SourceParsers
|
|
11
|
+
# Parse JSON string into DynValue
|
|
12
|
+
def self.parse_json(input)
|
|
13
|
+
raise ArgumentError, "Input cannot be nil or empty" if input.nil? || input.strip.empty?
|
|
14
|
+
|
|
15
|
+
parsed = JSON.parse(input)
|
|
16
|
+
Types::DynValue.from_json_value(parsed)
|
|
17
|
+
rescue JSON::ParserError => e
|
|
18
|
+
raise FormatError, "Invalid JSON: #{e.message}"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Parse CSV string into DynValue (array of objects)
|
|
22
|
+
def self.parse_csv(input, headers: true, delimiter: ",")
|
|
23
|
+
return Types::DynValue.of_array([]) if input.nil? || input.strip.empty?
|
|
24
|
+
|
|
25
|
+
# Strip BOM
|
|
26
|
+
cleaned = input.sub(/\A\xEF\xBB\xBF/n, "")
|
|
27
|
+
cleaned = cleaned.encode("UTF-8", "UTF-8", invalid: :replace, undef: :replace)
|
|
28
|
+
|
|
29
|
+
rows = parse_csv_rows(cleaned, delimiter)
|
|
30
|
+
return Types::DynValue.of_array([]) if rows.empty?
|
|
31
|
+
|
|
32
|
+
if headers && rows.size > 1
|
|
33
|
+
header_row = rows[0]
|
|
34
|
+
data_rows = rows[1..]
|
|
35
|
+
items = data_rows.map do |row|
|
|
36
|
+
fields = {}
|
|
37
|
+
header_row.each_with_index do |col, i|
|
|
38
|
+
val = i < row.size ? row[i] : ""
|
|
39
|
+
fields[col] = infer_type(val)
|
|
40
|
+
end
|
|
41
|
+
Types::DynValue.of_object(fields)
|
|
42
|
+
end
|
|
43
|
+
Types::DynValue.of_array(items)
|
|
44
|
+
elsif headers
|
|
45
|
+
# Only header, no data
|
|
46
|
+
Types::DynValue.of_array([])
|
|
47
|
+
else
|
|
48
|
+
items = rows.map do |row|
|
|
49
|
+
Types::DynValue.of_array(row.map { |cell| infer_type(cell) })
|
|
50
|
+
end
|
|
51
|
+
Types::DynValue.of_array(items)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Parse XML string into DynValue
|
|
56
|
+
def self.parse_xml(input)
|
|
57
|
+
raise ArgumentError, "Input cannot be nil or empty" if input.nil? || input.strip.empty?
|
|
58
|
+
|
|
59
|
+
# Pre-process: mark self-closing elements with a synthetic attribute
|
|
60
|
+
# REXML doesn't distinguish <tag/> from <tag></tag>, so we inject a marker
|
|
61
|
+
marked = input.gsub(/<([a-zA-Z_][\w:.-]*)\s*(\s[^>]*)?\/>/) do |_match|
|
|
62
|
+
tag_name = $1
|
|
63
|
+
attrs = $2 || ""
|
|
64
|
+
"<#{tag_name}#{attrs} __odin_sc=\"1\"/>"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
doc = REXML::Document.new(marked)
|
|
68
|
+
root = doc.root
|
|
69
|
+
raise FormatError, "No root element found" unless root
|
|
70
|
+
|
|
71
|
+
root_name = qualified_name(root)
|
|
72
|
+
content = parse_xml_element(root, 0)
|
|
73
|
+
Types::DynValue.of_object({ root_name => content })
|
|
74
|
+
rescue REXML::ParseException => e
|
|
75
|
+
raise FormatError, "Invalid XML: #{e.message}"
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Parse fixed-width text into DynValue
|
|
79
|
+
# columns: [{name:, pos:, len:, trim: true}]
|
|
80
|
+
def self.parse_fixed_width(input, columns:)
|
|
81
|
+
return Types::DynValue.of_array([]) if input.nil? || input.strip.empty?
|
|
82
|
+
raise ArgumentError, "Columns specification required" if columns.nil? || columns.empty?
|
|
83
|
+
|
|
84
|
+
lines = input.lines.map(&:chomp).reject(&:empty?)
|
|
85
|
+
rows = lines.map do |line|
|
|
86
|
+
fields = {}
|
|
87
|
+
columns.each do |col|
|
|
88
|
+
start_pos = col[:pos] || 0
|
|
89
|
+
len = col[:len] || 0
|
|
90
|
+
name = col[:name]
|
|
91
|
+
trim = col.fetch(:trim, true)
|
|
92
|
+
|
|
93
|
+
raw = if start_pos < line.length
|
|
94
|
+
end_pos = [start_pos + len, line.length].min
|
|
95
|
+
line[start_pos...end_pos] || ""
|
|
96
|
+
else
|
|
97
|
+
""
|
|
98
|
+
end
|
|
99
|
+
raw = raw.strip if trim
|
|
100
|
+
fields[name] = Types::DynValue.of_string(raw)
|
|
101
|
+
end
|
|
102
|
+
Types::DynValue.of_object(fields)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
rows.size == 1 ? rows[0] : Types::DynValue.of_array(rows)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Parse flat key=value pairs into DynValue
|
|
109
|
+
def self.parse_flat_kvp(input)
|
|
110
|
+
return Types::DynValue.of_object({}) if input.nil? || input.strip.empty?
|
|
111
|
+
|
|
112
|
+
result = {}
|
|
113
|
+
input.each_line do |line|
|
|
114
|
+
line = line.chomp.sub(/\r$/, "")
|
|
115
|
+
next if line.strip.empty?
|
|
116
|
+
next if line.strip.start_with?("#", ";")
|
|
117
|
+
|
|
118
|
+
eq_pos = line.index("=")
|
|
119
|
+
next unless eq_pos
|
|
120
|
+
|
|
121
|
+
key = line[0...eq_pos].strip
|
|
122
|
+
val_str = line[(eq_pos + 1)..].strip
|
|
123
|
+
|
|
124
|
+
value = parse_flat_value(val_str)
|
|
125
|
+
set_nested(result, key, value)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
Types::DynValue.of_object(result.transform_values { |v| wrap_nested(v) })
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Parse YAML string into DynValue
|
|
132
|
+
def self.parse_yaml(input)
|
|
133
|
+
return Types::DynValue.of_object({}) if input.nil? || input.strip.empty?
|
|
134
|
+
|
|
135
|
+
parsed = YAML.safe_load(input, permitted_classes: [Date, Time, BigDecimal])
|
|
136
|
+
Types::DynValue.from_ruby(parsed)
|
|
137
|
+
rescue Psych::SyntaxError => e
|
|
138
|
+
raise FormatError, "Invalid YAML: #{e.message}"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# ── Private Helpers ──
|
|
142
|
+
|
|
143
|
+
# CSV row parser handling quoted fields, embedded commas, embedded newlines
|
|
144
|
+
def self.parse_csv_rows(input, delimiter)
|
|
145
|
+
rows = []
|
|
146
|
+
current_row = []
|
|
147
|
+
current_field = +""
|
|
148
|
+
in_quotes = false
|
|
149
|
+
i = 0
|
|
150
|
+
chars = input.chars
|
|
151
|
+
|
|
152
|
+
while i < chars.length
|
|
153
|
+
ch = chars[i]
|
|
154
|
+
|
|
155
|
+
if in_quotes
|
|
156
|
+
if ch == '"'
|
|
157
|
+
if i + 1 < chars.length && chars[i + 1] == '"'
|
|
158
|
+
current_field << '"'
|
|
159
|
+
i += 2
|
|
160
|
+
else
|
|
161
|
+
in_quotes = false
|
|
162
|
+
i += 1
|
|
163
|
+
end
|
|
164
|
+
else
|
|
165
|
+
current_field << ch
|
|
166
|
+
i += 1
|
|
167
|
+
end
|
|
168
|
+
elsif ch == '"'
|
|
169
|
+
in_quotes = true
|
|
170
|
+
i += 1
|
|
171
|
+
elsif ch == delimiter
|
|
172
|
+
current_row << current_field
|
|
173
|
+
current_field = +""
|
|
174
|
+
i += 1
|
|
175
|
+
elsif ch == "\r"
|
|
176
|
+
if i + 1 < chars.length && chars[i + 1] == "\n"
|
|
177
|
+
i += 2
|
|
178
|
+
else
|
|
179
|
+
i += 1
|
|
180
|
+
end
|
|
181
|
+
current_row << current_field
|
|
182
|
+
rows << current_row unless current_row.all?(&:empty?) && rows.empty? && current_row.size <= 1
|
|
183
|
+
current_row = []
|
|
184
|
+
current_field = +""
|
|
185
|
+
elsif ch == "\n"
|
|
186
|
+
current_row << current_field
|
|
187
|
+
rows << current_row
|
|
188
|
+
current_row = []
|
|
189
|
+
current_field = +""
|
|
190
|
+
i += 1
|
|
191
|
+
else
|
|
192
|
+
current_field << ch
|
|
193
|
+
i += 1
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Final field/row
|
|
198
|
+
unless current_field.empty? && current_row.empty?
|
|
199
|
+
current_row << current_field
|
|
200
|
+
rows << current_row
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
rows
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Type inference for CSV/flat values
|
|
207
|
+
def self.infer_type(val)
|
|
208
|
+
return Types::DynValue.of_null if val.nil? || val == "null"
|
|
209
|
+
return Types::DynValue.of_bool(true) if val == "true"
|
|
210
|
+
return Types::DynValue.of_bool(false) if val == "false"
|
|
211
|
+
|
|
212
|
+
if val.match?(/\A-?\d+\z/)
|
|
213
|
+
Types::DynValue.of_integer(val.to_i)
|
|
214
|
+
elsif val.match?(/\A-?\d+(\.\d+)?([eE][+-]?\d+)?\z/)
|
|
215
|
+
Types::DynValue.of_float(val.to_f)
|
|
216
|
+
else
|
|
217
|
+
Types::DynValue.of_string(val)
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Parse flat value (quoted string, null, bool, number, or plain string)
|
|
222
|
+
def self.parse_flat_value(val_str)
|
|
223
|
+
return Types::DynValue.of_null if val_str.empty? || val_str == "~"
|
|
224
|
+
|
|
225
|
+
if val_str.start_with?('"') && val_str.end_with?('"') && val_str.length >= 2
|
|
226
|
+
return Types::DynValue.of_string(val_str[1...-1])
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
infer_type(val_str)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Set nested value using dotted/bracket path
|
|
233
|
+
def self.set_nested(root, path, value)
|
|
234
|
+
segments = parse_path_segments(path)
|
|
235
|
+
current = root
|
|
236
|
+
|
|
237
|
+
segments[0...-1].each_with_index do |seg, idx|
|
|
238
|
+
next_seg = segments[idx + 1]
|
|
239
|
+
if seg.is_a?(Integer)
|
|
240
|
+
current[seg] ||= next_seg.is_a?(Integer) ? [] : {}
|
|
241
|
+
current = current[seg]
|
|
242
|
+
else
|
|
243
|
+
current[seg] ||= next_seg.is_a?(Integer) ? [] : {}
|
|
244
|
+
current = current[seg]
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
last = segments.last
|
|
249
|
+
current[last] = value
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Parse path into segments: "a.b[0].c" -> ["a", "b", 0, "c"]
|
|
253
|
+
def self.parse_path_segments(path)
|
|
254
|
+
segments = []
|
|
255
|
+
path.scan(/([^.\[\]]+)|\[(\d+)\]/) do |name, index|
|
|
256
|
+
if index
|
|
257
|
+
segments << index.to_i
|
|
258
|
+
else
|
|
259
|
+
segments << name
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
segments
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Wrap nested Hash/Array into DynValue
|
|
266
|
+
def self.wrap_nested(obj)
|
|
267
|
+
case obj
|
|
268
|
+
when Types::DynValue then obj
|
|
269
|
+
when Hash
|
|
270
|
+
Types::DynValue.of_object(obj.transform_values { |v| wrap_nested(v) })
|
|
271
|
+
when Array
|
|
272
|
+
Types::DynValue.of_array(obj.map { |v| wrap_nested(v) })
|
|
273
|
+
else
|
|
274
|
+
obj
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Parse XML element recursively
|
|
279
|
+
def self.parse_xml_element(element, depth)
|
|
280
|
+
raise FormatError, "XML nesting depth exceeded (max 100)" if depth > 100
|
|
281
|
+
|
|
282
|
+
# Check xsi:nil
|
|
283
|
+
nil_attr = element.attributes["xsi:nil"] || element.attributes["nil"]
|
|
284
|
+
return Types::DynValue.of_null if nil_attr == "true" || nil_attr == "1"
|
|
285
|
+
|
|
286
|
+
# Check for self-closing marker (injected during pre-processing)
|
|
287
|
+
is_self_closing = element.attributes["__odin_sc"] == "1"
|
|
288
|
+
element.attributes.delete("__odin_sc") if is_self_closing
|
|
289
|
+
|
|
290
|
+
children = element.elements.to_a
|
|
291
|
+
has_text = element.texts.any? { |t| !t.value.strip.empty? }
|
|
292
|
+
|
|
293
|
+
# Count real attributes (excluding our synthetic marker, already removed)
|
|
294
|
+
real_attrs_empty = element.attributes.size == 0
|
|
295
|
+
|
|
296
|
+
if children.empty? && real_attrs_empty
|
|
297
|
+
# Self-closing <tag/> becomes null; empty <tag></tag> becomes empty string
|
|
298
|
+
if is_self_closing
|
|
299
|
+
return Types::DynValue.of_null
|
|
300
|
+
end
|
|
301
|
+
# Leaf element with only text
|
|
302
|
+
text = element.text || ""
|
|
303
|
+
text = text.strip
|
|
304
|
+
return Types::DynValue.of_string(text)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
if children.empty? && !element.attributes.empty? && !has_text
|
|
308
|
+
# Only attributes, no children or text — self-closing with attrs
|
|
309
|
+
fields = {}
|
|
310
|
+
element.attributes.each do |name, val|
|
|
311
|
+
next if name.start_with?("xmlns") || name == "xsi:nil" || name == "nil" || name == "nillable"
|
|
312
|
+
|
|
313
|
+
fields["@#{strip_ns(name)}"] = Types::DynValue.of_string(val.to_s)
|
|
314
|
+
end
|
|
315
|
+
return Types::DynValue.of_object(fields) unless fields.empty?
|
|
316
|
+
|
|
317
|
+
return Types::DynValue.of_null
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Complex element: build object
|
|
321
|
+
fields = {}
|
|
322
|
+
|
|
323
|
+
# Attributes
|
|
324
|
+
element.attributes.each do |name, val|
|
|
325
|
+
next if name.start_with?("xmlns") || name == "xsi:nil" || name == "nil" || name == "nillable"
|
|
326
|
+
|
|
327
|
+
fields["@#{strip_ns(name)}"] = Types::DynValue.of_string(val.to_s)
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
# Text content
|
|
331
|
+
if has_text && !children.empty?
|
|
332
|
+
text = element.texts.map { |t| t.value }.join.strip
|
|
333
|
+
fields["_text"] = Types::DynValue.of_string(text) unless text.empty?
|
|
334
|
+
elsif has_text && children.empty?
|
|
335
|
+
text = element.text&.strip || ""
|
|
336
|
+
fields["_text"] = Types::DynValue.of_string(text) unless text.empty?
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
# Child elements — use qualified names (with namespace prefix) to match Java behavior
|
|
340
|
+
child_counts = Hash.new(0)
|
|
341
|
+
children.each { |c| child_counts[qualified_name(c)] += 1 }
|
|
342
|
+
|
|
343
|
+
child_arrays = {}
|
|
344
|
+
children.each do |child|
|
|
345
|
+
name = qualified_name(child)
|
|
346
|
+
child_val = parse_xml_element(child, depth + 1)
|
|
347
|
+
|
|
348
|
+
# Elements named 'item' are always treated as arrays (matches TypeScript)
|
|
349
|
+
if child_counts[name] > 1 || name == "item"
|
|
350
|
+
child_arrays[name] ||= []
|
|
351
|
+
child_arrays[name] << child_val
|
|
352
|
+
else
|
|
353
|
+
fields[name] = child_val
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
child_arrays.each do |name, items|
|
|
358
|
+
fields[name] = Types::DynValue.of_array(items)
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
Types::DynValue.of_object(fields)
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
# Get the full qualified name of an element (prefix:localName or just localName)
|
|
365
|
+
def self.qualified_name(element)
|
|
366
|
+
if element.prefix && !element.prefix.empty?
|
|
367
|
+
"#{element.prefix}:#{element.name}"
|
|
368
|
+
else
|
|
369
|
+
element.name
|
|
370
|
+
end
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
# Strip namespace prefix from element/attribute name
|
|
374
|
+
def self.strip_ns(name)
|
|
375
|
+
name.include?(":") ? name.split(":", 2).last : name
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
private_class_method :parse_csv_rows, :infer_type, :parse_flat_value,
|
|
379
|
+
:set_nested, :parse_path_segments, :wrap_nested,
|
|
380
|
+
:parse_xml_element, :strip_ns
|
|
381
|
+
|
|
382
|
+
class FormatError < StandardError; end
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
end
|