kumi 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CLAUDE.md +28 -44
  3. data/README.md +188 -108
  4. data/docs/AST.md +8 -1
  5. data/docs/FUNCTIONS.md +52 -8
  6. data/docs/compiler_design_principles.md +86 -0
  7. data/docs/features/README.md +22 -2
  8. data/docs/features/hierarchical-broadcasting.md +349 -0
  9. data/docs/features/javascript-transpiler.md +148 -0
  10. data/docs/features/performance.md +1 -3
  11. data/docs/features/s-expression-printer.md +77 -0
  12. data/docs/schema_metadata.md +7 -7
  13. data/examples/game_of_life.rb +2 -4
  14. data/lib/kumi/analyzer.rb +0 -2
  15. data/lib/kumi/compiler.rb +6 -275
  16. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +600 -42
  17. data/lib/kumi/core/analyzer/passes/input_collector.rb +4 -2
  18. data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +27 -0
  19. data/lib/kumi/core/analyzer/passes/type_checker.rb +6 -2
  20. data/lib/kumi/core/analyzer/passes/unsat_detector.rb +90 -46
  21. data/lib/kumi/core/cascade_executor_builder.rb +132 -0
  22. data/lib/kumi/core/compiler/expression_compiler.rb +146 -0
  23. data/lib/kumi/core/compiler/function_invoker.rb +55 -0
  24. data/lib/kumi/core/compiler/path_traversal_compiler.rb +158 -0
  25. data/lib/kumi/core/compiler/reference_compiler.rb +46 -0
  26. data/lib/kumi/core/compiler_base.rb +137 -0
  27. data/lib/kumi/core/explain.rb +2 -2
  28. data/lib/kumi/core/function_registry/collection_functions.rb +86 -3
  29. data/lib/kumi/core/function_registry/function_builder.rb +5 -3
  30. data/lib/kumi/core/function_registry/logical_functions.rb +171 -1
  31. data/lib/kumi/core/function_registry/stat_functions.rb +156 -0
  32. data/lib/kumi/core/function_registry.rb +32 -10
  33. data/lib/kumi/core/nested_structure_utils.rb +78 -0
  34. data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +2 -2
  35. data/lib/kumi/core/ruby_parser/input_builder.rb +61 -8
  36. data/lib/kumi/core/schema_instance.rb +4 -0
  37. data/lib/kumi/core/vectorized_function_builder.rb +88 -0
  38. data/lib/kumi/errors.rb +2 -0
  39. data/lib/kumi/js/compiler.rb +878 -0
  40. data/lib/kumi/js/function_registry.rb +333 -0
  41. data/lib/kumi/js.rb +23 -0
  42. data/lib/kumi/registry.rb +61 -1
  43. data/lib/kumi/schema.rb +1 -1
  44. data/lib/kumi/support/s_expression_printer.rb +162 -0
  45. data/lib/kumi/syntax/array_expression.rb +6 -6
  46. data/lib/kumi/syntax/call_expression.rb +4 -4
  47. data/lib/kumi/syntax/cascade_expression.rb +4 -4
  48. data/lib/kumi/syntax/case_expression.rb +4 -4
  49. data/lib/kumi/syntax/declaration_reference.rb +4 -4
  50. data/lib/kumi/syntax/hash_expression.rb +4 -4
  51. data/lib/kumi/syntax/input_declaration.rb +6 -5
  52. data/lib/kumi/syntax/input_element_reference.rb +5 -5
  53. data/lib/kumi/syntax/input_reference.rb +5 -5
  54. data/lib/kumi/syntax/literal.rb +4 -4
  55. data/lib/kumi/syntax/node.rb +34 -34
  56. data/lib/kumi/syntax/root.rb +6 -6
  57. data/lib/kumi/syntax/trait_declaration.rb +4 -4
  58. data/lib/kumi/syntax/value_declaration.rb +4 -4
  59. data/lib/kumi/version.rb +1 -1
  60. data/lib/kumi.rb +1 -1
  61. data/scripts/analyze_broadcast_methods.rb +68 -0
  62. data/scripts/analyze_cascade_methods.rb +74 -0
  63. data/scripts/check_broadcasting_coverage.rb +51 -0
  64. data/scripts/find_dead_code.rb +114 -0
  65. metadata +22 -4
  66. data/docs/features/array-broadcasting.md +0 -170
  67. data/lib/kumi/cli.rb +0 -449
  68. data/lib/kumi/core/vectorization_metadata.rb +0 -110
@@ -1,170 +0,0 @@
1
- # Array Broadcasting
2
-
3
- Automatic vectorization of operations over array fields with element-wise computation and aggregation.
4
-
5
- ## Overview
6
-
7
- The array broadcasting system enables natural field access syntax on array inputs (`input.items.price`) that automatically applies operations element-wise across the array, with intelligent detection of map vs reduce operations.
8
-
9
- ## Core Mechanism
10
-
11
- The system uses a three-stage pipeline:
12
-
13
- 1. **Parser** - Creates InputElementReference AST nodes for nested field access
14
- 2. **BroadcastDetector** - Identifies which operations should be vectorized vs scalar
15
- 3. **Compiler** - Generates appropriate map/reduce functions based on usage context
16
-
17
- ## Basic Broadcasting
18
-
19
- ```ruby
20
- schema do
21
- input do
22
- array :line_items do
23
- float :price
24
- integer :quantity
25
- string :category
26
- end
27
- float :tax_rate, type: :float
28
- end
29
-
30
- # Element-wise computation - broadcasts over each item
31
- value :subtotals, input.line_items.price * input.line_items.quantity
32
-
33
- # Element-wise traits - applied to each item
34
- trait :is_taxable, (input.line_items.category != "digital")
35
-
36
- # Conditional logic - element-wise evaluation
37
- value :taxes, fn(:if, is_taxable, subtotals * input.tax_rate, 0.0)
38
- end
39
- ```
40
-
41
- ## Aggregation Operations
42
-
43
- Operations that consume arrays to produce scalars are automatically detected:
44
-
45
- ```ruby
46
- schema do
47
- # These aggregate the vectorized results
48
- value :total_subtotal, fn(:sum, subtotals)
49
- value :total_tax, fn(:sum, taxes)
50
- value :grand_total, total_subtotal + total_tax
51
-
52
- # Statistics over arrays
53
- value :avg_price, fn(:avg, input.line_items.price)
54
- value :max_quantity, fn(:max, input.line_items.quantity)
55
- end
56
- ```
57
-
58
- ## Field Access Nesting
59
-
60
- Supports arbitrary depth field access with path building:
61
-
62
- ```ruby
63
- schema do
64
- input do
65
- array :orders do
66
- array :items do
67
- hash :product do
68
- string :name
69
- float :base_price
70
- end
71
- integer :quantity
72
- end
73
- end
74
- end
75
-
76
- # Deep field access - automatically broadcasts over nested arrays
77
- value :all_product_names, input.orders.items.product.name
78
- value :total_values, input.orders.items.product.base_price * input.orders.items.quantity
79
- end
80
- ```
81
-
82
- ## Type Inference
83
-
84
- The type system automatically infers appropriate types for broadcasted operations:
85
-
86
- - `input.items.price` (float array) → inferred as `:float` per element
87
- - `input.items.price * input.items.quantity` → element-wise `:float` result
88
- - `fn(:sum, input.items.price)` → scalar `:float` result
89
-
90
- ## Implementation Details
91
-
92
- ### Parser Layer
93
- - **InputFieldProxy** - Handles `input.field.subfield...` with path building
94
- - **InputElementReference** - AST node representing array field access paths
95
-
96
- ### Analysis Layer
97
- - **BroadcastDetector** - Identifies vectorized vs scalar operations
98
- - **TypeInferencer** - Infers types for array element access patterns
99
-
100
- ### Compilation Layer
101
- - **Automatic Dispatch** - Maps element-wise operations to array map functions
102
- - **Reduction Detection** - Converts aggregation functions to array reduce operations
103
-
104
- ## Usage Patterns
105
-
106
- ### Element-wise Operations
107
- ```ruby
108
- # All of these broadcast element-wise
109
- value :discounted_prices, input.items.price * 0.9
110
- trait :expensive, (input.items.price > 100.0)
111
- value :categories, input.items.category
112
- ```
113
-
114
- ### Aggregation Operations
115
- ```ruby
116
- # These consume arrays to produce scalars
117
- value :item_count, fn(:size, input.items)
118
- value :total_price, fn(:sum, input.items.price)
119
- value :has_expensive, fn(:any?, expensive)
120
- ```
121
-
122
- ### Mixed Operations
123
- ```ruby
124
- # Element-wise computation followed by aggregation
125
- value :line_totals, input.items.price * input.items.quantity
126
- value :order_total, fn(:sum, line_totals)
127
- value :avg_line_total, fn(:avg, line_totals)
128
- ```
129
-
130
- ## Error Handling
131
-
132
- ### Dimension Mismatch Detection
133
-
134
- Array broadcasting operations are only valid within the same array source. Attempting to broadcast across different arrays generates detailed error messages:
135
-
136
- ```ruby
137
- schema do
138
- input do
139
- array :items do
140
- string :name
141
- end
142
- array :logs do
143
- string :user_name
144
- end
145
- end
146
-
147
- # This will generate a dimension mismatch error
148
- trait :same_name, input.items.name == input.logs.user_name
149
- end
150
-
151
- # Error:
152
- # Cannot broadcast operation across arrays from different sources: items, logs.
153
- # Problem: Multiple operands are arrays from different sources:
154
- # - Operand 1 resolves to array(string) from array 'items'
155
- # - Operand 2 resolves to array(string) from array 'logs'
156
- # Direct operations on arrays from different sources is ambiguous and not supported.
157
- # Vectorized operations can only work on fields from the same array input.
158
- ```
159
-
160
- The error messages provide:
161
- - **Quick Summary**: Identifies the conflicting array sources
162
- - **Type Information**: Shows the resolved types of each operand
163
- - **Clear Explanation**: Why the operation is ambiguous and not supported
164
-
165
- ## Performance Characteristics
166
-
167
- - **Single Pass** - Each array is traversed once per computation chain
168
- - **Lazy Evaluation** - Operations are composed into efficient pipelines
169
- - **Memory Efficient** - No intermediate array allocations for simple operations
170
- - **Type Safe** - Full compile-time type checking for array element operations
data/lib/kumi/cli.rb DELETED
@@ -1,449 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "json"
4
- require "yaml"
5
- require "optparse"
6
- require "irb"
7
-
8
- module Kumi
9
- module CLI
10
- class Application
11
- def initialize
12
- @options = {
13
- interactive: false,
14
- schema_file: nil,
15
- input_file: nil,
16
- output_format: :pretty,
17
- keys: [],
18
- explain: false
19
- }
20
- end
21
-
22
- def run(args = ARGV)
23
- parse_options(args)
24
-
25
- if @options[:interactive]
26
- start_repl
27
- elsif @options[:schema_file]
28
- execute_schema_file
29
- else
30
- show_help_and_exit
31
- end
32
- rescue StandardError => e
33
- puts "Error: #{e.message}"
34
- exit 1
35
- end
36
-
37
- private
38
-
39
- def parse_options(args)
40
- parser = OptionParser.new do |opts|
41
- opts.banner = "Usage: kumi [options]"
42
- opts.separator ""
43
- opts.separator "Options:"
44
-
45
- opts.on("-i", "--interactive", "Start interactive REPL mode") do
46
- @options[:interactive] = true
47
- end
48
-
49
- opts.on("-f", "--file FILE", "Load schema from Ruby file") do |file|
50
- @options[:schema_file] = file
51
- end
52
-
53
- opts.on("-d", "--data FILE", "Load input data from JSON/YAML file") do |file|
54
- @options[:input_file] = file
55
- end
56
-
57
- opts.on("-k", "--keys KEY1,KEY2", Array, "Extract specific keys (comma-separated)") do |keys|
58
- @options[:keys] = keys.map(&:to_sym)
59
- end
60
-
61
- opts.on("-e", "--explain KEY", "Explain how a specific key is computed") do |key|
62
- @options[:explain] = key.to_sym
63
- end
64
-
65
- opts.on("-o", "--format FORMAT", %i[pretty json yaml], "Output format: pretty, json, yaml") do |format|
66
- @options[:output_format] = format
67
- end
68
-
69
- opts.on("-h", "--help", "Show this help message") do
70
- puts opts
71
- exit
72
- end
73
- end
74
-
75
- parser.parse!(args)
76
- end
77
-
78
- def show_help_and_exit
79
- puts <<~HELP
80
- Kumi CLI - Declarative decision modeling for Ruby
81
-
82
- Usage:
83
- kumi -i # Start interactive mode
84
- kumi -f schema.rb -d data.json # Execute schema with data
85
- kumi -f schema.rb -k key1,key2 # Extract specific keys
86
- kumi -f schema.rb -e key_name # Explain computation
87
-
88
- Examples:
89
- # Interactive mode for rapid testing
90
- kumi -i
91
-
92
- # Execute schema file with JSON data
93
- kumi -f my_schema.rb -d input.json
94
-
95
- # Get specific values in JSON format
96
- kumi -f my_schema.rb -d input.yaml -k salary,bonus -o json
97
-
98
- # Debug a specific computation
99
- kumi -f my_schema.rb -d input.json -e total_compensation
100
-
101
- For more information, see: https://github.com/amuta/kumi
102
- HELP
103
- exit
104
- end
105
-
106
- def start_repl
107
- puts "🚀 Kumi Interactive REPL"
108
- puts "Type 'help' for commands, 'exit' to quit"
109
- puts
110
-
111
- repl = InteractiveREPL.new
112
- repl.start
113
- end
114
-
115
- def execute_schema_file
116
- schema_module = load_schema_file(@options[:schema_file])
117
- input_data = load_input_data(@options[:input_file])
118
-
119
- runner = schema_module.from(input_data)
120
-
121
- if @options[:explain]
122
- result = schema_module.explain(input_data, @options[:explain])
123
- puts result
124
- elsif @options[:keys].any?
125
- result = runner.slice(*@options[:keys])
126
- output_result(result)
127
- else
128
- # Show available keys if no specific keys requested
129
- puts "Schema loaded successfully!"
130
- available_bindings = schema_module.__compiled_schema__.bindings.keys
131
- puts "Available keys: #{available_bindings.join(', ')}"
132
- puts "Use -k to extract specific keys or -e to explain computations"
133
- end
134
- end
135
-
136
- def load_schema_file(file_path)
137
- raise "Schema file not found: #{file_path}" unless File.exist?(file_path)
138
-
139
- # Load the file and extract the module
140
- require_relative File.expand_path(file_path)
141
-
142
- # Find the module name from the file
143
- module_name = extract_module_name_from_file(file_path)
144
-
145
- raise "Could not find module extending Kumi::Schema in #{file_path}" unless module_name
146
-
147
- # Get the module constant
148
- schema_module = Object.const_get(module_name)
149
-
150
- raise "Module #{module_name} does not have a compiled schema" unless schema_module.__compiled_schema__
151
-
152
- schema_module
153
- end
154
-
155
- def extract_module_name_from_file(file_path)
156
- content = File.read(file_path)
157
-
158
- # Look for "module ModuleName" pattern
159
- if (match = content.match(/^\s*module\s+(\w+)/))
160
- match[1]
161
- end
162
- end
163
-
164
- def load_input_data(file_path)
165
- return {} unless file_path
166
-
167
- raise "Input file not found: #{file_path}" unless File.exist?(file_path)
168
-
169
- case File.extname(file_path).downcase
170
- when ".json"
171
- JSON.parse(File.read(file_path), symbolize_names: true)
172
- when ".yml", ".yaml"
173
- YAML.safe_load_file(file_path, symbolize_names: true)
174
- else
175
- raise "Unsupported input file format. Use .json or .yaml"
176
- end
177
- end
178
-
179
- def output_result(result)
180
- case @options[:output_format]
181
- when :json
182
- puts JSON.pretty_generate(result)
183
- when :yaml
184
- puts result.to_yaml
185
- else
186
- output_pretty(result)
187
- end
188
- end
189
-
190
- def output_pretty(result)
191
- case result
192
- when Hash
193
- result.each do |key, value|
194
- puts "#{key}: #{format_value(value)}"
195
- end
196
- when Kumi::Explain::Result
197
- puts "Explanation for: #{result.key}"
198
- puts "Value: #{format_value(result.value)}"
199
- puts
200
- puts "Computation trace:"
201
- result.trace.each do |step|
202
- puts " #{step[:operation]} -> #{format_value(step[:result])}"
203
- end
204
- else
205
- puts format_value(result)
206
- end
207
- end
208
-
209
- def format_value(value)
210
- case value
211
- when String
212
- value.inspect
213
- when Numeric
214
- value.is_a?(Float) ? value.round(2) : value
215
- when Array, Hash
216
- value.inspect
217
- else
218
- value.to_s
219
- end
220
- end
221
- end
222
-
223
- class InteractiveREPL
224
- def initialize
225
- @schema_module = nil
226
- @runner = nil
227
- @input_data = {}
228
- end
229
-
230
- def start
231
- loop do
232
- print "kumi> "
233
- input = gets&.chomp
234
- break if input.nil? || input == "exit"
235
-
236
- execute_command(input)
237
- end
238
- puts "Goodbye!"
239
- end
240
-
241
- private
242
-
243
- def execute_command(input)
244
- case input.strip
245
- when "help"
246
- show_help
247
- when /^schema\s+(.+)/
248
- load_schema_command(::Regexp.last_match(1))
249
- when /^data\s+(.+)/
250
- load_data_command(::Regexp.last_match(1))
251
- when /^set\s+(\w+)\s+(.+)/
252
- set_data_command(::Regexp.last_match(1), ::Regexp.last_match(2))
253
- when /^get\s+(.+)/
254
- get_value_command(::Regexp.last_match(1))
255
- when /^explain\s+(.+)/
256
- explain_command(::Regexp.last_match(1))
257
- when /^slice\s+(.+)/
258
- slice_command(::Regexp.last_match(1))
259
- when "keys"
260
- show_keys
261
- when "clear"
262
- clear_data
263
- when ""
264
- # ignore empty input
265
- else
266
- puts "Unknown command. Type 'help' for available commands."
267
- end
268
- rescue StandardError => e
269
- puts "Error: #{e.message}"
270
- puts e.backtrace.first if ENV["DEBUG"]
271
- end
272
-
273
- def show_help
274
- puts <<~HELP
275
- Available commands:
276
-
277
- Schema management:
278
- schema <file> Load schema from Ruby file
279
- schema { ... } Define schema inline (experimental)
280
-
281
- Data management:
282
- data <file> Load input data from JSON/YAML file
283
- set <key> <value> Set individual input value
284
- clear Clear all input data
285
-
286
- Evaluation:
287
- get <key> Get computed value for key
288
- explain <key> Show detailed computation trace
289
- slice <key1,key2> Get multiple values
290
- keys Show available keys
291
-
292
- General:
293
- help Show this help
294
- exit Exit REPL
295
-
296
- Examples:
297
- schema examples/tax_2024.rb
298
- data test_input.json
299
- get total_tax
300
- explain effective_rate
301
- slice income,deductions,total_tax
302
- HELP
303
- end
304
-
305
- def load_schema_command(file_path)
306
- file_path = file_path.strip.gsub(/^["']|["']$/, "") # Remove quotes
307
-
308
- unless File.exist?(file_path)
309
- puts "Schema file not found: #{file_path}"
310
- return
311
- end
312
-
313
- @schema_module = Module.new
314
- @schema_module.extend(Kumi::Schema)
315
-
316
- schema_content = File.read(file_path)
317
- @schema_module.module_eval(schema_content, file_path)
318
-
319
- puts "✅ Schema loaded from #{file_path}"
320
- refresh_runner
321
- rescue StandardError => e
322
- puts "❌ Failed to load schema: #{e.message}"
323
- end
324
-
325
- def load_data_command(file_path)
326
- file_path = file_path.strip.gsub(/^["']|["']$/, "") # Remove quotes
327
-
328
- unless File.exist?(file_path)
329
- puts "Data file not found: #{file_path}"
330
- return
331
- end
332
-
333
- case File.extname(file_path).downcase
334
- when ".json"
335
- @input_data = JSON.parse(File.read(file_path), symbolize_names: true)
336
- when ".yml", ".yaml"
337
- @input_data = YAML.safe_load_file(file_path, symbolize_names: true)
338
- else
339
- puts "Unsupported file format. Use .json or .yaml"
340
- return
341
- end
342
-
343
- puts "✅ Data loaded from #{file_path}"
344
- puts "Keys: #{@input_data.keys.join(', ')}"
345
- refresh_runner
346
- rescue StandardError => e
347
- puts "❌ Failed to load data: #{e.message}"
348
- end
349
-
350
- def set_data_command(key, value)
351
- # Try to parse value as JSON first, then as literal
352
- parsed_value = begin
353
- JSON.parse(value)
354
- rescue JSON::ParserError
355
- # If not valid JSON, treat as string unless it looks like a number/boolean
356
- case value
357
- when /^\d+$/ then value.to_i
358
- when /^\d+\.\d+$/ then value.to_f
359
- when "true" then true
360
- when "false" then false
361
- else value
362
- end
363
- end
364
-
365
- @input_data[key.to_sym] = parsed_value
366
- puts "✅ Set #{key} = #{parsed_value.inspect}"
367
- refresh_runner
368
- end
369
-
370
- def get_value_command(key)
371
- ensure_runner_ready
372
-
373
- key_sym = key.strip.to_sym
374
- result = @runner[key_sym]
375
- puts "#{key_sym}: #{format_value(result)}"
376
- rescue StandardError => e
377
- puts "❌ Error getting #{key}: #{e.message}"
378
- end
379
-
380
- def explain_command(key)
381
- ensure_runner_ready
382
-
383
- key_sym = key.strip.to_sym
384
- puts @schema_module.explain(@input_data, key_sym)
385
- rescue StandardError => e
386
- puts "❌ Error explaining #{key}: #{e.message}"
387
- end
388
-
389
- def slice_command(keys_str)
390
- ensure_runner_ready
391
-
392
- keys = keys_str.split(",").map { |k| k.strip.to_sym }
393
- result = @runner.slice(*keys)
394
-
395
- result.each do |key, value|
396
- puts "#{key}: #{format_value(value)}"
397
- end
398
- rescue StandardError => e
399
- puts "❌ Error getting slice: #{e.message}"
400
- end
401
-
402
- def show_keys
403
- if @schema_module
404
- available_bindings = @schema_module.__compiled_schema__.bindings.keys
405
- puts "Available keys: #{available_bindings.join(', ')}"
406
- else
407
- puts "No schema loaded. Use 'schema <file>' to load a schema."
408
- end
409
- end
410
-
411
- def clear_data
412
- @input_data = {}
413
- @runner = nil
414
- puts "✅ Input data cleared"
415
- end
416
-
417
- def ensure_runner_ready
418
- raise "No schema loaded. Use 'schema <file>' to load a schema." unless @schema_module
419
-
420
- return if @runner
421
-
422
- raise "No runner available. Load data with 'data <file>' or set values with 'set <key> <value>'"
423
- end
424
-
425
- def refresh_runner
426
- return unless @schema_module
427
-
428
- @runner = @schema_module.from(@input_data)
429
- puts "✅ Runner refreshed with current data"
430
- rescue StandardError => e
431
- puts "⚠️ Runner refresh failed: #{e.message}"
432
- @runner = nil
433
- end
434
-
435
- def format_value(value)
436
- case value
437
- when String
438
- value.inspect
439
- when Numeric
440
- value.is_a?(Float) ? value.round(2) : value
441
- when Array, Hash
442
- value.inspect
443
- else
444
- value.to_s
445
- end
446
- end
447
- end
448
- end
449
- end