prompt_objects 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +80 -0
- data/Gemfile.lock +1 -1
- data/README.md +2 -2
- data/exe/prompt_objects +548 -1
- data/frontend/src/App.tsx +11 -3
- data/frontend/src/components/ContextMenu.tsx +67 -0
- data/frontend/src/components/MessageBus.tsx +4 -3
- data/frontend/src/components/ModelSelector.tsx +5 -1
- data/frontend/src/components/ThreadsSidebar.tsx +46 -2
- data/frontend/src/components/UsagePanel.tsx +105 -0
- data/frontend/src/hooks/useWebSocket.ts +53 -0
- data/frontend/src/store/index.ts +10 -0
- data/frontend/src/types/index.ts +4 -1
- data/lib/prompt_objects/cli.rb +1 -0
- data/lib/prompt_objects/connectors/mcp.rb +1 -0
- data/lib/prompt_objects/environment.rb +24 -1
- data/lib/prompt_objects/llm/anthropic_adapter.rb +15 -1
- data/lib/prompt_objects/llm/factory.rb +93 -6
- data/lib/prompt_objects/llm/gemini_adapter.rb +13 -1
- data/lib/prompt_objects/llm/openai_adapter.rb +21 -4
- data/lib/prompt_objects/llm/pricing.rb +49 -0
- data/lib/prompt_objects/llm/response.rb +3 -2
- data/lib/prompt_objects/mcp/server.rb +1 -0
- data/lib/prompt_objects/message_bus.rb +27 -8
- data/lib/prompt_objects/prompt_object.rb +6 -4
- data/lib/prompt_objects/server/api/routes.rb +186 -29
- data/lib/prompt_objects/server/public/assets/index-Bkme6COu.css +1 -0
- data/lib/prompt_objects/server/public/assets/index-CQ7lVDF_.js +77 -0
- data/lib/prompt_objects/server/public/index.html +2 -2
- data/lib/prompt_objects/server/websocket_handler.rb +93 -9
- data/lib/prompt_objects/server.rb +54 -0
- data/lib/prompt_objects/session/store.rb +399 -4
- data/lib/prompt_objects.rb +1 -0
- data/prompt_objects.gemspec +1 -1
- data/templates/arc-agi-1/manifest.yml +22 -0
- data/templates/arc-agi-1/objects/data_manager.md +42 -0
- data/templates/arc-agi-1/objects/observer.md +100 -0
- data/templates/arc-agi-1/objects/solver.md +118 -0
- data/templates/arc-agi-1/objects/verifier.md +79 -0
- data/templates/arc-agi-1/primitives/check_arc_data.rb +53 -0
- data/templates/arc-agi-1/primitives/find_objects.rb +72 -0
- data/templates/arc-agi-1/primitives/grid_diff.rb +70 -0
- data/templates/arc-agi-1/primitives/grid_info.rb +42 -0
- data/templates/arc-agi-1/primitives/grid_transform.rb +50 -0
- data/templates/arc-agi-1/primitives/load_arc_task.rb +68 -0
- data/templates/arc-agi-1/primitives/render_grid.rb +78 -0
- data/templates/arc-agi-1/primitives/test_solution.rb +131 -0
- data/tools/thread-explorer.html +1043 -0
- metadata +21 -3
- data/lib/prompt_objects/server/public/assets/index-CeNJvqLG.js +0 -77
- data/lib/prompt_objects/server/public/assets/index-Vx4-uMOU.css +0 -1
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptObjects
|
|
4
|
+
module Primitives
|
|
5
|
+
class TestSolution < Primitive
|
|
6
|
+
def name
|
|
7
|
+
"test_solution"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def description
|
|
11
|
+
"Test a solution against ARC training pairs. Provide either a primitive_name to run, or a grid to compare directly against the first training pair's expected output."
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def parameters
|
|
15
|
+
{
|
|
16
|
+
type: "object",
|
|
17
|
+
properties: {
|
|
18
|
+
primitive_name: {
|
|
19
|
+
type: "string",
|
|
20
|
+
description: "Name of a primitive that accepts {grid: [[...]]} and returns a transformed grid"
|
|
21
|
+
},
|
|
22
|
+
grid: {
|
|
23
|
+
type: "array",
|
|
24
|
+
description: "A grid to compare directly against expected output (for quick checks)"
|
|
25
|
+
},
|
|
26
|
+
expected: {
|
|
27
|
+
type: "array",
|
|
28
|
+
description: "Expected output grid (used with 'grid' parameter)"
|
|
29
|
+
},
|
|
30
|
+
train: {
|
|
31
|
+
type: "array",
|
|
32
|
+
description: "Training pairs array (used with 'primitive_name'). Each element has 'input' and 'output' grids."
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
required: []
|
|
36
|
+
}
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def receive(message, context:)
|
|
40
|
+
prim_name = message[:primitive_name] || message["primitive_name"]
|
|
41
|
+
direct_grid = message[:grid] || message["grid"]
|
|
42
|
+
|
|
43
|
+
if direct_grid
|
|
44
|
+
return test_direct(direct_grid, message, context)
|
|
45
|
+
elsif prim_name
|
|
46
|
+
return test_primitive(prim_name, message, context)
|
|
47
|
+
else
|
|
48
|
+
return "Error: Provide either 'primitive_name' with 'train', or 'grid' with 'expected'"
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def test_direct(actual, message, _context)
|
|
55
|
+
expected = message[:expected] || message["expected"]
|
|
56
|
+
return "Error: 'expected' grid is required for direct comparison" unless expected
|
|
57
|
+
|
|
58
|
+
if actual == expected
|
|
59
|
+
"PASS: Grid matches expected output exactly."
|
|
60
|
+
else
|
|
61
|
+
diff = compute_diff(actual, expected)
|
|
62
|
+
"FAIL: #{diff}"
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def test_primitive(prim_name, message, context)
|
|
67
|
+
train = message[:train] || message["train"]
|
|
68
|
+
return "Error: 'train' array is required with primitive_name" unless train
|
|
69
|
+
|
|
70
|
+
primitive = context.env.registry.get(prim_name)
|
|
71
|
+
return "Error: Primitive '#{prim_name}' not found" unless primitive
|
|
72
|
+
|
|
73
|
+
results = []
|
|
74
|
+
passed = 0
|
|
75
|
+
|
|
76
|
+
train.each_with_index do |pair, i|
|
|
77
|
+
input = pair["input"] || pair[:input]
|
|
78
|
+
expected = pair["output"] || pair[:output]
|
|
79
|
+
|
|
80
|
+
begin
|
|
81
|
+
actual = primitive.receive({ grid: input }, context: context)
|
|
82
|
+
actual = JSON.parse(actual) if actual.is_a?(String)
|
|
83
|
+
|
|
84
|
+
if actual == expected
|
|
85
|
+
passed += 1
|
|
86
|
+
results << "Pair #{i}: PASS"
|
|
87
|
+
else
|
|
88
|
+
diff = compute_diff(actual, expected)
|
|
89
|
+
results << "Pair #{i}: FAIL - #{diff}"
|
|
90
|
+
end
|
|
91
|
+
rescue => e
|
|
92
|
+
results << "Pair #{i}: ERROR - #{e.message}"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
"#{passed}/#{train.length} passed\n" + results.join("\n")
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def compute_diff(actual, expected)
|
|
100
|
+
unless actual.is_a?(Array)
|
|
101
|
+
return "Output is not a grid (got #{actual.class})"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
actual_rows = actual.length
|
|
105
|
+
actual_cols = actual[0]&.length || 0
|
|
106
|
+
exp_rows = expected.length
|
|
107
|
+
exp_cols = expected[0]&.length || 0
|
|
108
|
+
|
|
109
|
+
if actual_rows != exp_rows || actual_cols != exp_cols
|
|
110
|
+
return "Dimension mismatch: expected #{exp_rows}x#{exp_cols}, got #{actual_rows}x#{actual_cols}"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
wrong = 0
|
|
114
|
+
details = []
|
|
115
|
+
exp_rows.times do |r|
|
|
116
|
+
exp_cols.times do |c|
|
|
117
|
+
if actual[r][c] != expected[r][c]
|
|
118
|
+
wrong += 1
|
|
119
|
+
details << "(#{r},#{c}): expected #{expected[r][c]}, got #{actual[r][c]}" if details.length < 10
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
msg = "#{wrong} cells wrong"
|
|
125
|
+
msg += "\n " + details.join("\n ") unless details.empty?
|
|
126
|
+
msg += "\n ... and #{wrong - 10} more" if wrong > 10
|
|
127
|
+
msg
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|