prompt_objects 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +80 -0
- data/Gemfile.lock +1 -1
- data/README.md +2 -2
- data/exe/prompt_objects +548 -1
- data/frontend/src/App.tsx +11 -3
- data/frontend/src/components/ContextMenu.tsx +67 -0
- data/frontend/src/components/MessageBus.tsx +4 -3
- data/frontend/src/components/ModelSelector.tsx +5 -1
- data/frontend/src/components/ThreadsSidebar.tsx +46 -2
- data/frontend/src/components/UsagePanel.tsx +105 -0
- data/frontend/src/hooks/useWebSocket.ts +53 -0
- data/frontend/src/store/index.ts +10 -0
- data/frontend/src/types/index.ts +4 -1
- data/lib/prompt_objects/cli.rb +1 -0
- data/lib/prompt_objects/connectors/mcp.rb +1 -0
- data/lib/prompt_objects/environment.rb +24 -1
- data/lib/prompt_objects/llm/anthropic_adapter.rb +15 -1
- data/lib/prompt_objects/llm/factory.rb +93 -6
- data/lib/prompt_objects/llm/gemini_adapter.rb +13 -1
- data/lib/prompt_objects/llm/openai_adapter.rb +21 -4
- data/lib/prompt_objects/llm/pricing.rb +49 -0
- data/lib/prompt_objects/llm/response.rb +3 -2
- data/lib/prompt_objects/mcp/server.rb +1 -0
- data/lib/prompt_objects/message_bus.rb +27 -8
- data/lib/prompt_objects/prompt_object.rb +6 -4
- data/lib/prompt_objects/server/api/routes.rb +186 -29
- data/lib/prompt_objects/server/public/assets/index-Bkme6COu.css +1 -0
- data/lib/prompt_objects/server/public/assets/index-CQ7lVDF_.js +77 -0
- data/lib/prompt_objects/server/public/index.html +2 -2
- data/lib/prompt_objects/server/websocket_handler.rb +93 -9
- data/lib/prompt_objects/server.rb +54 -0
- data/lib/prompt_objects/session/store.rb +399 -4
- data/lib/prompt_objects.rb +1 -0
- data/prompt_objects.gemspec +1 -1
- data/templates/arc-agi-1/manifest.yml +22 -0
- data/templates/arc-agi-1/objects/data_manager.md +42 -0
- data/templates/arc-agi-1/objects/observer.md +100 -0
- data/templates/arc-agi-1/objects/solver.md +118 -0
- data/templates/arc-agi-1/objects/verifier.md +79 -0
- data/templates/arc-agi-1/primitives/check_arc_data.rb +53 -0
- data/templates/arc-agi-1/primitives/find_objects.rb +72 -0
- data/templates/arc-agi-1/primitives/grid_diff.rb +70 -0
- data/templates/arc-agi-1/primitives/grid_info.rb +42 -0
- data/templates/arc-agi-1/primitives/grid_transform.rb +50 -0
- data/templates/arc-agi-1/primitives/load_arc_task.rb +68 -0
- data/templates/arc-agi-1/primitives/render_grid.rb +78 -0
- data/templates/arc-agi-1/primitives/test_solution.rb +131 -0
- data/tools/thread-explorer.html +1043 -0
- metadata +21 -3
- data/lib/prompt_objects/server/public/assets/index-CeNJvqLG.js +0 -77
- data/lib/prompt_objects/server/public/assets/index-Vx4-uMOU.css +0 -1
|
@@ -9,7 +9,7 @@ module PromptObjects
|
|
|
9
9
|
# SQLite-based session storage for conversation history.
|
|
10
10
|
# Each environment has its own sessions.db file (gitignored for privacy).
|
|
11
11
|
class Store
|
|
12
|
-
SCHEMA_VERSION =
|
|
12
|
+
SCHEMA_VERSION = 6
|
|
13
13
|
|
|
14
14
|
# Thread types for conversation branching
|
|
15
15
|
THREAD_TYPES = %w[root continuation delegation fork].freeze
|
|
@@ -427,7 +427,7 @@ module PromptObjects
|
|
|
427
427
|
# @param tool_results [Array, nil] Tool results data
|
|
428
428
|
# @param source [String, nil] Source interface that added this message
|
|
429
429
|
# @return [Integer] Message ID
|
|
430
|
-
def add_message(session_id:, role:, content: nil, from_po: nil, tool_calls: nil, tool_results: nil, source: nil)
|
|
430
|
+
def add_message(session_id:, role:, content: nil, from_po: nil, tool_calls: nil, tool_results: nil, usage: nil, source: nil)
|
|
431
431
|
now = Time.now.utc.iso8601
|
|
432
432
|
|
|
433
433
|
params = [
|
|
@@ -437,12 +437,13 @@ module PromptObjects
|
|
|
437
437
|
from_po,
|
|
438
438
|
tool_calls&.to_json,
|
|
439
439
|
tool_results&.to_json,
|
|
440
|
+
usage&.to_json,
|
|
440
441
|
now
|
|
441
442
|
]
|
|
442
443
|
|
|
443
444
|
@db.execute(<<~SQL, params)
|
|
444
|
-
INSERT INTO messages (session_id, role, content, from_po, tool_calls, tool_results, created_at)
|
|
445
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
445
|
+
INSERT INTO messages (session_id, role, content, from_po, tool_calls, tool_results, usage, created_at)
|
|
446
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
446
447
|
SQL
|
|
447
448
|
|
|
448
449
|
# Update session's updated_at and optionally last_message_source
|
|
@@ -499,6 +500,124 @@ module PromptObjects
|
|
|
499
500
|
row["count"]
|
|
500
501
|
end
|
|
501
502
|
|
|
503
|
+
# --- Events (Message Bus Persistence) ---
|
|
504
|
+
|
|
505
|
+
# Add an event from the message bus.
|
|
506
|
+
# @param entry [Hash] Bus entry with :timestamp, :from, :to, :message, :summary
|
|
507
|
+
# @param session_id [String, nil] Associated session ID
|
|
508
|
+
# @return [Integer] Event ID
|
|
509
|
+
def add_event(entry, session_id: nil)
|
|
510
|
+
message_text = case entry[:message]
|
|
511
|
+
when Hash then entry[:message].to_json
|
|
512
|
+
when String then entry[:message]
|
|
513
|
+
else entry[:message].to_s
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
params = [
|
|
517
|
+
session_id || entry[:session_id],
|
|
518
|
+
entry[:timestamp].iso8601,
|
|
519
|
+
entry[:from],
|
|
520
|
+
entry[:to],
|
|
521
|
+
message_text,
|
|
522
|
+
entry[:summary]
|
|
523
|
+
]
|
|
524
|
+
|
|
525
|
+
@db.execute(<<~SQL, params)
|
|
526
|
+
INSERT INTO events (session_id, timestamp, from_name, to_name, message, summary)
|
|
527
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
528
|
+
SQL
|
|
529
|
+
|
|
530
|
+
@db.last_insert_row_id
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
# Get events for a session.
|
|
534
|
+
# @param session_id [String] Session ID
|
|
535
|
+
# @return [Array<Hash>]
|
|
536
|
+
def get_events(session_id:)
|
|
537
|
+
rows = @db.execute(<<~SQL, [session_id])
|
|
538
|
+
SELECT * FROM events WHERE session_id = ? ORDER BY id ASC
|
|
539
|
+
SQL
|
|
540
|
+
|
|
541
|
+
rows.map { |row| parse_event_row(row) }
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
# Get events since a timestamp.
|
|
545
|
+
# @param timestamp [String] ISO8601 timestamp
|
|
546
|
+
# @param limit [Integer] Maximum events to return
|
|
547
|
+
# @return [Array<Hash>]
|
|
548
|
+
def get_events_since(timestamp, limit: 500)
|
|
549
|
+
rows = @db.execute(<<~SQL, [timestamp, limit])
|
|
550
|
+
SELECT * FROM events WHERE timestamp > ? ORDER BY id ASC LIMIT ?
|
|
551
|
+
SQL
|
|
552
|
+
|
|
553
|
+
rows.map { |row| parse_event_row(row) }
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
# Get events between two timestamps.
|
|
557
|
+
# @param start_time [String] ISO8601 start timestamp
|
|
558
|
+
# @param end_time [String] ISO8601 end timestamp
|
|
559
|
+
# @return [Array<Hash>]
|
|
560
|
+
def get_events_between(start_time, end_time)
|
|
561
|
+
rows = @db.execute(<<~SQL, [start_time, end_time])
|
|
562
|
+
SELECT * FROM events WHERE timestamp BETWEEN ? AND ? ORDER BY id ASC
|
|
563
|
+
SQL
|
|
564
|
+
|
|
565
|
+
rows.map { |row| parse_event_row(row) }
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
# Get recent events.
|
|
569
|
+
# @param count [Integer] Number of events
|
|
570
|
+
# @return [Array<Hash>]
|
|
571
|
+
def get_recent_events(count = 50)
|
|
572
|
+
rows = @db.execute(<<~SQL, [count])
|
|
573
|
+
SELECT * FROM events ORDER BY id DESC LIMIT ?
|
|
574
|
+
SQL
|
|
575
|
+
|
|
576
|
+
rows.map { |row| parse_event_row(row) }.reverse
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
# Search events by message content.
|
|
580
|
+
# @param query [String] Search text
|
|
581
|
+
# @param limit [Integer] Maximum results
|
|
582
|
+
# @return [Array<Hash>]
|
|
583
|
+
def search_events(query, limit: 100)
|
|
584
|
+
rows = @db.execute(<<~SQL, ["%#{query}%", limit])
|
|
585
|
+
SELECT * FROM events WHERE message LIKE ? ORDER BY id DESC LIMIT ?
|
|
586
|
+
SQL
|
|
587
|
+
|
|
588
|
+
rows.map { |row| parse_event_row(row) }
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
# Get total event count.
|
|
592
|
+
# @return [Integer]
|
|
593
|
+
def total_events
|
|
594
|
+
row = @db.get_first_row("SELECT COUNT(*) as count FROM events")
|
|
595
|
+
row["count"]
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
# --- Usage Aggregation ---
|
|
599
|
+
|
|
600
|
+
# Get total token usage for a session.
|
|
601
|
+
# @param session_id [String] Session ID
|
|
602
|
+
# @return [Hash] Aggregated usage data
|
|
603
|
+
def session_usage(session_id)
|
|
604
|
+
rows = @db.execute(<<~SQL, [session_id])
|
|
605
|
+
SELECT usage FROM messages WHERE session_id = ? AND usage IS NOT NULL
|
|
606
|
+
SQL
|
|
607
|
+
|
|
608
|
+
aggregate_usage_rows(rows)
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
# Get usage for a full thread tree (session + all descendants).
|
|
612
|
+
# @param session_id [String] Root session ID
|
|
613
|
+
# @return [Hash] Aggregated usage across the tree
|
|
614
|
+
def thread_tree_usage(session_id)
|
|
615
|
+
tree = get_thread_tree(session_id)
|
|
616
|
+
return empty_usage unless tree
|
|
617
|
+
|
|
618
|
+
collect_tree_usage(tree)
|
|
619
|
+
end
|
|
620
|
+
|
|
502
621
|
# --- Export ---
|
|
503
622
|
|
|
504
623
|
# Export a session to JSON format.
|
|
@@ -615,6 +734,38 @@ module PromptObjects
|
|
|
615
734
|
end
|
|
616
735
|
end
|
|
617
736
|
|
|
737
|
+
# Export a full thread tree as a single markdown document.
|
|
738
|
+
# Follows all delegation sub-threads recursively.
|
|
739
|
+
# @param session_id [String] Root session ID
|
|
740
|
+
# @return [String, nil] Markdown content
|
|
741
|
+
def export_thread_tree_markdown(session_id)
|
|
742
|
+
tree = get_thread_tree(session_id)
|
|
743
|
+
return nil unless tree
|
|
744
|
+
|
|
745
|
+
lines = []
|
|
746
|
+
lines << "# Thread Export"
|
|
747
|
+
lines << ""
|
|
748
|
+
lines << "- **Root PO**: #{tree[:session][:po_name]}"
|
|
749
|
+
lines << "- **Started**: #{tree[:session][:created_at]&.strftime('%Y-%m-%d %H:%M')}"
|
|
750
|
+
lines << "- **Exported**: #{Time.now.strftime('%Y-%m-%d %H:%M')}"
|
|
751
|
+
lines << ""
|
|
752
|
+
lines << "---"
|
|
753
|
+
lines << ""
|
|
754
|
+
|
|
755
|
+
render_thread_node(tree, lines, depth: 0)
|
|
756
|
+
lines.join("\n")
|
|
757
|
+
end
|
|
758
|
+
|
|
759
|
+
# Export a full thread tree as structured JSON.
|
|
760
|
+
# @param session_id [String] Root session ID
|
|
761
|
+
# @return [Hash, nil] Tree data
|
|
762
|
+
def export_thread_tree_json(session_id)
|
|
763
|
+
tree = get_thread_tree(session_id)
|
|
764
|
+
return nil unless tree
|
|
765
|
+
|
|
766
|
+
serialize_tree_for_export(tree)
|
|
767
|
+
end
|
|
768
|
+
|
|
618
769
|
# --- Import ---
|
|
619
770
|
|
|
620
771
|
# Import a session from JSON data.
|
|
@@ -655,6 +806,136 @@ module PromptObjects
|
|
|
655
806
|
|
|
656
807
|
private
|
|
657
808
|
|
|
809
|
+
TOOL_RESULT_TRUNCATE_LIMIT = 10_000
|
|
810
|
+
|
|
811
|
+
def render_thread_node(node, lines, depth:)
|
|
812
|
+
session = node[:session]
|
|
813
|
+
messages = get_messages(session[:id])
|
|
814
|
+
indent = " " * depth
|
|
815
|
+
po_name = session[:po_name]
|
|
816
|
+
children = node[:children] || []
|
|
817
|
+
|
|
818
|
+
# Build a lookup: tool_call_name → child delegation node
|
|
819
|
+
# so we can render delegations inline where the tool call happened
|
|
820
|
+
delegation_children = {}
|
|
821
|
+
other_children = []
|
|
822
|
+
children.each do |child|
|
|
823
|
+
child_po = child[:session][:po_name]
|
|
824
|
+
if child[:session][:thread_type] == "delegation"
|
|
825
|
+
delegation_children[child_po] ||= []
|
|
826
|
+
delegation_children[child_po] << child
|
|
827
|
+
else
|
|
828
|
+
other_children << child
|
|
829
|
+
end
|
|
830
|
+
end
|
|
831
|
+
|
|
832
|
+
# Thread header
|
|
833
|
+
if depth == 0
|
|
834
|
+
lines << "## #{po_name}"
|
|
835
|
+
else
|
|
836
|
+
type_label = session[:thread_type] == "delegation" ? "Delegation" : (session[:thread_type] || "thread").capitalize
|
|
837
|
+
lines << ""
|
|
838
|
+
lines << "#{indent}### #{type_label} → #{po_name}"
|
|
839
|
+
lines << "#{indent}*Created by #{session[:parent_po]}*" if session[:parent_po]
|
|
840
|
+
end
|
|
841
|
+
lines << ""
|
|
842
|
+
|
|
843
|
+
# Messages
|
|
844
|
+
messages.each do |msg|
|
|
845
|
+
case msg[:role]
|
|
846
|
+
when :user
|
|
847
|
+
from = msg[:from_po] || "human"
|
|
848
|
+
lines << "#{indent}**#{from}:**"
|
|
849
|
+
lines << ""
|
|
850
|
+
lines << "#{indent}#{msg[:content]}" if msg[:content]
|
|
851
|
+
lines << ""
|
|
852
|
+
when :assistant
|
|
853
|
+
lines << "#{indent}**#{po_name}:**"
|
|
854
|
+
lines << ""
|
|
855
|
+
if msg[:content]
|
|
856
|
+
msg[:content].each_line { |l| lines << "#{indent}#{l.rstrip}" }
|
|
857
|
+
lines << ""
|
|
858
|
+
end
|
|
859
|
+
if msg[:tool_calls]
|
|
860
|
+
msg[:tool_calls].each do |tc|
|
|
861
|
+
tc_name = tc[:name] || tc["name"]
|
|
862
|
+
tc_args = tc[:arguments] || tc["arguments"] || {}
|
|
863
|
+
lines << "#{indent}<details>"
|
|
864
|
+
lines << "#{indent}<summary>Tool call: <code>#{tc_name}</code></summary>"
|
|
865
|
+
lines << ""
|
|
866
|
+
lines << "#{indent}```json"
|
|
867
|
+
JSON.pretty_generate(tc_args).each_line { |l| lines << "#{indent}#{l.rstrip}" }
|
|
868
|
+
lines << "#{indent}```"
|
|
869
|
+
lines << "#{indent}</details>"
|
|
870
|
+
lines << ""
|
|
871
|
+
|
|
872
|
+
# Render delegation sub-thread inline if this tool call targets a PO
|
|
873
|
+
if delegation_children[tc_name]
|
|
874
|
+
child_node = delegation_children[tc_name].shift
|
|
875
|
+
if child_node
|
|
876
|
+
render_thread_node(child_node, lines, depth: depth + 1)
|
|
877
|
+
end
|
|
878
|
+
end
|
|
879
|
+
end
|
|
880
|
+
end
|
|
881
|
+
when :tool
|
|
882
|
+
results = msg[:tool_results] || msg[:results] || []
|
|
883
|
+
results.each do |r|
|
|
884
|
+
r_name = r[:name] || r["name"] || "tool"
|
|
885
|
+
r_content = r[:content] || r["content"] || ""
|
|
886
|
+
lines << "#{indent}<details>"
|
|
887
|
+
lines << "#{indent}<summary>Result from <code>#{r_name}</code></summary>"
|
|
888
|
+
lines << ""
|
|
889
|
+
lines << "#{indent}```"
|
|
890
|
+
if r_content.to_s.length > TOOL_RESULT_TRUNCATE_LIMIT
|
|
891
|
+
display = r_content.to_s[0, TOOL_RESULT_TRUNCATE_LIMIT] + "\n... (truncated)"
|
|
892
|
+
else
|
|
893
|
+
display = r_content.to_s
|
|
894
|
+
end
|
|
895
|
+
display.each_line { |l| lines << "#{indent}#{l.rstrip}" }
|
|
896
|
+
lines << "#{indent}```"
|
|
897
|
+
lines << "#{indent}</details>"
|
|
898
|
+
lines << ""
|
|
899
|
+
end
|
|
900
|
+
end
|
|
901
|
+
end
|
|
902
|
+
|
|
903
|
+
# Render any remaining children that weren't matched to a tool call
|
|
904
|
+
# (e.g., fork threads, or delegations we couldn't match by name)
|
|
905
|
+
remaining = delegation_children.values.flatten + other_children
|
|
906
|
+
remaining.each do |child|
|
|
907
|
+
render_thread_node(child, lines, depth: depth + 1)
|
|
908
|
+
end
|
|
909
|
+
end
|
|
910
|
+
|
|
911
|
+
def serialize_tree_for_export(node)
|
|
912
|
+
session = node[:session]
|
|
913
|
+
messages = get_messages(session[:id])
|
|
914
|
+
|
|
915
|
+
{
|
|
916
|
+
session: {
|
|
917
|
+
id: session[:id],
|
|
918
|
+
po_name: session[:po_name],
|
|
919
|
+
name: session[:name],
|
|
920
|
+
thread_type: session[:thread_type],
|
|
921
|
+
parent_po: session[:parent_po],
|
|
922
|
+
created_at: session[:created_at]&.iso8601
|
|
923
|
+
},
|
|
924
|
+
messages: messages.map { |m|
|
|
925
|
+
{
|
|
926
|
+
role: m[:role].to_s,
|
|
927
|
+
content: m[:content],
|
|
928
|
+
from_po: m[:from_po],
|
|
929
|
+
tool_calls: m[:tool_calls],
|
|
930
|
+
tool_results: m[:tool_results],
|
|
931
|
+
usage: m[:usage],
|
|
932
|
+
created_at: m[:created_at]&.iso8601
|
|
933
|
+
}
|
|
934
|
+
},
|
|
935
|
+
children: (node[:children] || []).map { |c| serialize_tree_for_export(c) }
|
|
936
|
+
}
|
|
937
|
+
end
|
|
938
|
+
|
|
658
939
|
def setup_schema
|
|
659
940
|
# Check if we need to create/migrate
|
|
660
941
|
version = get_schema_version
|
|
@@ -708,6 +989,7 @@ module PromptObjects
|
|
|
708
989
|
from_po TEXT,
|
|
709
990
|
tool_calls TEXT,
|
|
710
991
|
tool_results TEXT,
|
|
992
|
+
usage TEXT,
|
|
711
993
|
created_at TEXT NOT NULL
|
|
712
994
|
);
|
|
713
995
|
|
|
@@ -733,6 +1015,21 @@ module PromptObjects
|
|
|
733
1015
|
INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content);
|
|
734
1016
|
INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
|
|
735
1017
|
END;
|
|
1018
|
+
|
|
1019
|
+
-- Event log for message bus persistence (v5)
|
|
1020
|
+
CREATE TABLE IF NOT EXISTS events (
|
|
1021
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1022
|
+
session_id TEXT,
|
|
1023
|
+
timestamp TEXT NOT NULL,
|
|
1024
|
+
from_name TEXT NOT NULL,
|
|
1025
|
+
to_name TEXT NOT NULL,
|
|
1026
|
+
message TEXT NOT NULL,
|
|
1027
|
+
summary TEXT,
|
|
1028
|
+
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
1029
|
+
);
|
|
1030
|
+
|
|
1031
|
+
CREATE INDEX IF NOT EXISTS idx_events_session ON events(session_id);
|
|
1032
|
+
CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp);
|
|
736
1033
|
SQL
|
|
737
1034
|
end
|
|
738
1035
|
|
|
@@ -786,6 +1083,90 @@ module PromptObjects
|
|
|
786
1083
|
CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id);
|
|
787
1084
|
SQL
|
|
788
1085
|
end
|
|
1086
|
+
|
|
1087
|
+
if from_version < 5
|
|
1088
|
+
# Add event log table for message bus persistence
|
|
1089
|
+
@db.execute_batch(<<~SQL)
|
|
1090
|
+
CREATE TABLE IF NOT EXISTS events (
|
|
1091
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1092
|
+
session_id TEXT,
|
|
1093
|
+
timestamp TEXT NOT NULL,
|
|
1094
|
+
from_name TEXT NOT NULL,
|
|
1095
|
+
to_name TEXT NOT NULL,
|
|
1096
|
+
message TEXT NOT NULL,
|
|
1097
|
+
summary TEXT,
|
|
1098
|
+
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
1099
|
+
);
|
|
1100
|
+
|
|
1101
|
+
CREATE INDEX IF NOT EXISTS idx_events_session ON events(session_id);
|
|
1102
|
+
CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp);
|
|
1103
|
+
SQL
|
|
1104
|
+
end
|
|
1105
|
+
|
|
1106
|
+
if from_version < 6
|
|
1107
|
+
# Add usage column for token tracking
|
|
1108
|
+
@db.execute("ALTER TABLE messages ADD COLUMN usage TEXT")
|
|
1109
|
+
end
|
|
1110
|
+
end
|
|
1111
|
+
|
|
1112
|
+
def empty_usage
|
|
1113
|
+
{ input_tokens: 0, output_tokens: 0, total_tokens: 0, estimated_cost_usd: 0.0, calls: 0, by_model: {} }
|
|
1114
|
+
end
|
|
1115
|
+
|
|
1116
|
+
def aggregate_usage_rows(rows)
|
|
1117
|
+
totals = empty_usage
|
|
1118
|
+
|
|
1119
|
+
rows.each do |row|
|
|
1120
|
+
usage = JSON.parse(row["usage"], symbolize_names: true)
|
|
1121
|
+
input = usage[:input_tokens] || 0
|
|
1122
|
+
output = usage[:output_tokens] || 0
|
|
1123
|
+
model = usage[:model] || "unknown"
|
|
1124
|
+
|
|
1125
|
+
totals[:input_tokens] += input
|
|
1126
|
+
totals[:output_tokens] += output
|
|
1127
|
+
totals[:total_tokens] += input + output
|
|
1128
|
+
totals[:estimated_cost_usd] += LLM::Pricing.calculate(model: model, input_tokens: input, output_tokens: output)
|
|
1129
|
+
totals[:calls] += 1
|
|
1130
|
+
|
|
1131
|
+
# Breakdown by model
|
|
1132
|
+
totals[:by_model][model] ||= { input_tokens: 0, output_tokens: 0, estimated_cost_usd: 0.0, calls: 0 }
|
|
1133
|
+
totals[:by_model][model][:input_tokens] += input
|
|
1134
|
+
totals[:by_model][model][:output_tokens] += output
|
|
1135
|
+
totals[:by_model][model][:estimated_cost_usd] += LLM::Pricing.calculate(model: model, input_tokens: input, output_tokens: output)
|
|
1136
|
+
totals[:by_model][model][:calls] += 1
|
|
1137
|
+
end
|
|
1138
|
+
|
|
1139
|
+
totals
|
|
1140
|
+
end
|
|
1141
|
+
|
|
1142
|
+
def collect_tree_usage(node)
|
|
1143
|
+
# Get usage for this node's session
|
|
1144
|
+
session_rows = @db.execute(<<~SQL, [node[:session][:id]])
|
|
1145
|
+
SELECT usage FROM messages WHERE session_id = ? AND usage IS NOT NULL
|
|
1146
|
+
SQL
|
|
1147
|
+
|
|
1148
|
+
totals = aggregate_usage_rows(session_rows)
|
|
1149
|
+
|
|
1150
|
+
# Recurse into children
|
|
1151
|
+
(node[:children] || []).each do |child|
|
|
1152
|
+
child_usage = collect_tree_usage(child)
|
|
1153
|
+
totals[:input_tokens] += child_usage[:input_tokens]
|
|
1154
|
+
totals[:output_tokens] += child_usage[:output_tokens]
|
|
1155
|
+
totals[:total_tokens] += child_usage[:total_tokens]
|
|
1156
|
+
totals[:estimated_cost_usd] += child_usage[:estimated_cost_usd]
|
|
1157
|
+
totals[:calls] += child_usage[:calls]
|
|
1158
|
+
|
|
1159
|
+
# Merge by_model
|
|
1160
|
+
child_usage[:by_model].each do |model, data|
|
|
1161
|
+
totals[:by_model][model] ||= { input_tokens: 0, output_tokens: 0, estimated_cost_usd: 0.0, calls: 0 }
|
|
1162
|
+
totals[:by_model][model][:input_tokens] += data[:input_tokens]
|
|
1163
|
+
totals[:by_model][model][:output_tokens] += data[:output_tokens]
|
|
1164
|
+
totals[:by_model][model][:estimated_cost_usd] += data[:estimated_cost_usd]
|
|
1165
|
+
totals[:by_model][model][:calls] += data[:calls]
|
|
1166
|
+
end
|
|
1167
|
+
end
|
|
1168
|
+
|
|
1169
|
+
totals
|
|
789
1170
|
end
|
|
790
1171
|
|
|
791
1172
|
def parse_session_row(row, include_count: false)
|
|
@@ -809,6 +1190,19 @@ module PromptObjects
|
|
|
809
1190
|
result
|
|
810
1191
|
end
|
|
811
1192
|
|
|
1193
|
+
def parse_event_row(row)
|
|
1194
|
+
{
|
|
1195
|
+
id: row["id"],
|
|
1196
|
+
session_id: row["session_id"],
|
|
1197
|
+
timestamp: row["timestamp"] ? Time.parse(row["timestamp"]) : nil,
|
|
1198
|
+
from: row["from_name"],
|
|
1199
|
+
to: row["to_name"],
|
|
1200
|
+
message: row["message"],
|
|
1201
|
+
summary: row["summary"],
|
|
1202
|
+
created_at: row["created_at"] ? Time.parse(row["created_at"]) : nil
|
|
1203
|
+
}
|
|
1204
|
+
end
|
|
1205
|
+
|
|
812
1206
|
def parse_message_row(row)
|
|
813
1207
|
{
|
|
814
1208
|
id: row["id"],
|
|
@@ -818,6 +1212,7 @@ module PromptObjects
|
|
|
818
1212
|
from_po: row["from_po"],
|
|
819
1213
|
tool_calls: row["tool_calls"] ? JSON.parse(row["tool_calls"], symbolize_names: true) : nil,
|
|
820
1214
|
tool_results: row["tool_results"] ? JSON.parse(row["tool_results"], symbolize_names: true) : nil,
|
|
1215
|
+
usage: row["usage"] ? JSON.parse(row["usage"], symbolize_names: true) : nil,
|
|
821
1216
|
created_at: row["created_at"] ? Time.parse(row["created_at"]) : nil
|
|
822
1217
|
}
|
|
823
1218
|
end
|
data/lib/prompt_objects.rb
CHANGED
|
@@ -25,6 +25,7 @@ require_relative "prompt_objects/llm/openai_adapter"
|
|
|
25
25
|
require_relative "prompt_objects/llm/anthropic_adapter"
|
|
26
26
|
require_relative "prompt_objects/llm/gemini_adapter"
|
|
27
27
|
require_relative "prompt_objects/llm/factory"
|
|
28
|
+
require_relative "prompt_objects/llm/pricing"
|
|
28
29
|
require_relative "prompt_objects/prompt_object"
|
|
29
30
|
|
|
30
31
|
# Environment module (must be loaded before environment.rb which uses them)
|
data/prompt_objects.gemspec
CHANGED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: arc-agi-1
|
|
2
|
+
description: ARC-AGI-1 challenge solving environment with grid primitives
|
|
3
|
+
icon: "\U0001F9E9"
|
|
4
|
+
color: "#F59E0B"
|
|
5
|
+
|
|
6
|
+
objects:
|
|
7
|
+
- solver
|
|
8
|
+
- observer
|
|
9
|
+
- verifier
|
|
10
|
+
- data_manager
|
|
11
|
+
|
|
12
|
+
primitives:
|
|
13
|
+
- load_arc_task
|
|
14
|
+
- render_grid
|
|
15
|
+
- grid_diff
|
|
16
|
+
- grid_info
|
|
17
|
+
- find_objects
|
|
18
|
+
- grid_transform
|
|
19
|
+
- test_solution
|
|
20
|
+
- check_arc_data
|
|
21
|
+
|
|
22
|
+
default_po: solver
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: data_manager
|
|
3
|
+
description: Manages the ARC-AGI-1 dataset — checks availability, lists tasks, reads task files
|
|
4
|
+
capabilities:
|
|
5
|
+
- check_arc_data
|
|
6
|
+
- list_files
|
|
7
|
+
- read_file
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Data Manager
|
|
11
|
+
|
|
12
|
+
## Identity
|
|
13
|
+
|
|
14
|
+
You manage the ARC-AGI-1 dataset. You know where the data lives, can check if it's been downloaded, and help the user or other POs get set up.
|
|
15
|
+
|
|
16
|
+
## Data Location
|
|
17
|
+
|
|
18
|
+
The ARC-AGI dataset is expected at: `~/.prompt_objects/data/arc-agi-1/`
|
|
19
|
+
|
|
20
|
+
- Training tasks: `~/.prompt_objects/data/arc-agi-1/data/training/`
|
|
21
|
+
- Evaluation tasks: `~/.prompt_objects/data/arc-agi-1/data/evaluation/`
|
|
22
|
+
- Tasks are JSON files named by 8-character hex IDs (e.g., `007bbfb7.json`)
|
|
23
|
+
|
|
24
|
+
## Behavior
|
|
25
|
+
|
|
26
|
+
**When asked about the dataset:**
|
|
27
|
+
1. Use `check_arc_data` to see if the data exists
|
|
28
|
+
2. If missing, provide the git clone command and use `ask_human` to confirm before suggesting they run it
|
|
29
|
+
3. If present, report the path and number of available tasks
|
|
30
|
+
|
|
31
|
+
**When asked to list tasks:**
|
|
32
|
+
- Use `list_files` on the training/ and evaluation/ directories
|
|
33
|
+
- Report count and sample filenames
|
|
34
|
+
|
|
35
|
+
**When asked about a specific task:**
|
|
36
|
+
- Use `read_file` to load the raw JSON
|
|
37
|
+
- Report the number of training pairs and test inputs
|
|
38
|
+
- Summarize grid dimensions for each pair
|
|
39
|
+
|
|
40
|
+
**When the solver delegates data loading to you:**
|
|
41
|
+
- Check that data exists first
|
|
42
|
+
- Return the file path so the solver can use `load_arc_task` directly
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: observer
|
|
3
|
+
description: Deep grid observation specialist — produces exhaustive structured analysis of ARC grid pairs
|
|
4
|
+
capabilities:
|
|
5
|
+
- render_grid
|
|
6
|
+
- grid_info
|
|
7
|
+
- grid_diff
|
|
8
|
+
- find_objects
|
|
9
|
+
- grid_transform
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# Observer
|
|
13
|
+
|
|
14
|
+
## Identity
|
|
15
|
+
|
|
16
|
+
You are an observation specialist for ARC-AGI grid puzzles. Your job is to look at input/output grid pairs and describe *everything* you see — objects, patterns, spatial relationships, color changes, symmetry, dimensional changes. You are exhaustive and precise. You never skip details because the detail you skip is always the one that matters.
|
|
17
|
+
|
|
18
|
+
## How You Work
|
|
19
|
+
|
|
20
|
+
When given grid pairs to analyze, you produce a structured observation report. You use your tools — don't try to analyze from descriptions alone. Render the grids, run grid_info, find the objects, diff the pairs.
|
|
21
|
+
|
|
22
|
+
## Observation Framework
|
|
23
|
+
|
|
24
|
+
For each training pair, analyze and report on ALL of these dimensions:
|
|
25
|
+
|
|
26
|
+
### 1. Dimensions
|
|
27
|
+
- Input size vs output size
|
|
28
|
+
- Are they the same? If different, what's the relationship? (multiple, subset, transposed)
|
|
29
|
+
- Does the size change relate to something in the input? (number of objects, a specific color count)
|
|
30
|
+
|
|
31
|
+
### 2. Color Census
|
|
32
|
+
- Which colors appear in input? In output?
|
|
33
|
+
- Are any colors added that weren't in the input?
|
|
34
|
+
- Are any colors removed?
|
|
35
|
+
- Do color frequencies change? How?
|
|
36
|
+
- Is there a color that appears in the output but not input (or vice versa)?
|
|
37
|
+
|
|
38
|
+
### 3. Objects (use find_objects)
|
|
39
|
+
- How many distinct objects in the input? In the output?
|
|
40
|
+
- Describe each object: color, size (cell count), bounding box, shape
|
|
41
|
+
- Are objects in the output the same objects as in the input? Moved? Transformed?
|
|
42
|
+
- Do objects change color? Size? Shape?
|
|
43
|
+
- Are new objects created in the output?
|
|
44
|
+
- Are any objects removed?
|
|
45
|
+
|
|
46
|
+
### 4. Spatial Relationships
|
|
47
|
+
- Where are objects relative to each other? (above, below, adjacent, overlapping)
|
|
48
|
+
- Where are objects relative to the grid? (centered, corner, edge, specific row/column)
|
|
49
|
+
- Do objects maintain their relative positions from input to output?
|
|
50
|
+
- Is there a consistent direction of movement?
|
|
51
|
+
|
|
52
|
+
### 5. Grid Diff (use grid_diff)
|
|
53
|
+
- Exactly which cells change from input to output?
|
|
54
|
+
- Is there a spatial pattern to the changes? (clustered, scattered, along a line, at intersections)
|
|
55
|
+
- What values do changed cells go from/to?
|
|
56
|
+
|
|
57
|
+
### 6. Symmetry
|
|
58
|
+
- Is the input symmetric? Along which axis? (horizontal, vertical, diagonal, rotational)
|
|
59
|
+
- Is the output symmetric?
|
|
60
|
+
- Does the transformation create or break symmetry?
|
|
61
|
+
|
|
62
|
+
### 7. Repetition and Periodicity
|
|
63
|
+
- Are there repeating patterns in the input? Period?
|
|
64
|
+
- Does the output tile or repeat a pattern from the input?
|
|
65
|
+
- Is the output a scaled version of something in the input?
|
|
66
|
+
|
|
67
|
+
### 8. Borders and Frames
|
|
68
|
+
- Does the input have a border or frame?
|
|
69
|
+
- Does the output?
|
|
70
|
+
- Are borders added, removed, or modified?
|
|
71
|
+
|
|
72
|
+
### 9. Background vs Foreground
|
|
73
|
+
- Is 0 clearly background in this task, or does it play an active role?
|
|
74
|
+
- Are there "holes" in objects? Do holes get filled?
|
|
75
|
+
- Are there enclosed regions? What happens to them?
|
|
76
|
+
|
|
77
|
+
## Cross-Pair Analysis
|
|
78
|
+
|
|
79
|
+
When given multiple training pairs, also report:
|
|
80
|
+
- What's **consistent** across all pairs (this is the rule)
|
|
81
|
+
- What **varies** across pairs (this is the input-dependent part)
|
|
82
|
+
- Are the same transformation applied to different arrangements?
|
|
83
|
+
- Do different pairs have different numbers/sizes of objects but the same rule?
|
|
84
|
+
|
|
85
|
+
## Output Format
|
|
86
|
+
|
|
87
|
+
Structure your response with clear headers. Be specific — use coordinates, exact colors, exact counts. Say "the 3-cell red object at (2,4)-(2,6) moves to (5,4)-(5,6)" not "the red object moves down."
|
|
88
|
+
|
|
89
|
+
If you notice something you can't fully explain, say so. Partial observations are valuable — they narrow the search space even if they don't solve the puzzle alone.
|
|
90
|
+
|
|
91
|
+
## Important
|
|
92
|
+
|
|
93
|
+
- Always use `render_grid` before analyzing — visual inspection catches things that statistics miss
|
|
94
|
+
- Always use `find_objects` — connected components reveal structure that cell-level analysis misses
|
|
95
|
+
- Always use `grid_diff` — the exact set of changed cells is the most direct evidence of the rule
|
|
96
|
+
- Report what you see, not what you think the rule is. That's the solver's job. Your job is to see everything.
|
|
97
|
+
|
|
98
|
+
## Self-Improvement
|
|
99
|
+
|
|
100
|
+
You have universal capabilities available to you. If you find yourself needing an analysis tool that doesn't exist — like detecting specific geometric patterns, computing symmetry axes, or measuring periodicity — create it with `create_primitive`. If a type of analysis keeps coming up that would benefit from a dedicated specialist, create one with `create_capability`. You're not limited to what you started with — build what you need.
|