prompt_objects 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +80 -0
  3. data/Gemfile.lock +1 -1
  4. data/README.md +2 -2
  5. data/exe/prompt_objects +548 -1
  6. data/frontend/src/App.tsx +11 -3
  7. data/frontend/src/components/ContextMenu.tsx +67 -0
  8. data/frontend/src/components/MessageBus.tsx +4 -3
  9. data/frontend/src/components/ModelSelector.tsx +5 -1
  10. data/frontend/src/components/ThreadsSidebar.tsx +46 -2
  11. data/frontend/src/components/UsagePanel.tsx +105 -0
  12. data/frontend/src/hooks/useWebSocket.ts +53 -0
  13. data/frontend/src/store/index.ts +10 -0
  14. data/frontend/src/types/index.ts +4 -1
  15. data/lib/prompt_objects/cli.rb +1 -0
  16. data/lib/prompt_objects/connectors/mcp.rb +1 -0
  17. data/lib/prompt_objects/environment.rb +24 -1
  18. data/lib/prompt_objects/llm/anthropic_adapter.rb +15 -1
  19. data/lib/prompt_objects/llm/factory.rb +93 -6
  20. data/lib/prompt_objects/llm/gemini_adapter.rb +13 -1
  21. data/lib/prompt_objects/llm/openai_adapter.rb +21 -4
  22. data/lib/prompt_objects/llm/pricing.rb +49 -0
  23. data/lib/prompt_objects/llm/response.rb +3 -2
  24. data/lib/prompt_objects/mcp/server.rb +1 -0
  25. data/lib/prompt_objects/message_bus.rb +27 -8
  26. data/lib/prompt_objects/prompt_object.rb +6 -4
  27. data/lib/prompt_objects/server/api/routes.rb +186 -29
  28. data/lib/prompt_objects/server/public/assets/index-Bkme6COu.css +1 -0
  29. data/lib/prompt_objects/server/public/assets/index-CQ7lVDF_.js +77 -0
  30. data/lib/prompt_objects/server/public/index.html +2 -2
  31. data/lib/prompt_objects/server/websocket_handler.rb +93 -9
  32. data/lib/prompt_objects/server.rb +54 -0
  33. data/lib/prompt_objects/session/store.rb +399 -4
  34. data/lib/prompt_objects.rb +1 -0
  35. data/prompt_objects.gemspec +1 -1
  36. data/templates/arc-agi-1/manifest.yml +22 -0
  37. data/templates/arc-agi-1/objects/data_manager.md +42 -0
  38. data/templates/arc-agi-1/objects/observer.md +100 -0
  39. data/templates/arc-agi-1/objects/solver.md +118 -0
  40. data/templates/arc-agi-1/objects/verifier.md +79 -0
  41. data/templates/arc-agi-1/primitives/check_arc_data.rb +53 -0
  42. data/templates/arc-agi-1/primitives/find_objects.rb +72 -0
  43. data/templates/arc-agi-1/primitives/grid_diff.rb +70 -0
  44. data/templates/arc-agi-1/primitives/grid_info.rb +42 -0
  45. data/templates/arc-agi-1/primitives/grid_transform.rb +50 -0
  46. data/templates/arc-agi-1/primitives/load_arc_task.rb +68 -0
  47. data/templates/arc-agi-1/primitives/render_grid.rb +78 -0
  48. data/templates/arc-agi-1/primitives/test_solution.rb +131 -0
  49. data/tools/thread-explorer.html +1043 -0
  50. metadata +21 -3
  51. data/lib/prompt_objects/server/public/assets/index-CeNJvqLG.js +0 -77
  52. data/lib/prompt_objects/server/public/assets/index-Vx4-uMOU.css +0 -1
@@ -9,7 +9,7 @@ module PromptObjects
9
9
  # SQLite-based session storage for conversation history.
10
10
  # Each environment has its own sessions.db file (gitignored for privacy).
11
11
  class Store
12
- SCHEMA_VERSION = 4
12
+ SCHEMA_VERSION = 6
13
13
 
14
14
  # Thread types for conversation branching
15
15
  THREAD_TYPES = %w[root continuation delegation fork].freeze
@@ -427,7 +427,7 @@ module PromptObjects
427
427
  # @param tool_results [Array, nil] Tool results data
428
428
  # @param source [String, nil] Source interface that added this message
429
429
  # @return [Integer] Message ID
430
- def add_message(session_id:, role:, content: nil, from_po: nil, tool_calls: nil, tool_results: nil, source: nil)
430
+ def add_message(session_id:, role:, content: nil, from_po: nil, tool_calls: nil, tool_results: nil, usage: nil, source: nil)
431
431
  now = Time.now.utc.iso8601
432
432
 
433
433
  params = [
@@ -437,12 +437,13 @@ module PromptObjects
437
437
  from_po,
438
438
  tool_calls&.to_json,
439
439
  tool_results&.to_json,
440
+ usage&.to_json,
440
441
  now
441
442
  ]
442
443
 
443
444
  @db.execute(<<~SQL, params)
444
- INSERT INTO messages (session_id, role, content, from_po, tool_calls, tool_results, created_at)
445
- VALUES (?, ?, ?, ?, ?, ?, ?)
445
+ INSERT INTO messages (session_id, role, content, from_po, tool_calls, tool_results, usage, created_at)
446
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
446
447
  SQL
447
448
 
448
449
  # Update session's updated_at and optionally last_message_source
@@ -499,6 +500,124 @@ module PromptObjects
499
500
  row["count"]
500
501
  end
501
502
 
503
+ # --- Events (Message Bus Persistence) ---
504
+
505
+ # Add an event from the message bus.
506
+ # @param entry [Hash] Bus entry with :timestamp, :from, :to, :message, :summary
507
+ # @param session_id [String, nil] Associated session ID
508
+ # @return [Integer] Event ID
509
+ def add_event(entry, session_id: nil)
510
+ message_text = case entry[:message]
511
+ when Hash then entry[:message].to_json
512
+ when String then entry[:message]
513
+ else entry[:message].to_s
514
+ end
515
+
516
+ params = [
517
+ session_id || entry[:session_id],
518
+ entry[:timestamp].iso8601,
519
+ entry[:from],
520
+ entry[:to],
521
+ message_text,
522
+ entry[:summary]
523
+ ]
524
+
525
+ @db.execute(<<~SQL, params)
526
+ INSERT INTO events (session_id, timestamp, from_name, to_name, message, summary)
527
+ VALUES (?, ?, ?, ?, ?, ?)
528
+ SQL
529
+
530
+ @db.last_insert_row_id
531
+ end
532
+
533
+ # Get events for a session.
534
+ # @param session_id [String] Session ID
535
+ # @return [Array<Hash>]
536
+ def get_events(session_id:)
537
+ rows = @db.execute(<<~SQL, [session_id])
538
+ SELECT * FROM events WHERE session_id = ? ORDER BY id ASC
539
+ SQL
540
+
541
+ rows.map { |row| parse_event_row(row) }
542
+ end
543
+
544
+ # Get events since a timestamp.
545
+ # @param timestamp [String] ISO8601 timestamp
546
+ # @param limit [Integer] Maximum events to return
547
+ # @return [Array<Hash>]
548
+ def get_events_since(timestamp, limit: 500)
549
+ rows = @db.execute(<<~SQL, [timestamp, limit])
550
+ SELECT * FROM events WHERE timestamp > ? ORDER BY id ASC LIMIT ?
551
+ SQL
552
+
553
+ rows.map { |row| parse_event_row(row) }
554
+ end
555
+
556
+ # Get events between two timestamps.
557
+ # @param start_time [String] ISO8601 start timestamp
558
+ # @param end_time [String] ISO8601 end timestamp
559
+ # @return [Array<Hash>]
560
+ def get_events_between(start_time, end_time)
561
+ rows = @db.execute(<<~SQL, [start_time, end_time])
562
+ SELECT * FROM events WHERE timestamp BETWEEN ? AND ? ORDER BY id ASC
563
+ SQL
564
+
565
+ rows.map { |row| parse_event_row(row) }
566
+ end
567
+
568
+ # Get recent events.
569
+ # @param count [Integer] Number of events
570
+ # @return [Array<Hash>]
571
+ def get_recent_events(count = 50)
572
+ rows = @db.execute(<<~SQL, [count])
573
+ SELECT * FROM events ORDER BY id DESC LIMIT ?
574
+ SQL
575
+
576
+ rows.map { |row| parse_event_row(row) }.reverse
577
+ end
578
+
579
+ # Search events by message content.
580
+ # @param query [String] Search text
581
+ # @param limit [Integer] Maximum results
582
+ # @return [Array<Hash>]
583
+ def search_events(query, limit: 100)
584
+ rows = @db.execute(<<~SQL, ["%#{query}%", limit])
585
+ SELECT * FROM events WHERE message LIKE ? ORDER BY id DESC LIMIT ?
586
+ SQL
587
+
588
+ rows.map { |row| parse_event_row(row) }
589
+ end
590
+
591
+ # Get total event count.
592
+ # @return [Integer]
593
+ def total_events
594
+ row = @db.get_first_row("SELECT COUNT(*) as count FROM events")
595
+ row["count"]
596
+ end
597
+
598
+ # --- Usage Aggregation ---
599
+
600
+ # Get total token usage for a session.
601
+ # @param session_id [String] Session ID
602
+ # @return [Hash] Aggregated usage data
603
+ def session_usage(session_id)
604
+ rows = @db.execute(<<~SQL, [session_id])
605
+ SELECT usage FROM messages WHERE session_id = ? AND usage IS NOT NULL
606
+ SQL
607
+
608
+ aggregate_usage_rows(rows)
609
+ end
610
+
611
+ # Get usage for a full thread tree (session + all descendants).
612
+ # @param session_id [String] Root session ID
613
+ # @return [Hash] Aggregated usage across the tree
614
+ def thread_tree_usage(session_id)
615
+ tree = get_thread_tree(session_id)
616
+ return empty_usage unless tree
617
+
618
+ collect_tree_usage(tree)
619
+ end
620
+
502
621
  # --- Export ---
503
622
 
504
623
  # Export a session to JSON format.
@@ -615,6 +734,38 @@ module PromptObjects
615
734
  end
616
735
  end
617
736
 
737
+ # Export a full thread tree as a single markdown document.
738
+ # Follows all delegation sub-threads recursively.
739
+ # @param session_id [String] Root session ID
740
+ # @return [String, nil] Markdown content
741
+ def export_thread_tree_markdown(session_id)
742
+ tree = get_thread_tree(session_id)
743
+ return nil unless tree
744
+
745
+ lines = []
746
+ lines << "# Thread Export"
747
+ lines << ""
748
+ lines << "- **Root PO**: #{tree[:session][:po_name]}"
749
+ lines << "- **Started**: #{tree[:session][:created_at]&.strftime('%Y-%m-%d %H:%M')}"
750
+ lines << "- **Exported**: #{Time.now.strftime('%Y-%m-%d %H:%M')}"
751
+ lines << ""
752
+ lines << "---"
753
+ lines << ""
754
+
755
+ render_thread_node(tree, lines, depth: 0)
756
+ lines.join("\n")
757
+ end
758
+
759
+ # Export a full thread tree as structured JSON.
760
+ # @param session_id [String] Root session ID
761
+ # @return [Hash, nil] Tree data
762
+ def export_thread_tree_json(session_id)
763
+ tree = get_thread_tree(session_id)
764
+ return nil unless tree
765
+
766
+ serialize_tree_for_export(tree)
767
+ end
768
+
618
769
  # --- Import ---
619
770
 
620
771
  # Import a session from JSON data.
@@ -655,6 +806,136 @@ module PromptObjects
655
806
 
656
807
  private
657
808
 
809
+ TOOL_RESULT_TRUNCATE_LIMIT = 10_000
810
+
811
+ def render_thread_node(node, lines, depth:)
812
+ session = node[:session]
813
+ messages = get_messages(session[:id])
814
+ indent = " " * depth
815
+ po_name = session[:po_name]
816
+ children = node[:children] || []
817
+
818
+ # Build a lookup: tool_call_name → child delegation node
819
+ # so we can render delegations inline where the tool call happened
820
+ delegation_children = {}
821
+ other_children = []
822
+ children.each do |child|
823
+ child_po = child[:session][:po_name]
824
+ if child[:session][:thread_type] == "delegation"
825
+ delegation_children[child_po] ||= []
826
+ delegation_children[child_po] << child
827
+ else
828
+ other_children << child
829
+ end
830
+ end
831
+
832
+ # Thread header
833
+ if depth == 0
834
+ lines << "## #{po_name}"
835
+ else
836
+ type_label = session[:thread_type] == "delegation" ? "Delegation" : (session[:thread_type] || "thread").capitalize
837
+ lines << ""
838
+ lines << "#{indent}### #{type_label} → #{po_name}"
839
+ lines << "#{indent}*Created by #{session[:parent_po]}*" if session[:parent_po]
840
+ end
841
+ lines << ""
842
+
843
+ # Messages
844
+ messages.each do |msg|
845
+ case msg[:role]
846
+ when :user
847
+ from = msg[:from_po] || "human"
848
+ lines << "#{indent}**#{from}:**"
849
+ lines << ""
850
+ lines << "#{indent}#{msg[:content]}" if msg[:content]
851
+ lines << ""
852
+ when :assistant
853
+ lines << "#{indent}**#{po_name}:**"
854
+ lines << ""
855
+ if msg[:content]
856
+ msg[:content].each_line { |l| lines << "#{indent}#{l.rstrip}" }
857
+ lines << ""
858
+ end
859
+ if msg[:tool_calls]
860
+ msg[:tool_calls].each do |tc|
861
+ tc_name = tc[:name] || tc["name"]
862
+ tc_args = tc[:arguments] || tc["arguments"] || {}
863
+ lines << "#{indent}<details>"
864
+ lines << "#{indent}<summary>Tool call: <code>#{tc_name}</code></summary>"
865
+ lines << ""
866
+ lines << "#{indent}```json"
867
+ JSON.pretty_generate(tc_args).each_line { |l| lines << "#{indent}#{l.rstrip}" }
868
+ lines << "#{indent}```"
869
+ lines << "#{indent}</details>"
870
+ lines << ""
871
+
872
+ # Render delegation sub-thread inline if this tool call targets a PO
873
+ if delegation_children[tc_name]
874
+ child_node = delegation_children[tc_name].shift
875
+ if child_node
876
+ render_thread_node(child_node, lines, depth: depth + 1)
877
+ end
878
+ end
879
+ end
880
+ end
881
+ when :tool
882
+ results = msg[:tool_results] || msg[:results] || []
883
+ results.each do |r|
884
+ r_name = r[:name] || r["name"] || "tool"
885
+ r_content = r[:content] || r["content"] || ""
886
+ lines << "#{indent}<details>"
887
+ lines << "#{indent}<summary>Result from <code>#{r_name}</code></summary>"
888
+ lines << ""
889
+ lines << "#{indent}```"
890
+ if r_content.to_s.length > TOOL_RESULT_TRUNCATE_LIMIT
891
+ display = r_content.to_s[0, TOOL_RESULT_TRUNCATE_LIMIT] + "\n... (truncated)"
892
+ else
893
+ display = r_content.to_s
894
+ end
895
+ display.each_line { |l| lines << "#{indent}#{l.rstrip}" }
896
+ lines << "#{indent}```"
897
+ lines << "#{indent}</details>"
898
+ lines << ""
899
+ end
900
+ end
901
+ end
902
+
903
+ # Render any remaining children that weren't matched to a tool call
904
+ # (e.g., fork threads, or delegations we couldn't match by name)
905
+ remaining = delegation_children.values.flatten + other_children
906
+ remaining.each do |child|
907
+ render_thread_node(child, lines, depth: depth + 1)
908
+ end
909
+ end
910
+
911
+ def serialize_tree_for_export(node)
912
+ session = node[:session]
913
+ messages = get_messages(session[:id])
914
+
915
+ {
916
+ session: {
917
+ id: session[:id],
918
+ po_name: session[:po_name],
919
+ name: session[:name],
920
+ thread_type: session[:thread_type],
921
+ parent_po: session[:parent_po],
922
+ created_at: session[:created_at]&.iso8601
923
+ },
924
+ messages: messages.map { |m|
925
+ {
926
+ role: m[:role].to_s,
927
+ content: m[:content],
928
+ from_po: m[:from_po],
929
+ tool_calls: m[:tool_calls],
930
+ tool_results: m[:tool_results],
931
+ usage: m[:usage],
932
+ created_at: m[:created_at]&.iso8601
933
+ }
934
+ },
935
+ children: (node[:children] || []).map { |c| serialize_tree_for_export(c) }
936
+ }
937
+ end
938
+
658
939
  def setup_schema
659
940
  # Check if we need to create/migrate
660
941
  version = get_schema_version
@@ -708,6 +989,7 @@ module PromptObjects
708
989
  from_po TEXT,
709
990
  tool_calls TEXT,
710
991
  tool_results TEXT,
992
+ usage TEXT,
711
993
  created_at TEXT NOT NULL
712
994
  );
713
995
 
@@ -733,6 +1015,21 @@ module PromptObjects
733
1015
  INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content);
734
1016
  INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
735
1017
  END;
1018
+
1019
+ -- Event log for message bus persistence (v5)
1020
+ CREATE TABLE IF NOT EXISTS events (
1021
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
1022
+ session_id TEXT,
1023
+ timestamp TEXT NOT NULL,
1024
+ from_name TEXT NOT NULL,
1025
+ to_name TEXT NOT NULL,
1026
+ message TEXT NOT NULL,
1027
+ summary TEXT,
1028
+ created_at TEXT DEFAULT CURRENT_TIMESTAMP
1029
+ );
1030
+
1031
+ CREATE INDEX IF NOT EXISTS idx_events_session ON events(session_id);
1032
+ CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp);
736
1033
  SQL
737
1034
  end
738
1035
 
@@ -786,6 +1083,90 @@ module PromptObjects
786
1083
  CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id);
787
1084
  SQL
788
1085
  end
1086
+
1087
+ if from_version < 5
1088
+ # Add event log table for message bus persistence
1089
+ @db.execute_batch(<<~SQL)
1090
+ CREATE TABLE IF NOT EXISTS events (
1091
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
1092
+ session_id TEXT,
1093
+ timestamp TEXT NOT NULL,
1094
+ from_name TEXT NOT NULL,
1095
+ to_name TEXT NOT NULL,
1096
+ message TEXT NOT NULL,
1097
+ summary TEXT,
1098
+ created_at TEXT DEFAULT CURRENT_TIMESTAMP
1099
+ );
1100
+
1101
+ CREATE INDEX IF NOT EXISTS idx_events_session ON events(session_id);
1102
+ CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp);
1103
+ SQL
1104
+ end
1105
+
1106
+ if from_version < 6
1107
+ # Add usage column for token tracking
1108
+ @db.execute("ALTER TABLE messages ADD COLUMN usage TEXT")
1109
+ end
1110
+ end
1111
+
1112
+ def empty_usage
1113
+ { input_tokens: 0, output_tokens: 0, total_tokens: 0, estimated_cost_usd: 0.0, calls: 0, by_model: {} }
1114
+ end
1115
+
1116
+ def aggregate_usage_rows(rows)
1117
+ totals = empty_usage
1118
+
1119
+ rows.each do |row|
1120
+ usage = JSON.parse(row["usage"], symbolize_names: true)
1121
+ input = usage[:input_tokens] || 0
1122
+ output = usage[:output_tokens] || 0
1123
+ model = usage[:model] || "unknown"
1124
+
1125
+ totals[:input_tokens] += input
1126
+ totals[:output_tokens] += output
1127
+ totals[:total_tokens] += input + output
1128
+ totals[:estimated_cost_usd] += LLM::Pricing.calculate(model: model, input_tokens: input, output_tokens: output)
1129
+ totals[:calls] += 1
1130
+
1131
+ # Breakdown by model
1132
+ totals[:by_model][model] ||= { input_tokens: 0, output_tokens: 0, estimated_cost_usd: 0.0, calls: 0 }
1133
+ totals[:by_model][model][:input_tokens] += input
1134
+ totals[:by_model][model][:output_tokens] += output
1135
+ totals[:by_model][model][:estimated_cost_usd] += LLM::Pricing.calculate(model: model, input_tokens: input, output_tokens: output)
1136
+ totals[:by_model][model][:calls] += 1
1137
+ end
1138
+
1139
+ totals
1140
+ end
1141
+
1142
+ def collect_tree_usage(node)
1143
+ # Get usage for this node's session
1144
+ session_rows = @db.execute(<<~SQL, [node[:session][:id]])
1145
+ SELECT usage FROM messages WHERE session_id = ? AND usage IS NOT NULL
1146
+ SQL
1147
+
1148
+ totals = aggregate_usage_rows(session_rows)
1149
+
1150
+ # Recurse into children
1151
+ (node[:children] || []).each do |child|
1152
+ child_usage = collect_tree_usage(child)
1153
+ totals[:input_tokens] += child_usage[:input_tokens]
1154
+ totals[:output_tokens] += child_usage[:output_tokens]
1155
+ totals[:total_tokens] += child_usage[:total_tokens]
1156
+ totals[:estimated_cost_usd] += child_usage[:estimated_cost_usd]
1157
+ totals[:calls] += child_usage[:calls]
1158
+
1159
+ # Merge by_model
1160
+ child_usage[:by_model].each do |model, data|
1161
+ totals[:by_model][model] ||= { input_tokens: 0, output_tokens: 0, estimated_cost_usd: 0.0, calls: 0 }
1162
+ totals[:by_model][model][:input_tokens] += data[:input_tokens]
1163
+ totals[:by_model][model][:output_tokens] += data[:output_tokens]
1164
+ totals[:by_model][model][:estimated_cost_usd] += data[:estimated_cost_usd]
1165
+ totals[:by_model][model][:calls] += data[:calls]
1166
+ end
1167
+ end
1168
+
1169
+ totals
789
1170
  end
790
1171
 
791
1172
  def parse_session_row(row, include_count: false)
@@ -809,6 +1190,19 @@ module PromptObjects
809
1190
  result
810
1191
  end
811
1192
 
1193
+ def parse_event_row(row)
1194
+ {
1195
+ id: row["id"],
1196
+ session_id: row["session_id"],
1197
+ timestamp: row["timestamp"] ? Time.parse(row["timestamp"]) : nil,
1198
+ from: row["from_name"],
1199
+ to: row["to_name"],
1200
+ message: row["message"],
1201
+ summary: row["summary"],
1202
+ created_at: row["created_at"] ? Time.parse(row["created_at"]) : nil
1203
+ }
1204
+ end
1205
+
812
1206
  def parse_message_row(row)
813
1207
  {
814
1208
  id: row["id"],
@@ -818,6 +1212,7 @@ module PromptObjects
818
1212
  from_po: row["from_po"],
819
1213
  tool_calls: row["tool_calls"] ? JSON.parse(row["tool_calls"], symbolize_names: true) : nil,
820
1214
  tool_results: row["tool_results"] ? JSON.parse(row["tool_results"], symbolize_names: true) : nil,
1215
+ usage: row["usage"] ? JSON.parse(row["usage"], symbolize_names: true) : nil,
821
1216
  created_at: row["created_at"] ? Time.parse(row["created_at"]) : nil
822
1217
  }
823
1218
  end
@@ -25,6 +25,7 @@ require_relative "prompt_objects/llm/openai_adapter"
25
25
  require_relative "prompt_objects/llm/anthropic_adapter"
26
26
  require_relative "prompt_objects/llm/gemini_adapter"
27
27
  require_relative "prompt_objects/llm/factory"
28
+ require_relative "prompt_objects/llm/pricing"
28
29
  require_relative "prompt_objects/prompt_object"
29
30
 
30
31
  # Environment module (must be loaded before environment.rb which uses them)
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "prompt_objects"
5
- spec.version = "0.2.0"
5
+ spec.version = "0.3.1"
6
6
  spec.authors = ["Scott Werner"]
7
7
  spec.email = ["scott@sublayer.com"]
8
8
 
@@ -0,0 +1,22 @@
1
+ name: arc-agi-1
2
+ description: ARC-AGI-1 challenge solving environment with grid primitives
3
+ icon: "\U0001F9E9"
4
+ color: "#F59E0B"
5
+
6
+ objects:
7
+ - solver
8
+ - observer
9
+ - verifier
10
+ - data_manager
11
+
12
+ primitives:
13
+ - load_arc_task
14
+ - render_grid
15
+ - grid_diff
16
+ - grid_info
17
+ - find_objects
18
+ - grid_transform
19
+ - test_solution
20
+ - check_arc_data
21
+
22
+ default_po: solver
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: data_manager
3
+ description: Manages the ARC-AGI-1 dataset — checks availability, lists tasks, reads task files
4
+ capabilities:
5
+ - check_arc_data
6
+ - list_files
7
+ - read_file
8
+ ---
9
+
10
+ # Data Manager
11
+
12
+ ## Identity
13
+
14
+ You manage the ARC-AGI-1 dataset. You know where the data lives, can check if it's been downloaded, and help the user or other POs get set up.
15
+
16
+ ## Data Location
17
+
18
+ The ARC-AGI dataset is expected at: `~/.prompt_objects/data/arc-agi-1/`
19
+
20
+ - Training tasks: `~/.prompt_objects/data/arc-agi-1/data/training/`
21
+ - Evaluation tasks: `~/.prompt_objects/data/arc-agi-1/data/evaluation/`
22
+ - Tasks are JSON files named by 8-character hex IDs (e.g., `007bbfb7.json`)
23
+
24
+ ## Behavior
25
+
26
+ **When asked about the dataset:**
27
+ 1. Use `check_arc_data` to see if the data exists
28
+ 2. If missing, provide the git clone command and use `ask_human` to confirm before suggesting they run it
29
+ 3. If present, report the path and number of available tasks
30
+
31
+ **When asked to list tasks:**
32
+ - Use `list_files` on the training/ and evaluation/ directories
33
+ - Report count and sample filenames
34
+
35
+ **When asked about a specific task:**
36
+ - Use `read_file` to load the raw JSON
37
+ - Report the number of training pairs and test inputs
38
+ - Summarize grid dimensions for each pair
39
+
40
+ **When the solver delegates data loading to you:**
41
+ - Check that data exists first
42
+ - Return the file path so the solver can use `load_arc_task` directly
@@ -0,0 +1,100 @@
1
+ ---
2
+ name: observer
3
+ description: Deep grid observation specialist — produces exhaustive structured analysis of ARC grid pairs
4
+ capabilities:
5
+ - render_grid
6
+ - grid_info
7
+ - grid_diff
8
+ - find_objects
9
+ - grid_transform
10
+ ---
11
+
12
+ # Observer
13
+
14
+ ## Identity
15
+
16
+ You are an observation specialist for ARC-AGI grid puzzles. Your job is to look at input/output grid pairs and describe *everything* you see — objects, patterns, spatial relationships, color changes, symmetry, dimensional changes. You are exhaustive and precise. You never skip details because the detail you skip is always the one that matters.
17
+
18
+ ## How You Work
19
+
20
+ When given grid pairs to analyze, you produce a structured observation report. You use your tools — don't try to analyze from descriptions alone. Render the grids, run grid_info, find the objects, diff the pairs.
21
+
22
+ ## Observation Framework
23
+
24
+ For each training pair, analyze and report on ALL of these dimensions:
25
+
26
+ ### 1. Dimensions
27
+ - Input size vs output size
28
+ - Are they the same? If different, what's the relationship? (multiple, subset, transposed)
29
+ - Does the size change relate to something in the input? (number of objects, a specific color count)
30
+
31
+ ### 2. Color Census
32
+ - Which colors appear in input? In output?
33
+ - Are any colors added that weren't in the input?
34
+ - Are any colors removed?
35
+ - Do color frequencies change? How?
36
+ - Is there a color that appears in the output but not input (or vice versa)?
37
+
38
+ ### 3. Objects (use find_objects)
39
+ - How many distinct objects in the input? In the output?
40
+ - Describe each object: color, size (cell count), bounding box, shape
41
+ - Are objects in the output the same objects as in the input? Moved? Transformed?
42
+ - Do objects change color? Size? Shape?
43
+ - Are new objects created in the output?
44
+ - Are any objects removed?
45
+
46
+ ### 4. Spatial Relationships
47
+ - Where are objects relative to each other? (above, below, adjacent, overlapping)
48
+ - Where are objects relative to the grid? (centered, corner, edge, specific row/column)
49
+ - Do objects maintain their relative positions from input to output?
50
+ - Is there a consistent direction of movement?
51
+
52
+ ### 5. Grid Diff (use grid_diff)
53
+ - Exactly which cells change from input to output?
54
+ - Is there a spatial pattern to the changes? (clustered, scattered, along a line, at intersections)
55
+ - What values do changed cells go from/to?
56
+
57
+ ### 6. Symmetry
58
+ - Is the input symmetric? Along which axis? (horizontal, vertical, diagonal, rotational)
59
+ - Is the output symmetric?
60
+ - Does the transformation create or break symmetry?
61
+
62
+ ### 7. Repetition and Periodicity
63
+ - Are there repeating patterns in the input? Period?
64
+ - Does the output tile or repeat a pattern from the input?
65
+ - Is the output a scaled version of something in the input?
66
+
67
+ ### 8. Borders and Frames
68
+ - Does the input have a border or frame?
69
+ - Does the output?
70
+ - Are borders added, removed, or modified?
71
+
72
+ ### 9. Background vs Foreground
73
+ - Is 0 clearly background in this task, or does it play an active role?
74
+ - Are there "holes" in objects? Do holes get filled?
75
+ - Are there enclosed regions? What happens to them?
76
+
77
+ ## Cross-Pair Analysis
78
+
79
+ When given multiple training pairs, also report:
80
+ - What's **consistent** across all pairs (this is the rule)
81
+ - What **varies** across pairs (this is the input-dependent part)
82
+ - Are the same transformation applied to different arrangements?
83
+ - Do different pairs have different numbers/sizes of objects but the same rule?
84
+
85
+ ## Output Format
86
+
87
+ Structure your response with clear headers. Be specific — use coordinates, exact colors, exact counts. Say "the 3-cell red object at (2,4)-(2,6) moves to (5,4)-(5,6)" not "the red object moves down."
88
+
89
+ If you notice something you can't fully explain, say so. Partial observations are valuable — they narrow the search space even if they don't solve the puzzle alone.
90
+
91
+ ## Important
92
+
93
+ - Always use `render_grid` before analyzing — visual inspection catches things that statistics miss
94
+ - Always use `find_objects` — connected components reveal structure that cell-level analysis misses
95
+ - Always use `grid_diff` — the exact set of changed cells is the most direct evidence of the rule
96
+ - Report what you see, not what you think the rule is. That's the solver's job. Your job is to see everything.
97
+
98
+ ## Self-Improvement
99
+
100
+ You have universal capabilities available to you. If you find yourself needing an analysis tool that doesn't exist — like detecting specific geometric patterns, computing symmetry axes, or measuring periodicity — create it with `create_primitive`. If a type of analysis keeps coming up that would benefit from a dedicated specialist, create one with `create_capability`. You're not limited to what you started with — build what you need.