rubyn-code 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +120 -3
  3. data/db/migrations/014_multi_agent_upgrade.rb +79 -0
  4. data/lib/rubyn_code/agent/conversation.rb +89 -3
  5. data/lib/rubyn_code/agent/llm_caller.rb +2 -2
  6. data/lib/rubyn_code/agent/loop.rb +49 -9
  7. data/lib/rubyn_code/agent/system_prompt_builder.rb +37 -2
  8. data/lib/rubyn_code/agent/tool_processor.rb +3 -1
  9. data/lib/rubyn_code/auth/oauth.rb +1 -1
  10. data/lib/rubyn_code/auth/token_store.rb +49 -4
  11. data/lib/rubyn_code/checkpoint/hook.rb +26 -0
  12. data/lib/rubyn_code/checkpoint/manager.rb +109 -0
  13. data/lib/rubyn_code/chisel/debt.rb +65 -0
  14. data/lib/rubyn_code/chisel/inspection.rb +93 -0
  15. data/lib/rubyn_code/chisel.rb +127 -0
  16. data/lib/rubyn_code/cli/commands/agents.rb +31 -0
  17. data/lib/rubyn_code/cli/commands/chisel.rb +52 -0
  18. data/lib/rubyn_code/cli/commands/chisel_audit.rb +19 -0
  19. data/lib/rubyn_code/cli/commands/chisel_debt.rb +28 -0
  20. data/lib/rubyn_code/cli/commands/chisel_gain.rb +30 -0
  21. data/lib/rubyn_code/cli/commands/chisel_review.rb +19 -0
  22. data/lib/rubyn_code/cli/commands/command_template.rb +50 -0
  23. data/lib/rubyn_code/cli/commands/context.rb +3 -1
  24. data/lib/rubyn_code/cli/commands/custom_command.rb +42 -0
  25. data/lib/rubyn_code/cli/commands/custom_loader.rb +69 -0
  26. data/lib/rubyn_code/cli/commands/goal.rb +87 -0
  27. data/lib/rubyn_code/cli/commands/learning.rb +62 -0
  28. data/lib/rubyn_code/cli/commands/loop.rb +58 -0
  29. data/lib/rubyn_code/cli/commands/mcp.rb +18 -5
  30. data/lib/rubyn_code/cli/commands/megaplan.rb +1 -1
  31. data/lib/rubyn_code/cli/commands/registry.rb +14 -9
  32. data/lib/rubyn_code/cli/commands/rewind.rb +65 -0
  33. data/lib/rubyn_code/cli/first_run.rb +1 -1
  34. data/lib/rubyn_code/cli/loop_runner.rb +98 -0
  35. data/lib/rubyn_code/cli/mention_expander.rb +92 -0
  36. data/lib/rubyn_code/cli/renderer.rb +3 -2
  37. data/lib/rubyn_code/cli/repl.rb +37 -14
  38. data/lib/rubyn_code/cli/repl_commands.rb +76 -2
  39. data/lib/rubyn_code/cli/repl_setup.rb +9 -1
  40. data/lib/rubyn_code/cli/stream_formatter.rb +3 -2
  41. data/lib/rubyn_code/cli/version_check.rb +10 -3
  42. data/lib/rubyn_code/config/defaults.rb +13 -1
  43. data/lib/rubyn_code/config/schema.json +4 -0
  44. data/lib/rubyn_code/config/settings.rb +17 -2
  45. data/lib/rubyn_code/context/manager.rb +29 -12
  46. data/lib/rubyn_code/debug.rb +11 -5
  47. data/lib/rubyn_code/goal/evaluator.rb +95 -0
  48. data/lib/rubyn_code/hooks/event_map.rb +56 -0
  49. data/lib/rubyn_code/hooks/external_dispatcher.rb +199 -0
  50. data/lib/rubyn_code/hooks/goal_hook.rb +88 -0
  51. data/lib/rubyn_code/hooks/response.rb +83 -0
  52. data/lib/rubyn_code/hooks/runner.rb +61 -3
  53. data/lib/rubyn_code/hooks/settings_json_loader.rb +109 -0
  54. data/lib/rubyn_code/hooks/subprocess_executor.rb +116 -0
  55. data/lib/rubyn_code/ide/handlers/plan_interview_answer_handler.rb +13 -13
  56. data/lib/rubyn_code/ide/handlers/plan_interview_cancel_handler.rb +1 -1
  57. data/lib/rubyn_code/ide/handlers/plan_interview_start_handler.rb +10 -10
  58. data/lib/rubyn_code/ide/handlers/plan_propose_handler.rb +1 -1
  59. data/lib/rubyn_code/ide/handlers/prompt_handler.rb +9 -1
  60. data/lib/rubyn_code/ide/handlers/recover_ci_handler.rb +27 -16
  61. data/lib/rubyn_code/ide/handlers/session_resume_handler.rb +1 -1
  62. data/lib/rubyn_code/index/codebase_index.rb +39 -1
  63. data/lib/rubyn_code/learning/porter.rb +129 -0
  64. data/lib/rubyn_code/llm/adapters/anthropic.rb +65 -16
  65. data/lib/rubyn_code/llm/adapters/openai.rb +1 -1
  66. data/lib/rubyn_code/llm/adapters/prompt_caching.rb +5 -1
  67. data/lib/rubyn_code/llm/adapters/token_caching.rb +54 -0
  68. data/lib/rubyn_code/llm/model_router.rb +2 -2
  69. data/lib/rubyn_code/mcp/client.rb +59 -0
  70. data/lib/rubyn_code/mcp/server_extras_bridge.rb +110 -0
  71. data/lib/rubyn_code/mcp/sse_transport.rb +2 -1
  72. data/lib/rubyn_code/mcp/tool_bridge.rb +16 -14
  73. data/lib/rubyn_code/megaplan/ci_recovery.rb +3 -3
  74. data/lib/rubyn_code/megaplan/interview_session.rb +8 -3
  75. data/lib/rubyn_code/megaplan/plan_proposer.rb +3 -3
  76. data/lib/rubyn_code/memory/search.rb +9 -5
  77. data/lib/rubyn_code/memory/session_persistence.rb +159 -21
  78. data/lib/rubyn_code/observability/cost_calculator.rb +3 -1
  79. data/lib/rubyn_code/output/diff_renderer.rb +62 -7
  80. data/lib/rubyn_code/skills/auto_suggest.rb +70 -2
  81. data/lib/rubyn_code/skills/registry_client.rb +4 -3
  82. data/lib/rubyn_code/sub_agents/agent_type.rb +17 -0
  83. data/lib/rubyn_code/sub_agents/catalog.rb +124 -0
  84. data/lib/rubyn_code/teams/agent_registry.rb +120 -0
  85. data/lib/rubyn_code/teams/mailbox.rb +99 -10
  86. data/lib/rubyn_code/teams/manager.rb +83 -5
  87. data/lib/rubyn_code/teams/teammate.rb +5 -1
  88. data/lib/rubyn_code/tools/ask_user.rb +15 -1
  89. data/lib/rubyn_code/tools/executor.rb +5 -3
  90. data/lib/rubyn_code/tools/spawn_agent.rb +47 -62
  91. data/lib/rubyn_code/tools/spawn_teammate.rb +7 -2
  92. data/lib/rubyn_code/tools/web_fetch.rb +1 -1
  93. data/lib/rubyn_code/tools/web_search.rb +4 -1
  94. data/lib/rubyn_code/version.rb +1 -1
  95. data/lib/rubyn_code.rb +45 -2
  96. data/skills/rubyn_self_test.md +322 -14
  97. data/skills/self_test/chisel_smoke.rb +84 -0
  98. data/skills/self_test/fixtures/chisel_sample.rb +64 -0
  99. metadata +37 -1
@@ -63,30 +63,113 @@ Score: 18/22 (82%) — 4 failures
63
63
  #### File Cache
64
64
  - Read `lib/rubyn_code/version.rb` twice. PASS if both reads succeed (cache should serve the second).
65
65
 
66
- #### Output Compressor — Head/Tail Strategy
67
- - Run `bash` with `seq 1 5000` (generates 5,000 lines — well over the bash threshold of 4,000 chars). PASS if the result contains "lines omitted" or is significantly shorter than 5,000 lines. This proves the head_tail compressor is working.
66
+ #### Output Compressor — All Strategies (direct)
68
67
 
69
- #### Output Compressor Spec Summary Strategy
70
- - Run `bash` with `cd <project_root> && bundle exec rspec spec/rubyn_code/tools/base_spec.rb --format documentation 2>&1`. This produces multi-line RSpec output. PASS if the result you receive is shorter than the full verbose output — specifically check if passing specs got compressed to a summary line like "N examples, 0 failures" instead of listing every example.
68
+ > **Why this is a direct call, not a tool observation.** Earlier versions of this
69
+ > test ran `seq 1 5000`, a big `grep`, etc. through the agent's own tools and
70
+ > hoped the compressor would visibly truncate the result. That is unreliable:
71
+ > whether a given tool invocation is routed through the compressor gate (and at
72
+ > what threshold) depends on the execution path, so the agent often received
73
+ > already-handled output and scored a false FAIL even though the compressor was
74
+ > fine. Instead, drive `OutputCompressor#compress(tool_name, raw_output)`
75
+ > **directly** with inputs crafted to exceed each strategy's threshold, and
76
+ > assert on the marker in the returned string. This is deterministic and matches
77
+ > how the unit specs exercise it.
71
78
 
72
- #### Output Compressor Grep Top Matches
73
- - Run `grep` searching for `def ` across all of `lib/`. This will match hundreds of method definitions. PASS if the result contains "matches omitted" or shows only a subset of results (the compressor limits to top N matches).
79
+ - **All strategies**: `bash` with the script below. PASS for each strategy whose line says `PASS`. Report the final `COMPRESSION: N/5 strategies active` line in the scorecard.
74
80
 
75
- #### Output Compressor — Glob Tree Collapse
76
- - Run `glob` for `**/*.rb` across the entire project. With 170+ files this should exceed the glob threshold. PASS if the result shows directory summaries like `app/models/ (N files)` instead of listing every individual file path, OR if the result is significantly shorter than listing all 170+ paths individually.
81
+ ```bash
82
+ bundle exec ruby -e '
83
+ require_relative "lib/rubyn_code"
84
+ c = RubynCode::Tools::OutputCompressor
85
+
86
+ results = {}
87
+
88
+ # head_tail (bash, 1000-token threshold): >10 lines, well over 4000 chars
89
+ big = (1..5000).map { |i| "line #{i}" }.join("\n")
90
+ results["head_tail"] = c.new.compress("bash", big).include?("lines omitted")
91
+
92
+ # spec_summary (run_specs, 500-token threshold): verbose passing output
93
+ # collapses to just the "N examples, 0 failures" summary line
94
+ spec_out = (Array.new(200) { |i| " passing example #{i} runs and returns ok value" }.join("\n")) +
95
+ "\n\n42 examples, 0 failures\n"
96
+ results["spec_summary"] = (c.new.compress("run_specs", spec_out).strip == "42 examples, 0 failures")
97
+
98
+ # top_matches (grep, 1000-token threshold): keeps top N, marks the rest
99
+ grep_out = (1..500).map { |i| "lib/file#{i}.rb:#{i}: def method_number_#{i}(arg)" }.join("\n")
100
+ results["top_matches"] = c.new.compress("grep", grep_out).include?("matches omitted")
101
+
102
+ # tree (glob, 500-token threshold): collapses paths to "dir/ (N files)"
103
+ glob_out = (1..500).map { |i| "lib/rubyn_code/subdir#{i % 25}/some_file_name_#{i}.rb" }.join("\n")
104
+ results["tree"] = c.new.compress("glob", glob_out).include?("files)")
105
+
106
+ # relevant_hunks (git_diff, 2000-token threshold): keeps headers, truncates bodies
107
+ hunk = ->(f) { "diff --git a/#{f} b/#{f}\nindex 000..111 100644\n--- a/#{f}\n+++ b/#{f}\n" +
108
+ (Array.new(100) { |i| "+ added source line number #{i}" }.join("\n")) + "\n" }
109
+ diff_out = (1..10).map { |i| hunk.call("file#{i}.rb") }.join
110
+ results["relevant_hunks"] = c.new.compress("git_diff", diff_out).include?("lines in this file omitted")
77
111
 
78
- #### Output Compressor Diff Strategy
79
- - Run `bash` with `cd <project_root> && git log --oneline -1 --format=%H | xargs git diff HEAD~5..` (diff of last 5 commits). If the diff is large enough, the compressor should keep headers but truncate bodies. PASS if result contains diff headers. SKIP if diff is small enough to pass through uncompressed.
112
+ results.each { |k, v| puts "STRATEGY #{k}: #{v ? "PASS" : "FAIL"}" }
113
+ puts "COMPRESSION: #{results.values.count(true)}/5 strategies active"
114
+ '
115
+ ```
80
116
 
81
- #### Compression Stats
82
- - After running the above tests, note whether any output you received contained truncation markers like "lines omitted", "matches omitted", or "files)". Count how many of the 5 compression strategies actually triggered. Report: "N/5 compression strategies verified active".
117
+ Each strategy is scored independently (5 line items). A healthy build prints `COMPRESSION: 5/5 strategies active`.
83
118
 
84
119
  ### 7. Skills System
85
120
  - **load_skill**: Load any available skill (e.g., `classes`). PASS if content is returned.
86
121
 
87
122
  ### 8. Memory System
88
- - **memory_write**: Write a test memory: `category: "test", content: "self-test at #{Time.now}"`. PASS if no error.
89
- - **memory_search**: Search for `self-test`. PASS if the memory we just wrote is found.
123
+
124
+ > **Use the real `Memory::Store` / `Memory::Search` API.** Both are constructed
125
+ > as `.new(db, project_path:)` — the `project_path:` keyword is **required**, and
126
+ > `db` must respond to `execute` / `query` / `transaction` (a raw
127
+ > `SQLite3::Database` alone does **not** provide `query`, which `Search` needs).
128
+ > The script below wraps an in-memory SQLite DB to satisfy that interface, exactly
129
+ > as the specs' `setup_test_db` helper does. Writing the memory creates its own
130
+ > table via `Store#ensure_tables`, so no migrations are needed.
131
+
132
+ - **Round-trip**: `bash` with the script below. PASS if the final line is `MEMORY: PASS`.
133
+
134
+ ```bash
135
+ bundle exec ruby -e '
136
+ require_relative "lib/rubyn_code"
137
+ require "sqlite3"
138
+
139
+ # Minimal stand-in for RubynCode::DB::Connection (execute/query/transaction).
140
+ class SelfTestDB
141
+ def initialize(raw) = @raw = raw
142
+ def execute(sql, params = []) = @raw.execute(sql, params)
143
+ def query(sql, params = []) = @raw.execute(sql, params)
144
+ def transaction(&b) = @raw.transaction(&b)
145
+ end
146
+
147
+ raw = SQLite3::Database.new(":memory:")
148
+ raw.results_as_hash = true
149
+ db = SelfTestDB.new(raw)
150
+
151
+ project = "/self-test"
152
+ store = RubynCode::Memory::Store.new(db, project_path: project)
153
+ search = RubynCode::Memory::Search.new(db, project_path: project)
154
+
155
+ token = "selftesttoken-marker-xyz"
156
+ store.write(content: "self-test memory #{token}")
157
+
158
+ found = search.search(token).any? { |r| r.content.include?(token) }
159
+ recent_ok = search.recent(limit: 5).any? { |r| r.content.include?(token) }
160
+
161
+ if found && recent_ok
162
+ puts "MEMORY: PASS (write + search + recent all round-trip)"
163
+ elsif found
164
+ puts "MEMORY: PARTIAL (search works, recent did not return it)"
165
+ else
166
+ puts "MEMORY: FAIL (write succeeded but search did not return it)"
167
+ end
168
+ '
169
+ ```
170
+
171
+ The script writes a memory with a unique token, then confirms both
172
+ `Search#search` (LIKE query) and `Search#recent` return it.
90
173
 
91
174
  ### 9. Configuration
92
175
  - **bash**: Run `cat ~/.rubyn-code/config.yml`. PASS if file exists and contains `provider:`.
@@ -190,6 +273,231 @@ End-to-end exercise of the autoload pipeline against the real registry at `rubyn
190
273
  ```
191
274
  before the next response (the `📥` line appears only if the pack wasn't already installed). Do **not** count this as PASS/FAIL — just mention it in the scorecard so the user can verify the renderer side themselves.
192
275
 
276
+ ### 16. Teams System — Multi-Agent
277
+
278
+ Run the following inline Ruby script with `bash`. It exercises the teammate manager, mailbox (including structured messaging), and agent registry in a single SQLite-backed round-trip. PASS if the final line is `ALL PASS`.
279
+
280
+ ```bash
281
+ bundle exec ruby -e '
282
+ require_relative "lib/rubyn_code"
283
+ require "sqlite3"
284
+
285
+ db = SQLite3::Database.new(":memory:")
286
+ db.results_as_hash = true
287
+
288
+ mailbox = RubynCode::Teams::Mailbox.new(db)
289
+ manager = RubynCode::Teams::Manager.new(db, mailbox: mailbox)
290
+ registry = RubynCode::Teams::AgentRegistry.new(manager: manager, mailbox: mailbox)
291
+
292
+ # 1. Spawn root + child teammates
293
+ root = manager.spawn(name: "lead", role: "coordinator")
294
+ child = manager.spawn(name: "coder", role: "developer", parent_agent_id: root.id)
295
+ raise "spawn failed" unless root.root? && !child.root?
296
+ puts "STEP spawn: PASS"
297
+
298
+ # 2. Parent-child tracking
299
+ kids = manager.children_of(root.id)
300
+ raise "children_of broken" unless kids.size == 1 && kids.first.name == "coder"
301
+ raise "roots broken" unless manager.roots.size == 1
302
+ tree = manager.agent_tree(root.id)
303
+ raise "tree broken" unless tree[:children].size == 1
304
+ puts "STEP lineage: PASS"
305
+
306
+ # 3. Structured messaging with correlation_id
307
+ corr_id = mailbox.send_structured(
308
+ from: "lead", to: "coder", type: "task",
309
+ data: { action: "write_tests", files: ["user.rb"] },
310
+ content: "Write tests for user.rb"
311
+ )
312
+ raise "send_structured returned nil" if corr_id.nil?
313
+
314
+ msgs = mailbox.read_inbox("coder")
315
+ raise "inbox empty" if msgs.empty?
316
+ msg = msgs.first
317
+ raise "missing data" unless msg[:data].is_a?(Hash) && msg[:data][:action] == "write_tests"
318
+ raise "missing correlation_id" unless msg[:correlation_id].is_a?(String)
319
+ puts "STEP structured_msg: PASS"
320
+
321
+ # 4. Correlation chain
322
+ mailbox.send(
323
+ from: "coder", to: "lead", content: "Done",
324
+ message_type: "result", correlation_id: msg[:correlation_id],
325
+ data: { status: "ok", tests: 5 }
326
+ )
327
+ chain = mailbox.find_by_correlation_id(msg[:correlation_id])
328
+ raise "correlation chain broken (#{chain.size})" unless chain.size == 2
329
+ puts "STEP correlation: PASS"
330
+
331
+ # 5. Agent discovery
332
+ manager.update_status("coder", "active")
333
+ snap = registry.snapshot
334
+ raise "snapshot broken" unless snap.size == 2
335
+ actives = registry.active
336
+ raise "active filter broken" unless actives.size == 1 && actives.first[:name] == "coder"
337
+ forest = registry.forest
338
+ raise "forest broken" unless forest.size == 1 && forest.first[:children].size == 1
339
+ lineage = registry.lineage(child.id)
340
+ raise "lineage broken" unless lineage.size == 1 && lineage.first.name == "lead"
341
+ report = registry.status_report
342
+ raise "status_report broken" unless report.include?("lead") && report.include?("coder")
343
+ puts "STEP discovery: PASS"
344
+
345
+ # 6. Cleanup + unread_count
346
+ raise "unread wrong" unless mailbox.unread_count("lead") == 1
347
+ mailbox.read_inbox("lead")
348
+ raise "read didnt clear" unless mailbox.unread_count("lead") == 0
349
+ manager.remove("coder")
350
+ manager.remove("lead")
351
+ raise "cleanup failed" unless manager.list.empty?
352
+ puts "STEP cleanup: PASS"
353
+
354
+ puts "ALL PASS"
355
+ '
356
+ ```
357
+
358
+ The script tests:
359
+ 1. **Spawn** — root and child teammates with parent tracking
360
+ 2. **Lineage** — `children_of`, `roots`, `agent_tree`
361
+ 3. **Structured messaging** — `send_structured` with typed data payloads
362
+ 4. **Correlation chains** — request/response pairing via `correlation_id`
363
+ 5. **Agent discovery** — `snapshot`, `active`, `forest`, `lineage`, `status_report`
364
+ 6. **Cleanup** — `unread_count`, `read_inbox`, `remove`
365
+
366
+ PASS criteria: all 6 `STEP` lines say PASS and the final line is `ALL PASS`.
367
+
368
+ ### 17. Recent Additions — Claude Code / Codex Parity
369
+
370
+ Each feature below ships as its own PR; a check FAILs cleanly if that PR has
371
+ not yet merged into the branch under test. Run the grep/spec checks — they are
372
+ fast and need no API calls.
373
+
374
+ #### 17a. `/goal` — work until a goal is met
375
+ - **grep**: `class GoalHook` in `lib/rubyn_code/hooks/goal_hook.rb`. PASS if found.
376
+ - **grep**: `:stop` in `lib/rubyn_code/hooks/runner.rb`. PASS if found (stop-hook gating wired).
377
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/hooks/goal_hook_spec.rb spec/rubyn_code/cli/commands/goal_spec.rb --format progress`. PASS if `0 failures`.
378
+
379
+ #### 17b. `/loop` — repeat a prompt/command on an interval
380
+ - **grep**: `class LoopRunner` in `lib/rubyn_code/cli/loop_runner.rb`. PASS if found.
381
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/cli/loop_runner_spec.rb spec/rubyn_code/cli/commands/loop_spec.rb --format progress`. PASS if `0 failures`.
382
+ - **bash** (behavior): `bundle exec ruby -Ilib -rrubyn_code -e 'puts RubynCode::CLI::LoopRunner.parse_interval("5m")'`. PASS if output is `300`.
383
+
384
+ #### 17c. `AGENTS.md` project instructions
385
+ - **grep**: `AGENTS.md` in `lib/rubyn_code/agent/system_prompt_builder.rb`. PASS if found.
386
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/agent/system_prompt_builder_spec.rb --format progress`. PASS if `0 failures`.
387
+
388
+ #### 17d. `@`-file mentions
389
+ - **grep**: `class MentionExpander` in `lib/rubyn_code/cli/mention_expander.rb`. PASS if found.
390
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/cli/mention_expander_spec.rb --format progress`. PASS if `0 failures`.
391
+
392
+ #### 17e. User-defined slash commands
393
+ - **grep**: `module CustomLoader` in `lib/rubyn_code/cli/commands/custom_loader.rb`. PASS if found.
394
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/cli/commands/custom_loader_spec.rb spec/rubyn_code/cli/commands/command_template_spec.rb --format progress`. PASS if `0 failures`.
395
+
396
+ #### 17f. Custom sub-agents + `/agents`
397
+ - **grep**: `class Catalog` in `lib/rubyn_code/sub_agents/catalog.rb`. PASS if found.
398
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/sub_agents/catalog_spec.rb spec/rubyn_code/tools/spawn_agent_spec.rb --format progress`. PASS if `0 failures` (spawn_agent must still pass after the refactor).
399
+
400
+ #### 17g. MCP resources & prompts
401
+ - **grep**: `def supports_resources?` in `lib/rubyn_code/mcp/client.rb`. PASS if found.
402
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/mcp/client_spec.rb spec/rubyn_code/mcp/tool_bridge_spec.rb --format progress`. PASS if `0 failures`.
403
+
404
+ #### 17h. `/rewind` — checkpoint & restore
405
+ - **grep**: `class Manager` in `lib/rubyn_code/checkpoint/manager.rb`. PASS if found.
406
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/checkpoint --format progress`. PASS if `0 failures`.
407
+
408
+ #### 17i. Learning export/import
409
+ - **grep**: `module Porter` in `lib/rubyn_code/learning/porter.rb`. PASS if found.
410
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/learning/porter_spec.rb --format progress`. PASS if `0 failures`.
411
+ - **bash** (round-trip): the script below exports instincts to a temp file and re-imports them into a fresh in-memory DB. PASS if the final line is `LEARNING ROUNDTRIP: PASS`.
412
+
413
+ ```bash
414
+ bundle exec ruby -Ilib -rrubyn_code -rsqlite3 -rtmpdir -e '
415
+ def db_with_instincts
416
+ raw = SQLite3::Database.new(":memory:"); raw.results_as_hash = true
417
+ raw.execute(File.read("db/migrations/010_create_instincts.sql").split(";").first + ";")
418
+ wrap = Object.new
419
+ wrap.define_singleton_method(:execute) { |sql, p = []| raw.execute(sql, p) }
420
+ wrap.define_singleton_method(:query) { |sql, p = []| raw.execute(sql, p) }
421
+ wrap
422
+ end
423
+ src = db_with_instincts
424
+ src.execute("INSERT INTO instincts (id,project_path,pattern,context_tags,confidence,decay_rate,times_applied,times_helpful,created_at,updated_at) VALUES (?,?,?,?,?,?,?,?,?,?)",
425
+ ["x","/p","prefer guard clauses","[]",0.8,0.05,1,1,"2026-01-01T00:00:00Z","2026-01-01T00:00:00Z"])
426
+ Dir.mktmpdir do |d|
427
+ f = File.join(d, "l.json")
428
+ RubynCode::Learning::Porter.export(db: src, path: f)
429
+ dst = db_with_instincts
430
+ res = RubynCode::Learning::Porter.import(db: dst, path: f)
431
+ ok = res[:imported] == 1 && dst.query("SELECT COUNT(*) AS n FROM instincts").first["n"] == 1
432
+ puts(ok ? "LEARNING ROUNDTRIP: PASS" : "LEARNING ROUNDTRIP: FAIL #{res.inspect}")
433
+ end
434
+ '
435
+ ```
436
+
437
+ #### 17j. Command registry integrity (all new commands load + register)
438
+ - **bash**: the script below boots the command registry exactly as the REPL does and asserts the new slash commands are present and unique. PASS if the final line is `COMMANDS: PASS`.
439
+
440
+ ```bash
441
+ bundle exec ruby -Ilib -rrubyn_code -e '
442
+ reg = RubynCode::CLI::Commands::Registry.new
443
+ [RubynCode::CLI::Commands::Goal, RubynCode::CLI::Commands::Loop,
444
+ RubynCode::CLI::Commands::Agents, RubynCode::CLI::Commands::Rewind,
445
+ RubynCode::CLI::Commands::Learning].each { |c| reg.register(c) }
446
+ want = %w[/goal /loop /agents /rewind /learning]
447
+ missing = want.reject { |n| reg.known?(n) }
448
+ puts(missing.empty? ? "COMMANDS: PASS" : "COMMANDS: FAIL missing #{missing.inspect}")
449
+ '
450
+ ```
451
+
452
+ ### 18. Chisel — Minimal-Code Enforcement (opt-in)
453
+
454
+ Chisel is rubyn-code's "write the minimum that works" layer. It is **off by
455
+ default** and only changes the agent once a user turns it on (`/chisel full` or
456
+ `chisel_mode` in config). These checks prove the engine resolves modes, injects
457
+ its ruleset only when enabled, never chisels away the safety floor, and that the
458
+ debt scanner, inspection prompts, and all five slash commands are wired up — all
459
+ deterministic, no API calls.
460
+
461
+ The deterministic target is a committed, deliberately over-engineered fixture,
462
+ `skills/self_test/fixtures/chisel_sample.rb`. Chisel scans it and must return the
463
+ **same three `chisel:` markers every time** (and ignore the two decoys). That is
464
+ what makes this check repeatable rather than a one-off tmpdir.
465
+
466
+ - **grep** (prompt integration): `append_chisel_ruleset` in `lib/rubyn_code/agent/system_prompt_builder.rb`. PASS if found (confirms the ruleset reaches the system prompt).
467
+ - **run_specs**: `bundle exec rspec spec/rubyn_code/chisel_spec.rb spec/rubyn_code/chisel spec/rubyn_code/cli/commands/chisel_spec.rb spec/rubyn_code/cli/commands/chisel_review_spec.rb spec/rubyn_code/cli/commands/chisel_audit_spec.rb spec/rubyn_code/cli/commands/chisel_debt_spec.rb spec/rubyn_code/cli/commands/chisel_gain_spec.rb --format progress`. PASS if output contains `0 failures`. (Includes `self_test_fixture_spec.rb`, which guards the fixture's exact scan result.)
468
+ - **Smoke run against the fixture**: `bash` runs the committed runner — no inline script to keep in sync:
469
+
470
+ ```bash
471
+ bundle exec ruby skills/self_test/chisel_smoke.rb
472
+ ```
473
+
474
+ It scores four areas on their own line and exits non-zero on any failure:
475
+
476
+ ```
477
+ CHISEL debt: PASS
478
+ CHISEL engine: PASS
479
+ CHISEL inspection: PASS
480
+ CHISEL commands: PASS
481
+ CHISEL: PASS
482
+ ```
483
+
484
+ - **debt** — scanning the fixture returns exactly its three planted markers
485
+ (file/line/note), with the string-literal and trailing-comment decoys ignored.
486
+ - **engine** — `off` injects nothing; `lite`/`full`/`ultra` layer the right
487
+ addenda and always keep the safety floor; a garbage mode never crashes or
488
+ leaks through. Driven via `RUBYN_CHISEL_MODE`, independent of this machine's
489
+ `chisel_mode` config.
490
+ - **inspection** — `:diff` and `:repo` prompts assemble a String carrying the
491
+ ladder + safety floor; an unknown scope raises instead of emitting junk.
492
+ - **commands** — all five (`/chisel`, `/chisel-review`, `/chisel-audit`,
493
+ `/chisel-debt`, `/chisel-gain`) register and resolve.
494
+
495
+ Score each `CHISEL <area>` line independently (4 line items). PASS criteria:
496
+ all four areas PASS and the final line is `CHISEL: PASS`.
497
+
498
+ You can also point Chisel at the fixture by hand to see the consistent result
499
+ directly: `bundle exec ruby -Ilib -rrubyn_code -e 'RubynCode::Chisel::Debt.scan("skills/self_test/fixtures").each { |i| puts "#{i.file}:#{i.line} — #{i.note}" }'`.
500
+
193
501
  ## Scoring
194
502
 
195
503
  Count total PASS results out of total tests run. Report the percentage.
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Chisel smoke test — runs rubyn-code's Chisel layer against a committed,
4
+ # deliberately over-engineered fixture and asserts a CONSISTENT result every
5
+ # time. Deterministic and offline (no LLM): the debt scanner, mode resolution,
6
+ # inspection-prompt assembly, and command registration are all pure.
7
+ #
8
+ # $ bundle exec ruby skills/self_test/chisel_smoke.rb
9
+ #
10
+ # Prints one `CHISEL <area>: PASS/FAIL` line per area, a final `CHISEL: PASS`
11
+ # (or `FAIL`), and exits non-zero if anything failed — so CI and the
12
+ # /skill self-test scorecard can both consume it.
13
+
14
+ require_relative '../../lib/rubyn_code'
15
+
16
+ C = RubynCode::Chisel
17
+ FIXTURE_DIR = File.expand_path('fixtures', __dir__)
18
+
19
+ # The exact, repeatable output the scanner must produce for the fixture. If you
20
+ # edit skills/self_test/fixtures/chisel_sample.rb, update this table to match.
21
+ EXPECTED_DEBT = [
22
+ { file: 'chisel_sample.rb', line: 18, note: 'collapse this factory into a single build method' },
23
+ { file: 'chisel_sample.rb', line: 39, note: 'replace this class with Array#sum at the single call site' },
24
+ { file: 'chisel_sample.rb', line: 52, note: 'inline DEFAULTS[:retries] since there is only one reader' }
25
+ ].freeze
26
+
27
+ results = {}
28
+
29
+ # 1. Debt scanner — the consistent-result core. Scan only the fixture dir so
30
+ # the outcome never depends on the rest of the tree.
31
+ scanned = RubynCode::Chisel::Debt.scan(FIXTURE_DIR)
32
+ actual = scanned.map { |i| { file: i.file, line: i.line, note: i.note } }
33
+ results['debt'] = (actual == EXPECTED_DEBT)
34
+ warn(" debt mismatch — expected #{EXPECTED_DEBT.inspect}, got #{actual.inspect}") unless results['debt']
35
+
36
+ # 2. Engine — off injects nothing; lite/full/ultra layer the right addenda and
37
+ # ALWAYS keep the safety floor; a garbage mode never crashes or leaks through.
38
+ # Driven via RUBYN_CHISEL_MODE so it ignores this machine's chisel_mode config.
39
+ ENV['RUBYN_CHISEL_MODE'] = 'off'
40
+ off_ok = !C.enabled? && C.mode == 'off' && C.prompt_section.empty?
41
+
42
+ ENV['RUBYN_CHISEL_MODE'] = 'lite'
43
+ lite = C.prompt_section
44
+ lite_ok = C.enabled? && lite.include?(C::LADDER) && lite.include?(C::SAFETY_FLOOR) && !lite.include?(C::FULL_ADDENDUM)
45
+
46
+ ENV['RUBYN_CHISEL_MODE'] = 'full'
47
+ full = C.prompt_section
48
+ full_ok = full.include?(C::FULL_ADDENDUM) && full.include?(C::SAFETY_FLOOR) && !full.include?(C::ULTRA_ADDENDUM)
49
+
50
+ ENV['RUBYN_CHISEL_MODE'] = 'ultra'
51
+ ultra = C.prompt_section
52
+ ultra_ok = ultra.include?(C::ULTRA_ADDENDUM) && ultra.include?(C::SAFETY_FLOOR)
53
+
54
+ ENV['RUBYN_CHISEL_MODE'] = 'definitely-not-a-mode'
55
+ typo_ok = C::MODES.include?(C.mode) && C.mode != 'definitely-not-a-mode'
56
+ ENV.delete('RUBYN_CHISEL_MODE')
57
+ results['engine'] = off_ok && lite_ok && full_ok && ultra_ok && typo_ok
58
+
59
+ # 3. Inspection — both scopes assemble a String carrying the ladder + safety
60
+ # floor and naming the fixture; an unknown scope raises instead of emitting junk.
61
+ insp = RubynCode::Chisel::Inspection
62
+ diff_p = insp.prompt(scope: :diff, target: 'main')
63
+ repo_p = insp.prompt(scope: :repo, target: FIXTURE_DIR)
64
+ raised = begin
65
+ insp.prompt(scope: :bogus)
66
+ false
67
+ rescue ArgumentError
68
+ true
69
+ end
70
+ results['inspection'] = diff_p.is_a?(String) && diff_p.include?(C::LADDER) &&
71
+ diff_p.include?(C::SAFETY_FLOOR) &&
72
+ repo_p.include?(C::LADDER) && repo_p.include?(FIXTURE_DIR) && raised
73
+
74
+ # 4. Command registry — all five Chisel commands register and resolve by name.
75
+ reg = RubynCode::CLI::Commands::Registry.new
76
+ [RubynCode::CLI::Commands::Chisel, RubynCode::CLI::Commands::ChiselReview,
77
+ RubynCode::CLI::Commands::ChiselAudit, RubynCode::CLI::Commands::ChiselDebt,
78
+ RubynCode::CLI::Commands::ChiselGain].each { |cmd| reg.register(cmd) }
79
+ results['commands'] = %w[/chisel /chisel-review /chisel-audit /chisel-debt /chisel-gain].all? { |n| reg.known?(n) }
80
+
81
+ results.each { |area, ok| puts "CHISEL #{area}: #{ok ? 'PASS' : 'FAIL'}" }
82
+ all_ok = results.values.all?
83
+ puts(all_ok ? 'CHISEL: PASS' : 'CHISEL: FAIL')
84
+ exit(all_ok ? 0 : 1)
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Chisel self-test fixture — a DELIBERATELY over-engineered Ruby file.
4
+ #
5
+ # rubyn-code points Chisel at this file to get consistent, repeatable results:
6
+ # - `Chisel::Debt.scan` on this directory must harvest EXACTLY the three
7
+ # own-line `chisel:` markers below — and none of the decoys at the bottom.
8
+ # - `/chisel-review` and `/chisel-audit` have real over-engineering to flag.
9
+ #
10
+ # Do NOT "clean this up" — the smells and the markers are the point. The smoke
11
+ # test that asserts on this file lives in skills/self_test/chisel_smoke.rb and
12
+ # spec/rubyn_code/chisel/self_test_fixture_spec.rb. If you change a marker, the
13
+ # line/note it sits on, or add/remove one, update those two in lock-step.
14
+ module ChiselFixture
15
+ # An abstract factory with exactly one product shape — classic premature
16
+ # abstraction. A plain method (or just calling the class) would do.
17
+ class GreeterFactory
18
+ # chisel: collapse this factory into a single build method
19
+ def self.create(kind)
20
+ case kind
21
+ when :formal then FormalGreeter.new
22
+ when :casual then CasualGreeter.new
23
+ end
24
+ end
25
+ end
26
+
27
+ class FormalGreeter
28
+ def greet(name) = "Good day, #{name}."
29
+ end
30
+
31
+ class CasualGreeter
32
+ def greet(name) = "hey #{name}"
33
+ end
34
+
35
+ # A stateful wrapper that adds nothing over Array#sum.
36
+ class Accumulator
37
+ def initialize = (@total = 0)
38
+
39
+ # chisel: replace this class with Array#sum at the single call site
40
+ def add(amount)
41
+ @total += amount
42
+ self
43
+ end
44
+
45
+ def total = @total
46
+ end
47
+
48
+ # Single-reader config indirection.
49
+ DEFAULTS = { retries: 3 }.freeze
50
+
51
+ def self.retries
52
+ # chisel: inline DEFAULTS[:retries] since there is only one reader
53
+ DEFAULTS.fetch(:retries)
54
+ end
55
+
56
+ # --- decoys: these MUST NOT be harvested as debt markers ---
57
+
58
+ def self.decoy
59
+ # The next line has "chisel:" inside a string AND as a trailing comment;
60
+ # neither is an own-line marker, so the scanner must ignore both.
61
+ label = 'see # chisel: this is data, not a marker' # chisel: trailing, ignored
62
+ label
63
+ end
64
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyn-code
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - fadedmaturity
@@ -184,6 +184,7 @@ files:
184
184
  - db/migrations/011_fix_mailbox_messages_columns.rb
185
185
  - db/migrations/012_expand_mailbox_message_types.rb
186
186
  - db/migrations/013_add_failed_status_to_tasks.rb
187
+ - db/migrations/014_multi_agent_upgrade.rb
187
188
  - exe/rubyn-code
188
189
  - lib/rubyn_code.rb
189
190
  - lib/rubyn_code/agent/RUBYN.md
@@ -213,21 +214,38 @@ files:
213
214
  - lib/rubyn_code/background/job.rb
214
215
  - lib/rubyn_code/background/notifier.rb
215
216
  - lib/rubyn_code/background/worker.rb
217
+ - lib/rubyn_code/checkpoint/hook.rb
218
+ - lib/rubyn_code/checkpoint/manager.rb
219
+ - lib/rubyn_code/chisel.rb
220
+ - lib/rubyn_code/chisel/debt.rb
221
+ - lib/rubyn_code/chisel/inspection.rb
216
222
  - lib/rubyn_code/cli/RUBYN.md
217
223
  - lib/rubyn_code/cli/app.rb
218
224
  - lib/rubyn_code/cli/commands/RUBYN.md
225
+ - lib/rubyn_code/cli/commands/agents.rb
219
226
  - lib/rubyn_code/cli/commands/base.rb
220
227
  - lib/rubyn_code/cli/commands/budget.rb
228
+ - lib/rubyn_code/cli/commands/chisel.rb
229
+ - lib/rubyn_code/cli/commands/chisel_audit.rb
230
+ - lib/rubyn_code/cli/commands/chisel_debt.rb
231
+ - lib/rubyn_code/cli/commands/chisel_gain.rb
232
+ - lib/rubyn_code/cli/commands/chisel_review.rb
221
233
  - lib/rubyn_code/cli/commands/clear.rb
234
+ - lib/rubyn_code/cli/commands/command_template.rb
222
235
  - lib/rubyn_code/cli/commands/compact.rb
223
236
  - lib/rubyn_code/cli/commands/context.rb
224
237
  - lib/rubyn_code/cli/commands/context_info.rb
225
238
  - lib/rubyn_code/cli/commands/cost.rb
239
+ - lib/rubyn_code/cli/commands/custom_command.rb
240
+ - lib/rubyn_code/cli/commands/custom_loader.rb
226
241
  - lib/rubyn_code/cli/commands/diff.rb
227
242
  - lib/rubyn_code/cli/commands/doctor.rb
243
+ - lib/rubyn_code/cli/commands/goal.rb
228
244
  - lib/rubyn_code/cli/commands/help.rb
229
245
  - lib/rubyn_code/cli/commands/install_skills.rb
246
+ - lib/rubyn_code/cli/commands/learning.rb
230
247
  - lib/rubyn_code/cli/commands/list_skills.rb
248
+ - lib/rubyn_code/cli/commands/loop.rb
231
249
  - lib/rubyn_code/cli/commands/mcp.rb
232
250
  - lib/rubyn_code/cli/commands/megaplan.rb
233
251
  - lib/rubyn_code/cli/commands/model.rb
@@ -239,6 +257,7 @@ files:
239
257
  - lib/rubyn_code/cli/commands/remove_skills.rb
240
258
  - lib/rubyn_code/cli/commands/resume.rb
241
259
  - lib/rubyn_code/cli/commands/review.rb
260
+ - lib/rubyn_code/cli/commands/rewind.rb
242
261
  - lib/rubyn_code/cli/commands/skill.rb
243
262
  - lib/rubyn_code/cli/commands/skills.rb
244
263
  - lib/rubyn_code/cli/commands/spawn.rb
@@ -249,6 +268,8 @@ files:
249
268
  - lib/rubyn_code/cli/daemon_runner.rb
250
269
  - lib/rubyn_code/cli/first_run.rb
251
270
  - lib/rubyn_code/cli/input_handler.rb
271
+ - lib/rubyn_code/cli/loop_runner.rb
272
+ - lib/rubyn_code/cli/mention_expander.rb
252
273
  - lib/rubyn_code/cli/renderer.rb
253
274
  - lib/rubyn_code/cli/repl.rb
254
275
  - lib/rubyn_code/cli/repl_commands.rb
@@ -280,10 +301,17 @@ files:
280
301
  - lib/rubyn_code/db/migrator.rb
281
302
  - lib/rubyn_code/db/schema.rb
282
303
  - lib/rubyn_code/debug.rb
304
+ - lib/rubyn_code/goal/evaluator.rb
283
305
  - lib/rubyn_code/hooks/RUBYN.md
284
306
  - lib/rubyn_code/hooks/built_in.rb
307
+ - lib/rubyn_code/hooks/event_map.rb
308
+ - lib/rubyn_code/hooks/external_dispatcher.rb
309
+ - lib/rubyn_code/hooks/goal_hook.rb
285
310
  - lib/rubyn_code/hooks/registry.rb
311
+ - lib/rubyn_code/hooks/response.rb
286
312
  - lib/rubyn_code/hooks/runner.rb
313
+ - lib/rubyn_code/hooks/settings_json_loader.rb
314
+ - lib/rubyn_code/hooks/subprocess_executor.rb
287
315
  - lib/rubyn_code/hooks/user_hooks.rb
288
316
  - lib/rubyn_code/ide/adapters/tool_output.rb
289
317
  - lib/rubyn_code/ide/client.rb
@@ -314,6 +342,7 @@ files:
314
342
  - lib/rubyn_code/learning/extractor.rb
315
343
  - lib/rubyn_code/learning/injector.rb
316
344
  - lib/rubyn_code/learning/instinct.rb
345
+ - lib/rubyn_code/learning/porter.rb
317
346
  - lib/rubyn_code/learning/shortcut.rb
318
347
  - lib/rubyn_code/llm/RUBYN.md
319
348
  - lib/rubyn_code/llm/adapters/anthropic.rb
@@ -326,6 +355,7 @@ files:
326
355
  - lib/rubyn_code/llm/adapters/openai_message_translator.rb
327
356
  - lib/rubyn_code/llm/adapters/openai_streaming.rb
328
357
  - lib/rubyn_code/llm/adapters/prompt_caching.rb
358
+ - lib/rubyn_code/llm/adapters/token_caching.rb
329
359
  - lib/rubyn_code/llm/client.rb
330
360
  - lib/rubyn_code/llm/message_builder.rb
331
361
  - lib/rubyn_code/llm/model_router.rb
@@ -333,6 +363,7 @@ files:
333
363
  - lib/rubyn_code/mcp/RUBYN.md
334
364
  - lib/rubyn_code/mcp/client.rb
335
365
  - lib/rubyn_code/mcp/config.rb
366
+ - lib/rubyn_code/mcp/server_extras_bridge.rb
336
367
  - lib/rubyn_code/mcp/sse_transport.rb
337
368
  - lib/rubyn_code/mcp/stdio_transport.rb
338
369
  - lib/rubyn_code/mcp/tool_bridge.rb
@@ -379,6 +410,8 @@ files:
379
410
  - lib/rubyn_code/skills/registry_client.rb
380
411
  - lib/rubyn_code/skills/ttl_manager.rb
381
412
  - lib/rubyn_code/sub_agents/RUBYN.md
413
+ - lib/rubyn_code/sub_agents/agent_type.rb
414
+ - lib/rubyn_code/sub_agents/catalog.rb
382
415
  - lib/rubyn_code/sub_agents/runner.rb
383
416
  - lib/rubyn_code/sub_agents/summarizer.rb
384
417
  - lib/rubyn_code/tasks/RUBYN.md
@@ -386,6 +419,7 @@ files:
386
419
  - lib/rubyn_code/tasks/manager.rb
387
420
  - lib/rubyn_code/tasks/models.rb
388
421
  - lib/rubyn_code/teams/RUBYN.md
422
+ - lib/rubyn_code/teams/agent_registry.rb
389
423
  - lib/rubyn_code/teams/mailbox.rb
390
424
  - lib/rubyn_code/teams/manager.rb
391
425
  - lib/rubyn_code/teams/teammate.rb
@@ -535,6 +569,8 @@ files:
535
569
  - skills/ruby_project/rake_tasks.md
536
570
  - skills/ruby_project/structure.md
537
571
  - skills/rubyn_self_test.md
572
+ - skills/self_test/chisel_smoke.rb
573
+ - skills/self_test/fixtures/chisel_sample.rb
538
574
  - skills/sinatra/application_structure.md
539
575
  - skills/sinatra/middleware_and_deployment.md
540
576
  - skills/sinatra/testing.md