ultimate-pi 0.12.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/.agents/skills/ccc/SKILL.md +136 -0
  2. package/.agents/skills/ccc/references/management.md +110 -0
  3. package/.agents/skills/ccc/references/settings.md +126 -0
  4. package/.agents/skills/harness-orchestration/SKILL.md +4 -4
  5. package/.pi/PACKAGING.md +1 -0
  6. package/.pi/SYSTEM.md +21 -20
  7. package/.pi/agents/harness/planning/scout-graphify.md +2 -0
  8. package/.pi/agents/harness/planning/scout-semantic.md +13 -6
  9. package/.pi/extensions/harness-run-context.ts +5 -0
  10. package/.pi/extensions/harness-subagents.ts +16 -5
  11. package/.pi/extensions/lib/harness-cocoindex-refresh.ts +49 -0
  12. package/.pi/extensions/lib/harness-subagent-policy.ts +5 -1
  13. package/.pi/extensions/lib/harness-subagents-bridge.ts +9 -63
  14. package/.pi/harness/docs/adrs/0033-parent-orchestrated-planning.md +1 -1
  15. package/.pi/prompts/harness-plan.md +10 -5
  16. package/.pi/prompts/harness-setup.md +15 -11
  17. package/.pi/scripts/README.md +1 -0
  18. package/.pi/scripts/harness-cli-verify.sh +24 -14
  19. package/.pi/scripts/harness-cocoindex-bootstrap.sh +182 -0
  20. package/.pi/scripts/harness-verify.mjs +10 -0
  21. package/.pi/skills/ast-grep/SKILL.md +2 -2
  22. package/CHANGELOG.md +16 -0
  23. package/THIRD_PARTY_NOTICES.md +7 -0
  24. package/package.json +3 -2
  25. package/vendor/pi-subagents/LICENSE +21 -0
  26. package/vendor/pi-subagents/UPSTREAM_PIN.md +11 -0
  27. package/vendor/pi-subagents/src/agents.ts +357 -0
  28. package/vendor/pi-subagents/src/subagents.ts +1463 -0
  29. package/.agents/skills/ck-search/SKILL.md +0 -99
  30. package/.agents/skills/obsidian-bases/SKILL.md +0 -299
  31. package/.agents/skills/obsidian-markdown/SKILL.md +0 -237
@@ -0,0 +1,136 @@
1
+ ---
2
+ name: ccc
3
+ description: "This skill should be used when code search, file/directory summary lookup, or concept-guide lookup is needed (whether explicitly requested or as part of completing a task), when indexing the codebase after changes, or when the user asks about ccc, cocoindex-code, or the codebase index. Trigger phrases include 'search the codebase', 'find code related to', 'describe this file', 'read the concept guide', 'update the index', 'ccc', 'cocoindex-code'."
4
+ ---
5
+
6
+ # ccc - Semantic Code Search & Indexing
7
+
8
+ `ccc` is the CLI for CocoIndex Code, providing semantic search over the current codebase and index management.
9
+
10
+ ## Ownership
11
+
12
+ The agent owns the `ccc` lifecycle for the current project — initialization, indexing, and searching. Do not ask the user to perform these steps; handle them automatically.
13
+
14
+ - **Initialization**: If `ccc search` or `ccc index` fails with an initialization error (e.g., "Not in an initialized project directory"), run `ccc init` from the project root directory, then `ccc index` to build the index, then retry the original command.
15
+ - **Index freshness**: Keep the index up to date by running `ccc index` (or `ccc search --refresh`) when the index may be stale — e.g., at the start of a session, or after making significant code changes (new files, refactors, renamed modules). There is no need to re-index between consecutive searches if no code was changed in between.
16
+ - **Installation**: If `ccc` itself is not found (command not found), refer to [management.md](references/management.md) for installation instructions and inform the user.
17
+
18
+ ## Searching the Codebase
19
+
20
+ To perform a semantic search:
21
+
22
+ ```bash
23
+ ccc search <query terms>
24
+ ```
25
+
26
+ The query should describe the concept, functionality, or behavior to find, not exact code syntax. For example:
27
+
28
+ ```bash
29
+ ccc search database connection pooling
30
+ ccc search user authentication flow
31
+ ccc search error handling retry logic
32
+ ```
33
+
34
+ ### Filtering Results
35
+
36
+ - **By language** (`--lang`, repeatable): restrict results to specific languages.
37
+
38
+ ```bash
39
+ ccc search --lang python --lang markdown database schema
40
+ ```
41
+
42
+ - **By path** (`--path`): restrict results to a glob pattern relative to project root. If omitted, defaults to the current working directory (only results under that subdirectory are returned).
43
+
44
+ ```bash
45
+ ccc search --path 'src/api/*' request validation
46
+ ```
47
+
48
+ ### Pagination
49
+
50
+ Results default to the first page. To retrieve additional results:
51
+
52
+ ```bash
53
+ ccc search --offset 5 --limit 5 database schema
54
+ ```
55
+
56
+ If all returned results look relevant, use `--offset` to fetch the next page — there are likely more useful matches beyond the first page.
57
+
58
+ ### Working with Search Results
59
+
60
+ Search results include file paths and line ranges. To explore a result in more detail:
61
+
62
+ - Use the editor's built-in file reading capabilities (e.g., the `Read` tool) to load the matched file and read lines around the returned range for full context.
63
+ - When working in a terminal without a file-reading tool, use `sed -n '<start>,<end>p' <file>` to extract a specific line range.
64
+
65
+ ### Following Hints in Search Output
66
+
67
+ Search results are a mixed ranking of code chunks, per-file/dir summaries, and (when configured) curated concept guides — all scored against the same query. Two kinds of hit come with a follow-up command embedded in the output:
68
+
69
+ - `[summary]` — a file or directory summary. Read with `ccc describe <path>`.
70
+ - `[guide]` — a curated concept guide. Read with `ccc guide <slug>`.
71
+
72
+ When a hit carries one of these tags, follow the hint: the synthesised text is usually a faster read than chasing through individual files. Conversely, do **not** run `ccc describe .` or `ccc guide` proactively as a triage step — let search rank what's relevant and act on what it returns.
73
+
74
+ ## Describing Files and Directories
75
+
76
+ Per-file and per-directory summaries (when configured for the project) condense each file's public API, contracts, and role into a short markdown block. They are typically faster to consult than reading the source.
77
+
78
+ ```bash
79
+ ccc describe src/auth/session.py # one file
80
+ ccc describe src/auth/ # directory: summary + children tree
81
+ ccc describe . # project root overview
82
+ ```
83
+
84
+ Use `describe` when you already know the path you want; let `ccc search` find paths for you when you don't.
85
+
86
+ ## Concept Guides
87
+
88
+ Some projects configure cross-cutting concept guides in `.cocoindex_code/guides.yml` — synthesised markdown documents for architectural topics that span many files (e.g. memoization, plugin-SDK boundary, channel routing). Each guide names canonical files, end-to-end flow, and contracts/invariants.
89
+
90
+ ```bash
91
+ ccc guide # list available guides + descriptions
92
+ ccc guide <slug> # print one guide
93
+ ```
94
+
95
+ Discovery is search-driven: a relevant guide will surface in `ccc search` results tagged `[guide]` with a `ccc guide <slug>` hint. Run `ccc guide` (no args) only when first orienting in an unfamiliar codebase or when the user explicitly asks for the guide list — not as a routine first step.
96
+
97
+ ### Authoring `guides.yml` Interactively
98
+
99
+ When the user wants to add or improve concept guides, collaborate on the slug list rather than dumping a finished YAML. Good guide candidates are **named subsystems the codebase obviously has** — cross-cutting lifecycles, registration/dispatch protocols, end-to-end data paths. Single-file or symbol-specific topics do not warrant a guide; per-file summaries already cover those.
100
+
101
+ Recommended flow:
102
+
103
+ 1. **Survey the codebase.** Use `ccc describe .` and a few likely subdirectory summaries to enumerate the project's subsystems and inter-edge boundaries.
104
+ 2. **Propose candidates.** Suggest 5–10 slugs with one-line descriptions, framed to name the canonical starting file or directory for each topic. Show them to the user as a list.
105
+ 3. **Iterate.** Ask which to keep, drop, rename, or merge. Surface non-obvious dependencies (`deps:`) so a higher-level guide can cite a lower-level one rather than restate it. Cycles are rejected at load time.
106
+ 4. **Write the YAML.** Add the agreed entries to `.cocoindex_code/guides.yml` (creating the file if absent). Confirm `defaults.enabled: true` and that the project's summary feature is enabled — guides require summaries.
107
+ 5. **Generate.** Run `ccc index` to drive the per-guide agent loop and produce `<slug>.md` files under `.cocoindex_code/guides/`. Re-run after editing descriptions to refresh.
108
+
109
+ Schema:
110
+
111
+ ```yaml
112
+ defaults:
113
+ enabled: true # disables all guides when false
114
+ model: openai/gpt-5.4-nano # falls back to summary.model when omitted
115
+ session_budget: 200
116
+ max_logical_depth: 3
117
+ max_turns_per_session: 18
118
+
119
+ guides:
120
+ - slug: memoization # [a-z0-9][a-z0-9-]*
121
+ description: |
122
+ What this guide covers, framed for the reader.
123
+ Name the canonical starting files (e.g. "start with src/cache.py").
124
+ deps: [other-slug] # optional; must not cycle
125
+ max_turns_per_session: 28 # optional per-entry overrides
126
+ ```
127
+
128
+ A multi-line description is fine and often clearer than one terse sentence — the description seeds the guide-generation agent's question, so concrete file/directory anchors pay off.
129
+
130
+ ## Settings
131
+
132
+ To view or edit embedding model configuration, include/exclude patterns, or language overrides, see [settings.md](references/settings.md).
133
+
134
+ ## Management & Troubleshooting
135
+
136
+ For installation, initialization, daemon management, troubleshooting, and cleanup commands, see [management.md](references/management.md).
@@ -0,0 +1,110 @@
1
+ # ccc Management
2
+
3
+ ## Installation
4
+
5
+ Install CocoIndex Code via pipx. Two install styles:
6
+
7
+ ```bash
8
+ pipx install 'cocoindex-code[full]' # batteries included (local embeddings via sentence-transformers)
9
+ pipx install cocoindex-code # slim (LiteLLM-only; requires a cloud embedding provider + API key)
10
+ ```
11
+
12
+ The `[full]` extra pulls in `sentence-transformers` so the first-run default (local embeddings, no API key) works out of the box. The slim install is for environments where you don't want the torch/transformers deps and plan to use a LiteLLM-supported cloud provider instead.
13
+
14
+ To upgrade to the latest version:
15
+
16
+ ```bash
17
+ pipx upgrade cocoindex-code
18
+ ```
19
+
20
+ After installation, the `ccc` command is available globally.
21
+
22
+ ## Project Initialization
23
+
24
+ Run from the root directory of the project to index:
25
+
26
+ ```bash
27
+ ccc init
28
+ ```
29
+
30
+ **First run (global settings don't exist yet)** — `ccc init` prompts interactively for the embedding provider (sentence-transformers / litellm) and model, then runs a one-off test embed via the daemon to confirm the model works. Accept the defaults for the sentence-transformers path, or pick litellm and enter a model identifier.
31
+
32
+ **Subsequent runs** (global settings already exist) — prompts are skipped; only project settings and `.gitignore` are set up.
33
+
34
+ To skip the interactive prompts on the first run (e.g. in a script or container), pass `--litellm-model MODEL`:
35
+
36
+ ```bash
37
+ ccc init --litellm-model openai/text-embedding-3-small
38
+ ```
39
+
40
+ This is also the only way to pick a LiteLLM model when stdin isn't a TTY and you've done a slim install.
41
+
42
+ `ccc init` creates:
43
+ - `~/.cocoindex_code/global_settings.yml` (user-level, embedding config + env vars).
44
+ - `.cocoindex_code/settings.yml` (project-level, include/exclude patterns).
45
+
46
+ If `.git` exists in the directory, `.cocoindex_code/` is automatically added to `.gitignore`.
47
+
48
+ Use `-f` to skip the confirmation prompt if `ccc init` detects a potential parent project root.
49
+
50
+ After initialization, edit the settings files if needed (see [settings.md](settings.md) for format details), then run `ccc index` to build the initial index. If the model test printed `[FAIL]` during `init`, edit `global_settings.yml` (and optionally add API keys under the commented `envs:` block) and verify with `ccc doctor` before indexing.
51
+
52
+ ## Troubleshooting
53
+
54
+ ### Diagnostics
55
+
56
+ Run `ccc doctor` to check system health end-to-end:
57
+
58
+ ```bash
59
+ ccc doctor
60
+ ```
61
+
62
+ This checks global settings, daemon status, embedding model (runs a test embedding), and — if run from within a project — file matching (walks files using the same logic as the indexer) and index status. Results stream incrementally. Always points to `daemon.log` at the end for further investigation.
63
+
64
+ ### Checking Project Status
65
+
66
+ To view the current project's index status:
67
+
68
+ ```bash
69
+ ccc status
70
+ ```
71
+
72
+ This shows whether indexing is ongoing and index statistics.
73
+
74
+ ### Daemon Management
75
+
76
+ The daemon starts automatically on first use. To check its status:
77
+
78
+ ```bash
79
+ ccc daemon status
80
+ ```
81
+
82
+ This shows whether the daemon is running, its version, uptime, and loaded projects.
83
+
84
+ To restart the daemon (useful if it gets into a bad state):
85
+
86
+ ```bash
87
+ ccc daemon restart
88
+ ```
89
+
90
+ To stop the daemon:
91
+
92
+ ```bash
93
+ ccc daemon stop
94
+ ```
95
+
96
+ ## Cleanup
97
+
98
+ To reset a project's index (removes databases, keeps settings):
99
+
100
+ ```bash
101
+ ccc reset
102
+ ```
103
+
104
+ To fully remove all CocoIndex Code data for a project (including settings):
105
+
106
+ ```bash
107
+ ccc reset --all
108
+ ```
109
+
110
+ Both commands prompt for confirmation. Use `-f` to skip.
@@ -0,0 +1,126 @@
1
+ # ccc Settings
2
+
3
+ Configuration lives in two YAML files, both created automatically by `ccc init`.
4
+
5
+ ## User-Level Settings (`~/.cocoindex_code/global_settings.yml`)
6
+
7
+ Shared across all projects. Controls the embedding model and extra environment variables for the daemon.
8
+
9
+ ```yaml
10
+ embedding:
11
+ provider: sentence-transformers # or "litellm" (default when provider is omitted)
12
+ model: Snowflake/snowflake-arctic-embed-xs
13
+ device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
14
+ min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
15
+
16
+ envs: # extra environment variables for the daemon
17
+ OPENAI_API_KEY: your-key # only needed if not already in the shell environment
18
+ ```
19
+
20
+ ### Fields
21
+
22
+ | Field | Description |
23
+ |-------|-------------|
24
+ | `embedding.provider` | `sentence-transformers` for local models, `litellm` (or omit) for cloud/remote models |
25
+ | `embedding.model` | Model identifier — format depends on provider (see examples below) |
26
+ | `embedding.device` | Optional. `cpu`, `cuda`, or `mps`. Auto-detected if omitted. Only relevant for `sentence-transformers`. |
27
+ | `embedding.min_interval_ms` | Optional. Minimum delay between LiteLLM embedding requests in milliseconds. Defaults to `5` for LiteLLM and is ignored by `sentence-transformers`. Set explicitly to override the default. |
28
+ | `envs` | Key-value map of environment variables injected into the daemon. Use for API keys not already in the shell environment. |
29
+
30
+ ### Embedding Model Examples
31
+
32
+ **Local (sentence-transformers, no API key needed):**
33
+
34
+ ```yaml
35
+ embedding:
36
+ provider: sentence-transformers
37
+ model: Snowflake/snowflake-arctic-embed-xs # default, lightweight
38
+ ```
39
+
40
+ ```yaml
41
+ embedding:
42
+ provider: sentence-transformers
43
+ model: nomic-ai/CodeRankEmbed # better code retrieval, needs GPU (~1 GB VRAM)
44
+ ```
45
+
46
+ **Ollama (local):**
47
+
48
+ ```yaml
49
+ embedding:
50
+ model: ollama/nomic-embed-text
51
+ ```
52
+
53
+ **OpenAI:**
54
+
55
+ ```yaml
56
+ embedding:
57
+ model: text-embedding-3-small
58
+ min_interval_ms: 300
59
+ envs:
60
+ OPENAI_API_KEY: your-api-key
61
+ ```
62
+
63
+ **Gemini:**
64
+
65
+ ```yaml
66
+ embedding:
67
+ model: gemini/gemini-embedding-001
68
+ envs:
69
+ GEMINI_API_KEY: your-api-key
70
+ ```
71
+
72
+ **Voyage (code-optimized):**
73
+
74
+ ```yaml
75
+ embedding:
76
+ model: voyage/voyage-code-3
77
+ envs:
78
+ VOYAGE_API_KEY: your-api-key
79
+ ```
80
+
81
+ For the full list of supported cloud providers and model identifiers, see [LiteLLM Embedding Models](https://docs.litellm.ai/docs/embedding/supported_embedding).
82
+
83
+ ### Important
84
+
85
+ Switching embedding models changes vector dimensions — you must re-index after changing the model:
86
+
87
+ ```bash
88
+ ccc reset && ccc index
89
+ ```
90
+
91
+ ## Project-Level Settings (`<project>/.cocoindex_code/settings.yml`)
92
+
93
+ Per-project. Controls which files to index. Created by `ccc init` and automatically added to `.gitignore`.
94
+
95
+ ```yaml
96
+ include_patterns:
97
+ - "**/*.py"
98
+ - "**/*.js"
99
+ - "**/*.ts"
100
+ # ... (sensible defaults for 28+ file types)
101
+
102
+ exclude_patterns:
103
+ - "**/.*" # hidden directories
104
+ - "**/__pycache__"
105
+ - "**/node_modules"
106
+ - "**/dist"
107
+ # ...
108
+
109
+ language_overrides:
110
+ - ext: inc # treat .inc files as PHP
111
+ lang: php
112
+ ```
113
+
114
+ ### Fields
115
+
116
+ | Field | Description |
117
+ |-------|-------------|
118
+ | `include_patterns` | Glob patterns for files to index. Defaults cover common languages (Python, JS/TS, Rust, Go, Java, C/C++, C#, SQL, Shell, Markdown, PHP, Lua, etc.). |
119
+ | `exclude_patterns` | Glob patterns for files/directories to skip. Defaults exclude hidden dirs, `node_modules`, `dist`, `__pycache__`, `vendor`, etc. |
120
+ | `language_overrides` | List of `{ext, lang}` pairs to override language detection for specific file extensions. |
121
+
122
+ ### Editing Tips
123
+
124
+ - To index additional file types, append glob patterns to `include_patterns` (e.g. `"**/*.proto"`).
125
+ - To exclude a directory, append to `exclude_patterns` (e.g. `"**/generated"`).
126
+ - After editing, run `ccc index` to re-index with the new settings.
@@ -36,7 +36,7 @@ LIMIT 30
36
36
  1. **Parallel `tasks`** — one `subagent({ tasks: [...] })` for scouts, decompose+hypothesis, or review fan-in; subprocesses run in parallel upstream.
37
37
  2. **Blocking calls** — each `subagent` returns when the subprocess exits; no `get_subagent_result` polling.
38
38
  3. **Compact handoffs** — pass scout/decompose JSON only; never paste full subprocess message logs into the next spawn.
39
- 4. **Spawn caps** — bridge enforces **8** active + **12** total harness spawns per session (`PI_SUBAGENT_TIMEOUT_MS` / per-task `timeoutMs` for backstop).
39
+ 4. **Spawn caps** — bridge enforces **8** active + **12** total harness spawns per session. Do **not** pass `timeoutMs` unless the user wants a cap — subprocesses wait for natural exit (`PI_SUBAGENT_TIMEOUT_MS` optional env backstop only).
40
40
 
41
41
  ## Command → agent
42
42
 
@@ -71,9 +71,9 @@ Spawn `harness/evaluator` / `harness/adversary` via `subagent` in the **same** p
71
71
  {
72
72
  "agentScope": "both",
73
73
  "tasks": [
74
- { "agent": "harness/planning/scout-graphify", "task": "…", "timeoutMs": 90000 },
75
- { "agent": "harness/planning/scout-structure", "task": "…", "timeoutMs": 90000 },
76
- { "agent": "harness/planning/scout-semantic", "task": "…", "timeoutMs": 90000 }
74
+ { "agent": "harness/planning/scout-graphify", "task": "…" },
75
+ { "agent": "harness/planning/scout-structure", "task": "…" },
76
+ { "agent": "harness/planning/scout-semantic", "task": "…" }
77
77
  ]
78
78
  }
79
79
  ```
package/.pi/PACKAGING.md CHANGED
@@ -24,6 +24,7 @@ We use an explicit allowlist (not the whole `.pi/` tree) so dev-only artifacts n
24
24
  - Ship `.pi/settings.example.json`, not `.pi/settings.json` (dev checkout uses `".."` local package)
25
25
  - Include **`vendor/pi-model-router/`** ([`pi-model-router`](https://github.com/yeliu84/pi-model-router), MIT) — see repo [`THIRD_PARTY_NOTICES.md`](../THIRD_PARTY_NOTICES.md); refresh with `npm run vendor:sync-router`
26
26
  - Include **`vendor/pi-vcc/`** ([`pi-vcc`](https://github.com/sting8k/pi-vcc), MIT; inspired by [lllyasviel/VCC](https://github.com/lllyasviel/VCC)) — loaded via `.pi/extensions/ultimate-pi-vcc.ts`; refresh with `npm run vendor:sync-vcc`
27
+ - Include **`vendor/pi-subagents/`** (vendored from [narumiruna/pi-extensions](https://github.com/narumiruna/pi-extensions) `pi-subagents`) — loaded via `.pi/extensions/harness-subagents.ts`; refresh with `npm run vendor:sync-subagents`
27
28
 
28
29
  ## Settings
29
30
 
package/.pi/SYSTEM.md CHANGED
@@ -81,41 +81,42 @@ edges at build time. Use these to answer call-graph questions without external t
81
81
  - **How does `Auth` reach `Database`?** → `graphify path "Auth" "Database"` (shortest call chain)
82
82
  - **Trace a dependency chain deep** → `graphify query "how does X depend on Y" --dfs`
83
83
 
84
- **Semantic code search via graphify:**
85
- Graphify already indexes the entire codebase as a knowledge graph. Use graphify
86
- for conceptual code search before falling back to `ck`:
87
- - **Find code by meaning** → `graphify query "where is authentication logic"`
88
- - **Find related concepts** → `graphify query "what connects to error handling"`
84
+ **Semantic code search (two lanes):**
85
+ - **Architecture / relationships** graphify (`query`, `explain`, `path`, `GRAPH_REPORT.md`)
86
+ - **Implementation by meaning** CocoIndex Code (`ccc search --limit N "concept"`)
87
+
88
+ Examples:
89
+ - **Find code by meaning** → `ccc search --limit 10 "authentication session validation"`
90
+ - **Who calls X / cross-module path** → `graphify explain "X"` or `graphify path "A" "B"`
89
91
  - **Cross-file surprises** → `graphify query "what unexpected connections exist"`
90
92
 
91
93
  **Order of operations for codebase exploration:**
92
94
  1. Read `graphify-out/GRAPH_REPORT.md` (god nodes, surprises, suggested questions)
93
- 2. Run `graphify query` for domain-specific questions, call traces, and semantic search
94
- 3. Use `graphify explain "Concept"` for caller/callee/dependency deep dives
95
- 4. Use `sg -p 'pattern'` for structural code search, then `ck --hybrid` only if graph and ast-grep don't surface it
96
- 5. Read individual files last — the graph already told you what matters
95
+ 2. Run `graphify query` / `explain` / `path` for architecture and call graphs
96
+ 3. Use `sg -p 'pattern'` for structural code search
97
+ 4. Use `ccc search --limit N` for conceptual implementation chunks when graphify/sg are insufficient
98
+ 5. Read individual files last — scouts and graph already narrowed the set
99
+
100
+ **Indexing:** Harness runs incremental `ccc index` before subagent spawns. Use `ccc search` only in agents; run `ccc index` at session start or after large edits on parent turns. Never use `ccc search --refresh` in scouts. `/skill:ccc` for full CLI reference.
97
101
 
98
102
  ### Fallback Search (when graph doesn't cover it)
99
103
 
100
- > [!note] Graphify handles semantic search and call graphs
101
- > Graphify already provides semantic code search and call-graph tracing. Use
102
- > `graphify query`, `graphify explain`, and `graphify path` as your primary
103
- > code exploration tools. Only fall back to `sg`/`ck`/`find` when the graph
104
- > doesn't have the answer (e.g., not yet indexed, or you need exact raw text).
104
+ > [!note] Graphify + ccc split responsibilities
105
+ > Graphify owns call graphs and cross-module relationships. `ccc` owns AST-aware
106
+ > semantic chunks. Only fall back to `find`/`grep` for exact literals or non-code files.
105
107
 
106
108
  | Tool | When | Command |
107
109
  |------|------|---------|
108
- | `sg -p` | **Primary code search** — AST-aware structural pattern matching | `sg -p 'pattern' --lang typescript` |
110
+ | `sg -p` | **Structural code search** — AST pattern matching | `sg -p 'pattern' --lang typescript` |
109
111
  | `sg scan` | Rule-based code scanning (use project rules in `sgconfig.yml`) | `sg scan` |
110
- | `ck --hybrid` | Lexical + semantic fusion search (fallback after ast-grep) | `ck --hybrid "query" .` |
111
- | `ck --sem` | Purely conceptual searches (fallback after ast-grep) | `ck --sem "concept" src/` |
112
+ | `ccc search` | **Semantic chunks** implementation by meaning | `ccc search --limit 10 "query"` |
112
113
  | `find` | File discovery by name/glob only | `find . -name "*.ts"` |
113
114
  | `grep` | **Last resort** — exact literal string matching in non-code files only | `grep -F "exact string"` |
114
115
 
115
- - **Always prefer ast-grep (`sg`) over grep for code search.** ast-grep understands code structure via tree-sitter — it matches patterns, not strings. Use it for: finding function calls, class definitions, import statements, variable usage, and any structural code query.
116
+ - **Always prefer ast-grep (`sg`) over grep for code search.** ast-grep understands code structure via tree-sitter — it matches patterns, not strings.
116
117
  - Never use grep for code search. grep is only for: log files, non-code text files, exact byte-level matching when AST patterns can't work.
117
- - Always use `--limit N` on ck to cap output and save context.
118
- - Graphify is primary. ast-grep is secondary. ck/find are fallbacks. grep is last resort.
118
+ - Always use `--limit N` on `ccc search` to cap output and save context.
119
+ - Graphify is primary for architecture. ast-grep is secondary for structure. ccc is semantic implementation search. grep is last resort.
119
120
  - Do NOT install or use grepai/seagoat/mgrep for call-graph traces or semantic
120
121
  search — graphify already handles both.
121
122
 
@@ -15,6 +15,8 @@ Explore the codebase via graphify for the task in `HarnessSpawnContext`. You do
15
15
 
16
16
  Findings should feed **constraints, prior art, and tensions** for the decompose agent (existing patterns, god nodes, surprising connections).
17
17
 
18
+ **Lane contract:** you own **relationships and architecture** (`graphify query`, `explain`, `path`). `scout-semantic` owns implementation-by-meaning via `ccc search` — do not duplicate semantic chunk search here.
19
+
18
20
  ## Spawn context
19
21
 
20
22
  Read `HarnessSpawnContext` in the spawn prompt (`task_summary`, `mode`, `plan_packet_path`, `risk_level`, `quick`). For `mode: revise`, read the existing plan at `plan_packet_path` first and focus findings on what changed or is at risk.
@@ -1,5 +1,5 @@
1
1
  ---
2
- description: Plan-phase scout — ck semantic code search (read-only).
2
+ description: Plan-phase scout — CocoIndex semantic code search (read-only).
3
3
  tools: read, bash, ls
4
4
  disallowed_tools: write, edit, ask_user, approve_plan, create_plan, subagent, grep, find
5
5
  extensions: false
@@ -11,7 +11,9 @@ You are the **Harness planning scout (semantic lane)**.
11
11
 
12
12
  ## Mission
13
13
 
14
- Find conceptually related code via ck semantic search for the task in `HarnessSpawnContext`. You do **not** build the PlanPacket or mutate files.
14
+ Find conceptually related **implementation** via CocoIndex (`ccc search`) for the task in `HarnessSpawnContext`. You do **not** build the PlanPacket or mutate files.
15
+
16
+ **Lane contract:** `scout-graphify` owns relationships, callers, and communities. You own **meaning** — functions, classes, and chunks that implement the task.
15
17
 
16
18
  ## Spawn context
17
19
 
@@ -19,13 +21,18 @@ Read `HarnessSpawnContext` in the spawn prompt. For `mode: revise`, bias searche
19
21
 
20
22
  ## Process
21
23
 
22
- 1. Use `ck search` or `ck query` (or project-documented ck CLI) with task-focused queries.
23
- 2. If ck is unavailable, set `status: partial` and document in `findings`.
24
- 3. **Stop early** top **5** most relevant paths only.
24
+ 1. Run **2–3** task-focused queries: `ccc search "<query>" --limit 5` (add `--path` when spawn context names a directory).
25
+ 2. The harness runs incremental `ccc index` before scouts spawn — **do not** run `ccc index`, `ccc init`, or `ccc search --refresh`.
26
+ 3. If `ccc` is missing or the index is empty: `status: partial` and document in `findings`.
27
+ 4. **Stop early** — top **5** most relevant paths only.
25
28
 
26
29
  ## Bash guardrails
27
30
 
28
- Read-only only: no installs, index rebuilds that mutate disk, or redirects.
31
+ Read-only only: no installs, indexing, daemon control, or redirects.
32
+
33
+ **Allowed:** `ccc search`, `ccc status`, `ls`, `head`, `cat`, `sed -n` (read slices).
34
+
35
+ **Forbidden:** `ccc index`, `ccc init`, `ccc reset`, `ccc daemon`, `ccc search --refresh`, package installs.
29
36
 
30
37
  ## Output limits
31
38
 
@@ -55,6 +55,10 @@ import {
55
55
  parseStructuredDocument,
56
56
  writeYamlFile,
57
57
  } from "../lib/harness-yaml.js";
58
+ import { claimExtensionLoad } from "./lib/extension-load-guard.js";
59
+
60
+ // @ts-expect-error pi extensions run as ESM
61
+ const MODULE_URL = import.meta.url;
58
62
 
59
63
  interface SessionEntryLike {
60
64
  type?: string;
@@ -191,6 +195,7 @@ function needsClarificationFollowUp(ctx: HarnessRunContext | null): boolean {
191
195
  }
192
196
 
193
197
  export default function harnessRunContext(pi: ExtensionAPI) {
198
+ if (!claimExtensionLoad("harness-run-context", MODULE_URL)) return;
194
199
  let activeCtx: HarnessRunContext | null = null;
195
200
 
196
201
  pi.on("session_start", async (_event, ctx) => {
@@ -1,14 +1,25 @@
1
1
  /**
2
2
  * harness-subagents — vendored pi-subagents with ultimate-pi discovery and policy gates.
3
+ *
4
+ * Dynamic-imports the bridge only after claimExtensionLoad so a stale global npm
5
+ * install (missing vendor/pi-subagents) does not crash local development in this repo.
3
6
  */
4
7
 
8
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
5
9
  import { claimExtensionLoad } from "./lib/extension-load-guard.js";
6
- import { getHarnessPackageRoot } from "./lib/harness-paths.js";
7
- import { createHarnessSubagentsExtension } from "./lib/harness-subagents-bridge.js";
8
10
 
9
11
  // @ts-expect-error pi extensions run as ESM
10
12
  const MODULE_URL = import.meta.url;
11
13
 
12
- export default claimExtensionLoad("harness-subagents", MODULE_URL)
13
- ? createHarnessSubagentsExtension(getHarnessPackageRoot(MODULE_URL))
14
- : () => {};
14
+ async function loadHarnessSubagents(): Promise<(pi: ExtensionAPI) => void> {
15
+ if (!claimExtensionLoad("harness-subagents", MODULE_URL)) {
16
+ return () => {};
17
+ }
18
+ const { getHarnessPackageRoot } = await import("./lib/harness-paths.js");
19
+ const { createHarnessSubagentsExtension } = await import(
20
+ "./lib/harness-subagents-bridge.js"
21
+ );
22
+ return createHarnessSubagentsExtension(getHarnessPackageRoot(MODULE_URL));
23
+ }
24
+
25
+ export default await loadHarnessSubagents();
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Incremental CocoIndex refresh before harness subagent batches (plan/execute).
3
+ * Agents use `ccc search` only; harness owns `ccc index`.
4
+ */
5
+
6
+ import { spawnSync } from "node:child_process";
7
+ import { existsSync } from "node:fs";
8
+ import { join } from "node:path";
9
+
10
+ const DEFAULT_TIMEOUT_MS = 120_000;
11
+
12
+ export function refreshHarnessCocoindexIndex(cwd: string): string | undefined {
13
+ if (process.env.HARNESS_COCOINDEX_REFRESH === "0") {
14
+ return undefined;
15
+ }
16
+ const settingsPath = join(cwd, ".cocoindex_code", "settings.yml");
17
+ if (!existsSync(settingsPath)) {
18
+ return undefined;
19
+ }
20
+
21
+ const timeoutMs = Number(
22
+ process.env.HARNESS_COCOINDEX_REFRESH_TIMEOUT_MS ?? DEFAULT_TIMEOUT_MS,
23
+ );
24
+ const result = spawnSync("ccc", ["index"], {
25
+ cwd,
26
+ encoding: "utf8",
27
+ timeout: Number.isFinite(timeoutMs) ? timeoutMs : DEFAULT_TIMEOUT_MS,
28
+ stdio: "pipe",
29
+ });
30
+
31
+ if (result.error) {
32
+ const msg = `harness-cocoindex: ccc index failed (${result.error.message})`;
33
+ if (process.env.HARNESS_COCOINDEX_REFRESH_STRICT === "1") {
34
+ return msg;
35
+ }
36
+ return `${msg} — continuing`;
37
+ }
38
+
39
+ if (result.status !== 0) {
40
+ const stderr = (result.stderr ?? "").trim().slice(0, 500);
41
+ const msg = `harness-cocoindex: ccc index exited ${result.status ?? "?"}${stderr ? `: ${stderr}` : ""}`;
42
+ if (process.env.HARNESS_COCOINDEX_REFRESH_STRICT === "1") {
43
+ return msg;
44
+ }
45
+ return `${msg} — continuing`;
46
+ }
47
+
48
+ return undefined;
49
+ }
@@ -24,9 +24,13 @@ const PLANNING_BASH_DENY_PATTERNS = [
24
24
  /\bgraphify\s+update\b/i,
25
25
  /\bgraphify\s+extract\b/i,
26
26
  /\bgraphify\s+install\b/i,
27
+ /\bccc\s+(index|init|reset|daemon)\b/i,
28
+ /\bccc\s+search\b.*--refresh/i,
27
29
  /\bpip\s+install\b/i,
28
30
  /\buv\s+tool\s+install\b/i,
29
31
  /\bnpm\s+install\b/i,
32
+ /\bnpm\s+install\b.*cocoindex/i,
33
+ /\buv\s+tool\s+install\b.*cocoindex/i,
30
34
  ];
31
35
 
32
36
  const BASH_MUTATION_PATTERNS = [
@@ -141,7 +145,7 @@ export function evaluateHarnessSubagentToolCall(
141
145
  return {
142
146
  action: "block",
143
147
  reason:
144
- "harness-subagent-policy: planning scouts may use read-only graphify/sg/ck commands only.",
148
+ "harness-subagent-policy: planning scouts may use read-only graphify/sg/ccc commands only.",
145
149
  };
146
150
  }
147
151
  }