@pi-unipi/cocoindex 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -0
- package/bridge.ts +774 -0
- package/commands.ts +175 -0
- package/index.ts +55 -0
- package/installer.ts +397 -0
- package/package.json +42 -0
- package/skills/cocoindex/SKILL.md +88 -0
- package/tools.ts +131 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: cocoindex
|
|
3
|
+
description: "CocoIndex content indexing and search — use when you need to search indexed project content or trigger re-indexing"
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# CocoIndex Skill
|
|
7
|
+
|
|
8
|
+
## When to Use CocoIndex
|
|
9
|
+
|
|
10
|
+
Use CocoIndex tools and commands when you need to:
|
|
11
|
+
|
|
12
|
+
1. **Search indexed project content** — use `cocoindex_search` instead of `content_search` or `ctx_search`
|
|
13
|
+
2. **Trigger re-indexing** — use `/unipi:cocoindex-update` after significant code changes
|
|
14
|
+
3. **Initialize indexing** — use `/unipi:cocoindex-init` for new projects
|
|
15
|
+
|
|
16
|
+
## Key Differences from Old FTS5 Content Store
|
|
17
|
+
|
|
18
|
+
| Aspect | Old (FTS5) | New (CocoIndex) |
|
|
19
|
+
|--------|-----------|-----------------|
|
|
20
|
+
| Chunking | Heading/paragraph split | AST-aware code, recursive text |
|
|
21
|
+
| Search | BM25 + trigram fuzzy | Semantic vector + full-text |
|
|
22
|
+
| Incremental | Full re-index every time | Delta-only (changed files) |
|
|
23
|
+
| Scale | Small corpus | Multi-GiB, parallel |
|
|
24
|
+
| Store | SQLite FTS5 | LanceDB |
|
|
25
|
+
|
|
26
|
+
## Tools
|
|
27
|
+
|
|
28
|
+
### `cocoindex_search`
|
|
29
|
+
Search indexed content. It uses semantic vector search when the LanceDB table has vectors, LanceDB full-text search when an inverted index exists, and a lexical fallback for older text-only indexes.
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
cocoindex_search({ query: "how authentication works", limit: 10 })
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Returns results with `title`, `content`, `source`, `rank`, `contentType`, and `matchLayer`.
|
|
36
|
+
|
|
37
|
+
### `cocoindex_status`
|
|
38
|
+
Check indexing status — pipeline configured, last run, doc count, CLI availability.
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
cocoindex_status({})
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Commands
|
|
45
|
+
|
|
46
|
+
| Command | Description |
|
|
47
|
+
|---------|-------------|
|
|
48
|
+
| `/unipi:cocoindex-update` | Run `cocoindex update` on current project |
|
|
49
|
+
| `/unipi:cocoindex-status` | Show indexing status |
|
|
50
|
+
| `/unipi:cocoindex-init` | Scaffold a default pipeline |
|
|
51
|
+
| `/unipi:cocoindex-search <query>` | Search indexed codebase semantically |
|
|
52
|
+
| `/unipi:cocoindex-settings` | Show configuration |
|
|
53
|
+
|
|
54
|
+
## Prerequisites
|
|
55
|
+
|
|
56
|
+
1. **Python 3.10+** with `cocoindex` installed:
|
|
57
|
+
```bash
|
|
58
|
+
pip install cocoindex
|
|
59
|
+
pip install 'cocoindex[lancedb]'
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
2. **Pipeline initialized** — run `/unipi:cocoindex-init` once per project
|
|
63
|
+
|
|
64
|
+
3. **Embedding config** — CocoIndex reuses the memory package's embedding settings:
|
|
65
|
+
- API key from `~/.unipi/memory/config.json`
|
|
66
|
+
- Same model and dimensions for cross-system search
|
|
67
|
+
|
|
68
|
+
## Architecture
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
Project files → LocalFile source → SplitRecursively → EmbedText → LanceDB
|
|
72
|
+
↓
|
|
73
|
+
Agent query ──────────────────────────────→ vector search → ranked results
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
- Pipeline defined in `.unipi/cocoindex/main.py` (auto-generated, customizable)
|
|
77
|
+
- Data stored in `.unipi/cocoindex/.lancedb/`
|
|
78
|
+
- Search queries LanceDB directly via Node.js SDK (no CLI round-trip)
|
|
79
|
+
|
|
80
|
+
## Troubleshooting
|
|
81
|
+
|
|
82
|
+
| Issue | Fix |
|
|
83
|
+
|-------|-----|
|
|
84
|
+
| "CLI not found" | `pip install cocoindex` |
|
|
85
|
+
| "Pipeline not initialized" | `/unipi:cocoindex-init` |
|
|
86
|
+
| "Search unavailable" | `npm install @lancedb/lancedb` |
|
|
87
|
+
| "No results" | Run `/unipi:cocoindex-update` first |
|
|
88
|
+
| Embedding errors | Check `~/.unipi/memory/config.json` API key |
|
package/tools.ts
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tools.ts — CocoIndex tool registration
|
|
3
|
+
*
|
|
4
|
+
* Exposes cocoindex operations as Pi agent tools:
|
|
5
|
+
* - cocoindex_search: Search indexed content
|
|
6
|
+
* - cocoindex_status: Show indexing status
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { Type } from "@sinclair/typebox";
|
|
10
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
11
|
+
import { COCOINDEX_PACKAGE_SPEC, COCOINDEX_TOOLS } from "@pi-unipi/core";
|
|
12
|
+
import * as bridge from "./bridge.js";
|
|
13
|
+
import type { CocoindexDeps } from "./bridge.js";
|
|
14
|
+
|
|
15
|
+
// ─────────────────────────────────────────────────────────
|
|
16
|
+
// Schemas
|
|
17
|
+
// ─────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
const SearchParams = Type.Object({
|
|
20
|
+
query: Type.String({ description: "Search query against indexed content" }),
|
|
21
|
+
limit: Type.Optional(Type.Number({ description: "Max results (default 10)", minimum: 1 })),
|
|
22
|
+
offset: Type.Optional(Type.Number({ description: "Pagination offset", minimum: 0 })),
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
const StatusParams = Type.Object({});
|
|
26
|
+
|
|
27
|
+
// ─────────────────────────────────────────────────────────
|
|
28
|
+
// Helpers
|
|
29
|
+
// ─────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
function textResult(text: string, details?: Record<string, unknown>): any {
|
|
32
|
+
return {
|
|
33
|
+
content: [{ type: "text", text }],
|
|
34
|
+
details,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function jsonResult(data: unknown, label?: string): any {
|
|
39
|
+
const text = label ? `${label}:\n${JSON.stringify(data, null, 2)}` : JSON.stringify(data, null, 2);
|
|
40
|
+
return {
|
|
41
|
+
content: [{ type: "text", text }],
|
|
42
|
+
details: data as Record<string, unknown>,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ─────────────────────────────────────────────────────────
|
|
47
|
+
// Registration
|
|
48
|
+
// ─────────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
export function registerCocoindexTools(pi: ExtensionAPI, deps: CocoindexDeps): void {
|
|
51
|
+
// cocoindex_search — search indexed content via LanceDB
|
|
52
|
+
pi.registerTool({
|
|
53
|
+
name: COCOINDEX_TOOLS.SEARCH,
|
|
54
|
+
label: "CocoIndex Search",
|
|
55
|
+
description:
|
|
56
|
+
"Search indexed content using semantic vector search when available, with full-text/lexical fallbacks. " +
|
|
57
|
+
"Diagnostic/search only: this tool never installs CocoIndex. " +
|
|
58
|
+
"Use /unipi:cocoindex-init to set up/install, then /unipi:cocoindex-update to index.",
|
|
59
|
+
parameters: SearchParams,
|
|
60
|
+
async execute(_toolCallId: string, params: any): Promise<any> {
|
|
61
|
+
try {
|
|
62
|
+
const available = await bridge.isAvailable();
|
|
63
|
+
if (!available) {
|
|
64
|
+
return textResult(
|
|
65
|
+
"Search Unavailable: CocoIndex CLI is not installed. " +
|
|
66
|
+
"Run /unipi:cocoindex-init for guided install, or manually run " +
|
|
67
|
+
`uv tool install '${COCOINDEX_PACKAGE_SPEC}'.`,
|
|
68
|
+
{ cliAvailable: false },
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const results = await bridge.search(deps.projectDir, params.query, {
|
|
73
|
+
limit: params.limit,
|
|
74
|
+
offset: params.offset,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
if (results.length === 0) {
|
|
78
|
+
return textResult(
|
|
79
|
+
`No results for "${params.query}" in the current CocoIndex data. ` +
|
|
80
|
+
"If this seems wrong, run /unipi:cocoindex-status to confirm the pipeline has data, then /unipi:cocoindex-update to refresh it.",
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const lines = results.map(
|
|
85
|
+
(r, i) =>
|
|
86
|
+
`[${i + 1}] ${r.title} (${r.matchLayer}, rank: ${r.rank.toFixed(3)})\n${r.content.slice(0, 300)}`,
|
|
87
|
+
);
|
|
88
|
+
return textResult(
|
|
89
|
+
`Found ${results.length} results for "${params.query}":\n\n${lines.join("\n\n")}`,
|
|
90
|
+
{ results } as unknown as Record<string, unknown>,
|
|
91
|
+
);
|
|
92
|
+
} catch (err) {
|
|
93
|
+
return textResult(`CocoIndex search error: ${err}`, { error: true });
|
|
94
|
+
}
|
|
95
|
+
},
|
|
96
|
+
} as any);
|
|
97
|
+
|
|
98
|
+
// cocoindex_status — show indexing status
|
|
99
|
+
pi.registerTool({
|
|
100
|
+
name: COCOINDEX_TOOLS.STATUS,
|
|
101
|
+
label: "CocoIndex Status",
|
|
102
|
+
description: "Check CocoIndex indexing status. Diagnostic only; use commands for interactive install/update.",
|
|
103
|
+
parameters: StatusParams,
|
|
104
|
+
async execute(): Promise<any> {
|
|
105
|
+
try {
|
|
106
|
+
const info = await bridge.status(deps.projectDir);
|
|
107
|
+
const lines = [
|
|
108
|
+
`📦 CocoIndex Status`,
|
|
109
|
+
`CLI available: ${info.cliAvailable ? "✅" : "❌"}`,
|
|
110
|
+
`Pipeline configured: ${info.pipelineConfigured ? "✅" : "❌"}`,
|
|
111
|
+
`Target store: ${info.targetStore}`,
|
|
112
|
+
`Indexed: ${info.indexed ? "✅" : "— (no data)"}`,
|
|
113
|
+
`Doc count: ${info.docCount}`,
|
|
114
|
+
`Last run: ${info.lastRun ?? "never"}`,
|
|
115
|
+
];
|
|
116
|
+
if (!info.cliAvailable) {
|
|
117
|
+
lines.push(
|
|
118
|
+
"",
|
|
119
|
+
"Install guidance:",
|
|
120
|
+
" • Run /unipi:cocoindex-init for guided install.",
|
|
121
|
+
` • Manual: uv tool install '${COCOINDEX_PACKAGE_SPEC}'`,
|
|
122
|
+
" • If uv is missing and mise is available: mise use -g uv@latest",
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
return textResult(lines.join("\n"), info as unknown as Record<string, unknown>);
|
|
126
|
+
} catch (err) {
|
|
127
|
+
return textResult(`CocoIndex status error: ${err}`, { error: true });
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
} as any);
|
|
131
|
+
}
|