@copilotkit/pathfinder 1.1.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -1
- package/README.md +61 -249
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +60 -0
- package/dist/cli.js.map +1 -0
- package/dist/db/client.d.ts.map +1 -1
- package/dist/db/client.js +3 -1
- package/dist/db/client.js.map +1 -1
- package/dist/db/queries.d.ts +14 -3
- package/dist/db/queries.d.ts.map +1 -1
- package/dist/db/queries.js +46 -45
- package/dist/db/queries.js.map +1 -1
- package/dist/db/schema.d.ts +5 -0
- package/dist/db/schema.d.ts.map +1 -1
- package/dist/db/schema.js +11 -0
- package/dist/db/schema.js.map +1 -1
- package/dist/index.js +2 -362
- package/dist/index.js.map +1 -1
- package/dist/indexing/chunking/html.d.ts +7 -0
- package/dist/indexing/chunking/html.d.ts.map +1 -0
- package/dist/indexing/chunking/html.js +356 -0
- package/dist/indexing/chunking/html.js.map +1 -0
- package/dist/indexing/chunking/index.js +2 -0
- package/dist/indexing/chunking/index.js.map +1 -1
- package/dist/indexing/orchestrator.d.ts +1 -0
- package/dist/indexing/orchestrator.d.ts.map +1 -1
- package/dist/indexing/orchestrator.js +27 -2
- package/dist/indexing/orchestrator.js.map +1 -1
- package/dist/indexing/source-indexer.d.ts.map +1 -1
- package/dist/indexing/source-indexer.js +1 -0
- package/dist/indexing/source-indexer.js.map +1 -1
- package/dist/ip-limiter.d.ts +11 -0
- package/dist/ip-limiter.d.ts.map +1 -0
- package/dist/ip-limiter.js +40 -0
- package/dist/ip-limiter.js.map +1 -0
- package/dist/llms-txt.d.ts +11 -0
- package/dist/llms-txt.d.ts.map +1 -0
- package/dist/llms-txt.js +43 -0
- package/dist/llms-txt.js.map +1 -0
- package/dist/mcp/server.d.ts +3 -1
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/mcp/server.js +5 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/mcp/tools/bash.d.ts +8 -0
- package/dist/mcp/tools/bash.d.ts.map +1 -1
- package/dist/mcp/tools/bash.js +59 -0
- package/dist/mcp/tools/bash.js.map +1 -1
- package/dist/mcp/tools/search.d.ts.map +1 -1
- package/dist/mcp/tools/search.js +11 -3
- package/dist/mcp/tools/search.js.map +1 -1
- package/dist/server.d.ts +6 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +492 -0
- package/dist/server.js.map +1 -0
- package/dist/skill-md.d.ts +3 -0
- package/dist/skill-md.d.ts.map +1 -0
- package/dist/skill-md.js +75 -0
- package/dist/skill-md.js.map +1 -0
- package/dist/types.d.ts +56 -18
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +7 -3
- package/dist/types.js.map +1 -1
- package/dist/workspace.d.ts +13 -0
- package/dist/workspace.d.ts.map +1 -0
- package/dist/workspace.js +118 -0
- package/dist/workspace.js.map +1 -0
- package/package.json +14 -2
- package/pathfinder-docs.yaml +54 -0
- package/pathfinder.example.yaml +48 -0
- package/.superpowers/brainstorm/47098-1775507869/content/homepage-mockup.html +0 -324
- package/.superpowers/brainstorm/47098-1775507869/state/server-stopped +0 -1
- package/.superpowers/brainstorm/47098-1775507869/state/server.log +0 -13
- package/.superpowers/brainstorm/47098-1775507869/state/server.pid +0 -1
- package/.superpowers/brainstorm/82141-1775511032/content/migration-v2.html +0 -340
- package/.superpowers/brainstorm/82141-1775511032/content/migration.html +0 -340
- package/.superpowers/brainstorm/82141-1775511032/state/server-stopped +0 -1
- package/.superpowers/brainstorm/82141-1775511032/state/server.log +0 -4
- package/.superpowers/brainstorm/82141-1775511032/state/server.pid +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,4 +1,22 @@
|
|
|
1
|
-
#
|
|
1
|
+
# @copilotkit/pathfinder
|
|
2
|
+
|
|
3
|
+
## 1.4.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- Add HTML source type for indexing static HTML documentation sites (cheerio-based parser)
|
|
8
|
+
- Content container auto-detection (main, article, [role="main"], .content, #content)
|
|
9
|
+
- Heading-boundary chunking with headingPath tracking (h1-h3)
|
|
10
|
+
- Code block preservation, list formatting, table formatting in HTML extraction
|
|
11
|
+
- Add pathfinder-docs.yaml for dogfooding Pathfinder on its own documentation
|
|
12
|
+
|
|
13
|
+
### Patch Changes
|
|
14
|
+
|
|
15
|
+
- Generalize smoke test script for any Pathfinder instance
|
|
16
|
+
- Add mobile hamburger nav menu to all docs pages
|
|
17
|
+
- Simplify README to match aimock style, add npm metadata (repository, homepage, keywords)
|
|
18
|
+
- Fix Dockerfile to copy pathfinder.yaml for production deploy
|
|
19
|
+
- Fix schema migration: remove version index from generateSchema (was failing on existing databases)
|
|
2
20
|
|
|
3
21
|
## 1.1.0
|
|
4
22
|
|
package/README.md
CHANGED
|
@@ -1,284 +1,96 @@
|
|
|
1
|
-
#
|
|
1
|
+
# pathfinder [](https://www.npmjs.com/package/@copilotkit/pathfinder)
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
## How It Works
|
|
6
|
-
|
|
7
|
-
Pathfinder indexes your GitHub repositories — documentation (Markdown/MDX) and source code — into a PostgreSQL vector database using OpenAI embeddings. It exposes configurable search tools via the [Model Context Protocol (MCP)](https://modelcontextprotocol.io), so AI agents like Claude Code can search your docs and code semantically.
|
|
3
|
+
Agentic docs retrieval for AI agents — semantic search and filesystem exploration over your documentation and code via MCP. One config file, one command, works with any AI coding agent.
|
|
8
4
|
|
|
9
5
|
## Quick Start
|
|
10
6
|
|
|
11
|
-
1. **Clone and configure:**
|
|
12
|
-
```bash
|
|
13
|
-
git clone https://github.com/CopilotKit/pathfinder.git
|
|
14
|
-
cd pathfinder
|
|
15
|
-
cp pathfinder.example.yaml pathfinder.yaml # edit for your project
|
|
16
|
-
cp .env.example .env # add your OPENAI_API_KEY
|
|
17
|
-
```
|
|
18
|
-
|
|
19
|
-
2. **Start the server:**
|
|
20
|
-
```bash
|
|
21
|
-
docker compose up
|
|
22
|
-
```
|
|
23
|
-
|
|
24
|
-
3. **Seed the index:**
|
|
25
|
-
```bash
|
|
26
|
-
docker compose exec app npx tsx scripts/seed-index.ts
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
4. **Connect your AI agent:**
|
|
30
|
-
```json
|
|
31
|
-
{
|
|
32
|
-
"mcpServers": {
|
|
33
|
-
"my-docs": { "url": "http://localhost:3001/mcp" }
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
## Configuration
|
|
39
|
-
|
|
40
|
-
All configuration lives in `pathfinder.yaml`. See [pathfinder.example.yaml](pathfinder.example.yaml) for a minimal starting point.
|
|
41
|
-
|
|
42
|
-
### Sources
|
|
43
|
-
|
|
44
|
-
Each source defines what to index:
|
|
45
|
-
|
|
46
|
-
```yaml
|
|
47
|
-
sources:
|
|
48
|
-
- name: docs
|
|
49
|
-
type: markdown # Built-in: markdown, code, raw-text
|
|
50
|
-
repo: https://github.com/your-org/your-repo.git
|
|
51
|
-
path: docs/
|
|
52
|
-
base_url: https://docs.your-project.com/
|
|
53
|
-
url_derivation:
|
|
54
|
-
strip_prefix: "docs/"
|
|
55
|
-
strip_suffix: ".md"
|
|
56
|
-
file_patterns: ["**/*.md"]
|
|
57
|
-
chunk:
|
|
58
|
-
target_tokens: 600
|
|
59
|
-
overlap_tokens: 50
|
|
60
|
-
|
|
61
|
-
- name: code
|
|
62
|
-
type: code
|
|
63
|
-
repo: https://github.com/your-org/your-repo.git
|
|
64
|
-
path: "."
|
|
65
|
-
file_patterns: ["**/*.ts", "**/*.py"]
|
|
66
|
-
exclude_patterns: ["**/test/**", "**/*.test.*"]
|
|
67
|
-
chunk:
|
|
68
|
-
target_lines: 80
|
|
69
|
-
overlap_lines: 10
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
### Tools
|
|
73
|
-
|
|
74
|
-
Each tool maps to a source and defines the MCP tool interface:
|
|
75
|
-
|
|
76
|
-
```yaml
|
|
77
|
-
tools:
|
|
78
|
-
- name: search-docs
|
|
79
|
-
description: "Search documentation for relevant information."
|
|
80
|
-
source: docs
|
|
81
|
-
default_limit: 5
|
|
82
|
-
max_limit: 20
|
|
83
|
-
result_format: docs
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
### Collect Tools
|
|
87
|
-
|
|
88
|
-
Collect tools let agents write structured data back to the server. Unlike search tools, they don't query anything — they validate the agent's input against a YAML-defined schema and store it as JSON in the database. Use them to gather signal from agents without writing any code.
|
|
89
|
-
|
|
90
|
-
The first built-in use case is search feedback: agents report whether search results were helpful, what they tried, and what went wrong. This surfaces broken or misleading documentation quickly. But collect tools are generic — you can define any schema for any use case (e.g., broken link reporting, feature requests, error logging).
|
|
91
|
-
|
|
92
|
-
```yaml
|
|
93
|
-
tools:
|
|
94
|
-
- name: submit-feedback
|
|
95
|
-
type: collect
|
|
96
|
-
description: "Submit feedback on whether search results were helpful."
|
|
97
|
-
response: "Feedback recorded. Thank you."
|
|
98
|
-
schema:
|
|
99
|
-
tool_name:
|
|
100
|
-
type: string
|
|
101
|
-
description: "Which search tool was used"
|
|
102
|
-
required: true
|
|
103
|
-
rating:
|
|
104
|
-
type: enum
|
|
105
|
-
values: ["helpful", "not_helpful"]
|
|
106
|
-
description: "Whether the results were helpful"
|
|
107
|
-
required: true
|
|
108
|
-
comment:
|
|
109
|
-
type: string
|
|
110
|
-
description: "What worked or didn't work"
|
|
111
|
-
required: true
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
Each field in `schema` supports `type` (`string`, `number`, or `enum`), an optional `description` (shown to the agent), `required` (defaults to false), and `values` (required for `enum` fields). The validated input is written as JSONB to the `collected_data` table along with the tool name and a timestamp.
|
|
115
|
-
|
|
116
|
-
### Bash Tool Options
|
|
117
|
-
|
|
118
|
-
Bash tools expose source files as a read-only virtual filesystem that agents can explore with standard commands (`find`, `grep`, `cat`, `ls`, `head`). Several options control behavior:
|
|
119
|
-
|
|
120
|
-
```yaml
|
|
121
|
-
tools:
|
|
122
|
-
- name: explore-docs
|
|
123
|
-
type: bash
|
|
124
|
-
description: "Explore documentation files"
|
|
125
|
-
sources: [docs]
|
|
126
|
-
bash:
|
|
127
|
-
session_state: true # Persistent CWD across commands (default: false)
|
|
128
|
-
grep_strategy: hybrid # memory | vector | hybrid — enables qmd semantic search (default: memory, no qmd)
|
|
129
|
-
virtual_files: true # Auto-generate INDEX.md, SEARCH_TIPS.md (default: false)
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
- **session_state**: When enabled, `cd` persists across commands within a session. Agents can run `cd /docs` in one tool call and then `ls` or `cat file.md` in the next without repeating the path.
|
|
133
|
-
- **grep_strategy**: Controls whether the `qmd` semantic search command is available. `memory` uses pure in-memory regex only (no `qmd`). `vector` or `hybrid` enable the `qmd` command, which performs semantic search via embeddings plus text `ILIKE`. The `vector` and `hybrid` modes require an `embedding` config block.
|
|
134
|
-
- **virtual_files**: Auto-generates `/INDEX.md` (file listing with descriptions) and `/SEARCH_TIPS.md` (usage guidance) at the root of the virtual filesystem.
|
|
135
|
-
|
|
136
|
-
Agents can also run the `related` command inside bash tools to find semantically similar files across all mounted sources:
|
|
137
|
-
|
|
138
7
|
```bash
|
|
139
|
-
|
|
8
|
+
npx @copilotkit/pathfinder init
|
|
9
|
+
npx @copilotkit/pathfinder serve
|
|
140
10
|
```
|
|
141
11
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
When `grep_strategy` is set to `vector` or `hybrid`, agents can use the `qmd` command for semantic search:
|
|
12
|
+
Or with Docker:
|
|
145
13
|
|
|
146
14
|
```bash
|
|
147
|
-
|
|
15
|
+
docker pull ghcr.io/copilotkit/pathfinder
|
|
16
|
+
docker run -v ./pathfinder.yaml:/app/pathfinder.yaml \
|
|
17
|
+
-v ./docs:/app/docs -p 3001:3001 \
|
|
18
|
+
ghcr.io/copilotkit/pathfinder
|
|
148
19
|
```
|
|
149
20
|
|
|
150
|
-
|
|
21
|
+
Then connect your AI agent:
|
|
151
22
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
23
|
+
```json
|
|
24
|
+
{
|
|
25
|
+
"mcpServers": {
|
|
26
|
+
"my-docs": { "url": "http://localhost:3001/mcp" }
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
```
|
|
155
30
|
|
|
156
|
-
|
|
157
|
-
|------|----------|-----------|
|
|
158
|
-
| `markdown` | .md, .mdx files | Headings (h2->h3->paragraph->line), preserves code blocks |
|
|
159
|
-
| `code` | Source code files | Blank line boundaries, respects block comments/strings |
|
|
160
|
-
| `raw-text` | Plain text, logs | Paragraph boundaries (double newline) |
|
|
31
|
+
## Try It — Pathfinder on Its Own Docs
|
|
161
32
|
|
|
162
|
-
|
|
33
|
+
This documentation is indexed by a live Pathfinder instance. Connect your agent to try it:
|
|
163
34
|
|
|
164
|
-
```
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
model: text-embedding-3-small
|
|
168
|
-
dimensions: 1536
|
|
35
|
+
```bash
|
|
36
|
+
# Claude Code
|
|
37
|
+
claude mcp add pathfinder-docs --transport http https://mcp.pathfinder.copilotkit.dev/mcp
|
|
169
38
|
```
|
|
170
39
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
40
|
+
```json
|
|
41
|
+
// Claude Desktop / Cursor / any MCP client
|
|
42
|
+
{
|
|
43
|
+
"mcpServers": {
|
|
44
|
+
"pathfinder-docs": {
|
|
45
|
+
"url": "https://mcp.pathfinder.copilotkit.dev/mcp"
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
178
49
|
```
|
|
179
50
|
|
|
180
|
-
##
|
|
181
|
-
|
|
182
|
-
The simplest way to run in production:
|
|
183
|
-
|
|
184
|
-
1. **Configure:**
|
|
185
|
-
```bash
|
|
186
|
-
cp pathfinder.example.yaml pathfinder.yaml # edit for your project
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
2. **Set environment variables** in `.env`:
|
|
190
|
-
```
|
|
191
|
-
OPENAI_API_KEY=sk-...
|
|
192
|
-
POSTGRES_PASSWORD=your-secure-password
|
|
193
|
-
GITHUB_WEBHOOK_SECRET=your-webhook-secret
|
|
194
|
-
```
|
|
195
|
-
|
|
196
|
-
3. **Deploy:**
|
|
197
|
-
```bash
|
|
198
|
-
docker compose -f docker-compose.prod.yaml up -d
|
|
199
|
-
```
|
|
200
|
-
|
|
201
|
-
4. **Verify:**
|
|
202
|
-
```bash
|
|
203
|
-
curl http://localhost:3001/health | python3 -m json.tool
|
|
204
|
-
```
|
|
51
|
+
## What It Does
|
|
205
52
|
|
|
206
|
-
|
|
53
|
+
Pathfinder indexes your GitHub repos — docs (Markdown, MDX, HTML) and source code — into a PostgreSQL vector database using OpenAI embeddings. It serves configurable search and filesystem exploration tools via [MCP](https://modelcontextprotocol.io), so AI agents can search your docs semantically and browse files with bash commands.
|
|
207
54
|
|
|
208
|
-
|
|
55
|
+
| Tool Type | What It Does | Example |
|
|
56
|
+
|-----------|-------------|---------|
|
|
57
|
+
| **Search** | Semantic search over indexed content | `search-docs("how to authenticate")` |
|
|
58
|
+
| **Bash** | Virtual filesystem with find, grep, cat, ls | `explore-docs("cat /docs/quickstart.mdx")` |
|
|
59
|
+
| **Collect** | Structured data collection from agents | `submit-feedback(rating: "helpful")` |
|
|
209
60
|
|
|
210
|
-
|
|
61
|
+
## Features
|
|
211
62
|
|
|
212
|
-
|
|
63
|
+
- **[Semantic Search](https://pathfinder.copilotkit.dev/search)** — pgvector RAG with configurable chunk sizes, overlap, and score thresholds
|
|
64
|
+
- **[Filesystem Exploration](https://pathfinder.copilotkit.dev/search)** — QuickJS WASM sandbox with session state, `qmd` semantic grep, `related` files
|
|
65
|
+
- **[4 Source Types](https://pathfinder.copilotkit.dev/config)** — Markdown, code, raw-text, HTML — with pluggable chunker registry
|
|
66
|
+
- **[Config-Driven](https://pathfinder.copilotkit.dev/config)** — Everything in one `pathfinder.yaml`: sources, tools, embedding, indexing, webhooks
|
|
67
|
+
- **[Client Setup](https://pathfinder.copilotkit.dev/clients)** — Claude Desktop, Claude Code, Cursor, Codex, VS Code, any Streamable HTTP client
|
|
68
|
+
- **[Docker + Railway](https://pathfinder.copilotkit.dev/deploy)** — Container image, docker-compose, Railway one-click
|
|
69
|
+
- **[Auto-Generated Endpoints](https://pathfinder.copilotkit.dev/usage)** — `/llms.txt`, `/llms-full.txt`, `/.well-known/skills/default/skill.md`
|
|
70
|
+
- **[Webhook Reindexing](https://pathfinder.copilotkit.dev/deploy)** — GitHub push triggers incremental reindex
|
|
71
|
+
- **[IP Rate Limiting](https://pathfinder.copilotkit.dev/config)** — Per-IP session caps and configurable TTL
|
|
213
72
|
|
|
214
|
-
|
|
215
|
-
```yaml
|
|
216
|
-
webhook:
|
|
217
|
-
repo_sources:
|
|
218
|
-
"your-org/your-repo": [docs, code]
|
|
219
|
-
path_triggers:
|
|
220
|
-
docs: ["docs/"]
|
|
221
|
-
code: []
|
|
222
|
-
```
|
|
223
|
-
|
|
224
|
-
2. Configure the webhook on GitHub:
|
|
225
|
-
- URL: `https://your-server/webhooks/github`
|
|
226
|
-
- Secret: same as `GITHUB_WEBHOOK_SECRET`
|
|
227
|
-
- Events: Just `push`
|
|
228
|
-
|
|
229
|
-
## Deploying to Railway
|
|
230
|
-
|
|
231
|
-
1. **Run setup:**
|
|
232
|
-
```bash
|
|
233
|
-
./scripts/setup.sh # install deps, build Docker images
|
|
234
|
-
./scripts/deploy.sh # create Railway project, set vars, deploy
|
|
235
|
-
```
|
|
236
|
-
|
|
237
|
-
2. **Set custom domain** in Railway dashboard
|
|
238
|
-
|
|
239
|
-
3. **Configure webhooks:**
|
|
240
|
-
```bash
|
|
241
|
-
./scripts/setup-webhooks.sh
|
|
242
|
-
```
|
|
243
|
-
|
|
244
|
-
See [OPERATIONS.md](OPERATIONS.md) for the full operations runbook.
|
|
245
|
-
|
|
246
|
-
## Development
|
|
73
|
+
## CLI
|
|
247
74
|
|
|
248
75
|
```bash
|
|
249
|
-
#
|
|
250
|
-
|
|
76
|
+
# Scaffold config
|
|
77
|
+
npx @copilotkit/pathfinder init
|
|
251
78
|
|
|
252
|
-
# Start
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
# Seed index
|
|
256
|
-
docker compose exec app npx tsx scripts/seed-index.ts
|
|
257
|
-
|
|
258
|
-
# Run unit tests
|
|
259
|
-
npm test
|
|
79
|
+
# Start server (uses PGlite if no DATABASE_URL)
|
|
80
|
+
npx @copilotkit/pathfinder serve
|
|
260
81
|
|
|
261
|
-
#
|
|
262
|
-
docker compose
|
|
82
|
+
# Docker with Postgres
|
|
83
|
+
docker compose up
|
|
84
|
+
```
|
|
263
85
|
|
|
264
|
-
|
|
265
|
-
npx tsx scripts/integration-test.ts
|
|
86
|
+
## Switching from Mintlify?
|
|
266
87
|
|
|
267
|
-
|
|
268
|
-
npx tsx scripts/test-path-filter.ts
|
|
269
|
-
```
|
|
88
|
+
Step-by-step migration guide: **[Migrate from Mintlify](https://pathfinder.copilotkit.dev/migrate-from-mintlify)**
|
|
270
89
|
|
|
271
|
-
##
|
|
90
|
+
## Documentation
|
|
272
91
|
|
|
273
|
-
|
|
274
|
-
|----------|----------|-------------|
|
|
275
|
-
| `OPENAI_API_KEY` | Yes | OpenAI API key for embeddings |
|
|
276
|
-
| `DATABASE_URL` | Yes | PostgreSQL connection string |
|
|
277
|
-
| `GITHUB_WEBHOOK_SECRET` | No | HMAC secret for webhook verification |
|
|
278
|
-
| `GITHUB_TOKEN` | No | GitHub token for private repos |
|
|
279
|
-
| `PATHFINDER_CONFIG` | No | Path to config file (default: `./pathfinder.yaml`) |
|
|
280
|
-
| `PORT` | No | Server port (default: `3001`) |
|
|
92
|
+
**[https://pathfinder.copilotkit.dev](https://pathfinder.copilotkit.dev)**
|
|
281
93
|
|
|
282
94
|
## License
|
|
283
95
|
|
|
284
|
-
MIT
|
|
96
|
+
MIT
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import { fileURLToPath } from "url";
|
|
6
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
const program = new Command();
|
|
8
|
+
program
|
|
9
|
+
.name("pathfinder")
|
|
10
|
+
.description("Agentic docs retrieval for AI agents")
|
|
11
|
+
.version("1.1.0");
|
|
12
|
+
program
|
|
13
|
+
.command("init")
|
|
14
|
+
.description("Scaffold a new Pathfinder project in the current directory")
|
|
15
|
+
.action(async () => {
|
|
16
|
+
const cwd = process.cwd();
|
|
17
|
+
const yamlDest = path.join(cwd, "pathfinder.yaml");
|
|
18
|
+
if (fs.existsSync(yamlDest)) {
|
|
19
|
+
console.log("pathfinder.yaml already exists, skipping.");
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
const templatePath = path.join(__dirname, "..", "pathfinder.example.yaml");
|
|
23
|
+
if (!fs.existsSync(templatePath)) {
|
|
24
|
+
console.error("Could not find pathfinder.example.yaml template.");
|
|
25
|
+
process.exit(1);
|
|
26
|
+
}
|
|
27
|
+
fs.copyFileSync(templatePath, yamlDest);
|
|
28
|
+
console.log("Created pathfinder.yaml");
|
|
29
|
+
}
|
|
30
|
+
const envDest = path.join(cwd, ".env");
|
|
31
|
+
if (fs.existsSync(envDest)) {
|
|
32
|
+
console.log(".env already exists, skipping.");
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
const envTemplatePath = path.join(__dirname, "..", ".env.example");
|
|
36
|
+
if (fs.existsSync(envTemplatePath)) {
|
|
37
|
+
fs.copyFileSync(envTemplatePath, envDest);
|
|
38
|
+
console.log("Created .env from template");
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
console.log("No .env.example found, skipping .env creation.");
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
console.log("\nEdit pathfinder.yaml to configure your docs sources.");
|
|
45
|
+
console.log("Then run: pathfinder serve");
|
|
46
|
+
});
|
|
47
|
+
program
|
|
48
|
+
.command("serve")
|
|
49
|
+
.description("Start the Pathfinder MCP server")
|
|
50
|
+
.option("-p, --port <port>", "Port to listen on", parseInt)
|
|
51
|
+
.option("-c, --config <path>", "Path to pathfinder.yaml")
|
|
52
|
+
.action(async (opts) => {
|
|
53
|
+
const { startServer } = await import("./server.js");
|
|
54
|
+
await startServer({
|
|
55
|
+
port: opts.port,
|
|
56
|
+
configPath: opts.config,
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
program.parse();
|
|
60
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AAEpC,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACF,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,sCAAsC,CAAC;KACnD,OAAO,CAAC,OAAO,CAAC,CAAC;AAEtB,OAAO;KACF,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,4DAA4D,CAAC;KACzE,MAAM,CAAC,KAAK,IAAI,EAAE;IACf,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAE1B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;IACnD,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;IAC7D,CAAC;SAAM,CAAC;QACJ,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,yBAAyB,CAAC,CAAC;QAC3E,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;YAClE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QACD,EAAE,CAAC,YAAY,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;IAC3C,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACvC,IAAI,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;IAClD,CAAC;SAAM,CAAC;QACJ,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,cAAc,CAAC,CAAC;QACnE,IAAI,EAAE,CAAC,UAAU,CAAC,eAAe,CAAC,EAAE,CAAC;YACjC,EAAE,CAAC,YAAY,CAAC,eAAe,EAAE,OAAO,CAAC,CAAC;YAC1C,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;QAC9C,CAAC;aAAM,CAAC;YACJ,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;QAClE,CAAC;IACL,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,wDAAwD,CAAC,CAAC;IACtE,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;AAC9C,CAAC,CAAC,CAAC;AAEP,OAAO;KACF,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,iCAAiC,CAAC;KAC9C,MAAM,CAAC,mBAAmB,EAAE,mBAAmB,EAAE,QAAQ,CAAC;KAC1D,MAAM,CAAC,qBAAqB,EAAE,yBAAyB,CAAC;KACxD,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;IACnB,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;IACpD,MAAM,WAAW,CAAC;QACd,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,UAAU,EAAE,IAAI,CAAC,MAAM;KAC1B,CAAC,CAAC;AACP,CAAC,CAAC,CAAC;AAEP,OAAO,CAAC,KAAK,EAAE,CAAC"}
|
package/dist/db/client.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/db/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AAOpB;;;;;GAKG;AACH,wBAAgB,OAAO,IAAI,EAAE,CAAC,IAAI,CAsBjC;
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/db/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AAOpB;;;;;GAKG;AACH,wBAAgB,OAAO,IAAI,EAAE,CAAC,IAAI,CAsBjC;AAyDD;;;;;;;GAOG;AACH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CA0CtD;AAED;;GAEG;AACH,wBAAsB,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC,CAM/C"}
|
package/dist/db/client.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import pg from "pg";
|
|
2
2
|
import pgvector from "pgvector/pg";
|
|
3
|
-
import { generateSchema, generateMigration } from "./schema.js";
|
|
3
|
+
import { generateSchema, generateMigration, generatePostSchemaMigration } from "./schema.js";
|
|
4
4
|
import { getConfig, getServerConfig } from "../config.js";
|
|
5
5
|
let pool = null;
|
|
6
6
|
/**
|
|
@@ -48,6 +48,7 @@ async function initializePGlite() {
|
|
|
48
48
|
try {
|
|
49
49
|
await db.exec(generateMigration());
|
|
50
50
|
await db.exec(generateSchema(dimensions));
|
|
51
|
+
await db.exec(generatePostSchemaMigration());
|
|
51
52
|
await db.exec('COMMIT');
|
|
52
53
|
}
|
|
53
54
|
catch (err) {
|
|
@@ -111,6 +112,7 @@ export async function initializeSchema() {
|
|
|
111
112
|
await migrationClient.query('BEGIN');
|
|
112
113
|
await migrationClient.query(generateMigration());
|
|
113
114
|
await migrationClient.query(generateSchema(dimensions));
|
|
115
|
+
await migrationClient.query(generatePostSchemaMigration());
|
|
114
116
|
await migrationClient.query('COMMIT');
|
|
115
117
|
}
|
|
116
118
|
catch (err) {
|
package/dist/db/client.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/db/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,QAAQ,MAAM,aAAa,CAAC;AACnC,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/db/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,QAAQ,MAAM,aAAa,CAAC;AACnC,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,2BAA2B,EAAE,MAAM,aAAa,CAAC;AAC7F,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE1D,IAAI,IAAI,GAAmB,IAAI,CAAC;AAEhC;;;;;GAKG;AACH,MAAM,UAAU,OAAO;IACnB,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IAEtB,MAAM,WAAW,GAAG,SAAS,EAAE,CAAC,WAAW,CAAC;IAE5C,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CACX,uDAAuD,CAC1D,CAAC;IACN,CAAC;IAED,IAAI,WAAW,CAAC,WAAW,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CACX,6DAA6D,CAChE,CAAC;IACN,CAAC;IAED,IAAI,GAAG,IAAI,EAAE,CAAC,IAAI,CAAC;QACf,gBAAgB,EAAE,WAAW;KAChC,CAAC,CAAC;IAEH,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,SAAS,WAAW,CAAC,GAAuB;IACxC,OAAO,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACnC,OAAO,GAAG,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;AAC3C,CAAC;AAED,KAAK,UAAU,gBAAgB;IAC3B,MAAM,WAAW,GAAG,SAAS,EAAE,CAAC,WAAW,CAAC;IAC5C,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;IAC1E,CAAC;IACD,MAAM,OAAO,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;IAChD,MAAM,UAAU,GAAG,eAAe,EAAE,CAAC,SAAS,EAAE,UAAU,CAAC;IAC3D,IAAI,CAAC,UAAU;QAAE,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IAExG,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;IACxD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;IAE/D,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,EAAE,OAAO,EAAE,UAAU,EAAE,EAAE,MAAM,EAAE,EAAE,CAAC,CAAC;IAC3D,MAAM,EAAE,CAAC,SAAS,CAAC;IAEnB,6DAA6D;IAC7D,MAAM,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACvB,IAAI,CAAC;QACD,MAAM,EAAE,CAAC,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC;QACnC,MAAM,EAAE,CAAC,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC,CAAC;QAC1C,MAAM,EAAE,CAAC,IAAI,CAAC,2BAA2B,EAAE,CAAC,CAAC;QAC7C,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACX,IAAI,CAAC;YACD,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC9B,CAAC;QAAC,MAAM,CAAC;YACL,kDAAkD;QACtD,CAAC;QACD,MAAM,GAAG,CAAC;IACd,CAAC;IAED,8CAA8C;IAC9C,wFAAwF;IACxF,iFAAiF;IACjF,iFAAiF;IACjF,MAAM,OAAO,GAAG;QACZ,KAAK,EAAE,CAAC,IAAY,EAAE,MAAkB,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC;QACnE,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;YAClB,KAAK,EAAE,CAAC,IAAY,EAAE,MAAkB,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC;YACnE,OAAO,EAAE,GAAG,EAAE,GAAE,CAAC;SACpB,CAAC;QACF,GAAG,EAAE,KAAK,IAAI,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE;KAC9B,CAAC;IAEF,IAAI,GAAG,OAA6B,CAAC;AACzC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB;IAClC,MAAM,WAAW,GAAG,SAAS,EAAE,CAAC,WAAW,CAAC;IAE5C,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,6DAA6D,CAAC,CAAC;IACnF,CAAC;IAED,IAAI,WAAW,CAAC,WAAW,CAAC,EAAE,CAAC;QAC3B,MAAM,gBAAgB,EAAE,CAAC;QACzB,OAAO;IACX,CAAC;IAED,MAAM,CAAC,GAAG,OAAO,EAAE,CAAC;IAEpB,MAAM,UAAU,GAAG,eAAe,EAAE,CAAC,SAAS,EAAE,UAAU,CAAC;IAC3D,IAAI,CAAC,UAAU;QAAE,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IAExG,8DAA8D;IAC9D,MAAM,WAAW,GAAG,MAAM,CAAC,CAAC,OAAO,EAAE,CAAC;IACtC,IAAI,CAAC;QACD,iFAAiF;QACjF,yEAAyE;QACzE,MAAM,WAAW,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;QACjE,MAAM,QAAQ,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC;IAC7C,CAAC;YAAS,CAAC;QACP,WAAW,CAAC,OAAO,EAAE,CAAC;IAC1B,CAAC;IAED,6CAA6C;IAC7C,MAAM,eAAe,GAAG,MAAM,CAAC,CAAC,OAAO,EAAE,CAAC;IAC1C,IAAI,CAAC;QACD,MAAM,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,eAAe,CAAC,KAAK,CAAC,iBAAiB,EAAE,CAAC,CAAC;QACjD,MAAM,eAAe,CAAC,KAAK,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC,CAAC;QACxD,MAAM,eAAe,CAAC,KAAK,CAAC,2BAA2B,EAAE,CAAC,CAAC;QAC3D,MAAM,eAAe,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC1C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACX,MAAM,eAAe,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QACxC,MAAM,GAAG,CAAC;IACd,CAAC;YAAS,CAAC;QACP,eAAe,CAAC,OAAO,EAAE,CAAC;IAC9B,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS;IAC3B,IAAI,IAAI,EAAE,CAAC;QACP,MAAM,CAAC,GAAG,IAAI,CAAC;QACf,IAAI,GAAG,IAAI,CAAC;QACZ,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC;IAClB,CAAC;AACL,CAAC"}
|
package/dist/db/queries.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import type { Chunk, ChunkResult, IndexState } from "../types.js";
|
|
2
2
|
/**
|
|
3
3
|
* Cosine similarity search on the unified chunks table.
|
|
4
|
-
* Optionally filtered by source_name. Returns results ordered
|
|
5
|
-
* (highest first).
|
|
4
|
+
* Optionally filtered by source_name and/or version. Returns results ordered
|
|
5
|
+
* by similarity (highest first).
|
|
6
6
|
*/
|
|
7
|
-
export declare function searchChunks(embedding: number[], limit: number, sourceName?: string): Promise<ChunkResult[]>;
|
|
7
|
+
export declare function searchChunks(embedding: number[], limit: number, sourceName?: string, version?: string): Promise<ChunkResult[]>;
|
|
8
8
|
/**
|
|
9
9
|
* Text search (ILIKE) on the unified chunks table.
|
|
10
10
|
* Optionally filtered by source_name. Returns results ordered by id.
|
|
@@ -44,6 +44,17 @@ export interface IndexStats {
|
|
|
44
44
|
indexedRepos: number;
|
|
45
45
|
indexStates: IndexState[];
|
|
46
46
|
}
|
|
47
|
+
/**
|
|
48
|
+
* Fetch all chunks (without embeddings) for llms.txt generation.
|
|
49
|
+
* Ordered by source_name, file_path, chunk_index for deterministic output.
|
|
50
|
+
*/
|
|
51
|
+
export declare function getAllChunksForLlms(): Promise<{
|
|
52
|
+
source_name: string;
|
|
53
|
+
file_path: string;
|
|
54
|
+
title: string | null;
|
|
55
|
+
content: string;
|
|
56
|
+
chunk_index: number;
|
|
57
|
+
}[]>;
|
|
47
58
|
/**
|
|
48
59
|
* Get aggregate statistics for the health endpoint.
|
|
49
60
|
*/
|
package/dist/db/queries.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"queries.d.ts","sourceRoot":"","sources":["../../src/db/queries.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,KAAK,EAAE,WAAW,EAAE,UAAU,EAAe,MAAM,aAAa,CAAC;AAM/E;;;;GAIG;AACH,wBAAsB,YAAY,CAC9B,SAAS,EAAE,MAAM,EAAE,EACnB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"queries.d.ts","sourceRoot":"","sources":["../../src/db/queries.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,KAAK,EAAE,WAAW,EAAE,UAAU,EAAe,MAAM,aAAa,CAAC;AAM/E;;;;GAIG;AACH,wBAAsB,YAAY,CAC9B,SAAS,EAAE,MAAM,EAAE,EACnB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE,MAAM,GACjB,OAAO,CAAC,WAAW,EAAE,CAAC,CAsDxB;AAED;;;GAGG;AACH,wBAAsB,gBAAgB,CAClC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,WAAW,EAAE,CAAC,CA2BxB;AAMD;;;GAGG;AACH,wBAAsB,YAAY,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAyDjE;AAMD;;GAEG;AACH,wBAAsB,kBAAkB,CACpC,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,MAAM,GACjB,OAAO,CAAC,IAAI,CAAC,CAMf;AAED;;GAEG;AACH,wBAAsB,oBAAoB,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAG5E;AAMD;;GAEG;AACH,wBAAsB,aAAa,CAC/B,UAAU,EAAE,MAAM,EAClB,SAAS,EAAE,MAAM,GAClB,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAmB5B;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAqBvE;AAMD;;GAEG;AACH,wBAAsB,mBAAmB,CACrC,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC9B,OAAO,CAAC,IAAI,CAAC,CAMf;AAMD,MAAM,WAAW,UAAU;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,KAAK,CAAC;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACxD,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,UAAU,EAAE,CAAC;CAC7B;AAED;;;GAGG;AACH,wBAAsB,mBAAmB,IAAI,OAAO,CAAC;IAAE,WAAW,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAA;CAAE,EAAE,CAAC,CAM7J;AAED;;GAEG;AACH,wBAAsB,aAAa,IAAI,OAAO,CAAC,UAAU,CAAC,CA8BzD"}
|
package/dist/db/queries.js
CHANGED
|
@@ -5,54 +5,44 @@ import { getPool } from "./client.js";
|
|
|
5
5
|
// ---------------------------------------------------------------------------
|
|
6
6
|
/**
|
|
7
7
|
* Cosine similarity search on the unified chunks table.
|
|
8
|
-
* Optionally filtered by source_name. Returns results ordered
|
|
9
|
-
* (highest first).
|
|
8
|
+
* Optionally filtered by source_name and/or version. Returns results ordered
|
|
9
|
+
* by similarity (highest first).
|
|
10
10
|
*/
|
|
11
|
-
export async function searchChunks(embedding, limit, sourceName) {
|
|
11
|
+
export async function searchChunks(embedding, limit, sourceName, version) {
|
|
12
12
|
const pool = getPool();
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
const conditions = [];
|
|
14
|
+
const params = [pgvector.toSql(embedding)];
|
|
15
|
+
let paramIdx = 2;
|
|
15
16
|
if (sourceName) {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
id,
|
|
19
|
-
source_name,
|
|
20
|
-
source_url,
|
|
21
|
-
title,
|
|
22
|
-
content,
|
|
23
|
-
repo_url,
|
|
24
|
-
file_path,
|
|
25
|
-
start_line,
|
|
26
|
-
end_line,
|
|
27
|
-
language,
|
|
28
|
-
1 - (embedding <=> $1) AS similarity
|
|
29
|
-
FROM chunks
|
|
30
|
-
WHERE source_name = $2
|
|
31
|
-
ORDER BY embedding <=> $1
|
|
32
|
-
LIMIT $3
|
|
33
|
-
`;
|
|
34
|
-
params = [pgvector.toSql(embedding), sourceName, limit];
|
|
17
|
+
conditions.push(`source_name = $${paramIdx++}`);
|
|
18
|
+
params.push(sourceName);
|
|
35
19
|
}
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
id,
|
|
40
|
-
source_name,
|
|
41
|
-
source_url,
|
|
42
|
-
title,
|
|
43
|
-
content,
|
|
44
|
-
repo_url,
|
|
45
|
-
file_path,
|
|
46
|
-
start_line,
|
|
47
|
-
end_line,
|
|
48
|
-
language,
|
|
49
|
-
1 - (embedding <=> $1) AS similarity
|
|
50
|
-
FROM chunks
|
|
51
|
-
ORDER BY embedding <=> $1
|
|
52
|
-
LIMIT $2
|
|
53
|
-
`;
|
|
54
|
-
params = [pgvector.toSql(embedding), limit];
|
|
20
|
+
if (version) {
|
|
21
|
+
conditions.push(`version = $${paramIdx++}`);
|
|
22
|
+
params.push(version);
|
|
55
23
|
}
|
|
24
|
+
const whereClause = conditions.length > 0
|
|
25
|
+
? `WHERE ${conditions.join(' AND ')}`
|
|
26
|
+
: '';
|
|
27
|
+
const sql = `
|
|
28
|
+
SELECT
|
|
29
|
+
id,
|
|
30
|
+
source_name,
|
|
31
|
+
source_url,
|
|
32
|
+
title,
|
|
33
|
+
content,
|
|
34
|
+
repo_url,
|
|
35
|
+
file_path,
|
|
36
|
+
start_line,
|
|
37
|
+
end_line,
|
|
38
|
+
language,
|
|
39
|
+
1 - (embedding <=> $1) AS similarity
|
|
40
|
+
FROM chunks
|
|
41
|
+
${whereClause}
|
|
42
|
+
ORDER BY embedding <=> $1
|
|
43
|
+
LIMIT $${paramIdx}
|
|
44
|
+
`;
|
|
45
|
+
params.push(limit);
|
|
56
46
|
const { rows } = await pool.query(sql, params);
|
|
57
47
|
return rows.map((r) => ({
|
|
58
48
|
id: r.id,
|
|
@@ -119,9 +109,9 @@ export async function upsertChunks(chunks) {
|
|
|
119
109
|
INSERT INTO chunks
|
|
120
110
|
(source_name, source_url, title, content, embedding, repo_url,
|
|
121
111
|
file_path, start_line, end_line, language, chunk_index,
|
|
122
|
-
metadata, commit_sha, indexed_at)
|
|
112
|
+
metadata, commit_sha, version, indexed_at)
|
|
123
113
|
VALUES
|
|
124
|
-
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, NOW())
|
|
114
|
+
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, NOW())
|
|
125
115
|
ON CONFLICT (source_name, file_path, chunk_index) DO UPDATE SET
|
|
126
116
|
source_url = EXCLUDED.source_url,
|
|
127
117
|
title = EXCLUDED.title,
|
|
@@ -133,6 +123,7 @@ export async function upsertChunks(chunks) {
|
|
|
133
123
|
language = EXCLUDED.language,
|
|
134
124
|
metadata = EXCLUDED.metadata,
|
|
135
125
|
commit_sha = EXCLUDED.commit_sha,
|
|
126
|
+
version = EXCLUDED.version,
|
|
136
127
|
indexed_at = NOW()
|
|
137
128
|
`;
|
|
138
129
|
for (const chunk of chunks) {
|
|
@@ -150,6 +141,7 @@ export async function upsertChunks(chunks) {
|
|
|
150
141
|
chunk.chunk_index,
|
|
151
142
|
JSON.stringify(chunk.metadata ?? {}),
|
|
152
143
|
chunk.commit_sha ?? null,
|
|
144
|
+
chunk.version ?? null,
|
|
153
145
|
]);
|
|
154
146
|
}
|
|
155
147
|
await client.query("COMMIT");
|
|
@@ -240,6 +232,15 @@ export async function insertCollectedData(toolName, data) {
|
|
|
240
232
|
const pool = getPool();
|
|
241
233
|
await pool.query("INSERT INTO collected_data (tool_name, data) VALUES ($1, $2)", [toolName, JSON.stringify(data)]);
|
|
242
234
|
}
|
|
235
|
+
/**
|
|
236
|
+
* Fetch all chunks (without embeddings) for llms.txt generation.
|
|
237
|
+
* Ordered by source_name, file_path, chunk_index for deterministic output.
|
|
238
|
+
*/
|
|
239
|
+
export async function getAllChunksForLlms() {
|
|
240
|
+
const pool = getPool();
|
|
241
|
+
const result = await pool.query('SELECT source_name, file_path, title, content, chunk_index FROM chunks ORDER BY source_name, file_path, chunk_index');
|
|
242
|
+
return result.rows;
|
|
243
|
+
}
|
|
243
244
|
/**
|
|
244
245
|
* Get aggregate statistics for the health endpoint.
|
|
245
246
|
*/
|