diffdoc 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -3
- package/dist/commands/query.js +27 -0
- package/dist/index.js +17 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -36,6 +36,7 @@ Package scripts can call the installed binary:
|
|
|
36
36
|
"scripts": {
|
|
37
37
|
"diffdoc:summarize": "diffdoc summarize",
|
|
38
38
|
"diffdoc:embed": "diffdoc embed",
|
|
39
|
+
"diffdoc:search": "diffdoc search",
|
|
39
40
|
"diffdoc:query": "diffdoc query"
|
|
40
41
|
}
|
|
41
42
|
}
|
|
@@ -76,6 +77,29 @@ Example config with all supported keys:
|
|
|
76
77
|
|
|
77
78
|
Supported environment fallbacks use the uppercase names for the same settings, including `AI_PROVIDER`, `DIFFDOC_BASE_DIR`, `LOCAL_LLM_ENDPOINT`, `LOCAL_EMBED_ENDPOINT`, `LOCAL_CHAT_MODEL`, `LOCAL_EMBED_MODEL`, `CLOUD_LLM_ENDPOINT`, `CLOUD_CHAT_MODEL`, `CLOUD_EMBED_MODEL`, and `OPENAI_API_KEY`.
|
|
78
79
|
|
|
80
|
+
## Manifest-First Design
|
|
81
|
+
|
|
82
|
+
DiffDoc separates summarization from embedding. The `summarize` command writes all generated file summaries to `manifest.json` under `baseDir`, usually `./.diffdoc/manifest.json`.
|
|
83
|
+
|
|
84
|
+
The manifest is plain JSON and contains one entry per tracked file:
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{
|
|
88
|
+
"lastSyncedCommit": "string-hash",
|
|
89
|
+
"files": {
|
|
90
|
+
"src/example.ts": {
|
|
91
|
+
"hash": "md5-string",
|
|
92
|
+
"summaryText": "Plain-English explanation text here.",
|
|
93
|
+
"rawCodeSnapshot": "Full code text here..."
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Because the summaries are stored independently, users do not have to embed immediately. They can review, archive, transform, or embed the manifest later using their preferred vectorization model and storage solution.
|
|
100
|
+
|
|
101
|
+
DiffDoc includes `diffdoc embed` as a built-in convenience path for creating a local Vectra index, but the manifest can also be consumed by other tools such as custom OpenAI-compatible embedding pipelines, hosted vector databases, local search systems, or internal documentation workflows.
|
|
102
|
+
|
|
79
103
|
## Commands
|
|
80
104
|
|
|
81
105
|
Summarize a repository into `./.diffdoc/manifest.json`:
|
|
@@ -96,13 +120,25 @@ Embed the manifest into a local Vectra index at `./.diffdoc/vectra`:
|
|
|
96
120
|
diffdoc embed
|
|
97
121
|
```
|
|
98
122
|
|
|
99
|
-
|
|
123
|
+
Search the local Vectra index and print raw matches:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
diffdoc search "How does this project process changed files?"
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Include retrieved code snapshots in search results:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
diffdoc search "How does embedding work?" --top 3 --code
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Ask a question and have the configured chat model answer using retrieved embedded context:
|
|
100
136
|
|
|
101
137
|
```bash
|
|
102
138
|
diffdoc query "How does this project process changed files?"
|
|
103
139
|
```
|
|
104
140
|
|
|
105
|
-
Include retrieved code snapshots after the answer:
|
|
141
|
+
Include retrieved code snapshots after the generated answer:
|
|
106
142
|
|
|
107
143
|
```bash
|
|
108
144
|
diffdoc query "How does embedding work?" --top 3 --code
|
|
@@ -133,6 +169,7 @@ Typical usage is:
|
|
|
133
169
|
```bash
|
|
134
170
|
diffdoc summarize --path . --mode all
|
|
135
171
|
diffdoc embed
|
|
172
|
+
diffdoc search "What files explain the summarization flow?"
|
|
136
173
|
diffdoc query "What business behavior does this repository implement?"
|
|
137
174
|
```
|
|
138
175
|
|
|
@@ -146,8 +183,10 @@ diffdoc embed
|
|
|
146
183
|
## Notes
|
|
147
184
|
|
|
148
185
|
- Node.js `>=22` is required because Vectra requires it.
|
|
149
|
-
- `.diffdoc
|
|
186
|
+
- This repository ignores `.diffdoc/vectra` and `.diffdocrc`; add similar entries to your project's `.gitignore` if you do not want generated indexes or local config committed. The manifest at `.diffdoc/manifest.json` is not ignored by this repository.
|
|
187
|
+
- Commit `.diffdoc/manifest.json` when using delta workflows. Delta summarization reads the previous manifest state to decide which changed files need fresh summaries.
|
|
150
188
|
- `summarize` requires a configured chat model.
|
|
151
189
|
- `embed` requires a configured embedding model.
|
|
190
|
+
- `search` requires a configured embedding model and returns raw retrieval results without calling the chat model.
|
|
152
191
|
- `query` requires both a configured chat model and embedding model.
|
|
153
192
|
- For code-oriented embedding models such as `nomic-embed-code`, DiffDoc prefixes query embeddings with `Represent this query for searching relevant code:`.
|
package/dist/commands/query.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.runQuery = runQuery;
|
|
4
|
+
exports.runSearch = runSearch;
|
|
4
5
|
const vectra_1 = require("vectra");
|
|
5
6
|
const llm_1 = require("../utils/llm");
|
|
6
7
|
const embed_1 = require("./embed");
|
|
@@ -67,3 +68,29 @@ async function runQuery(message, options, config) {
|
|
|
67
68
|
}
|
|
68
69
|
}
|
|
69
70
|
}
|
|
71
|
+
async function runSearch(message, options, config) {
|
|
72
|
+
const topK = parseTopK(options.top);
|
|
73
|
+
const indexPath = (0, embed_1.getVectraIndexPath)(config);
|
|
74
|
+
const index = new vectra_1.LocalIndex(indexPath);
|
|
75
|
+
if (!await index.isIndexCreated()) {
|
|
76
|
+
throw new Error(`No Vectra index found at ${indexPath}. Run "diffdoc embed" first.`);
|
|
77
|
+
}
|
|
78
|
+
const [queryVector] = await (0, llm_1.generateEmbeddings)([`${CODE_QUERY_PREFIX}${message}`], config.embeddings);
|
|
79
|
+
const results = await index.queryItems(queryVector, message, topK);
|
|
80
|
+
if (results.length === 0) {
|
|
81
|
+
console.log("No matching embedded summaries found.");
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
for (const [indexPosition, result] of results.entries()) {
|
|
85
|
+
const metadata = result.item.metadata;
|
|
86
|
+
console.log(`\n#${indexPosition + 1} ${metadata.filePath}`);
|
|
87
|
+
console.log(`Score: ${result.score.toFixed(4)}`);
|
|
88
|
+
console.log(`Hash: ${metadata.hash}`);
|
|
89
|
+
console.log("Summary:");
|
|
90
|
+
console.log(trimForDisplay(metadata.summaryText, 1200));
|
|
91
|
+
if (options.code) {
|
|
92
|
+
console.log("Code Snapshot:");
|
|
93
|
+
console.log(trimForDisplay(metadata.rawCodeSnapshot, 2000));
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -70,7 +70,7 @@ addChatOptions(addBaseOptions(program
|
|
|
70
70
|
});
|
|
71
71
|
addEmbeddingOptions(addChatOptions(addBaseOptions(program
|
|
72
72
|
.command("query"))))
|
|
73
|
-
.description("
|
|
73
|
+
.description("Answer a question using retrieved local Vectra context")
|
|
74
74
|
.argument("<message...>", "question or search text")
|
|
75
75
|
.option("--top <count>", "number of matches to return", "5")
|
|
76
76
|
.option("--code", "include code snapshots in results", false)
|
|
@@ -84,6 +84,22 @@ addEmbeddingOptions(addChatOptions(addBaseOptions(program
|
|
|
84
84
|
process.exit(1);
|
|
85
85
|
}
|
|
86
86
|
});
|
|
87
|
+
addCloudEndpointAndKeyOptions(addEmbeddingOptions(addBaseOptions(program
|
|
88
|
+
.command("search"))))
|
|
89
|
+
.description("Search the local Vectra index and print raw matches")
|
|
90
|
+
.argument("<message...>", "search text")
|
|
91
|
+
.option("--top <count>", "number of matches to return", "5")
|
|
92
|
+
.option("--code", "include code snapshots in results", false)
|
|
93
|
+
.action(async (messageParts, options) => {
|
|
94
|
+
try {
|
|
95
|
+
const config = (0, config_1.buildRuntimeConfig)(options, { embeddings: true });
|
|
96
|
+
await (0, query_1.runSearch)(messageParts.join(" "), options, config);
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
console.error(error instanceof Error ? error.message : error);
|
|
100
|
+
process.exit(1);
|
|
101
|
+
}
|
|
102
|
+
});
|
|
87
103
|
addCloudEndpointAndKeyOptions(addEmbeddingOptions(addBaseOptions(program
|
|
88
104
|
.command("embed"))))
|
|
89
105
|
.description("Embed manifest summaries into a local Vectra index")
|