diffdoc 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,6 +36,7 @@ Package scripts can call the installed binary:
36
36
  "scripts": {
37
37
  "diffdoc:summarize": "diffdoc summarize",
38
38
  "diffdoc:embed": "diffdoc embed",
39
+ "diffdoc:search": "diffdoc search",
39
40
  "diffdoc:query": "diffdoc query"
40
41
  }
41
42
  }
@@ -76,6 +77,29 @@ Example config with all supported keys:
76
77
 
77
78
  Supported environment fallbacks use the uppercase names for the same settings, including `AI_PROVIDER`, `DIFFDOC_BASE_DIR`, `LOCAL_LLM_ENDPOINT`, `LOCAL_EMBED_ENDPOINT`, `LOCAL_CHAT_MODEL`, `LOCAL_EMBED_MODEL`, `CLOUD_LLM_ENDPOINT`, `CLOUD_CHAT_MODEL`, `CLOUD_EMBED_MODEL`, and `OPENAI_API_KEY`.
78
79
 
80
+ ## Manifest-First Design
81
+
82
+ DiffDoc separates summarization from embedding. The `summarize` command writes all generated file summaries to `manifest.json` under `baseDir`, usually `./.diffdoc/manifest.json`.
83
+
84
+ The manifest is plain JSON and contains one entry per tracked file:
85
+
86
+ ```json
87
+ {
88
+ "lastSyncedCommit": "string-hash",
89
+ "files": {
90
+ "src/example.ts": {
91
+ "hash": "md5-string",
92
+ "summaryText": "Plain-English explanation text here.",
93
+ "rawCodeSnapshot": "Full code text here..."
94
+ }
95
+ }
96
+ }
97
+ ```
98
+
99
+ Because the summaries are stored independently, users do not have to embed immediately. They can review, archive, transform, or embed the manifest later using their preferred vectorization model and storage solution.
100
+
101
+ DiffDoc includes `diffdoc embed` as a built-in convenience path for creating a local Vectra index, but the manifest can also be consumed by other tools such as custom OpenAI-compatible embedding pipelines, hosted vector databases, local search systems, or internal documentation workflows.
102
+
79
103
  ## Commands
80
104
 
81
105
  Summarize a repository into `./.diffdoc/manifest.json`:
@@ -96,13 +120,25 @@ Embed the manifest into a local Vectra index at `./.diffdoc/vectra`:
96
120
  diffdoc embed
97
121
  ```
98
122
 
99
- Ask a question using retrieved embedded context:
123
+ Search the local Vectra index and print raw matches:
124
+
125
+ ```bash
126
+ diffdoc search "How does this project process changed files?"
127
+ ```
128
+
129
+ Include retrieved code snapshots in search results:
130
+
131
+ ```bash
132
+ diffdoc search "How does embedding work?" --top 3 --code
133
+ ```
134
+
135
+ Ask a question and have the configured chat model answer using retrieved embedded context:
100
136
 
101
137
  ```bash
102
138
  diffdoc query "How does this project process changed files?"
103
139
  ```
104
140
 
105
- Include retrieved code snapshots after the answer:
141
+ Include retrieved code snapshots after the generated answer:
106
142
 
107
143
  ```bash
108
144
  diffdoc query "How does embedding work?" --top 3 --code
@@ -133,6 +169,7 @@ Typical usage is:
133
169
  ```bash
134
170
  diffdoc summarize --path . --mode all
135
171
  diffdoc embed
172
+ diffdoc search "What files explain the summarization flow?"
136
173
  diffdoc query "What business behavior does this repository implement?"
137
174
  ```
138
175
 
@@ -146,8 +183,10 @@ diffdoc embed
146
183
  ## Notes
147
184
 
148
185
  - Node.js `>=22` is required because Vectra requires it.
149
- - `.diffdoc/` and `.diffdocrc` are ignored by git by default.
186
+ - This repository ignores `.diffdoc/vectra` and `.diffdocrc`; add similar entries to your project's `.gitignore` if you do not want generated indexes or local config committed. The manifest at `.diffdoc/manifest.json` is not ignored by this repository.
187
+ - Commit `.diffdoc/manifest.json` when using delta workflows. Delta summarization reads the previous manifest state to decide which changed files need fresh summaries.
150
188
  - `summarize` requires a configured chat model.
151
189
  - `embed` requires a configured embedding model.
190
+ - `search` requires a configured embedding model and returns raw retrieval results without calling the chat model.
152
191
  - `query` requires both a configured chat model and embedding model.
153
192
  - For code-oriented embedding models such as `nomic-embed-code`, DiffDoc prefixes query embeddings with `Represent this query for searching relevant code:`.
@@ -1,6 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.runQuery = runQuery;
4
+ exports.runSearch = runSearch;
4
5
  const vectra_1 = require("vectra");
5
6
  const llm_1 = require("../utils/llm");
6
7
  const embed_1 = require("./embed");
@@ -67,3 +68,29 @@ async function runQuery(message, options, config) {
67
68
  }
68
69
  }
69
70
  }
71
+ async function runSearch(message, options, config) {
72
+ const topK = parseTopK(options.top);
73
+ const indexPath = (0, embed_1.getVectraIndexPath)(config);
74
+ const index = new vectra_1.LocalIndex(indexPath);
75
+ if (!await index.isIndexCreated()) {
76
+ throw new Error(`No Vectra index found at ${indexPath}. Run "diffdoc embed" first.`);
77
+ }
78
+ const [queryVector] = await (0, llm_1.generateEmbeddings)([`${CODE_QUERY_PREFIX}${message}`], config.embeddings);
79
+ const results = await index.queryItems(queryVector, message, topK);
80
+ if (results.length === 0) {
81
+ console.log("No matching embedded summaries found.");
82
+ return;
83
+ }
84
+ for (const [indexPosition, result] of results.entries()) {
85
+ const metadata = result.item.metadata;
86
+ console.log(`\n#${indexPosition + 1} ${metadata.filePath}`);
87
+ console.log(`Score: ${result.score.toFixed(4)}`);
88
+ console.log(`Hash: ${metadata.hash}`);
89
+ console.log("Summary:");
90
+ console.log(trimForDisplay(metadata.summaryText, 1200));
91
+ if (options.code) {
92
+ console.log("Code Snapshot:");
93
+ console.log(trimForDisplay(metadata.rawCodeSnapshot, 2000));
94
+ }
95
+ }
96
+ }
package/dist/index.js CHANGED
@@ -70,7 +70,7 @@ addChatOptions(addBaseOptions(program
70
70
  });
71
71
  addEmbeddingOptions(addChatOptions(addBaseOptions(program
72
72
  .command("query"))))
73
- .description("Search the local Vectra index with a natural-language question")
73
+ .description("Answer a question using retrieved local Vectra context")
74
74
  .argument("<message...>", "question or search text")
75
75
  .option("--top <count>", "number of matches to return", "5")
76
76
  .option("--code", "include code snapshots in results", false)
@@ -84,6 +84,22 @@ addEmbeddingOptions(addChatOptions(addBaseOptions(program
84
84
  process.exit(1);
85
85
  }
86
86
  });
87
+ addCloudEndpointAndKeyOptions(addEmbeddingOptions(addBaseOptions(program
88
+ .command("search"))))
89
+ .description("Search the local Vectra index and print raw matches")
90
+ .argument("<message...>", "search text")
91
+ .option("--top <count>", "number of matches to return", "5")
92
+ .option("--code", "include code snapshots in results", false)
93
+ .action(async (messageParts, options) => {
94
+ try {
95
+ const config = (0, config_1.buildRuntimeConfig)(options, { embeddings: true });
96
+ await (0, query_1.runSearch)(messageParts.join(" "), options, config);
97
+ }
98
+ catch (error) {
99
+ console.error(error instanceof Error ? error.message : error);
100
+ process.exit(1);
101
+ }
102
+ });
87
103
  addCloudEndpointAndKeyOptions(addEmbeddingOptions(addBaseOptions(program
88
104
  .command("embed"))))
89
105
  .description("Embed manifest summaries into a local Vectra index")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "diffdoc",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Translate repository code shifts into plain-English business context",
5
5
  "license": "MIT",
6
6
  "author": "Christopher Sullivan",