diffdoc 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +209 -158
- package/dist/commands/summarize.js +20 -16
- package/dist/index.js +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -1,277 +1,323 @@
|
|
|
1
1
|
# DiffDoc
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Your codebase already knows how the product works. DiffDoc turns that implementation into a living, portable knowledgebase that humans and agents can search, question, and reuse.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
It generates plain-English summaries from source files, records them in a manifest-first artifact model, and keeps the resulting context close to the repository. Use it to give developers, agents, reviewers, and stakeholders implementation-grounded answers without asking them to read every file first.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
## Guiding Principles
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
- The codebase is the source of truth. Requirements documents, tickets, wikis, and tribal knowledge can drift, but product behavior is ultimately defined by the code that ships.
|
|
10
|
+
- Summaries should describe implemented behavior, not imagined intent. DiffDoc focuses on what the current files do so product questions are answered from the implementation first.
|
|
11
|
+
- The knowledgebase should evolve with the product. When files change, DiffDoc refreshes affected summaries and manifest entries so generated context does not become a stale snapshot.
|
|
12
|
+
- The manifest is the durable contract. DiffDoc is intentionally manifest-first: the manifest is the source of truth for generated summaries, and downstream tools should be able to consume the manifest and summary assets without depending on DiffDoc's built-in embedding workflow.
|
|
13
|
+
- Retrieval is optional infrastructure. The built-in `embed` command, local Vectra index, `search`, `query`, and MCP server are convenience features for teams that want an end-to-end local workflow, but consumers should be free to use their own embedding provider, vector store, search system, or documentation pipeline.
|
|
14
|
+
- Useful context should serve humans and agents. The generated knowledgebase is intended for product questions, onboarding, code review, agent workflows, audits, and long-term maintenance.
|
|
10
15
|
|
|
11
|
-
|
|
16
|
+
## Requirements
|
|
12
17
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
```
|
|
18
|
+
- Node.js `>=22`
|
|
19
|
+
- An OpenAI-compatible chat model for `summarize` and `query`
|
|
20
|
+
- An OpenAI-compatible embedding model for `embed`, `search`, and `query`
|
|
21
|
+
- A local model server such as Ollama, LM Studio, or vLLM, or a cloud OpenAI-compatible endpoint
|
|
18
22
|
|
|
19
|
-
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
Run DiffDoc without adding it to your project:
|
|
20
26
|
|
|
21
27
|
```bash
|
|
22
28
|
npx diffdoc --help
|
|
23
29
|
```
|
|
24
30
|
|
|
25
|
-
|
|
31
|
+
Install it as a project dev dependency:
|
|
26
32
|
|
|
27
33
|
```bash
|
|
28
34
|
npm install --save-dev diffdoc
|
|
29
|
-
npx diffdoc --help
|
|
30
35
|
```
|
|
31
36
|
|
|
32
|
-
|
|
37
|
+
Recommended package scripts:
|
|
33
38
|
|
|
34
39
|
```json
|
|
35
40
|
{
|
|
36
41
|
"scripts": {
|
|
37
42
|
"diffdoc:init": "diffdoc init",
|
|
38
43
|
"diffdoc:summarize": "diffdoc summarize",
|
|
39
|
-
"diffdoc:status": "diffdoc status",
|
|
40
44
|
"diffdoc:embed": "diffdoc embed",
|
|
41
45
|
"diffdoc:search": "diffdoc search",
|
|
42
46
|
"diffdoc:query": "diffdoc query",
|
|
47
|
+
"diffdoc:status": "diffdoc status",
|
|
43
48
|
"diffdoc:mcp": "diffdoc-mcp"
|
|
44
49
|
}
|
|
45
50
|
}
|
|
46
51
|
```
|
|
47
52
|
|
|
48
|
-
##
|
|
49
|
-
|
|
50
|
-
DiffDoc accepts runtime flags on each command. It also loads a JSON `.diffdocrc` file from the current working directory when present, or from a custom path with `--config <path>`.
|
|
53
|
+
## Quick Start
|
|
51
54
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
1. CLI flags
|
|
55
|
-
2. `.diffdocrc`
|
|
56
|
-
3. Environment variable fallbacks
|
|
57
|
-
|
|
58
|
-
Create a local config from the example:
|
|
55
|
+
Initialize DiffDoc in your repository:
|
|
59
56
|
|
|
60
57
|
```bash
|
|
61
|
-
|
|
58
|
+
npx diffdoc init
|
|
62
59
|
```
|
|
63
60
|
|
|
64
|
-
|
|
61
|
+
For a non-interactive setup using defaults:
|
|
65
62
|
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
"baseDir": "./.diffdoc",
|
|
69
|
-
"aiProvider": "local",
|
|
70
|
-
"localLlmEndpoint": "http://localhost:11434/v1",
|
|
71
|
-
"localEmbedEndpoint": "http://localhost:11434/v1/embeddings",
|
|
72
|
-
"localChatModel": "qwen2.5-coder:7b",
|
|
73
|
-
"localEmbedModel": "nomic-embed-code",
|
|
74
|
-
"cloudLlmEndpoint": "https://api.openai.com/v1",
|
|
75
|
-
"cloudChatModel": "gpt-4o-mini",
|
|
76
|
-
"cloudEmbedModel": "text-embedding-3-small",
|
|
77
|
-
"openaiApiKey": "",
|
|
78
|
-
"includeGlobs": [],
|
|
79
|
-
"excludeGlobs": [],
|
|
80
|
-
"ignoreFile": ".diffdocignore"
|
|
81
|
-
}
|
|
63
|
+
```bash
|
|
64
|
+
npx diffdoc init --yes
|
|
82
65
|
```
|
|
83
66
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
## Manifest-First Design
|
|
67
|
+
Create summaries:
|
|
87
68
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
The manifest is plain JSON and contains one entry per tracked file:
|
|
91
|
-
|
|
92
|
-
```json
|
|
93
|
-
{
|
|
94
|
-
"schemaVersion": 2,
|
|
95
|
-
"lastSyncedCommit": "string-hash",
|
|
96
|
-
"files": {
|
|
97
|
-
"src/example.ts": "md5-string"
|
|
98
|
-
}
|
|
99
|
-
}
|
|
69
|
+
```bash
|
|
70
|
+
npx diffdoc summarize --path . --mode all
|
|
100
71
|
```
|
|
101
72
|
|
|
102
|
-
|
|
73
|
+
Build the local search index:
|
|
103
74
|
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
"schemaVersion": 1,
|
|
107
|
-
"content_hash": "md5-string",
|
|
108
|
-
"summary": "Plain-English explanation text here.",
|
|
109
|
-
"raw_code_snapshot": "Optional code text when --include-code-snapshot is enabled"
|
|
110
|
-
}
|
|
75
|
+
```bash
|
|
76
|
+
npx diffdoc embed
|
|
111
77
|
```
|
|
112
78
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
DiffDoc includes `diffdoc embed` as a built-in convenience path for creating a local Vectra index, but the manifest can also be consumed by other tools such as custom OpenAI-compatible embedding pipelines, hosted vector databases, local search systems, or internal documentation workflows.
|
|
116
|
-
|
|
117
|
-
## Commands
|
|
118
|
-
|
|
119
|
-
Initialize DiffDoc configuration for a repository:
|
|
79
|
+
Search raw matches:
|
|
120
80
|
|
|
121
81
|
```bash
|
|
122
|
-
diffdoc
|
|
82
|
+
npx diffdoc search "How does authentication work?"
|
|
123
83
|
```
|
|
124
84
|
|
|
125
|
-
|
|
85
|
+
Ask a question using retrieved project context:
|
|
126
86
|
|
|
127
87
|
```bash
|
|
128
|
-
diffdoc
|
|
88
|
+
npx diffdoc query "What business behavior does this repository implement?"
|
|
129
89
|
```
|
|
130
90
|
|
|
131
|
-
|
|
91
|
+
After the first full run, refresh changed files with delta mode:
|
|
132
92
|
|
|
133
93
|
```bash
|
|
134
|
-
diffdoc
|
|
94
|
+
npx diffdoc summarize --path . --mode delta
|
|
95
|
+
npx diffdoc embed
|
|
135
96
|
```
|
|
136
97
|
|
|
137
|
-
|
|
98
|
+
## What Init Creates
|
|
138
99
|
|
|
139
|
-
|
|
100
|
+
`diffdoc init` creates or updates repository-local setup files:
|
|
140
101
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
102
|
+
- `.diffdocrc`: local DiffDoc configuration
|
|
103
|
+
- `.diffdocignore`: gitignore-style file selection rules for summarization
|
|
104
|
+
- `.gitignore`: entries for local/generated DiffDoc files when needed
|
|
144
105
|
|
|
145
|
-
|
|
106
|
+
It does not summarize or embed anything. Run `summarize` and `embed` after initialization.
|
|
146
107
|
|
|
147
|
-
|
|
148
|
-
diffdoc summarize --path . --mode delta
|
|
149
|
-
```
|
|
108
|
+
## Configuration
|
|
150
109
|
|
|
151
|
-
|
|
110
|
+
DiffDoc reads settings in this order:
|
|
152
111
|
|
|
153
|
-
|
|
154
|
-
|
|
112
|
+
1. CLI flags
|
|
113
|
+
2. `.diffdocrc` or the file passed with `--config <path>`
|
|
114
|
+
3. Environment variables
|
|
115
|
+
4. Built-in defaults
|
|
116
|
+
|
|
117
|
+
Example `.diffdocrc` for local models:
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"baseDir": "./.diffdoc",
|
|
122
|
+
"aiProvider": "local",
|
|
123
|
+
"localLlmEndpoint": "http://localhost:11434/v1",
|
|
124
|
+
"localEmbedEndpoint": "http://localhost:11434/v1/embeddings",
|
|
125
|
+
"localChatModel": "qwen2.5-coder:7b",
|
|
126
|
+
"localEmbedModel": "nomic-embed-code",
|
|
127
|
+
"embedBatchSize": 25,
|
|
128
|
+
"includeGlobs": [],
|
|
129
|
+
"excludeGlobs": [],
|
|
130
|
+
"ignoreFile": ".diffdocignore"
|
|
131
|
+
}
|
|
155
132
|
```
|
|
156
133
|
|
|
157
|
-
|
|
134
|
+
Example `.diffdocrc` for a cloud OpenAI-compatible endpoint:
|
|
158
135
|
|
|
159
|
-
```
|
|
160
|
-
|
|
136
|
+
```json
|
|
137
|
+
{
|
|
138
|
+
"baseDir": "./.diffdoc",
|
|
139
|
+
"aiProvider": "cloud",
|
|
140
|
+
"cloudLlmEndpoint": "https://api.openai.com/v1",
|
|
141
|
+
"cloudChatModel": "gpt-4o-mini",
|
|
142
|
+
"cloudEmbedModel": "text-embedding-3-small",
|
|
143
|
+
"embedBatchSize": 25,
|
|
144
|
+
"includeGlobs": [],
|
|
145
|
+
"excludeGlobs": [],
|
|
146
|
+
"ignoreFile": ".diffdocignore"
|
|
147
|
+
}
|
|
161
148
|
```
|
|
162
149
|
|
|
163
|
-
|
|
150
|
+
Set `OPENAI_API_KEY` for cloud providers instead of committing API keys:
|
|
164
151
|
|
|
165
152
|
```bash
|
|
166
|
-
diffdoc summarize --path . --mode
|
|
153
|
+
OPENAI_API_KEY="..." npx diffdoc summarize --path . --mode all
|
|
167
154
|
```
|
|
168
155
|
|
|
169
|
-
|
|
156
|
+
Supported environment variables:
|
|
170
157
|
|
|
171
|
-
```
|
|
172
|
-
|
|
158
|
+
```text
|
|
159
|
+
AI_PROVIDER
|
|
160
|
+
DIFFDOC_BASE_DIR
|
|
161
|
+
DIFFDOC_EMBED_BATCH_SIZE
|
|
162
|
+
DIFFDOC_INCLUDE_GLOBS
|
|
163
|
+
DIFFDOC_EXCLUDE_GLOBS
|
|
164
|
+
DIFFDOC_IGNORE_FILE
|
|
165
|
+
LOCAL_LLM_ENDPOINT
|
|
166
|
+
LOCAL_CHAT_MODEL
|
|
167
|
+
LOCAL_EMBED_ENDPOINT
|
|
168
|
+
LOCAL_EMBED_MODEL
|
|
169
|
+
CLOUD_LLM_ENDPOINT
|
|
170
|
+
CLOUD_CHAT_MODEL
|
|
171
|
+
CLOUD_EMBED_MODEL
|
|
172
|
+
OPENAI_API_KEY
|
|
173
173
|
```
|
|
174
174
|
|
|
175
|
-
|
|
175
|
+
## File Selection
|
|
176
176
|
|
|
177
|
-
|
|
178
|
-
|
|
177
|
+
`.diffdocignore` uses `.gitignore`-style syntax. This is the main way to keep generated files, dependencies, secrets, binaries, and local artifacts out of summaries.
|
|
178
|
+
|
|
179
|
+
Example `.diffdocignore`:
|
|
180
|
+
|
|
181
|
+
```gitignore
|
|
182
|
+
.git/
|
|
183
|
+
.diffdoc/
|
|
184
|
+
node_modules/
|
|
185
|
+
dist/
|
|
186
|
+
coverage/
|
|
187
|
+
.env
|
|
188
|
+
*.log
|
|
179
189
|
```
|
|
180
190
|
|
|
181
|
-
|
|
191
|
+
Precedence is intentionally conservative:
|
|
182
192
|
|
|
183
|
-
|
|
184
|
-
|
|
193
|
+
1. `.diffdocignore` skips files first
|
|
194
|
+
2. `excludeGlobs` skip files second
|
|
195
|
+
3. `includeGlobs` narrow whatever remains
|
|
196
|
+
|
|
197
|
+
An included file is still skipped if it matches `.diffdocignore` or `excludeGlobs`.
|
|
198
|
+
|
|
199
|
+
Use include and exclude filters from config:
|
|
200
|
+
|
|
201
|
+
```json
|
|
202
|
+
{
|
|
203
|
+
"includeGlobs": ["src/**/*.ts"],
|
|
204
|
+
"excludeGlobs": ["**/*.test.ts"]
|
|
205
|
+
}
|
|
185
206
|
```
|
|
186
207
|
|
|
187
|
-
|
|
208
|
+
Or pass them at runtime:
|
|
188
209
|
|
|
189
210
|
```bash
|
|
190
|
-
diffdoc
|
|
211
|
+
npx diffdoc summarize --path . --mode all --include-glob "src/**/*.ts" --exclude-glob "**/*.test.ts"
|
|
191
212
|
```
|
|
192
213
|
|
|
193
|
-
|
|
214
|
+
## Commands
|
|
215
|
+
|
|
216
|
+
Initialize setup files:
|
|
194
217
|
|
|
195
218
|
```bash
|
|
196
|
-
diffdoc
|
|
219
|
+
npx diffdoc init
|
|
220
|
+
npx diffdoc init --yes
|
|
221
|
+
npx diffdoc init --provider cloud --force
|
|
197
222
|
```
|
|
198
223
|
|
|
199
|
-
|
|
224
|
+
Summarize files into `.diffdoc/manifest.json` and `.diffdoc/summaries/*.json`:
|
|
200
225
|
|
|
201
226
|
```bash
|
|
202
|
-
diffdoc
|
|
227
|
+
npx diffdoc summarize --path . --mode all
|
|
228
|
+
npx diffdoc summarize --path . --mode delta
|
|
229
|
+
npx diffdoc summarize --path . --mode delta --json
|
|
203
230
|
```
|
|
204
231
|
|
|
205
|
-
|
|
232
|
+
Store raw code snapshots in summary assets when you want retrieved results to include source text:
|
|
206
233
|
|
|
207
234
|
```bash
|
|
208
|
-
diffdoc
|
|
235
|
+
npx diffdoc summarize --path . --mode all --include-code-snapshot
|
|
209
236
|
```
|
|
210
237
|
|
|
211
|
-
|
|
238
|
+
Check manifest and index freshness:
|
|
212
239
|
|
|
213
240
|
```bash
|
|
214
|
-
diffdoc
|
|
241
|
+
npx diffdoc status
|
|
242
|
+
npx diffdoc status --json
|
|
215
243
|
```
|
|
216
244
|
|
|
217
|
-
|
|
245
|
+
Embed summaries into the local Vectra index:
|
|
218
246
|
|
|
219
247
|
```bash
|
|
220
|
-
diffdoc
|
|
248
|
+
npx diffdoc embed
|
|
249
|
+
npx diffdoc embed --rebuild
|
|
250
|
+
npx diffdoc embed --embed-batch-size 20
|
|
221
251
|
```
|
|
222
252
|
|
|
223
|
-
|
|
253
|
+
Search indexed summaries:
|
|
224
254
|
|
|
225
255
|
```bash
|
|
226
|
-
diffdoc
|
|
256
|
+
npx diffdoc search "How does this project process changed files?"
|
|
257
|
+
npx diffdoc search "How does embedding work?" --top 3 --code
|
|
227
258
|
```
|
|
228
259
|
|
|
229
|
-
|
|
260
|
+
Ask questions with retrieval-augmented answers:
|
|
230
261
|
|
|
231
262
|
```bash
|
|
232
|
-
diffdoc query "How does
|
|
263
|
+
npx diffdoc query "How does this project process changed files?"
|
|
264
|
+
npx diffdoc query "How does embedding work?" --top 3 --code
|
|
233
265
|
```
|
|
234
266
|
|
|
235
|
-
|
|
267
|
+
Use a custom config or artifact directory:
|
|
236
268
|
|
|
237
269
|
```bash
|
|
238
|
-
diffdoc
|
|
270
|
+
npx diffdoc query "How does embedding work?" --config ./config/diffdoc.local.json
|
|
271
|
+
npx diffdoc embed --config ./.diffdocrc --base-dir ./tmp-diffdoc
|
|
239
272
|
```
|
|
240
273
|
|
|
241
|
-
##
|
|
274
|
+
## Artifacts
|
|
242
275
|
|
|
243
|
-
|
|
276
|
+
DiffDoc keeps generated project context under `baseDir`, which defaults to `./.diffdoc`:
|
|
244
277
|
|
|
245
|
-
```
|
|
246
|
-
diffdoc
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
278
|
+
```text
|
|
279
|
+
.diffdoc/
|
|
280
|
+
manifest.json
|
|
281
|
+
summaries/
|
|
282
|
+
<content-hash>.json
|
|
283
|
+
vectra/
|
|
250
284
|
```
|
|
251
285
|
|
|
252
|
-
|
|
286
|
+
The manifest maps repository-relative file paths to content hashes:
|
|
253
287
|
|
|
254
|
-
```
|
|
255
|
-
|
|
256
|
-
|
|
288
|
+
```json
|
|
289
|
+
{
|
|
290
|
+
"schemaVersion": 2,
|
|
291
|
+
"lastSyncedCommit": "string-hash",
|
|
292
|
+
"files": {
|
|
293
|
+
"src/example.ts": "md5-string"
|
|
294
|
+
}
|
|
295
|
+
}
|
|
257
296
|
```
|
|
258
297
|
|
|
259
|
-
|
|
298
|
+
Each summary asset is portable JSON:
|
|
260
299
|
|
|
261
|
-
|
|
300
|
+
```json
|
|
301
|
+
{
|
|
302
|
+
"schemaVersion": 1,
|
|
303
|
+
"content_hash": "md5-string",
|
|
304
|
+
"summary": "Plain-English explanation text here.",
|
|
305
|
+
"raw_code_snapshot": "Optional code text when --include-code-snapshot is enabled"
|
|
306
|
+
}
|
|
307
|
+
```
|
|
262
308
|
|
|
263
|
-
|
|
309
|
+
Commit `.diffdoc/manifest.json` and `.diffdoc/summaries/*.json` if you want summaries shared across machines or CI runs. Keep `.diffdoc/vectra/` local unless you have a specific reason to commit the generated vector index.
|
|
264
310
|
|
|
265
|
-
|
|
311
|
+
The manifest and summary assets are the stable handoff point for consumers. The local Vectra index produced by `diffdoc embed` is optional and can be replaced by any embedding model and storage backend that fits your environment.
|
|
266
312
|
|
|
267
313
|
## MCP Server
|
|
268
314
|
|
|
269
|
-
DiffDoc
|
|
315
|
+
DiffDoc ships an MCP stdio server as `diffdoc-mcp`. Run `summarize` and `embed` before using it so the MCP tools have a local index to query.
|
|
270
316
|
|
|
271
|
-
Run
|
|
317
|
+
Run the server manually:
|
|
272
318
|
|
|
273
319
|
```bash
|
|
274
|
-
diffdoc-mcp --config ./.diffdocrc
|
|
320
|
+
npx diffdoc-mcp --config ./.diffdocrc
|
|
275
321
|
```
|
|
276
322
|
|
|
277
323
|
Example MCP client configuration:
|
|
@@ -287,29 +333,34 @@ Example MCP client configuration:
|
|
|
287
333
|
}
|
|
288
334
|
```
|
|
289
335
|
|
|
290
|
-
If DiffDoc is installed as a project dev dependency, the same `npx diffdoc-mcp` command will resolve the local package binary.
|
|
291
|
-
|
|
292
336
|
Available MCP tools:
|
|
293
337
|
|
|
294
|
-
- `diffdoc_search`:
|
|
295
|
-
- `diffdoc_answer`:
|
|
296
|
-
- `diffdoc_index_stats`:
|
|
338
|
+
- `diffdoc_search`: search the local index and return matching files, summaries, scores, hashes, and optional code snapshots
|
|
339
|
+
- `diffdoc_answer`: retrieve relevant context and ask the configured chat model to answer a question
|
|
340
|
+
- `diffdoc_index_stats`: return index path, existence status, and indexed item count
|
|
341
|
+
|
|
342
|
+
## CI
|
|
343
|
+
|
|
344
|
+
For CI, prefer environment variables or a generated config file instead of committing local credentials.
|
|
297
345
|
|
|
298
|
-
|
|
346
|
+
Typical CI flow:
|
|
347
|
+
|
|
348
|
+
```bash
|
|
349
|
+
npm ci
|
|
350
|
+
npx diffdoc summarize --path . --mode delta --json
|
|
351
|
+
npx diffdoc embed
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
Use `summarize --json` and `status --json` when a workflow needs machine-readable output.
|
|
355
|
+
|
|
356
|
+
Commit the manifest and summary assets from CI if you want DiffDoc state to advance with the branch. Ignore `.diffdoc/vectra/` unless your workflow intentionally persists the local index.
|
|
299
357
|
|
|
300
358
|
## Notes
|
|
301
359
|
|
|
302
|
-
- Node.js `>=22` is required because Vectra requires it.
|
|
303
|
-
- This repository ignores `.diffdoc/vectra` and `.diffdocrc`; add similar entries to your project's `.gitignore` if you do not want generated indexes or local config committed. The manifest at `.diffdoc/manifest.json` is not ignored by this repository.
|
|
304
|
-
- Summary assets are written to `.diffdoc/summaries/*.json`.
|
|
305
|
-
- Manifest schema is currently `schemaVersion: 2`; older manifest shapes are not auto-migrated.
|
|
306
|
-
- Commit `.diffdoc/manifest.json` when using delta workflows. Delta summarization reads the previous manifest state to decide which changed files need fresh summaries.
|
|
307
360
|
- `summarize` requires a configured chat model.
|
|
308
|
-
- `
|
|
309
|
-
- `
|
|
310
|
-
- `status` does not require
|
|
311
|
-
-
|
|
312
|
-
-
|
|
313
|
-
- `search` requires a configured embedding model and returns raw retrieval results without calling the chat model.
|
|
314
|
-
- `query` requires both a configured chat model and embedding model.
|
|
361
|
+
- `embed` and `search` require a configured embedding model.
|
|
362
|
+
- `query` requires both chat and embedding configuration.
|
|
363
|
+
- `status` does not require chat or embedding configuration.
|
|
364
|
+
- Delta summarization uses Git changes plus the existing manifest state.
|
|
365
|
+
- Manifest schema is currently `schemaVersion: 2`; older manifest shapes are not auto-migrated.
|
|
315
366
|
- For code-oriented embedding models such as `nomic-embed-code`, DiffDoc prefixes query embeddings with `Represent this query for searching relevant code:`.
|
|
@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
exports.runSummarize = runSummarize;
|
|
7
7
|
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
8
8
|
const node_path_1 = __importDefault(require("node:path"));
|
|
9
|
+
const ignore_1 = __importDefault(require("ignore"));
|
|
9
10
|
const artifacts_1 = require("../types/artifacts");
|
|
10
11
|
const git_1 = require("../utils/git");
|
|
11
12
|
const hashing_1 = require("../utils/hashing");
|
|
@@ -55,18 +56,21 @@ function compileGlobs(patterns) {
|
|
|
55
56
|
function matchesAny(filePath, patterns) {
|
|
56
57
|
return patterns.some((pattern) => pattern.test(filePath));
|
|
57
58
|
}
|
|
58
|
-
function shouldIncludeFile(filePath, includeGlobs, excludeGlobs,
|
|
59
|
-
if (
|
|
59
|
+
function shouldIncludeFile(filePath, includeGlobs, excludeGlobs, ignoreMatcher) {
|
|
60
|
+
if (ignoreMatcher.ignores(filePath)) {
|
|
60
61
|
return false;
|
|
61
62
|
}
|
|
62
63
|
if (excludeGlobs.length > 0 && matchesAny(filePath, excludeGlobs)) {
|
|
63
64
|
return false;
|
|
64
65
|
}
|
|
65
|
-
if (
|
|
66
|
+
if (includeGlobs.length > 0 && !matchesAny(filePath, includeGlobs)) {
|
|
66
67
|
return false;
|
|
67
68
|
}
|
|
68
69
|
return true;
|
|
69
70
|
}
|
|
71
|
+
function isIgnoredDirectory(dirPath, ignoreMatcher) {
|
|
72
|
+
return ignoreMatcher.ignores(dirPath) || ignoreMatcher.ignores(`${dirPath}/`);
|
|
73
|
+
}
|
|
70
74
|
async function fileExists(filePath) {
|
|
71
75
|
try {
|
|
72
76
|
await promises_1.default.access(filePath);
|
|
@@ -119,38 +123,38 @@ async function readManifest(manifestPath) {
|
|
|
119
123
|
throw error;
|
|
120
124
|
}
|
|
121
125
|
}
|
|
122
|
-
async function
|
|
126
|
+
async function readIgnoreMatcher(repoPath, ignoreFilePath) {
|
|
127
|
+
const matcher = (0, ignore_1.default)();
|
|
123
128
|
const absolutePath = node_path_1.default.isAbsolute(ignoreFilePath)
|
|
124
129
|
? ignoreFilePath
|
|
125
130
|
: node_path_1.default.resolve(repoPath, ignoreFilePath);
|
|
126
131
|
try {
|
|
127
132
|
const raw = await promises_1.default.readFile(absolutePath, "utf8");
|
|
128
|
-
return raw
|
|
129
|
-
.split(/\r?\n/)
|
|
130
|
-
.map((line) => line.trim())
|
|
131
|
-
.filter((line) => line.length > 0 && !line.startsWith("#"))
|
|
132
|
-
.map(normalizeGlobPattern);
|
|
133
|
+
return matcher.add(raw);
|
|
133
134
|
}
|
|
134
135
|
catch (error) {
|
|
135
136
|
const nodeError = error;
|
|
136
137
|
if (nodeError.code === "ENOENT") {
|
|
137
|
-
return
|
|
138
|
+
return matcher;
|
|
138
139
|
}
|
|
139
140
|
throw error;
|
|
140
141
|
}
|
|
141
142
|
}
|
|
142
|
-
async function walkCodeFiles(rootPath, includeGlobs, excludeGlobs,
|
|
143
|
+
async function walkCodeFiles(rootPath, includeGlobs, excludeGlobs, ignoreMatcher, currentPath = rootPath) {
|
|
143
144
|
const entries = await promises_1.default.readdir(currentPath, { withFileTypes: true });
|
|
144
145
|
const files = [];
|
|
145
146
|
for (const entry of entries) {
|
|
146
147
|
const entryPath = node_path_1.default.join(currentPath, entry.name);
|
|
147
148
|
if (entry.isDirectory()) {
|
|
148
|
-
|
|
149
|
+
const relativePath = normalizeRelativePath(node_path_1.default.relative(rootPath, entryPath));
|
|
150
|
+
if (!isIgnoredDirectory(relativePath, ignoreMatcher)) {
|
|
151
|
+
files.push(...await walkCodeFiles(rootPath, includeGlobs, excludeGlobs, ignoreMatcher, entryPath));
|
|
152
|
+
}
|
|
149
153
|
continue;
|
|
150
154
|
}
|
|
151
155
|
if (entry.isFile()) {
|
|
152
156
|
const relativePath = normalizeRelativePath(node_path_1.default.relative(rootPath, entryPath));
|
|
153
|
-
if (shouldIncludeFile(relativePath, includeGlobs, excludeGlobs,
|
|
157
|
+
if (shouldIncludeFile(relativePath, includeGlobs, excludeGlobs, ignoreMatcher)) {
|
|
154
158
|
files.push(relativePath);
|
|
155
159
|
}
|
|
156
160
|
}
|
|
@@ -261,7 +265,7 @@ async function runSummarize(options, config) {
|
|
|
261
265
|
? options.excludeGlobs.map(normalizeGlobPattern)
|
|
262
266
|
: config.summarize.excludeGlobs.map(normalizeGlobPattern));
|
|
263
267
|
const ignoreFile = options.ignoreFile || config.summarize.ignoreFile;
|
|
264
|
-
const
|
|
268
|
+
const ignoreMatcher = await readIgnoreMatcher(repoPath, ignoreFile);
|
|
265
269
|
const totals = { scanned: 0, skipped: 0, updated: 0, failed: 0, pruned: 0 };
|
|
266
270
|
const failures = [];
|
|
267
271
|
const isJson = options.json;
|
|
@@ -277,7 +281,7 @@ async function runSummarize(options, config) {
|
|
|
277
281
|
manifest.files = {};
|
|
278
282
|
refs.clear();
|
|
279
283
|
await writeManifest(manifestPath, manifest);
|
|
280
|
-
const files = await walkCodeFiles(repoPath, includePatterns, excludePatterns,
|
|
284
|
+
const files = await walkCodeFiles(repoPath, includePatterns, excludePatterns, ignoreMatcher);
|
|
281
285
|
const totalFiles = files.length;
|
|
282
286
|
if (!isJson) {
|
|
283
287
|
console.log(`Candidates: ${totalFiles}`);
|
|
@@ -331,7 +335,7 @@ async function runSummarize(options, config) {
|
|
|
331
335
|
const filePath = deltas.modifiedOrAdded[i];
|
|
332
336
|
totals.scanned += 1;
|
|
333
337
|
try {
|
|
334
|
-
if (!shouldIncludeFile(filePath, includePatterns, excludePatterns,
|
|
338
|
+
if (!shouldIncludeFile(filePath, includePatterns, excludePatterns, ignoreMatcher)) {
|
|
335
339
|
const removed = await removeManifestPath(filePath, manifest, manifestPath, summaryDir, refs);
|
|
336
340
|
if (removed) {
|
|
337
341
|
totals.pruned += 1;
|
package/dist/index.js
CHANGED
|
@@ -42,7 +42,7 @@ function addCloudEndpointAndKeyOptions(command) {
|
|
|
42
42
|
program
|
|
43
43
|
.name("diffdoc")
|
|
44
44
|
.description("Translate repository code shifts into plain-English business context")
|
|
45
|
-
.version("0.4.
|
|
45
|
+
.version("0.4.3");
|
|
46
46
|
program
|
|
47
47
|
.command("init")
|
|
48
48
|
.description("Initialize DiffDoc configuration for this repository")
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "diffdoc",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.3",
|
|
4
4
|
"description": "Translate repository code shifts into plain-English business context",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Christopher Sullivan",
|
|
@@ -36,6 +36,7 @@
|
|
|
36
36
|
"dependencies": {
|
|
37
37
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
38
38
|
"commander": "^12.0.0",
|
|
39
|
+
"ignore": "^7.0.5",
|
|
39
40
|
"openai": "^4.28.0",
|
|
40
41
|
"simple-git": "^3.24.0",
|
|
41
42
|
"vectra": "^0.14.0",
|