deepdoc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. deepdoc-0.1.0/PKG-INFO +23 -0
  2. deepdoc-0.1.0/README.md +668 -0
  3. deepdoc-0.1.0/deepdoc/__init__.py +3 -0
  4. deepdoc-0.1.0/deepdoc/__main__.py +9 -0
  5. deepdoc-0.1.0/deepdoc/_legacy_types.py +48 -0
  6. deepdoc-0.1.0/deepdoc/benchmark_v2.py +124 -0
  7. deepdoc-0.1.0/deepdoc/chatbot/__init__.py +12 -0
  8. deepdoc-0.1.0/deepdoc/chatbot/chunker.py +508 -0
  9. deepdoc-0.1.0/deepdoc/chatbot/docs_summary.py +114 -0
  10. deepdoc-0.1.0/deepdoc/chatbot/indexer.py +142 -0
  11. deepdoc-0.1.0/deepdoc/chatbot/persistence.py +148 -0
  12. deepdoc-0.1.0/deepdoc/chatbot/providers.py +97 -0
  13. deepdoc-0.1.0/deepdoc/chatbot/scaffold.py +121 -0
  14. deepdoc-0.1.0/deepdoc/chatbot/service.py +342 -0
  15. deepdoc-0.1.0/deepdoc/chatbot/settings.py +164 -0
  16. deepdoc-0.1.0/deepdoc/chatbot/types.py +50 -0
  17. deepdoc-0.1.0/deepdoc/cli.py +888 -0
  18. deepdoc-0.1.0/deepdoc/config.py +228 -0
  19. deepdoc-0.1.0/deepdoc/generator_v2.py +2220 -0
  20. deepdoc-0.1.0/deepdoc/llm/__init__.py +5 -0
  21. deepdoc-0.1.0/deepdoc/llm/client.py +84 -0
  22. deepdoc-0.1.0/deepdoc/llm/litellm_compat.py +32 -0
  23. deepdoc-0.1.0/deepdoc/manifest.py +63 -0
  24. deepdoc-0.1.0/deepdoc/openapi.py +211 -0
  25. deepdoc-0.1.0/deepdoc/parser/__init__.py +6 -0
  26. deepdoc-0.1.0/deepdoc/parser/api_detector.py +21 -0
  27. deepdoc-0.1.0/deepdoc/parser/base.py +73 -0
  28. deepdoc-0.1.0/deepdoc/parser/go_parser.py +481 -0
  29. deepdoc-0.1.0/deepdoc/parser/js_ts_parser.py +649 -0
  30. deepdoc-0.1.0/deepdoc/parser/php_parser.py +555 -0
  31. deepdoc-0.1.0/deepdoc/parser/python_parser.py +382 -0
  32. deepdoc-0.1.0/deepdoc/parser/registry.py +48 -0
  33. deepdoc-0.1.0/deepdoc/parser/routes/__init__.py +16 -0
  34. deepdoc-0.1.0/deepdoc/parser/routes/base.py +57 -0
  35. deepdoc-0.1.0/deepdoc/parser/routes/common.py +129 -0
  36. deepdoc-0.1.0/deepdoc/parser/routes/detector.py +30 -0
  37. deepdoc-0.1.0/deepdoc/parser/routes/django.py +441 -0
  38. deepdoc-0.1.0/deepdoc/parser/routes/express.py +121 -0
  39. deepdoc-0.1.0/deepdoc/parser/routes/falcon.py +131 -0
  40. deepdoc-0.1.0/deepdoc/parser/routes/fastapi.py +65 -0
  41. deepdoc-0.1.0/deepdoc/parser/routes/fastify.py +122 -0
  42. deepdoc-0.1.0/deepdoc/parser/routes/flask.py +55 -0
  43. deepdoc-0.1.0/deepdoc/parser/routes/go.py +91 -0
  44. deepdoc-0.1.0/deepdoc/parser/routes/js_shared.py +180 -0
  45. deepdoc-0.1.0/deepdoc/parser/routes/laravel.py +139 -0
  46. deepdoc-0.1.0/deepdoc/parser/routes/nestjs.py +58 -0
  47. deepdoc-0.1.0/deepdoc/parser/routes/python_shared.py +29 -0
  48. deepdoc-0.1.0/deepdoc/parser/routes/registry.py +44 -0
  49. deepdoc-0.1.0/deepdoc/parser/routes/repo_resolver.py +984 -0
  50. deepdoc-0.1.0/deepdoc/parser/vue_parser.py +491 -0
  51. deepdoc-0.1.0/deepdoc/persistence_v2.py +698 -0
  52. deepdoc-0.1.0/deepdoc/pipeline_v2.py +1345 -0
  53. deepdoc-0.1.0/deepdoc/planner_v2.py +2917 -0
  54. deepdoc-0.1.0/deepdoc/prompts_v2.py +1280 -0
  55. deepdoc-0.1.0/deepdoc/scan_v2.py +1121 -0
  56. deepdoc-0.1.0/deepdoc/site/__init__.py +1 -0
  57. deepdoc-0.1.0/deepdoc/site/fumadocs_builder_v2.py +1326 -0
  58. deepdoc-0.1.0/deepdoc/smart_update_v2.py +901 -0
  59. deepdoc-0.1.0/deepdoc/updater_v2.py +330 -0
  60. deepdoc-0.1.0/deepdoc.egg-info/PKG-INFO +23 -0
  61. deepdoc-0.1.0/deepdoc.egg-info/SOURCES.txt +82 -0
  62. deepdoc-0.1.0/deepdoc.egg-info/dependency_links.txt +1 -0
  63. deepdoc-0.1.0/deepdoc.egg-info/entry_points.txt +2 -0
  64. deepdoc-0.1.0/deepdoc.egg-info/requires.txt +19 -0
  65. deepdoc-0.1.0/deepdoc.egg-info/top_level.txt +1 -0
  66. deepdoc-0.1.0/pyproject.toml +44 -0
  67. deepdoc-0.1.0/setup.cfg +4 -0
  68. deepdoc-0.1.0/tests/test_chatbot_config.py +110 -0
  69. deepdoc-0.1.0/tests/test_chatbot_index.py +106 -0
  70. deepdoc-0.1.0/tests/test_chatbot_query.py +328 -0
  71. deepdoc-0.1.0/tests/test_chatbot_scaffold.py +70 -0
  72. deepdoc-0.1.0/tests/test_classify.py +140 -0
  73. deepdoc-0.1.0/tests/test_cli_generate.py +159 -0
  74. deepdoc-0.1.0/tests/test_cli_serve.py +44 -0
  75. deepdoc-0.1.0/tests/test_framework_fixtures.py +64 -0
  76. deepdoc-0.1.0/tests/test_framework_support.py +673 -0
  77. deepdoc-0.1.0/tests/test_fumadocs_builder.py +379 -0
  78. deepdoc-0.1.0/tests/test_generation_evidence.py +202 -0
  79. deepdoc-0.1.0/tests/test_litellm_compat.py +38 -0
  80. deepdoc-0.1.0/tests/test_planner_granularity.py +638 -0
  81. deepdoc-0.1.0/tests/test_route_registry.py +69 -0
  82. deepdoc-0.1.0/tests/test_smart_update.py +214 -0
  83. deepdoc-0.1.0/tests/test_stale.py +118 -0
  84. deepdoc-0.1.0/tests/test_state.py +180 -0
deepdoc-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,23 @@
1
+ Metadata-Version: 2.4
2
+ Name: deepdoc
3
+ Version: 0.1.0
4
+ Summary: Auto-generate beautiful docs from any codebase
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: click>=8.1
7
+ Requires-Dist: litellm>=1.40
8
+ Requires-Dist: gitpython>=3.1
9
+ Requires-Dist: tree-sitter>=0.22
10
+ Requires-Dist: tree-sitter-python>=0.22
11
+ Requires-Dist: tree-sitter-javascript>=0.22
12
+ Requires-Dist: tree-sitter-typescript>=0.22
13
+ Requires-Dist: tree-sitter-go>=0.22
14
+ Requires-Dist: tree-sitter-php>=0.22
15
+ Requires-Dist: pyyaml>=6.0
16
+ Requires-Dist: rich>=13.0
17
+ Requires-Dist: jinja2>=3.1
18
+ Provides-Extra: chatbot
19
+ Requires-Dist: numpy>=1.26; extra == "chatbot"
20
+ Requires-Dist: faiss-cpu>=1.8.0; extra == "chatbot"
21
+ Requires-Dist: fastapi>=0.115; extra == "chatbot"
22
+ Requires-Dist: uvicorn>=0.30; extra == "chatbot"
23
+ Requires-Dist: httpx>=0.27; extra == "chatbot"
@@ -0,0 +1,668 @@
1
+ # DeepDoc
2
+
3
+ Auto-generate deep engineering documentation from real codebases using AI.
4
+
5
+ DeepDoc scans your repo, builds a bucket-based documentation plan, generates rich MDX pages with Mermaid diagrams, and builds a local-first Fumadocs site with Orama search.
6
+
7
+ ---
8
+
9
+ ## Features
10
+
11
+ - **Bucket-Based Documentation Architecture** — Docs are planned as system, feature, endpoint, endpoint reference, integration, and database buckets instead of noisy one-file-per-page output.
12
+ - **Five-Phase Pipeline** — Scan, plan, generate, playground, build. Planning and generation are separated so large repos and large files are handled more cleanly.
13
+ - **Multi-Step AI Planner** — The planner classifies the repo, proposes buckets, then assigns files, symbols, artifacts, and dependencies into the final doc structure.
14
+ - **Giant-File Handling** — Large files are decomposed into feature-aligned clusters so giant controllers or service files can feed multiple doc pages.
15
+ - **Endpoint-Family + Per-Endpoint Docs** — High-level endpoint family pages are AI-planned, and individual `endpoint_ref` pages are derived from scan data and generated separately.
16
+ - **Integration Discovery** — Third-party systems like payment gateways, delivery providers, warehouse systems, and webhook integrations can be grouped into integration docs.
17
+ - **Incremental Updates** — `deepdoc update` uses persisted plan and ledger data to regenerate only stale or structurally affected docs.
18
+ - **Full Refresh and Clean Rebuild Modes** — `generate --force` fully refreshes DeepDoc-managed docs and removes stale generated pages; `generate --clean --yes` wipes output and rebuilds from scratch.
19
+ - **Safe Existing-Docs Behavior** — Plain `generate` refuses to run over an existing DeepDoc-managed docs set and will not silently mix into a non-DeepDoc `docs/` folder.
20
+ - **Multi-Language Support** — JavaScript/TypeScript, Python, Go, PHP/Laravel with tree-sitter AST parsing and regex fallback.
21
+ - **Configurable LLM** — Works with Anthropic, OpenAI, Azure OpenAI, Ollama, and other LiteLLM-compatible providers.
22
+ - **Mermaid Diagrams** — Generated pages can include architecture, flow, and request-sequence diagrams.
23
+ - **OpenAPI-Aware API Docs** — Auto-detects OpenAPI/Swagger specs and stages canonical interactive `/api/*` pages in the generated site.
24
+ - **Local-First Fumadocs Site** — Generates a `site/` Next.js app with Fumadocs UI, Mermaid rendering, and built-in Orama search.
25
+ - **Static Export** — `deepdoc deploy` exports a static site to `site/out/` for any static host.
26
+
27
+ ---
28
+
29
+ ## Installation
30
+
31
+ ### From source (recommended during development)
32
+
33
+ ```bash
34
+ git clone <your-repo-url>
35
+ cd deepdoc
36
+ pip install -e .
37
+ ```
38
+
39
+ If the full install is slow due to tree-sitter compilation, install core deps first:
40
+
41
+ ```bash
42
+ pip install click litellm gitpython rich pyyaml jinja2
43
+ pip install -e . --no-deps
44
+ ```
45
+
46
+ ### Verify installation
47
+
48
+ ```bash
49
+ deepdoc --version
50
+ deepdoc --help
51
+ python -m deepdoc --help
52
+ ```
53
+
54
+ ---
55
+
56
+ ## Quick Start
57
+
58
+ ```bash
59
+ # 1. Go to your project
60
+ cd /path/to/your-project
61
+
62
+ # 2. Initialize DeepDoc
63
+ deepdoc init
64
+
65
+ # 3. Set your API key
66
+ export ANTHROPIC_API_KEY=sk-ant-...
67
+
68
+ # 4. Generate docs
69
+ deepdoc generate
70
+
71
+ # 5. Preview locally
72
+ deepdoc serve
73
+ # → Open http://localhost:3000
74
+ ```
75
+
76
+ ---
77
+
78
+ ## Commands
79
+
80
+ Every command supports `--help`, including nested config commands:
81
+
82
+ ```bash
83
+ deepdoc --help
84
+ deepdoc generate --help
85
+ deepdoc config --help
86
+ deepdoc config set --help
87
+ ```
88
+
89
+ ### `deepdoc init`
90
+
91
+ Initializes DeepDoc in the current directory by creating a `.deepdoc.yaml` config file.
92
+
93
+ ```bash
94
+ deepdoc init
95
+ deepdoc init --provider openai --model gpt-4o
96
+ deepdoc init --provider ollama --model ollama/llama3.2
97
+ deepdoc init --provider azure --model azure/gpt-4o
98
+ deepdoc init --output-dir documentation
99
+ ```
100
+
101
+ **Options:**
102
+
103
+ | Flag | Default | Description |
104
+ |------|---------|-------------|
105
+ | `--name` | directory name | Project name |
106
+ | `--description` | empty | Short project description |
107
+ | `--provider` | `anthropic` | LLM provider: `anthropic`, `openai`, `ollama`, `azure` |
108
+ | `--model` | provider default | Model name |
109
+ | `--output-dir` | `docs` | Where generated docs are written |
110
+
111
+ ### `deepdoc generate`
112
+
113
+ Full documentation generation. This is the first-run or explicit full-refresh command.
114
+
115
+ ```bash
116
+ deepdoc generate
117
+ deepdoc generate --force # Full refresh of DeepDoc-managed docs
118
+ deepdoc generate --clean --yes # Wipe output + state and rebuild from scratch
119
+ deepdoc generate --deploy # Generate + export the static site
120
+ deepdoc generate --batch-size 3 # Smaller batches for rate-limited APIs
121
+ deepdoc generate --include "src/**" --include "lib/**"
122
+ deepdoc generate --exclude "tests/**"
123
+ ```
124
+
125
+ **Current behavior:**
126
+
127
+ - `deepdoc generate`
128
+ - intended for the first run
129
+ - refuses to run if DeepDoc docs/state already exist
130
+ - refuses to write into a non-DeepDoc `docs/` folder unless you explicitly clean it
131
+ - `deepdoc generate --force`
132
+ - re-runs the full pipeline
133
+ - regenerates all DeepDoc-managed pages even if they are not stale
134
+ - removes stale generated pages that no longer belong in the new plan
135
+ - preserves non-DeepDoc files
136
+ - `deepdoc generate --clean --yes`
137
+ - deletes the output dir and DeepDoc state
138
+ - rebuilds everything from scratch
139
+
140
+ **What happens under the hood (5-phase pipeline):**
141
+
142
+ 1. **Phase 1: Scan** — Walk the repo, parse supported languages, detect endpoints, config/setup artifacts, integration signals, and OpenAPI specs.
143
+ 2. **Phase 2: Plan** — Run the multi-step bucket planner. It classifies the repo, proposes bucket candidates, and assigns files/symbols/artifacts to the final doc structure.
144
+ 3. **Phase 3: Generate** — Generate bucket pages in batches with parallel workers. High-level buckets are AI-planned; per-endpoint reference pages are derived from scan data and generated individually.
145
+ 4. **Phase 4: API Ref** — Stage OpenAPI assets for the generated Fumadocs `/api/*` pages when a spec exists.
146
+ 5. **Phase 5: Build** — Write the generated `site/` Fumadocs scaffold, page tree, search route, and static assets from the generated plan.
147
+
148
+ **Options:**
149
+
150
+ | Flag | Default | Description |
151
+ |------|---------|-------------|
152
+ | `--force` | off | Full refresh of DeepDoc-managed docs and cleanup of stale generated pages |
153
+ | `--clean` | off | Delete output dir and DeepDoc state, then regenerate from scratch |
154
+ | `--yes` | off | Skip destructive confirmation for `--clean` |
155
+ | `--include` | all files | Glob patterns to include (can be repeated) |
156
+ | `--exclude` | see config | Additional glob patterns to exclude |
157
+ | `--deploy` | off | Build and export the static site after generation |
158
+ | `--batch-size` | 10 | Pages per batch before pausing (helps with rate limits) |
159
+
160
+ ### `deepdoc update`
161
+
162
+ Incrementally update docs when source files change. This is the normal command after the first successful `generate`.
163
+
164
+ ```bash
165
+ deepdoc update # Normal ongoing refresh
166
+ deepdoc update --since HEAD~3 # Changes in last 3 commits
167
+ deepdoc update --since main # All changes since branching from main
168
+ deepdoc update --replan # Force a full replan
169
+ deepdoc update --deploy # Update + deploy
170
+ ```
171
+
172
+ **How it works:**
173
+
174
+ 1. Loads the saved plan and generation ledger from `.deepdoc/`.
175
+ 2. Detects changed, new, and deleted files.
176
+ 3. Chooses a strategy automatically:
177
+ - incremental update
178
+ - targeted replan
179
+ - full replan
180
+ 4. Regenerates only the affected bucket pages when safe.
181
+ 5. Rebuilds site config and nav afterward.
182
+
183
+ If git is unavailable, it falls back to hash-based staleness detection.
184
+
185
+ **Options:**
186
+
187
+ | Flag | Default | Description |
188
+ |------|---------|-------------|
189
+ | `--since` | `HEAD~1` | Git ref to diff against |
190
+ | `--replan` | off | Force a full replan even if the change set looks incremental |
191
+ | `--deploy` | off | Deploy after updating |
192
+
193
+ ### `deepdoc status`
194
+
195
+ Show how much documentation has been generated and whether any buckets are stale.
196
+
197
+ ```bash
198
+ deepdoc status
199
+ ```
200
+
201
+ This is useful after `generate` or `update` when you want a quick health check without opening the site.
202
+
203
+ ### `deepdoc serve`
204
+
205
+ Preview the generated docs locally with live reload using the generated Fumadocs app in `site/`.
206
+
207
+ ```bash
208
+ deepdoc serve
209
+ deepdoc serve --port 8001
210
+ ```
211
+
212
+ Requires Node.js >= 18 to be installed. Site dependencies are auto-installed into `site/node_modules/` on first run.
213
+
214
+ ### `deepdoc deploy`
215
+
216
+ Build and export the generated Fumadocs site.
217
+
218
+ ```bash
219
+ deepdoc deploy
220
+ ```
221
+
222
+ This runs `next build` inside `site/` and writes the static export to `site/out/`. You can deploy that directory to Vercel, Netlify, GitHub Pages, Cloudflare Pages, or any static host.
223
+
224
+ ### `deepdoc config`
225
+
226
+ View or update config values without editing YAML manually.
227
+
228
+ ```bash
229
+ deepdoc config show # Print all config
230
+ deepdoc config set llm.provider openai # Switch provider
231
+ deepdoc config set llm.model gpt-4o # Switch model
232
+ deepdoc config set llm.temperature 0.3 # Adjust creativity
233
+ deepdoc config set output_dir documentation # Change output dir
234
+ deepdoc config set llm.api_key_env AZURE_API_KEY # Change API key env var
235
+ ```
236
+
237
+ ---
238
+
239
+ ## LLM Provider Setup
240
+
241
+ DeepDoc uses [LiteLLM](https://github.com/BerriAI/litellm) under the hood, which means it supports 100+ LLM providers. Here are the most common setups:
242
+
243
+ ### Anthropic (Claude) — Default
244
+
245
+ ```bash
246
+ deepdoc init --provider anthropic
247
+ export ANTHROPIC_API_KEY=sk-ant-api03-...
248
+ deepdoc generate
249
+ ```
250
+
251
+ Models: `claude-3-5-sonnet-20241022`, `claude-3-opus-20240229`, `claude-3-haiku-20240307`
252
+
253
+ ### OpenAI (GPT)
254
+
255
+ ```bash
256
+ deepdoc init --provider openai --model gpt-4o
257
+ export OPENAI_API_KEY=sk-...
258
+ deepdoc generate
259
+ ```
260
+
261
+ Models: `gpt-4.1`, `gpt-4.1-mini`, `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`
262
+
263
+ ### Azure OpenAI
264
+
265
+ Azure requires a few more environment variables because deployments have custom names and endpoints.
266
+
267
+ ```bash
268
+ # 1. Initialize with Azure
269
+ deepdoc init --provider azure --model azure/<your-deployment-name>
270
+
271
+ # 2. Set required environment variables
272
+ export AZURE_API_KEY=your-azure-api-key
273
+ export AZURE_API_BASE=https://<your-resource-name>.openai.azure.com
274
+ export AZURE_API_VERSION=2024-02-01
275
+
276
+ # 3. Update config to point to your deployment
277
+ deepdoc config set llm.model azure/<your-deployment-name>
278
+ deepdoc config set llm.base_url https://<your-resource-name>.openai.azure.com
279
+
280
+ # 4. Generate
281
+ deepdoc generate
282
+ ```
283
+
284
+ **Where to find these values in Azure Portal:**
285
+
286
+ 1. Go to [Azure Portal](https://portal.azure.com) → Azure OpenAI resource.
287
+ 2. Click **Keys and Endpoint** in the sidebar → copy **Key 1** (that's your `AZURE_API_KEY`) and the **Endpoint** (that's your `AZURE_API_BASE`).
288
+ 3. Go to **Model deployments** → **Manage Deployments** → note your deployment name (e.g., `gpt-4o-deployment`). Use this as `azure/gpt-4o-deployment` in the model field.
289
+ 4. API version: Use `2024-02-01` or the latest GA version shown in Azure docs.
290
+
291
+ **Example `.deepdoc.yaml` for Azure:**
292
+
293
+ ```yaml
294
+ project_name: my-project
295
+ output_dir: docs
296
+ llm:
297
+ provider: azure
298
+ model: azure/gpt-4o-deploy # "azure/" prefix + your deployment name
299
+ api_key_env: AZURE_API_KEY
300
+ base_url: https://mycompany.openai.azure.com
301
+ max_tokens: 4096
302
+ temperature: 0.2
303
+ ```
304
+
305
+ **Azure AD / Managed Identity (token-based auth):**
306
+
307
+ If you use Azure AD instead of API keys, set these instead:
308
+
309
+ ```bash
310
+ export AZURE_AD_TOKEN=your-ad-token
311
+ export AZURE_API_BASE=https://<your-resource-name>.openai.azure.com
312
+ export AZURE_API_VERSION=2024-02-01
313
+ ```
314
+
315
+ LiteLLM picks up `AZURE_AD_TOKEN` automatically when `AZURE_API_KEY` is not set.
316
+
317
+ ### Ollama (Local / Free)
318
+
319
+ No API key needed. Just make sure Ollama is running locally.
320
+
321
+ ```bash
322
+ # 1. Install and start Ollama (https://ollama.com)
323
+ ollama pull llama3.2
324
+
325
+ # 2. Initialize
326
+ deepdoc init --provider ollama --model ollama/llama3.2
327
+
328
+ # 3. Generate (no API key needed)
329
+ deepdoc generate
330
+ ```
331
+
332
+ Other Ollama models: `ollama/codellama`, `ollama/mistral`, `ollama/mixtral`
333
+
334
+ ### Any LiteLLM Provider
335
+
336
+ DeepDoc passes the model string directly to LiteLLM, so you can use any provider LiteLLM supports by using the correct prefix:
337
+
338
+ ```bash
339
+ # Groq
340
+ deepdoc config set llm.model groq/llama3-70b-8192
341
+ export GROQ_API_KEY=...
342
+
343
+ # Together AI
344
+ deepdoc config set llm.model together_ai/meta-llama/Llama-3-70b-chat-hf
345
+ export TOGETHER_API_KEY=...
346
+
347
+ # AWS Bedrock
348
+ deepdoc config set llm.model bedrock/anthropic.claude-3-sonnet-20240229-v1:0
349
+ # (uses AWS credentials from environment)
350
+ ```
351
+
352
+ See [LiteLLM providers](https://docs.litellm.ai/docs/providers) for the full list.
353
+
354
+ ---
355
+
356
+ ## Configuration
357
+
358
+ The `.deepdoc.yaml` file in your repo root controls everything:
359
+
360
+ ```yaml
361
+ project_name: my-app
362
+ description: "A web application for managing tasks"
363
+ output_dir: docs
364
+ site_dir: site
365
+
366
+ llm:
367
+ provider: anthropic
368
+ model: claude-3-5-sonnet-20241022
369
+ api_key_env: ANTHROPIC_API_KEY
370
+ base_url: null # Set for Ollama/custom endpoints
371
+ max_tokens: null # null = no cap (recommended); set a number to limit output
372
+ temperature: 0.2
373
+
374
+ languages:
375
+ - python
376
+ - javascript
377
+ - typescript
378
+ - go
379
+ - php
380
+
381
+ include: [] # Empty = include everything
382
+ exclude:
383
+ - node_modules
384
+ - .git
385
+ - __pycache__
386
+ - "*.pyc"
387
+ - vendor
388
+ - dist
389
+ - build
390
+ - .env
391
+ - "*.lock"
392
+ - "*.sum"
393
+
394
+ generation_mode: feature_buckets
395
+
396
+ # Generation tuning
397
+ max_pages: 0 # 0 = no cap; set a number to limit total pages
398
+ giant_file_lines: 2000 # Files above this get LLM-based feature clustering
399
+ source_context_budget: 200000 # Raw-source char budget before DeepDoc switches overflow files to compressed evidence cards
400
+ integration_detection: auto # "auto" | "off"
401
+
402
+ # Page type toggles
403
+ include_endpoint_pages: true # Generate endpoint documentation
404
+ include_integration_pages: true # Generate integration documentation
405
+
406
+ # Parallelism — tune for your LLM provider's rate limits
407
+ max_parallel_workers: 6 # Concurrent LLM calls (increase for Azure PTU)
408
+ batch_size: 10 # Pages per batch before rate-limit pause
409
+
410
+ github_pages:
411
+ enabled: false
412
+ branch: gh-pages
413
+ remote: origin
414
+
415
+ site:
416
+ repo_url: "" # e.g., https://github.com/you/your-repo
417
+ favicon: ""
418
+ logo: ""
419
+ ```
420
+
421
+ ### Configuration Reference
422
+
423
+ | Key | Default | Description |
424
+ |-----|---------|-------------|
425
+ | `project_name` | directory name | Project name used in site title |
426
+ | `description` | `""` | Short project description |
427
+ | `output_dir` | `docs` | Where generated markdown pages are written |
428
+ | `site_dir` | `site` | Where MkDocs builds the static site |
429
+ | **LLM** | | |
430
+ | `llm.provider` | `anthropic` | `anthropic`, `openai`, `azure`, `ollama`, or any LiteLLM alias |
431
+ | `llm.model` | `claude-3-5-sonnet-20241022` | Model name (use provider prefix for non-Anthropic, e.g. `azure/gpt-4.1`) |
432
+ | `llm.api_key_env` | `ANTHROPIC_API_KEY` | Environment variable that holds the API key |
433
+ | `llm.base_url` | `null` | Custom endpoint URL (required for Ollama, optional for Azure) |
434
+ | `llm.max_tokens` | `null` | Max output tokens per LLM call. `null` = no cap (recommended). Set explicitly if your provider requires it (e.g. some Azure deployments). Typical values: `4096` for shorter pages, `8192`–`16384` for detailed docs |
435
+ | `llm.temperature` | `0.2` | LLM sampling temperature |
436
+ | **Generation** | | |
437
+ | `generation_mode` | `feature_buckets` | Documentation generation mode |
438
+ | `max_pages` | `0` | Max pages to generate. `0` = no cap |
439
+ | `giant_file_lines` | `2000` | Files above this line count get LLM-based feature clustering |
440
+ | `source_context_budget` | `200000` | Raw-source char budget per page before overflow files are represented as compressed evidence cards |
441
+ | `integration_detection` | `auto` | Detect third-party integrations: `auto` or `off` |
442
+ | `include_endpoint_pages` | `true` | Generate endpoint documentation pages |
443
+ | `include_integration_pages` | `true` | Generate integration documentation pages |
444
+ | **Parallelism** | | |
445
+ | `max_parallel_workers` | `6` | Concurrent LLM calls. Increase for Azure PTU or high-TPM deployments |
446
+ | `batch_size` | `10` | Pages per batch before rate-limit pause |
447
+ | **File filters** | | |
448
+ | `languages` | `[python, javascript, typescript, go, php, vue]` | Languages to parse |
449
+ | `include` | `[]` | Glob patterns to include (empty = everything) |
450
+ | `exclude` | *(see config)* | Glob patterns to exclude (node_modules, .git, dist, etc.) |
451
+ | **GitHub Pages** | | |
452
+ | `github_pages.branch` | `gh-pages` | Branch for GitHub Pages deploy |
453
+ | `github_pages.remote` | `origin` | Git remote for deploy |
454
+ | **Site** | | |
455
+ | `site.repo_url` | `""` | Repo URL shown in the generated Fumadocs navigation |
456
+ | `site.favicon` | `""` | Path to favicon |
457
+ | `site.logo` | `""` | Path to logo |
458
+
459
+ ---
460
+
461
+ ## Supported Languages & Frameworks
462
+
463
+ **Parsing (tree-sitter AST + regex fallback):**
464
+
465
+ | Language | Extensions | Extracts |
466
+ |----------|-----------|----------|
467
+ | Python | `.py` | Functions, classes, decorators, imports |
468
+ | JavaScript | `.js`, `.jsx`, `.mjs`, `.cjs` | Functions, classes, arrow functions, imports |
469
+ | TypeScript | `.ts`, `.tsx` | Same as JS + interfaces, type aliases |
470
+ | Go | `.go` | Functions, methods, structs, interfaces |
471
+ | PHP | `.php` | Functions, classes, methods, namespaces |
472
+ | Vue | `.vue` | SFC script symbols, props/emits/slots, router/store usage |
473
+
474
+ **High-confidence framework support (fixture-backed):**
475
+
476
+ | Framework | Language | Proven patterns |
477
+ |-----------|----------|-----------------|
478
+ | FastAPI | Python | `@app.get()`, `@router.post()`, docstrings, `response_model` |
479
+ | Flask | Python | `@app.route()` with method expansion |
480
+ | Laravel | PHP | `Route::get()`, grouped prefixes, middleware, resource expansion |
481
+ | Django / DRF | Python | `path()`, `re_path()`, `@api_view`, `as_view()`, DRF routers, `@action` |
482
+ | Express | JS/TS | Mounted routers via `app.use()`, nested prefixes, chained `route()` calls |
483
+ | Fastify | JS/TS | Plugin `register(..., { prefix })`, shorthand methods, `route({ ... })`, schema hints |
484
+ | Vue | Vue SFC | Component detection, `defineProps`, `defineEmits`, `defineModel`, `defineSlots`, router/store signals |
485
+
486
+ **Supported but not headline-high-confidence yet:**
487
+
488
+ | Framework | Language | Current coverage |
489
+ |-----------|----------|------------------|
490
+ | NestJS | TS | `@Controller` + `@Get/@Post` decorators |
491
+ | Falcon | Python | `app.add_route()` + `on_get/on_post` responders |
492
+ | Gin / Echo / Fiber | Go | Common route helpers (`GET`, `POST`, `HandleFunc`) |
493
+ | Next.js / Nuxt | JS/TS | Repo-level framework detection and planning hints |
494
+
495
+ ---
496
+
497
+ ## Architecture
498
+
499
+ The current system is bucket-based.
500
+
501
+ **Planner bucket types:**
502
+
503
+ | Type | Purpose |
504
+ |------|---------|
505
+ | `system` | Architecture, setup, testing, deployment/ops, auth, shared middleware, observability |
506
+ | `feature` | Business workflows like checkout, refunds, order status, onboarding |
507
+ | `endpoint` | Endpoint-family or resource-level API docs |
508
+ | `endpoint_ref` | One generated page per concrete API endpoint |
509
+ | `integration` | Third-party systems like payment, warehouse, delivery, webhook providers |
510
+ | `database` | Cross-cutting database/schema/data-layer documentation |
511
+
512
+ **Five implemented phases:**
513
+
514
+ 1. **Repository scan/indexing**
515
+ - Parse supported source files
516
+ - Detect endpoints, config files, setup artifacts, OpenAPI specs
517
+ - Record file sizes, symbols, imports, and raw scan summaries
518
+ 2. **Multi-step planning**
519
+ - Classify repo artifacts
520
+ - Propose system/feature/endpoint/integration/database buckets
521
+ - Assign files, symbols, and artifacts into the final plan
522
+ 3. **Generation engine**
523
+ - Build evidence packs for buckets
524
+ - Generate pages in batches with parallel workers
525
+ - Create nested endpoint reference pages under endpoint families
526
+ - Validate output and degrade gracefully on failures
527
+ 4. **Persistence**
528
+ - Persist plan, file map, scan cache, and generation ledger in `.deepdoc/`
529
+ - Keep enough state for updates, staleness detection, and cleanup
530
+ 5. **Smart update**
531
+ - Choose incremental update vs targeted replan vs full replan
532
+ - Refresh only stale docs when safe
533
+ - Rebuild affected docs after structural repo changes
534
+
535
+ ---
536
+
537
+ ## Generated Files
538
+
539
+ After running `deepdoc generate`, you'll find:
540
+
541
+ ```
542
+ your-repo/
543
+ ├── .deepdoc.yaml # Config
544
+ ├── .deepdoc/ # Canonical persisted state
545
+ │ ├── plan.json # Bucket plan
546
+ │ ├── scan_cache.json # Lightweight scan snapshot
547
+ │ ├── ledger.json # Generated-page ledger
548
+ │ ├── file_map.json # file → bucket/page mapping
549
+ │ └── state.json # last synced commit + update status
550
+ ├── .deepdoc_manifest.json # Legacy source hash manifest
551
+ ├── .deepdoc_plan.json # Legacy compatibility plan file
552
+ ├── .deepdoc_file_map.json # Legacy compatibility file map
553
+ ├── docs/ # Generated MDX pages
554
+ │ ├── index.mdx
555
+ │ ├── architecture.mdx
556
+ │ ├── setup-and-configuration.mdx
557
+ │ ├── orders-api.mdx
558
+ │ ├── get-api-v1-orders.mdx
559
+ │ └── ...
560
+ └── site/ # Generated Fumadocs app
561
+ ├── app/
562
+ ├── components/
563
+ ├── lib/
564
+ ├── openapi/ # Staged OpenAPI assets (when a spec exists)
565
+ ├── public/
566
+ └── out/ # Static export after `deepdoc deploy`
567
+ ```
568
+
569
+ ---
570
+
571
+ ## GitHub Actions CI/CD
572
+
573
+ Automate doc updates on every push to main:
574
+
575
+ ```yaml
576
+ # .github/workflows/docs.yml
577
+ name: Update Documentation
578
+
579
+ on:
580
+ push:
581
+ branches: [main]
582
+
583
+ jobs:
584
+ update-docs:
585
+ runs-on: ubuntu-latest
586
+ permissions:
587
+ contents: write # Needed for gh-pages push
588
+
589
+ steps:
590
+ - uses: actions/checkout@v4
591
+ with:
592
+ fetch-depth: 0 # Full history needed for git diff
593
+
594
+ - uses: actions/setup-python@v5
595
+ with:
596
+ python-version: "3.11"
597
+
598
+ - uses: actions/setup-node@v4
599
+ with:
600
+ node-version: "20"
601
+
602
+ - name: Install dependencies
603
+ run: |
604
+ pip install ./deepdoc # or from PyPI if published
605
+
606
+ - name: Update and deploy docs
607
+ env:
608
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
609
+ run: |
610
+ git config user.name "github-actions[bot]"
611
+ git config user.email "github-actions[bot]@users.noreply.github.com"
612
+ deepdoc update --deploy
613
+ ```
614
+
615
+ Add your API key to repo Settings → Secrets → Actions → `ANTHROPIC_API_KEY`.
616
+
617
+ ---
618
+
619
+ ## Typical Workflow
620
+
621
+ **First time:**
622
+ ```bash
623
+ cd your-repo
624
+ deepdoc init --provider anthropic
625
+ export ANTHROPIC_API_KEY=sk-ant-...
626
+ deepdoc generate
627
+ deepdoc serve # Preview at localhost:3000
628
+ deepdoc deploy # Export a static site to site/out/
629
+ ```
630
+
631
+ **Every time you update code:**
632
+ ```bash
633
+ git add . && git commit -m "feat: new feature"
634
+ deepdoc update # Only regenerates affected pages
635
+ deepdoc deploy # Or use --deploy flag with update
636
+ ```
637
+
638
+ **Full refresh after planner / prompt / generator changes:**
639
+ ```bash
640
+ deepdoc generate --force
641
+ ```
642
+
643
+ **Wipe docs and rebuild from zero:**
644
+ ```bash
645
+ deepdoc generate --clean --yes
646
+ ```
647
+
648
+ **Switch LLM mid-project:**
649
+ ```bash
650
+ deepdoc config set llm.provider openai
651
+ deepdoc config set llm.model gpt-4o
652
+ export OPENAI_API_KEY=sk-...
653
+ deepdoc generate --force # Full regen with new model
654
+ ```
655
+
656
+ ---
657
+
658
+ ## Requirements
659
+
660
+ - Python 3.10+
661
+ - Git (for `deepdoc update` and `deepdoc deploy`)
662
+ - An LLM API key (or Ollama running locally)
663
+
664
+ ---
665
+
666
+ ## License
667
+
668
+ MIT
@@ -0,0 +1,3 @@
1
+ """DeepDoc — Auto-generate beautiful docs from any codebase."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,9 @@
1
+ """Module entrypoint for `python -m deepdoc`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .cli import main
6
+
7
+
8
+ if __name__ == "__main__":
9
+ main()