codedoc-ai 0.8.0__tar.gz → 0.9.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/CHANGELOG.md +250 -0
- {codedoc_ai-0.8.0/codedoc_ai.egg-info → codedoc_ai-0.9.1}/PKG-INFO +98 -11
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/README.md +97 -10
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/RUN_FLOW.md +27 -4
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/__init__.py +1 -1
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/agents/base_agent.py +11 -11
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/agents/dependency_agent.py +36 -36
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/agents/documentation_agent.py +10 -10
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/agents/orchestrator.py +48 -8
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/agents/structure_agent.py +10 -10
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/cli/cli.py +70 -24
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/core/graph.py +9 -2
- codedoc_ai-0.9.1/codedoc/core/loader.py +618 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/core/output.py +50 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/core/project_view.py +245 -14
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/core/safe_writer.py +29 -0
- codedoc_ai-0.9.1/codedoc/core/scanner.py +270 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/llm/factory.py +63 -29
- codedoc_ai-0.9.1/codedoc/llm/rate_limit_profile.py +192 -0
- codedoc_ai-0.9.1/codedoc/parser/generic_parser.py +241 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/pipeline.py +257 -45
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/utils/logger.py +12 -1
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1/codedoc_ai.egg-info}/PKG-INFO +98 -11
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/SOURCES.txt +7 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/pyproject.toml +7 -1
- codedoc_ai-0.9.1/tests/conftest.py +30 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/test_080_features.py +6 -2
- codedoc_ai-0.9.1/tests/test_081_configurable_defaults.py +704 -0
- codedoc_ai-0.9.1/tests/test_081_lossless_md.py +1051 -0
- codedoc_ai-0.9.1/tests/test_081_placeholder.py +500 -0
- codedoc_ai-0.9.1/tests/test_081_rate_limit_profiles.py +848 -0
- codedoc_ai-0.9.1/tests/test_090_features.py +483 -0
- codedoc_ai-0.9.1/tests/test_graph.py +158 -0
- codedoc_ai-0.9.1/tests/test_parser.py +310 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/test_pipeline.py +293 -24
- codedoc_ai-0.9.1/tests/test_scanner.py +141 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/test_scenarios.py +52 -27
- codedoc_ai-0.8.0/codedoc/core/loader.py +0 -337
- codedoc_ai-0.8.0/codedoc/core/scanner.py +0 -192
- codedoc_ai-0.8.0/codedoc/parser/generic_parser.py +0 -88
- codedoc_ai-0.8.0/tests/test_graph.py +0 -83
- codedoc_ai-0.8.0/tests/test_parser.py +0 -96
- codedoc_ai-0.8.0/tests/test_scanner.py +0 -69
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/.env.example +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/CODE_OF_CONDUCT.md +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/CONTRIBUTING.md +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/LICENSE +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/MANIFEST.in +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/SECURITY.md +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/__main__.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/agents/__init__.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/bootstrap.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/cli/__init__.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/core/__init__.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/core/checkpoint.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/core/db.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/core/queue.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/llm/__init__.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/llm/api_provider.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/llm/base.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/llm/local_provider.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/parser/__init__.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/parser/factory.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/parser/python_parser.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/parser/react_parser.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/utils/__init__.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc/utils/errors.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/dependency_links.txt +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/entry_points.txt +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/requires.txt +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/top_level.txt +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/setup.cfg +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/__init__.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/flutter_app/app.dart +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/flutter_app/main.dart +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/java_app/Main.java +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/java_app/Service.java +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/python_app/main.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/python_app/models.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/python_app/utils.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/react_app/App.tsx +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/react_app/index.html +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/react_app/main.tsx +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/react_app/router.tsx +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/fixtures/react_sample.tsx +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/test_agents.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/test_llm_mock.py +0 -0
- {codedoc_ai-0.8.0 → codedoc_ai-0.9.1}/tests/test_queue.py +0 -0
|
@@ -1,5 +1,255 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.9.1 - 2026-06-08
|
|
4
|
+
|
|
5
|
+
### Bug-fix stabilization patch (first PyPI release)
|
|
6
|
+
|
|
7
|
+
Corrective-only patch. No new features or output-shape changes.
|
|
8
|
+
|
|
9
|
+
- **A1 — entry-reachability is no longer silent.** When an entry is given,
|
|
10
|
+
files not reachable from it were dropped without notice. `_select_files` now
|
|
11
|
+
logs a clear WARNING listing the excluded files, records `stats["entry_excluded"]`,
|
|
12
|
+
and the CLI prints an excluded-files line. (The structural selection fix is
|
|
13
|
+
tracked for a later minor; this patch only removes the silent failure.)
|
|
14
|
+
- **A2 — a wrong `--entry` no longer silently documents the whole repo.** An
|
|
15
|
+
explicitly specified entry that cannot be resolved, is not in the scanned set,
|
|
16
|
+
resolves outside the project root, or is given when **no** supported files are
|
|
17
|
+
scanned, now raises `ConfigError` instead of falling back to all files or
|
|
18
|
+
exiting successfully. Auto-detection with no entry still documents everything.
|
|
19
|
+
- **A3 — parser false imports fixed.** The Go parser no longer treats arbitrary
|
|
20
|
+
string literals (e.g. `fmt.Println("hi")`) as imports — only string-literal
|
|
21
|
+
paths in `import "..."` statements and `import ( ... )` blocks are read,
|
|
22
|
+
comments are ignored, and raw-string (backtick) paths are supported.
|
|
23
|
+
Interpreted literals use Go's byte-accurate escape semantics, including
|
|
24
|
+
multi-byte UTF-8 `\xNN` / octal sequences and Unicode escapes. The HTML parser
|
|
25
|
+
no longer treats CSS `<link href>` as a code import (kept `<script src>` and
|
|
26
|
+
JS imports).
|
|
27
|
+
- **A4 — no stale/empty record substituted for a real one.** In the parallel
|
|
28
|
+
batch, a rate-limited file was treated as "already recorded" using state that
|
|
29
|
+
also included records **preloaded** from a prior run, so a *changed* file could
|
|
30
|
+
be restored from stale documentation instead of retried. `SafeWriter` now
|
|
31
|
+
tracks records written *this run* (`recorded_this_run()`); a changed,
|
|
32
|
+
rate-limited file is retried, and a file genuinely recorded this run recovers
|
|
33
|
+
its real record via `get_record()` (never an empty `{}`).
|
|
34
|
+
- **A5 — honest interrupt message.** Removed dead code; the Ctrl-C message is now
|
|
35
|
+
conditional ("…if the run reached file processing") so it never falsely claims
|
|
36
|
+
progress was saved when interrupted before any file was processed.
|
|
37
|
+
- **A6 — scanner is re-entrant.** The directory walker no longer stores state on
|
|
38
|
+
the function object; state lives on a per-scan `_Walker` instance.
|
|
39
|
+
- **Version identity.** `pyproject.toml`, `codedoc.__version__`, the CLI
|
|
40
|
+
`--version`, and the README all report `0.9.1`, and the automated test
|
|
41
|
+
(`test_version_identity_consistent`) enforces agreement across **all four**,
|
|
42
|
+
including the README "Current release" line.
|
|
43
|
+
- **Reliable tests.** `tests/conftest.py` redirects the temp root into the repo
|
|
44
|
+
(`.pyt_tmp`) so a locked system temp dir does not make the suite unrunnable.
|
|
45
|
+
(This addresses the observed locked-system-temp failure; it is not a guarantee
|
|
46
|
+
for every environment.)
|
|
47
|
+
|
|
48
|
+
## 0.9.0 - 2026-06-04
|
|
49
|
+
|
|
50
|
+
### Output preflight safety, clean INFO logs, extension list fix, configurable content truncation
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
#### G0 — Output Preflight Safety
|
|
55
|
+
|
|
56
|
+
Foreign output targets now fail immediately with a `ConfigError` before the
|
|
57
|
+
scanner runs, the provider initialises, or any LLM API call is made. Previously
|
|
58
|
+
a foreign file at the target path would only be detected inside
|
|
59
|
+
`write_project_outputs`, after all tokens had already been spent.
|
|
60
|
+
|
|
61
|
+
- **`codedoc/core/output.py`**: Added `preflight_output_targets()` which calls
|
|
62
|
+
`_check_file_ownership()` for all final public targets (JSON, MD, both) and a
|
|
63
|
+
new `_check_md_live_backup_ownership()` for the MD live-backup JSON sibling.
|
|
64
|
+
- **`codedoc/pipeline.py`**: Calls `preflight_output_targets()` immediately after
|
|
65
|
+
output spec resolution, before `scan_files()` and `create_provider()`.
|
|
66
|
+
- **`codedoc/core/loader.py`**: `_resolve_output_spec()` now only emits the
|
|
67
|
+
format-conflict warning when `--format` was explicitly passed by the user (not
|
|
68
|
+
when the default `"json"` value from DEFAULTS triggers a mismatch).
|
|
69
|
+
|
|
70
|
+
#### G1 — Clean Log Output
|
|
71
|
+
|
|
72
|
+
Third-party HTTP libraries (`httpx`, `httpcore`, `openai`, `anthropic`,
|
|
73
|
+
`google.auth`) are now silenced at WARNING level by default. At `--verbose` /
|
|
74
|
+
DEBUG the HTTP diagnostics are restored. Per-agent progress lines appear at INFO
|
|
75
|
+
so users can see what codedoc is doing at each step.
|
|
76
|
+
|
|
77
|
+
- **`codedoc/utils/logger.py`**: `_NOISY_LOGGERS` constant defines the list;
|
|
78
|
+
`_configure()` sets those loggers to WARNING; `set_level()` lowers them to
|
|
79
|
+
DEBUG when the root logger is set to DEBUG.
|
|
80
|
+
- **`codedoc/agents/orchestrator.py`**: Added timing via `time.monotonic()` and
|
|
81
|
+
INFO/WARNING log lines after each agent: `[FILE] path | structure ok 0.8s`,
|
|
82
|
+
`[FILE] path | dependencies ok 0.9s`, `[FILE] path | documentation ok 1.2s`.
|
|
83
|
+
Fallbacks emit WARNING with `"fallback"` in the message.
|
|
84
|
+
|
|
85
|
+
#### G5 — Extension List Consistency
|
|
86
|
+
|
|
87
|
+
`_candidate_variants()` in `graph.py` used a hardcoded 9-extension list that
|
|
88
|
+
was out of sync with `_KNOWN_EXTENSIONS` and `DEFAULTS["extension_language_map"]`.
|
|
89
|
+
Import resolution for Go, Kotlin, Swift, Rust, Ruby, and C-family files silently
|
|
90
|
+
produced no candidates.
|
|
91
|
+
|
|
92
|
+
- **`codedoc/core/graph.py`**: `_KNOWN_EXTENSIONS` expanded to all 19 extensions
|
|
93
|
+
in `DEFAULTS["extension_language_map"]`. `_candidate_variants()` now uses
|
|
94
|
+
`sorted(_KNOWN_EXTENSIONS)` instead of a separate hardcoded list. A comment
|
|
95
|
+
notes the sync requirement with `loader.py`.
|
|
96
|
+
|
|
97
|
+
#### G6 — Configurable Content Truncation
|
|
98
|
+
|
|
99
|
+
Files above 12,000 characters were silently truncated with a DEBUG-only log.
|
|
100
|
+
Users saw degraded documentation for large files with no indication why.
|
|
101
|
+
|
|
102
|
+
- **`codedoc/core/loader.py`**: `max_content_chars` added to `DEFAULTS` (12000)
|
|
103
|
+
and `_ENV_KEY_MAP` (`CODEDOC_MAX_CONTENT_CHARS`). Validation requires a positive
|
|
104
|
+
integer ≥ 1000.
|
|
105
|
+
- **`codedoc/agents/base_agent.py`**: Removed module-level `_MAX_CONTENT_CHARS`
|
|
106
|
+
constant. `BaseAgent.__init__` now accepts `max_content_chars: int = 12000`.
|
|
107
|
+
`_truncate()` uses `self._max_content_chars` and logs at INFO with the file
|
|
108
|
+
path and original / truncated character counts.
|
|
109
|
+
- **`codedoc/agents/orchestrator.py`**: `Orchestrator.__init__` accepts
|
|
110
|
+
`max_content_chars: int = 12000` and forwards it to each agent.
|
|
111
|
+
- **`codedoc/pipeline.py`**: Passes `config.get("max_content_chars", 12000)` to
|
|
112
|
+
the `Orchestrator` constructor.
|
|
113
|
+
- All three agent subclasses pass `file_path` to `_truncate()` for accurate logs.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## 0.8.1 - 2026-06-02
|
|
118
|
+
|
|
119
|
+
### Lossless Markdown, placeholder sanitization, configurable defaults, provider-aware rate-limit backoff
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
#### Workstream A — Lossless Markdown View
|
|
124
|
+
|
|
125
|
+
Markdown output now embeds the complete public JSON view as a hidden base64
|
|
126
|
+
comment so `json_from_markdown()` (and incremental re-runs that read a `.md`
|
|
127
|
+
file) recover the full dependency catalog, per-file hashes, and all dependency
|
|
128
|
+
metadata without any information loss.
|
|
129
|
+
|
|
130
|
+
- **`codedoc/core/project_view.py`**:
|
|
131
|
+
- `markdown_from_view()` writes a `<!-- codedoc-ai-view-base64 ... -->` block
|
|
132
|
+
immediately after the legacy `<!-- codedoc-ai: ... -->` metadata comment.
|
|
133
|
+
The block is standard base64-encoded UTF-8 JSON, which avoids comment-safety
|
|
134
|
+
issues with raw `--` or `-->` sequences in generated text.
|
|
135
|
+
- `markdown_to_view()` now tries the embedded view first (fast, lossless path);
|
|
136
|
+
falls back to the existing visible Markdown parser for pre-0.8.1 files.
|
|
137
|
+
- New public helper `read_embedded_view(markdown)` decodes and validates the
|
|
138
|
+
embedded block; returns `None` on any failure so callers fall back safely.
|
|
139
|
+
- `read_codedoc_meta()` no longer raises `ConfigError` when `entry_file` is
|
|
140
|
+
`null`; a valid CodeDoc file with no entry point is now correctly identified
|
|
141
|
+
as owned rather than foreign.
|
|
142
|
+
- **`codedoc/pipeline.py`**:
|
|
143
|
+
- `_load_existing_file_docs_from_md()` preserves file hashes from the embedded
|
|
144
|
+
view when the lightweight metadata comment has no hash for a path.
|
|
145
|
+
- `_resolve_entry_and_docs()` no longer raises unconditionally when no existing
|
|
146
|
+
output is found; first runs without `--entry` now reach `detect_entry_file()`
|
|
147
|
+
for auto-detection instead of failing immediately.
|
|
148
|
+
|
|
149
|
+
#### Workstream B — Placeholder Usage Example Sanitization
|
|
150
|
+
|
|
151
|
+
LLM-generated usage examples that contain placeholder package names (e.g.
|
|
152
|
+
`import 'package:your_package/...'`) are now removed before any output is
|
|
153
|
+
written or cached.
|
|
154
|
+
|
|
155
|
+
- **`codedoc/core/project_view.py`**: `_clean_file()` calls the new
|
|
156
|
+
`_sanitize_usage_example()` helper, which checks against `_PLACEHOLDER_PATTERN`
|
|
157
|
+
(a compiled `re.IGNORECASE` regex with word-boundary guards). Covered
|
|
158
|
+
placeholders: `your_package_name`, `your_package`, `your_project`, `your_app`,
|
|
159
|
+
`example_package`, `my_package`, and Dart-style `package:example/`.
|
|
160
|
+
Sanitization is idempotent and applies to both freshly generated records and
|
|
161
|
+
cached/reused records loaded from prior output files.
|
|
162
|
+
|
|
163
|
+
#### Workstream C — Configurable Hardcoded Defaults
|
|
164
|
+
|
|
165
|
+
All previously hardcoded scanner and provider defaults are now driven by a
|
|
166
|
+
single source of truth in `DEFAULTS` (`loader.py`) and support `_add` / `_remove`
|
|
167
|
+
override keys.
|
|
168
|
+
|
|
169
|
+
- **`codedoc/core/loader.py`**:
|
|
170
|
+
- `DEFAULTS` gains eleven new keys: `skip_dirs_add`, `skip_dirs_remove`,
|
|
171
|
+
`extension_language_map` (full 18-entry map), `extension_language_map_add`,
|
|
172
|
+
`extension_language_map_remove`, `auto_entry_candidates`,
|
|
173
|
+
`auto_entry_candidates_add`, `auto_entry_candidates_remove`,
|
|
174
|
+
`provider_prefixes`, `provider_prefixes_add`, `provider_prefixes_remove`.
|
|
175
|
+
- Three resolver helpers implement the resolution order (replace → `_add` →
|
|
176
|
+
`_remove`): `_resolve_list_override`, `_resolve_dict_override`,
|
|
177
|
+
`_resolve_nested_list_dict_override`.
|
|
178
|
+
- `_apply_config_overrides()` is called after all config sources are merged;
|
|
179
|
+
it resolves all four configurable keys and derives `supported_extensions`
|
|
180
|
+
from the resolved `extension_language_map`.
|
|
181
|
+
- Backward-compat bridge: if `supported_extensions` was explicitly set to a
|
|
182
|
+
value different from the defaults, it is used as a filter on
|
|
183
|
+
`extension_language_map` so old configs continue to restrict scanning as
|
|
184
|
+
intended.
|
|
185
|
+
- **`codedoc/core/scanner.py`**:
|
|
186
|
+
- Hardcoded `SKIP_DIRS` and `EXTENSION_LANGUAGE_MAP` removed.
|
|
187
|
+
- `scan_files()` receives `extension_language_map` (primary) instead of
|
|
188
|
+
`supported_extensions`. A positional-list guard handles legacy callers
|
|
189
|
+
that pass a list as the second argument.
|
|
190
|
+
- `detect_entry_file()` receives the resolved `auto_entry_candidates` list;
|
|
191
|
+
falls back to a module-level default for direct callers.
|
|
192
|
+
- **`codedoc/pipeline.py`**: passes `extension_language_map` and
|
|
193
|
+
`auto_entry_candidates` to the scanner; always appends the output directory
|
|
194
|
+
name to the scan skip list (even when the user removed it via
|
|
195
|
+
`--remove-skip-dir`) to prevent codedoc from documenting its own output.
|
|
196
|
+
- **`codedoc/cli/cli.py`**: three new flags: `--skip-dirs DIR [...]`,
|
|
197
|
+
`--add-skip-dir DIR` (repeatable), `--remove-skip-dir DIR` (repeatable).
|
|
198
|
+
- **`codedoc/llm/factory.py`**: `create_provider()`, `_make_api()`,
|
|
199
|
+
`_resolve_api_provider()`, and `_provider_api_key()` all accept and use
|
|
200
|
+
`provider_prefixes` from config; module-level tuples kept as fallbacks.
|
|
201
|
+
|
|
202
|
+
#### Workstream D — Provider-Aware Rate-Limit Backoff
|
|
203
|
+
|
|
204
|
+
Parallel ladder step-downs now sleep between rungs using provider-aware
|
|
205
|
+
exponential backoff, with optional `Retry-After` hint parsing.
|
|
206
|
+
|
|
207
|
+
- **`codedoc/llm/rate_limit_profile.py`** *(new)*:
|
|
208
|
+
- `RateLimitProfile` dataclass — `provider`, `signals`, `min_backoff_s`,
|
|
209
|
+
`backoff_scale`.
|
|
210
|
+
- `PROVIDER_PROFILES` — preconfigured profiles for `openai`, `anthropic`,
|
|
211
|
+
`gemini`, and `default`.
|
|
212
|
+
- `get_rate_limit_profile(provider_name, config)` — returns the resolved
|
|
213
|
+
profile with `rate_limit_backoff_s`, `rate_limit_backoff_scale`,
|
|
214
|
+
`rate_limit_signals_add`, and `rate_limit_signals_remove` applied without
|
|
215
|
+
mutating module defaults.
|
|
216
|
+
- **`codedoc/pipeline.py`**:
|
|
217
|
+
- `_is_rate_limit_error(exc, profile=None)` — when a `profile` is supplied,
|
|
218
|
+
checks only `profile.signals`; falls back to `_RATE_LIMIT_SIGNALS` for
|
|
219
|
+
backward compatibility with callers without a profile.
|
|
220
|
+
- `_detect_limit_type(error_msg)` — classifies errors as `"tpm"`, `"rpm"`,
|
|
221
|
+
`"quota"`, `"overloaded"`, or `None`.
|
|
222
|
+
- `_process_descriptor_batch()` return type changed:
|
|
223
|
+
`retry_rate_limited` is now `list[tuple[dict, Exception]]` so the causing
|
|
224
|
+
exception is preserved for `Retry-After` parsing and error sampling.
|
|
225
|
+
- `_process_agent_files()`: fetches the provider profile, passes it to
|
|
226
|
+
`_process_descriptor_batch()`, and sleeps between rungs using:
|
|
227
|
+
- `min(Retry-After, retry_after_cap_s)` when a hint is present and
|
|
228
|
+
`respect_retry_after = True`,
|
|
229
|
+
- `min(min_backoff_s × backoff_scale ^ rung, retry_after_cap_s)` otherwise,
|
|
230
|
+
- no sleep when `rate_limit_backoff_s = 0`.
|
|
231
|
+
- Rate-limit warning dicts now include: `retry_after_s`, `sleep_s`,
|
|
232
|
+
`error_sample`, `limit_type`, `event_number`, `rung_index`.
|
|
233
|
+
- **`codedoc/core/loader.py`**: four new `DEFAULTS` keys:
|
|
234
|
+
`rate_limit_backoff_s`, `rate_limit_backoff_scale`, `rate_limit_signals_add`,
|
|
235
|
+
`rate_limit_signals_remove`.
|
|
236
|
+
- **`codedoc/cli/cli.py`**: compact rate-limit summary line printed only when
|
|
237
|
+
step-down events occurred; shows event count, providers, and total sleep time.
|
|
238
|
+
|
|
239
|
+
#### Version
|
|
240
|
+
|
|
241
|
+
- `codedoc/__init__.py`, `pyproject.toml`, `cli.py`: `0.8.0` → `0.8.1`.
|
|
242
|
+
|
|
243
|
+
#### Validation
|
|
244
|
+
|
|
245
|
+
- Added regression coverage for lossless Markdown regeneration, placeholder
|
|
246
|
+
sanitization, configurable defaults, provider-aware rate-limit backoff, and
|
|
247
|
+
rate-limit edge cases.
|
|
248
|
+
- Full test suite passes.
|
|
249
|
+
- Built sdist/wheel and verified release metadata with `twine check`.
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
3
253
|
## 0.8.0 - 2026-05-31
|
|
4
254
|
|
|
5
255
|
### Always-on live JSON crash backup, parallel crash-safety, rate-limit adaptive parallelism, error.log overhaul
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codedoc-ai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.1
|
|
4
4
|
Summary: Generate structured, incremental documentation for any codebase using OpenAI, Anthropic, or Gemini
|
|
5
5
|
Author: Atharv Mannur
|
|
6
6
|
License-Expression: MIT
|
|
@@ -40,7 +40,7 @@ Dynamic: license-file
|
|
|
40
40
|
|
|
41
41
|
The tool scans source files, resolves project-local imports into a dependency graph, sends only files that need analysis to an LLM, and writes one combined, structured documentation artifact designed for both humans and AI. By default that artifact is JSON.
|
|
42
42
|
|
|
43
|
-
Current release: `0.
|
|
43
|
+
Current release: `0.9.1`.
|
|
44
44
|
|
|
45
45
|
## What It Does
|
|
46
46
|
|
|
@@ -91,6 +91,7 @@ codedoc run
|
|
|
91
91
|
| Live JSON backup | always on (0.8.0 default) |
|
|
92
92
|
| Rate-limit adaptive | `true` |
|
|
93
93
|
| Max file size | `500 KB` |
|
|
94
|
+
| Max content chars | `12000` |
|
|
94
95
|
|
|
95
96
|
Because the default provider uses the OpenAI API, a user must supply an API key unless they select a different provider.
|
|
96
97
|
|
|
@@ -340,7 +341,47 @@ Create `codedoc.config.json` in the project being documented:
|
|
|
340
341
|
"parallel_ladder": null,
|
|
341
342
|
"respect_retry_after": true,
|
|
342
343
|
"retry_after_cap_s": 30,
|
|
344
|
+
"rate_limit_backoff_s": null,
|
|
345
|
+
"rate_limit_backoff_scale": null,
|
|
346
|
+
"rate_limit_signals_add": [],
|
|
347
|
+
"rate_limit_signals_remove": [],
|
|
343
348
|
"skip_dirs": ["myenv", ".venv", "venv", "env", "node_modules", "__pycache__", "codedoc"],
|
|
349
|
+
"skip_dirs_add": [],
|
|
350
|
+
"skip_dirs_remove": [],
|
|
351
|
+
"max_content_chars": 12000,
|
|
352
|
+
"extension_language_map": {
|
|
353
|
+
".py": "python",
|
|
354
|
+
".ts": "typescript",
|
|
355
|
+
".tsx": "tsx",
|
|
356
|
+
".js": "javascript",
|
|
357
|
+
".jsx": "jsx",
|
|
358
|
+
".dart": "dart",
|
|
359
|
+
".java": "java",
|
|
360
|
+
".cs": "csharp",
|
|
361
|
+
".html": "html",
|
|
362
|
+
".htm": "html",
|
|
363
|
+
".kt": "kotlin",
|
|
364
|
+
".swift": "swift",
|
|
365
|
+
".go": "go",
|
|
366
|
+
".rb": "ruby",
|
|
367
|
+
".rs": "rust",
|
|
368
|
+
".cpp": "cpp",
|
|
369
|
+
".c": "c",
|
|
370
|
+
".h": "c",
|
|
371
|
+
".hpp": "cpp"
|
|
372
|
+
},
|
|
373
|
+
"extension_language_map_add": {},
|
|
374
|
+
"extension_language_map_remove": [],
|
|
375
|
+
"auto_entry_candidates": ["index.html", "main.tsx", "main.ts", "main.js", "main.py", "main.dart", "Main.java", "Program.cs"],
|
|
376
|
+
"auto_entry_candidates_add": [],
|
|
377
|
+
"auto_entry_candidates_remove": [],
|
|
378
|
+
"provider_prefixes": {
|
|
379
|
+
"anthropic": ["claude"],
|
|
380
|
+
"gemini": ["gemini"],
|
|
381
|
+
"openai": ["gpt-", "o1", "o3", "text-"]
|
|
382
|
+
},
|
|
383
|
+
"provider_prefixes_add": {},
|
|
384
|
+
"provider_prefixes_remove": {},
|
|
344
385
|
"ignore_paths": ["/myenv", "services/generated"]
|
|
345
386
|
}
|
|
346
387
|
```
|
|
@@ -369,6 +410,21 @@ Parallelism settings:
|
|
|
369
410
|
| `file_retry_attempts` | Number of sequential retries for a failed file. Default: `1`. |
|
|
370
411
|
| `max_consecutive_failures` | Stops the run after repeated failures so provider/API problems are visible quickly. Default: `5`. |
|
|
371
412
|
|
|
413
|
+
Configurable defaults added in 0.8.1:
|
|
414
|
+
|
|
415
|
+
| Setting | Purpose |
|
|
416
|
+
| --- | --- |
|
|
417
|
+
| `skip_dirs`, `skip_dirs_add`, `skip_dirs_remove` | Replace, extend, or reduce directory names skipped anywhere in the tree. Use `--remove-skip-dir codedoc` to document this package source while codedoc still skips its output directory. |
|
|
418
|
+
| `extension_language_map`, `extension_language_map_add`, `extension_language_map_remove` | Control which extensions are scanned and what language label each gets. Any extension in the resolved map is supported. |
|
|
419
|
+
| `auto_entry_candidates`, `auto_entry_candidates_add`, `auto_entry_candidates_remove` | Control first-run entry auto-detection when `--entry` is omitted. |
|
|
420
|
+
| `provider_prefixes`, `provider_prefixes_add`, `provider_prefixes_remove` | Control model-name based provider auto-detection and matching API-key lookup. |
|
|
421
|
+
|
|
422
|
+
Configurable settings added in 0.9.0:
|
|
423
|
+
|
|
424
|
+
| Setting | Default | Purpose |
|
|
425
|
+
| --- | --- | --- |
|
|
426
|
+
| `max_content_chars` | `12000` | Maximum characters of file content sent to the LLM per file. Files longer than this are truncated and an INFO log line is emitted with the file path and character counts. Raise this for large-context providers (`60000`–`100000`). Must be at least `1000`. |
|
|
427
|
+
|
|
372
428
|
## Environment Variables
|
|
373
429
|
|
|
374
430
|
Secrets should live in environment variables or a local `.env` file that is ignored by Git. Use [.env.example](.env.example) as the template.
|
|
@@ -393,6 +449,7 @@ Supported variables:
|
|
|
393
449
|
| `CODEDOC_MAX_CONSECUTIVE_FAILURES` | Consecutive failure threshold before stopping. |
|
|
394
450
|
| `LOG_LEVEL` | `INFO`, `DEBUG`, etc. |
|
|
395
451
|
| `CODEDOC_IGNORE_PATHS` | Semicolon-separated ignore paths. |
|
|
452
|
+
| `CODEDOC_MAX_CONTENT_CHARS` | Maximum characters of file content sent to the LLM. Equivalent to `max_content_chars` in config. |
|
|
396
453
|
|
|
397
454
|
Example `.env` for OpenAI:
|
|
398
455
|
|
|
@@ -634,7 +691,7 @@ with the final clean output.
|
|
|
634
691
|
and now has no effect — live backup is always on. Passing it prints a deprecation
|
|
635
692
|
notice. It will be removed in a future release.
|
|
636
693
|
|
|
637
|
-
### Adaptive rate-limit parallelism (0.8.
|
|
694
|
+
### Adaptive rate-limit parallelism (0.8.1)
|
|
638
695
|
|
|
639
696
|
When a provider signals 429 / rate-limit / quota-exceeded, codedoc automatically
|
|
640
697
|
steps down file-level concurrency instead of hammering the API:
|
|
@@ -656,9 +713,34 @@ Customize it in config:
|
|
|
656
713
|
}
|
|
657
714
|
```
|
|
658
715
|
|
|
659
|
-
Provider-specific rate-limit signals are recognised for OpenAI (`429`,
|
|
660
|
-
`
|
|
661
|
-
(`
|
|
716
|
+
Provider-specific rate-limit signals are recognised for OpenAI (`429`, `rate limit`,
|
|
717
|
+
`rate_limit`, `too many requests`, `tokens per min`, `tpm`, `quota`), Anthropic
|
|
718
|
+
(`529`, `overloaded`, `rate_limit`, `429`), and Gemini (`resource_exhausted`,
|
|
719
|
+
`quota`, `429`, `503`). Non-rate-limit errors never trigger a step-down.
|
|
720
|
+
|
|
721
|
+
In 0.8.1, codedoc sleeps between parallel step-down rungs using provider-aware
|
|
722
|
+
backoff. You can tune this in config:
|
|
723
|
+
|
|
724
|
+
```json
|
|
725
|
+
{
|
|
726
|
+
"rate_limit_backoff_s": null,
|
|
727
|
+
"rate_limit_backoff_scale": null,
|
|
728
|
+
"rate_limit_signals_add": ["capacity exceeded", "throttled"],
|
|
729
|
+
"rate_limit_signals_remove": ["503"]
|
|
730
|
+
}
|
|
731
|
+
```
|
|
732
|
+
|
|
733
|
+
Set `rate_limit_backoff_s` to `0` to disable computed inter-rung backoff.
|
|
734
|
+
`Retry-After` hints are still honored when `respect_retry_after` is true.
|
|
735
|
+
|
|
736
|
+
### Lossless Markdown regeneration (0.8.1)
|
|
737
|
+
|
|
738
|
+
Markdown output remains human-readable, but codedoc now embeds a hidden
|
|
739
|
+
base64-encoded public JSON view in a `<!-- codedoc-ai-view-base64 ... -->`
|
|
740
|
+
comment. This lets later Markdown-to-JSON conversion and incremental re-runs
|
|
741
|
+
recover dependency catalogs, per-file dependency metadata, links, and hashes
|
|
742
|
+
without another LLM call. Legacy Markdown without the embedded view still uses
|
|
743
|
+
the best-effort visible Markdown parser.
|
|
662
744
|
|
|
663
745
|
### Issue log (`error.log`)
|
|
664
746
|
|
|
@@ -676,11 +758,16 @@ Only hard file failures are surfaced there.
|
|
|
676
758
|
|
|
677
759
|
### Ownership guard
|
|
678
760
|
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
761
|
+
`codedoc` checks that any existing file at the target path was produced by
|
|
762
|
+
codedoc (a `_codedoc` metadata block in JSON, or a `<!-- codedoc-ai: -->` comment
|
|
763
|
+
in Markdown). If the file is foreign, malformed, or empty, the run stops with a
|
|
764
|
+
clear `ConfigError`. Choose a different `--output` directory or remove the
|
|
765
|
+
conflicting file to proceed.
|
|
766
|
+
|
|
767
|
+
**Preflight (0.9.0).** The ownership check now runs *before* any filesystem
|
|
768
|
+
changes, directory creation, scanning, or LLM calls. A foreign target that would
|
|
769
|
+
block the final write is caught immediately — no tokens are spent and no output
|
|
770
|
+
directory is created.
|
|
684
771
|
|
|
685
772
|
### More detail
|
|
686
773
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
The tool scans source files, resolves project-local imports into a dependency graph, sends only files that need analysis to an LLM, and writes one combined, structured documentation artifact designed for both humans and AI. By default that artifact is JSON.
|
|
6
6
|
|
|
7
|
-
Current release: `0.
|
|
7
|
+
Current release: `0.9.1`.
|
|
8
8
|
|
|
9
9
|
## What It Does
|
|
10
10
|
|
|
@@ -55,6 +55,7 @@ codedoc run
|
|
|
55
55
|
| Live JSON backup | always on (0.8.0 default) |
|
|
56
56
|
| Rate-limit adaptive | `true` |
|
|
57
57
|
| Max file size | `500 KB` |
|
|
58
|
+
| Max content chars | `12000` |
|
|
58
59
|
|
|
59
60
|
Because the default provider uses the OpenAI API, a user must supply an API key unless they select a different provider.
|
|
60
61
|
|
|
@@ -304,7 +305,47 @@ Create `codedoc.config.json` in the project being documented:
|
|
|
304
305
|
"parallel_ladder": null,
|
|
305
306
|
"respect_retry_after": true,
|
|
306
307
|
"retry_after_cap_s": 30,
|
|
308
|
+
"rate_limit_backoff_s": null,
|
|
309
|
+
"rate_limit_backoff_scale": null,
|
|
310
|
+
"rate_limit_signals_add": [],
|
|
311
|
+
"rate_limit_signals_remove": [],
|
|
307
312
|
"skip_dirs": ["myenv", ".venv", "venv", "env", "node_modules", "__pycache__", "codedoc"],
|
|
313
|
+
"skip_dirs_add": [],
|
|
314
|
+
"skip_dirs_remove": [],
|
|
315
|
+
"max_content_chars": 12000,
|
|
316
|
+
"extension_language_map": {
|
|
317
|
+
".py": "python",
|
|
318
|
+
".ts": "typescript",
|
|
319
|
+
".tsx": "tsx",
|
|
320
|
+
".js": "javascript",
|
|
321
|
+
".jsx": "jsx",
|
|
322
|
+
".dart": "dart",
|
|
323
|
+
".java": "java",
|
|
324
|
+
".cs": "csharp",
|
|
325
|
+
".html": "html",
|
|
326
|
+
".htm": "html",
|
|
327
|
+
".kt": "kotlin",
|
|
328
|
+
".swift": "swift",
|
|
329
|
+
".go": "go",
|
|
330
|
+
".rb": "ruby",
|
|
331
|
+
".rs": "rust",
|
|
332
|
+
".cpp": "cpp",
|
|
333
|
+
".c": "c",
|
|
334
|
+
".h": "c",
|
|
335
|
+
".hpp": "cpp"
|
|
336
|
+
},
|
|
337
|
+
"extension_language_map_add": {},
|
|
338
|
+
"extension_language_map_remove": [],
|
|
339
|
+
"auto_entry_candidates": ["index.html", "main.tsx", "main.ts", "main.js", "main.py", "main.dart", "Main.java", "Program.cs"],
|
|
340
|
+
"auto_entry_candidates_add": [],
|
|
341
|
+
"auto_entry_candidates_remove": [],
|
|
342
|
+
"provider_prefixes": {
|
|
343
|
+
"anthropic": ["claude"],
|
|
344
|
+
"gemini": ["gemini"],
|
|
345
|
+
"openai": ["gpt-", "o1", "o3", "text-"]
|
|
346
|
+
},
|
|
347
|
+
"provider_prefixes_add": {},
|
|
348
|
+
"provider_prefixes_remove": {},
|
|
308
349
|
"ignore_paths": ["/myenv", "services/generated"]
|
|
309
350
|
}
|
|
310
351
|
```
|
|
@@ -333,6 +374,21 @@ Parallelism settings:
|
|
|
333
374
|
| `file_retry_attempts` | Number of sequential retries for a failed file. Default: `1`. |
|
|
334
375
|
| `max_consecutive_failures` | Stops the run after repeated failures so provider/API problems are visible quickly. Default: `5`. |
|
|
335
376
|
|
|
377
|
+
Configurable defaults added in 0.8.1:
|
|
378
|
+
|
|
379
|
+
| Setting | Purpose |
|
|
380
|
+
| --- | --- |
|
|
381
|
+
| `skip_dirs`, `skip_dirs_add`, `skip_dirs_remove` | Replace, extend, or reduce directory names skipped anywhere in the tree. Use `--remove-skip-dir codedoc` to document this package source while codedoc still skips its output directory. |
|
|
382
|
+
| `extension_language_map`, `extension_language_map_add`, `extension_language_map_remove` | Control which extensions are scanned and what language label each gets. Any extension in the resolved map is supported. |
|
|
383
|
+
| `auto_entry_candidates`, `auto_entry_candidates_add`, `auto_entry_candidates_remove` | Control first-run entry auto-detection when `--entry` is omitted. |
|
|
384
|
+
| `provider_prefixes`, `provider_prefixes_add`, `provider_prefixes_remove` | Control model-name based provider auto-detection and matching API-key lookup. |
|
|
385
|
+
|
|
386
|
+
Configurable settings added in 0.9.0:
|
|
387
|
+
|
|
388
|
+
| Setting | Default | Purpose |
|
|
389
|
+
| --- | --- | --- |
|
|
390
|
+
| `max_content_chars` | `12000` | Maximum characters of file content sent to the LLM per file. Files longer than this are truncated and an INFO log line is emitted with the file path and character counts. Raise this for large-context providers (`60000`–`100000`). Must be at least `1000`. |
|
|
391
|
+
|
|
336
392
|
## Environment Variables
|
|
337
393
|
|
|
338
394
|
Secrets should live in environment variables or a local `.env` file that is ignored by Git. Use [.env.example](.env.example) as the template.
|
|
@@ -357,6 +413,7 @@ Supported variables:
|
|
|
357
413
|
| `CODEDOC_MAX_CONSECUTIVE_FAILURES` | Consecutive failure threshold before stopping. |
|
|
358
414
|
| `LOG_LEVEL` | `INFO`, `DEBUG`, etc. |
|
|
359
415
|
| `CODEDOC_IGNORE_PATHS` | Semicolon-separated ignore paths. |
|
|
416
|
+
| `CODEDOC_MAX_CONTENT_CHARS` | Maximum characters of file content sent to the LLM. Equivalent to `max_content_chars` in config. |
|
|
360
417
|
|
|
361
418
|
Example `.env` for OpenAI:
|
|
362
419
|
|
|
@@ -598,7 +655,7 @@ with the final clean output.
|
|
|
598
655
|
and now has no effect — live backup is always on. Passing it prints a deprecation
|
|
599
656
|
notice. It will be removed in a future release.
|
|
600
657
|
|
|
601
|
-
### Adaptive rate-limit parallelism (0.8.
|
|
658
|
+
### Adaptive rate-limit parallelism (0.8.1)
|
|
602
659
|
|
|
603
660
|
When a provider signals 429 / rate-limit / quota-exceeded, codedoc automatically
|
|
604
661
|
steps down file-level concurrency instead of hammering the API:
|
|
@@ -620,9 +677,34 @@ Customize it in config:
|
|
|
620
677
|
}
|
|
621
678
|
```
|
|
622
679
|
|
|
623
|
-
Provider-specific rate-limit signals are recognised for OpenAI (`429`,
|
|
624
|
-
`
|
|
625
|
-
(`
|
|
680
|
+
Provider-specific rate-limit signals are recognised for OpenAI (`429`, `rate limit`,
|
|
681
|
+
`rate_limit`, `too many requests`, `tokens per min`, `tpm`, `quota`), Anthropic
|
|
682
|
+
(`529`, `overloaded`, `rate_limit`, `429`), and Gemini (`resource_exhausted`,
|
|
683
|
+
`quota`, `429`, `503`). Non-rate-limit errors never trigger a step-down.
|
|
684
|
+
|
|
685
|
+
In 0.8.1, codedoc sleeps between parallel step-down rungs using provider-aware
|
|
686
|
+
backoff. You can tune this in config:
|
|
687
|
+
|
|
688
|
+
```json
|
|
689
|
+
{
|
|
690
|
+
"rate_limit_backoff_s": null,
|
|
691
|
+
"rate_limit_backoff_scale": null,
|
|
692
|
+
"rate_limit_signals_add": ["capacity exceeded", "throttled"],
|
|
693
|
+
"rate_limit_signals_remove": ["503"]
|
|
694
|
+
}
|
|
695
|
+
```
|
|
696
|
+
|
|
697
|
+
Set `rate_limit_backoff_s` to `0` to disable computed inter-rung backoff.
|
|
698
|
+
`Retry-After` hints are still honored when `respect_retry_after` is true.
|
|
699
|
+
|
|
700
|
+
### Lossless Markdown regeneration (0.8.1)
|
|
701
|
+
|
|
702
|
+
Markdown output remains human-readable, but codedoc now embeds a hidden
|
|
703
|
+
base64-encoded public JSON view in a `<!-- codedoc-ai-view-base64 ... -->`
|
|
704
|
+
comment. This lets later Markdown-to-JSON conversion and incremental re-runs
|
|
705
|
+
recover dependency catalogs, per-file dependency metadata, links, and hashes
|
|
706
|
+
without another LLM call. Legacy Markdown without the embedded view still uses
|
|
707
|
+
the best-effort visible Markdown parser.
|
|
626
708
|
|
|
627
709
|
### Issue log (`error.log`)
|
|
628
710
|
|
|
@@ -640,11 +722,16 @@ Only hard file failures are surfaced there.
|
|
|
640
722
|
|
|
641
723
|
### Ownership guard
|
|
642
724
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
725
|
+
`codedoc` checks that any existing file at the target path was produced by
|
|
726
|
+
codedoc (a `_codedoc` metadata block in JSON, or a `<!-- codedoc-ai: -->` comment
|
|
727
|
+
in Markdown). If the file is foreign, malformed, or empty, the run stops with a
|
|
728
|
+
clear `ConfigError`. Choose a different `--output` directory or remove the
|
|
729
|
+
conflicting file to proceed.
|
|
730
|
+
|
|
731
|
+
**Preflight (0.9.0).** The ownership check now runs *before* any filesystem
|
|
732
|
+
changes, directory creation, scanning, or LLM calls. A foreign target that would
|
|
733
|
+
block the final write is caught immediately — no tokens are spent and no output
|
|
734
|
+
directory is created.
|
|
648
735
|
|
|
649
736
|
### More detail
|
|
650
737
|
|
|
@@ -203,15 +203,38 @@ main thread, so a Ctrl-C or crash after a worker completes never discards that r
|
|
|
203
203
|
- **Ownership guard.** `codedoc` refuses to overwrite a file it did not create (no `_codedoc`
|
|
204
204
|
metadata block) — including the JSON backup sibling for named-MD runs.
|
|
205
205
|
|
|
206
|
-
**Rate-limit step-down (0.8.
|
|
206
|
+
**Rate-limit step-down (0.8.1):**
|
|
207
207
|
When a rate-limit signal is detected during parallel processing, codedoc steps down the
|
|
208
|
-
file concurrency ladder
|
|
208
|
+
file concurrency ladder, sleeps using provider-aware exponential backoff, and prints a
|
|
209
|
+
notice to the terminal:
|
|
209
210
|
|
|
210
211
|
```
|
|
211
|
-
[
|
|
212
|
-
reduced to 2. Retrying 4 remaining file(s) at lower concurrency.
|
|
212
|
+
[anthropic] Rate limit detected - your configured max_parallel_files (5) has been
|
|
213
|
+
reduced to 2. Retrying 4 remaining file(s) at lower concurrency. Sleeping 10.0s before retry.
|
|
213
214
|
```
|
|
214
215
|
|
|
216
|
+
At the end of the run, a compact summary line is printed only when step-down events
|
|
217
|
+
occurred:
|
|
218
|
+
|
|
219
|
+
```
|
|
220
|
+
Rate limits: 1 step-down event(s) [anthropic], 10.0s total backoff. Details in error.log.
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Backoff behavior (provider defaults, all overridable via config):
|
|
224
|
+
|
|
225
|
+
| Provider | Signals | Min backoff | Scale |
|
|
226
|
+
|-----------|-------------------------------------------------------|------------:|------:|
|
|
227
|
+
| openai | 429, rate limit, tpm, quota, ... | 5 s | 1.5× |
|
|
228
|
+
| anthropic | 529, overloaded, rate_limit, 429 | 10 s | 2.0× |
|
|
229
|
+
| gemini | resource_exhausted, quota, 429, 503 | 8 s | 1.5× |
|
|
230
|
+
| default | (union of all above) | 5 s | 1.5× |
|
|
231
|
+
|
|
232
|
+
Config overrides:
|
|
233
|
+
- `rate_limit_backoff_s` — override min backoff globally (`0` disables sleep).
|
|
234
|
+
- `rate_limit_backoff_scale` — override exponential scale globally.
|
|
235
|
+
- `rate_limit_signals_add` — add extra signal strings (for custom gateways).
|
|
236
|
+
- `rate_limit_signals_remove` — remove signals from the resolved profile.
|
|
237
|
+
|
|
215
238
|
Recovered rate-limit events appear in `error.log` (located in the output directory,
|
|
216
239
|
not the project root) as warnings, and do not alarm the final output.
|
|
217
240
|
|