codedoc-ai 0.8.0__tar.gz → 0.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/CHANGELOG.md +271 -0
  2. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/MANIFEST.in +1 -0
  3. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/PKG-INFO +189 -22
  4. codedoc_ai-0.8.0/codedoc_ai.egg-info/PKG-INFO → codedoc_ai-0.9.2/README.md +186 -62
  5. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/RUN_FLOW.md +64 -8
  6. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/__init__.py +1 -1
  7. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/agents/base_agent.py +69 -17
  8. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/agents/dependency_agent.py +55 -41
  9. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/agents/documentation_agent.py +41 -18
  10. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/agents/orchestrator.py +78 -15
  11. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/agents/structure_agent.py +29 -15
  12. codedoc_ai-0.9.2/codedoc/cli/cli.py +480 -0
  13. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/core/graph.py +9 -2
  14. codedoc_ai-0.9.2/codedoc/core/loader.py +678 -0
  15. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/core/output.py +113 -14
  16. codedoc_ai-0.9.2/codedoc/core/planning.py +228 -0
  17. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/core/project_view.py +245 -14
  18. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/core/safe_writer.py +29 -0
  19. codedoc_ai-0.9.2/codedoc/core/scanner.py +270 -0
  20. codedoc_ai-0.9.2/codedoc/core/usage.py +96 -0
  21. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/llm/factory.py +74 -30
  22. codedoc_ai-0.9.2/codedoc/llm/rate_limit_profile.py +192 -0
  23. codedoc_ai-0.9.2/codedoc/parser/generic_parser.py +241 -0
  24. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/pipeline.py +520 -130
  25. codedoc_ai-0.9.2/codedoc/templates/github-actions-codedoc.yml +96 -0
  26. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/utils/errors.py +9 -0
  27. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/utils/logger.py +12 -1
  28. codedoc_ai-0.8.0/README.md → codedoc_ai-0.9.2/codedoc_ai.egg-info/PKG-INFO +223 -20
  29. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc_ai.egg-info/SOURCES.txt +11 -0
  30. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/pyproject.toml +49 -21
  31. codedoc_ai-0.9.2/tests/conftest.py +30 -0
  32. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/test_080_features.py +6 -2
  33. codedoc_ai-0.9.2/tests/test_081_configurable_defaults.py +704 -0
  34. codedoc_ai-0.9.2/tests/test_081_lossless_md.py +1051 -0
  35. codedoc_ai-0.9.2/tests/test_081_placeholder.py +500 -0
  36. codedoc_ai-0.9.2/tests/test_081_rate_limit_profiles.py +848 -0
  37. codedoc_ai-0.9.2/tests/test_090_features.py +487 -0
  38. codedoc_ai-0.9.2/tests/test_092_features.py +600 -0
  39. codedoc_ai-0.9.2/tests/test_graph.py +158 -0
  40. codedoc_ai-0.9.2/tests/test_parser.py +310 -0
  41. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/test_pipeline.py +293 -24
  42. codedoc_ai-0.9.2/tests/test_scanner.py +141 -0
  43. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/test_scenarios.py +52 -27
  44. codedoc_ai-0.8.0/codedoc/cli/cli.py +0 -265
  45. codedoc_ai-0.8.0/codedoc/core/loader.py +0 -337
  46. codedoc_ai-0.8.0/codedoc/core/scanner.py +0 -192
  47. codedoc_ai-0.8.0/codedoc/parser/generic_parser.py +0 -88
  48. codedoc_ai-0.8.0/tests/test_graph.py +0 -83
  49. codedoc_ai-0.8.0/tests/test_parser.py +0 -96
  50. codedoc_ai-0.8.0/tests/test_scanner.py +0 -69
  51. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/.env.example +0 -0
  52. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  53. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  54. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  55. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/CODE_OF_CONDUCT.md +0 -0
  56. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/CONTRIBUTING.md +0 -0
  57. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/LICENSE +0 -0
  58. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/SECURITY.md +0 -0
  59. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/__main__.py +0 -0
  60. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/agents/__init__.py +0 -0
  61. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/bootstrap.py +0 -0
  62. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/cli/__init__.py +0 -0
  63. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/core/__init__.py +0 -0
  64. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/core/checkpoint.py +0 -0
  65. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/core/db.py +0 -0
  66. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/core/queue.py +0 -0
  67. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/llm/__init__.py +0 -0
  68. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/llm/api_provider.py +0 -0
  69. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/llm/base.py +0 -0
  70. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/llm/local_provider.py +0 -0
  71. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/parser/__init__.py +0 -0
  72. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/parser/factory.py +0 -0
  73. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/parser/python_parser.py +0 -0
  74. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/parser/react_parser.py +0 -0
  75. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc/utils/__init__.py +0 -0
  76. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc_ai.egg-info/dependency_links.txt +0 -0
  77. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc_ai.egg-info/entry_points.txt +0 -0
  78. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc_ai.egg-info/requires.txt +0 -0
  79. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/codedoc_ai.egg-info/top_level.txt +0 -0
  80. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/setup.cfg +0 -0
  81. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/__init__.py +0 -0
  82. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/flutter_app/app.dart +0 -0
  83. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/flutter_app/main.dart +0 -0
  84. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/java_app/Main.java +0 -0
  85. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/java_app/Service.java +0 -0
  86. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/python_app/main.py +0 -0
  87. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/python_app/models.py +0 -0
  88. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/python_app/utils.py +0 -0
  89. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/react_app/App.tsx +0 -0
  90. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/react_app/index.html +0 -0
  91. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/react_app/main.tsx +0 -0
  92. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/react_app/router.tsx +0 -0
  93. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/fixtures/react_sample.tsx +0 -0
  94. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/test_agents.py +0 -0
  95. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/test_llm_mock.py +0 -0
  96. {codedoc_ai-0.8.0 → codedoc_ai-0.9.2}/tests/test_queue.py +0 -0
@@ -1,5 +1,276 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.9.2 - 2026-06-12
4
+
5
+ ### Safe planning and CI ergonomics
6
+
7
+ - Added a filesystem-read-only, provider-free `--dry-run` driven by the same
8
+ immutable routing plan as real execution.
9
+ - Added `--max-files`, repeatable `--force-files`, and `--allow-partial`, with
10
+ matching config and environment-variable support.
11
+ - Added stable CLI exit codes for success, file/output failures, setup errors,
12
+ and interrupts.
13
+ - Added approximate planned and actual LLM call/token reporting. Dry-run totals
14
+ are explicitly lower bounds and no monetary accuracy is claimed.
15
+ - Centralized per-file truncation so all three agents receive the same bounded
16
+ source string and only one warning is emitted.
17
+ - Added read-only ownership inspection and moved the paid-file cap ahead of
18
+ filesystem mutation, writer initialization, and provider creation.
19
+ - Added a packaged, manual-only GitHub Actions workflow with a dry-run, paid
20
+ cap, least-privilege permissions, and artifact upload.
21
+ - Kept `--safe-mode` accepted but hidden for backward compatibility.
22
+ - Added focused 0.9.2 regression coverage and synchronized release identity.
23
+
24
+ ## 0.9.1 - 2026-06-08
25
+
26
+ ### Bug-fix stabilization patch (first PyPI release)
27
+
28
+ Corrective-only patch. No new features or output-shape changes.
29
+
30
+ - **A1 — entry-reachability is no longer silent.** When an entry is given,
31
+ files not reachable from it were dropped without notice. `_select_files` now
32
+ logs a clear WARNING listing the excluded files, records `stats["entry_excluded"]`,
33
+ and the CLI prints an excluded-files line. (The structural selection fix is
34
+ tracked for a later minor; this patch only removes the silent failure.)
35
+ - **A2 — a wrong `--entry` no longer silently documents the whole repo.** An
36
+ explicitly specified entry that cannot be resolved, is not in the scanned set,
37
+ resolves outside the project root, or is given when **no** supported files are
38
+ scanned, now raises `ConfigError` instead of falling back to all files or
39
+ exiting successfully. Auto-detection with no entry still documents everything.
40
+ - **A3 — parser false imports fixed.** The Go parser no longer treats arbitrary
41
+ string literals (e.g. `fmt.Println("hi")`) as imports — only string-literal
42
+ paths in `import "..."` statements and `import ( ... )` blocks are read,
43
+ comments are ignored, and raw-string (backtick) paths are supported.
44
+ Interpreted literals use Go's byte-accurate escape semantics, including
45
+ multi-byte UTF-8 `\xNN` / octal sequences and Unicode escapes. The HTML parser
46
+ no longer treats CSS `<link href>` as a code import (kept `<script src>` and
47
+ JS imports).
48
+ - **A4 — no stale/empty record substituted for a real one.** In the parallel
49
+ batch, a rate-limited file was treated as "already recorded" using state that
50
+ also included records **preloaded** from a prior run, so a *changed* file could
51
+ be restored from stale documentation instead of retried. `SafeWriter` now
52
+ tracks records written *this run* (`recorded_this_run()`); a changed,
53
+ rate-limited file is retried, and a file genuinely recorded this run recovers
54
+ its real record via `get_record()` (never an empty `{}`).
55
+ - **A5 — honest interrupt message.** Removed dead code; the Ctrl-C message is now
56
+ conditional ("…if the run reached file processing") so it never falsely claims
57
+ progress was saved when interrupted before any file was processed.
58
+ - **A6 — scanner is re-entrant.** The directory walker no longer stores state on
59
+ the function object; state lives on a per-scan `_Walker` instance.
60
+ - **Version identity.** `pyproject.toml`, `codedoc.__version__`, the CLI
61
+ `--version`, and the README all report `0.9.1`, and the automated test
62
+ (`test_version_identity_consistent`) enforces agreement across **all four**,
63
+ including the README "Current release" line.
64
+ - **Reliable tests.** `tests/conftest.py` redirects the temp root into the repo
65
+ (`.pyt_tmp`) so a locked system temp dir does not make the suite unrunnable.
66
+ (This addresses the observed locked-system-temp failure; it is not a guarantee
67
+ for every environment.)
68
+
69
+ ## 0.9.0 - 2026-06-04
70
+
71
+ ### Output preflight safety, clean INFO logs, extension list fix, configurable content truncation
72
+
73
+ ---
74
+
75
+ #### G0 — Output Preflight Safety
76
+
77
+ Foreign output targets now fail immediately with a `ConfigError` before the
78
+ scanner runs, the provider initialises, or any LLM API call is made. Previously
79
+ a foreign file at the target path would only be detected inside
80
+ `write_project_outputs`, after all tokens had already been spent.
81
+
82
+ - **`codedoc/core/output.py`**: Added `preflight_output_targets()` which calls
83
+ `_check_file_ownership()` for all final public targets (JSON, MD, both) and a
84
+ new `_check_md_live_backup_ownership()` for the MD live-backup JSON sibling.
85
+ - **`codedoc/pipeline.py`**: Calls `preflight_output_targets()` immediately after
86
+ output spec resolution, before `scan_files()` and `create_provider()`.
87
+ - **`codedoc/core/loader.py`**: `_resolve_output_spec()` now only emits the
88
+ format-conflict warning when `--format` was explicitly passed by the user (not
89
+ when the default `"json"` value from DEFAULTS triggers a mismatch).
90
+
91
+ #### G1 — Clean Log Output
92
+
93
+ Third-party HTTP libraries (`httpx`, `httpcore`, `openai`, `anthropic`,
94
+ `google.auth`) are now silenced at WARNING level by default. At `--verbose` /
95
+ DEBUG the HTTP diagnostics are restored. Per-agent progress lines appear at INFO
96
+ so users can see what codedoc is doing at each step.
97
+
98
+ - **`codedoc/utils/logger.py`**: `_NOISY_LOGGERS` constant defines the list;
99
+ `_configure()` sets those loggers to WARNING; `set_level()` lowers them to
100
+ DEBUG when the root logger is set to DEBUG.
101
+ - **`codedoc/agents/orchestrator.py`**: Added timing via `time.monotonic()` and
102
+ INFO/WARNING log lines after each agent: `[FILE] path | structure ok 0.8s`,
103
+ `[FILE] path | dependencies ok 0.9s`, `[FILE] path | documentation ok 1.2s`.
104
+ Fallbacks emit WARNING with `"fallback"` in the message.
105
+
106
+ #### G5 — Extension List Consistency
107
+
108
+ `_candidate_variants()` in `graph.py` used a hardcoded 9-extension list that
109
+ was out of sync with `_KNOWN_EXTENSIONS` and `DEFAULTS["extension_language_map"]`.
110
+ Import resolution for Go, Kotlin, Swift, Rust, Ruby, and C-family files silently
111
+ produced no candidates.
112
+
113
+ - **`codedoc/core/graph.py`**: `_KNOWN_EXTENSIONS` expanded to all 19 extensions
114
+ in `DEFAULTS["extension_language_map"]`. `_candidate_variants()` now uses
115
+ `sorted(_KNOWN_EXTENSIONS)` instead of a separate hardcoded list. A comment
116
+ notes the sync requirement with `loader.py`.
117
+
118
+ #### G6 — Configurable Content Truncation
119
+
120
+ Files above 12,000 characters were silently truncated with a DEBUG-only log.
121
+ Users saw degraded documentation for large files with no indication why.
122
+
123
+ - **`codedoc/core/loader.py`**: `max_content_chars` added to `DEFAULTS` (12000)
124
+ and `_ENV_KEY_MAP` (`CODEDOC_MAX_CONTENT_CHARS`). Validation requires a positive
125
+ integer ≥ 1000.
126
+ - **`codedoc/agents/base_agent.py`**: Removed module-level `_MAX_CONTENT_CHARS`
127
+ constant. `BaseAgent.__init__` now accepts `max_content_chars: int = 12000`.
128
+ `_truncate()` uses `self._max_content_chars` and logs at INFO with the file
129
+ path and original / truncated character counts.
130
+ - **`codedoc/agents/orchestrator.py`**: `Orchestrator.__init__` accepts
131
+ `max_content_chars: int = 12000` and forwards it to each agent.
132
+ - **`codedoc/pipeline.py`**: Passes `config.get("max_content_chars", 12000)` to
133
+ the `Orchestrator` constructor.
134
+ - All three agent subclasses pass `file_path` to `_truncate()` for accurate logs.
135
+
136
+ ---
137
+
138
+ ## 0.8.1 - 2026-06-02
139
+
140
+ ### Lossless Markdown, placeholder sanitization, configurable defaults, provider-aware rate-limit backoff
141
+
142
+ ---
143
+
144
+ #### Workstream A — Lossless Markdown View
145
+
146
+ Markdown output now embeds the complete public JSON view as a hidden base64
147
+ comment so `json_from_markdown()` (and incremental re-runs that read a `.md`
148
+ file) recover the full dependency catalog, per-file hashes, and all dependency
149
+ metadata without any information loss.
150
+
151
+ - **`codedoc/core/project_view.py`**:
152
+ - `markdown_from_view()` writes a `<!-- codedoc-ai-view-base64 ... -->` block
153
+ immediately after the legacy `<!-- codedoc-ai: ... -->` metadata comment.
154
+ The block is standard base64-encoded UTF-8 JSON, which avoids comment-safety
155
+ issues with raw `--` or `-->` sequences in generated text.
156
+ - `markdown_to_view()` now tries the embedded view first (fast, lossless path);
157
+ falls back to the existing visible Markdown parser for pre-0.8.1 files.
158
+ - New public helper `read_embedded_view(markdown)` decodes and validates the
159
+ embedded block; returns `None` on any failure so callers fall back safely.
160
+ - `read_codedoc_meta()` no longer raises `ConfigError` when `entry_file` is
161
+ `null`; a valid CodeDoc file with no entry point is now correctly identified
162
+ as owned rather than foreign.
163
+ - **`codedoc/pipeline.py`**:
164
+ - `_load_existing_file_docs_from_md()` preserves file hashes from the embedded
165
+ view when the lightweight metadata comment has no hash for a path.
166
+ - `_resolve_entry_and_docs()` no longer raises unconditionally when no existing
167
+ output is found; first runs without `--entry` now reach `detect_entry_file()`
168
+ for auto-detection instead of failing immediately.
169
+
170
+ #### Workstream B — Placeholder Usage Example Sanitization
171
+
172
+ LLM-generated usage examples that contain placeholder package names (e.g.
173
+ `import 'package:your_package/...'`) are now removed before any output is
174
+ written or cached.
175
+
176
+ - **`codedoc/core/project_view.py`**: `_clean_file()` calls the new
177
+ `_sanitize_usage_example()` helper, which checks against `_PLACEHOLDER_PATTERN`
178
+ (a compiled `re.IGNORECASE` regex with word-boundary guards). Covered
179
+ placeholders: `your_package_name`, `your_package`, `your_project`, `your_app`,
180
+ `example_package`, `my_package`, and Dart-style `package:example/`.
181
+ Sanitization is idempotent and applies to both freshly generated records and
182
+ cached/reused records loaded from prior output files.
183
+
184
+ #### Workstream C — Configurable Hardcoded Defaults
185
+
186
+ All previously hardcoded scanner and provider defaults are now driven by a
187
+ single source of truth in `DEFAULTS` (`loader.py`) and support `_add` / `_remove`
188
+ override keys.
189
+
190
+ - **`codedoc/core/loader.py`**:
191
+ - `DEFAULTS` gains eleven new keys: `skip_dirs_add`, `skip_dirs_remove`,
192
+ `extension_language_map` (full 18-entry map), `extension_language_map_add`,
193
+ `extension_language_map_remove`, `auto_entry_candidates`,
194
+ `auto_entry_candidates_add`, `auto_entry_candidates_remove`,
195
+ `provider_prefixes`, `provider_prefixes_add`, `provider_prefixes_remove`.
196
+ - Three resolver helpers implement the resolution order (replace → `_add` →
197
+ `_remove`): `_resolve_list_override`, `_resolve_dict_override`,
198
+ `_resolve_nested_list_dict_override`.
199
+ - `_apply_config_overrides()` is called after all config sources are merged;
200
+ it resolves all four configurable keys and derives `supported_extensions`
201
+ from the resolved `extension_language_map`.
202
+ - Backward-compat bridge: if `supported_extensions` was explicitly set to a
203
+ value different from the defaults, it is used as a filter on
204
+ `extension_language_map` so old configs continue to restrict scanning as
205
+ intended.
206
+ - **`codedoc/core/scanner.py`**:
207
+ - Hardcoded `SKIP_DIRS` and `EXTENSION_LANGUAGE_MAP` removed.
208
+ - `scan_files()` receives `extension_language_map` (primary) instead of
209
+ `supported_extensions`. A positional-list guard handles legacy callers
210
+ that pass a list as the second argument.
211
+ - `detect_entry_file()` receives the resolved `auto_entry_candidates` list;
212
+ falls back to a module-level default for direct callers.
213
+ - **`codedoc/pipeline.py`**: passes `extension_language_map` and
214
+ `auto_entry_candidates` to the scanner; always appends the output directory
215
+ name to the scan skip list (even when the user removed it via
216
+ `--remove-skip-dir`) to prevent codedoc from documenting its own output.
217
+ - **`codedoc/cli/cli.py`**: three new flags: `--skip-dirs DIR [...]`,
218
+ `--add-skip-dir DIR` (repeatable), `--remove-skip-dir DIR` (repeatable).
219
+ - **`codedoc/llm/factory.py`**: `create_provider()`, `_make_api()`,
220
+ `_resolve_api_provider()`, and `_provider_api_key()` all accept and use
221
+ `provider_prefixes` from config; module-level tuples kept as fallbacks.
222
+
223
+ #### Workstream D — Provider-Aware Rate-Limit Backoff
224
+
225
+ Parallel ladder step-downs now sleep between rungs using provider-aware
226
+ exponential backoff, with optional `Retry-After` hint parsing.
227
+
228
+ - **`codedoc/llm/rate_limit_profile.py`** *(new)*:
229
+ - `RateLimitProfile` dataclass — `provider`, `signals`, `min_backoff_s`,
230
+ `backoff_scale`.
231
+ - `PROVIDER_PROFILES` — preconfigured profiles for `openai`, `anthropic`,
232
+ `gemini`, and `default`.
233
+ - `get_rate_limit_profile(provider_name, config)` — returns the resolved
234
+ profile with `rate_limit_backoff_s`, `rate_limit_backoff_scale`,
235
+ `rate_limit_signals_add`, and `rate_limit_signals_remove` applied without
236
+ mutating module defaults.
237
+ - **`codedoc/pipeline.py`**:
238
+ - `_is_rate_limit_error(exc, profile=None)` — when a `profile` is supplied,
239
+ checks only `profile.signals`; falls back to `_RATE_LIMIT_SIGNALS` for
240
+ backward compatibility with callers without a profile.
241
+ - `_detect_limit_type(error_msg)` — classifies errors as `"tpm"`, `"rpm"`,
242
+ `"quota"`, `"overloaded"`, or `None`.
243
+ - `_process_descriptor_batch()` return type changed:
244
+ `retry_rate_limited` is now `list[tuple[dict, Exception]]` so the causing
245
+ exception is preserved for `Retry-After` parsing and error sampling.
246
+ - `_process_agent_files()`: fetches the provider profile, passes it to
247
+ `_process_descriptor_batch()`, and sleeps between rungs using:
248
+ - `min(Retry-After, retry_after_cap_s)` when a hint is present and
249
+ `respect_retry_after = True`,
250
+ - `min(min_backoff_s × backoff_scale ^ rung, retry_after_cap_s)` otherwise,
251
+ - no sleep when `rate_limit_backoff_s = 0`.
252
+ - Rate-limit warning dicts now include: `retry_after_s`, `sleep_s`,
253
+ `error_sample`, `limit_type`, `event_number`, `rung_index`.
254
+ - **`codedoc/core/loader.py`**: four new `DEFAULTS` keys:
255
+ `rate_limit_backoff_s`, `rate_limit_backoff_scale`, `rate_limit_signals_add`,
256
+ `rate_limit_signals_remove`.
257
+ - **`codedoc/cli/cli.py`**: compact rate-limit summary line printed only when
258
+ step-down events occurred; shows event count, providers, and total sleep time.
259
+
260
+ #### Version
261
+
262
+ - `codedoc/__init__.py`, `pyproject.toml`, `cli.py`: `0.8.0` → `0.8.1`.
263
+
264
+ #### Validation
265
+
266
+ - Added regression coverage for lossless Markdown regeneration, placeholder
267
+ sanitization, configurable defaults, provider-aware rate-limit backoff, and
268
+ rate-limit edge cases.
269
+ - Full test suite passes.
270
+ - Built sdist/wheel and verified release metadata with `twine check`.
271
+
272
+ ---
273
+
3
274
  ## 0.8.0 - 2026-05-31
4
275
 
5
276
  ### Always-on live JSON crash backup, parallel crash-safety, rate-limit adaptive parallelism, error.log overhaul
@@ -6,6 +6,7 @@ include CONTRIBUTING.md
6
6
  include SECURITY.md
7
7
  include CODE_OF_CONDUCT.md
8
8
  include .env.example
9
+ recursive-include codedoc/templates *.yml
9
10
  recursive-include tests *.py
10
11
  recursive-include tests/fixtures *
11
12
  recursive-include .github *.md
@@ -1,14 +1,20 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codedoc-ai
3
- Version: 0.8.0
3
+ Version: 0.9.2
4
4
  Summary: Generate structured, incremental documentation for any codebase using OpenAI, Anthropic, or Gemini
5
5
  Author: Atharv Mannur
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/atharvm416/codedoc-ai
8
+ Project-URL: PyPI, https://pypi.org/project/codedoc-ai/
9
+ Project-URL: Documentation, https://github.com/atharvm416/codedoc-ai#readme
10
+ Project-URL: Source, https://github.com/atharvm416/codedoc-ai
8
11
  Project-URL: Issues, https://github.com/atharvm416/codedoc-ai/issues
9
- Keywords: documentation,ai,llm,codebase,agents,codegen
12
+ Project-URL: Changelog, https://github.com/atharvm416/codedoc-ai/blob/main/CHANGELOG.md
13
+ Keywords: ai,anthropic,cli,code-analysis,codebase,developer-tools,documentation,gemini,llm,openai,python
10
14
  Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Environment :: Console
11
16
  Classifier: Intended Audience :: Developers
17
+ Classifier: Operating System :: OS Independent
12
18
  Classifier: Programming Language :: Python :: 3
13
19
  Classifier: Programming Language :: Python :: 3.9
14
20
  Classifier: Programming Language :: Python :: 3.10
@@ -16,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.11
16
22
  Classifier: Programming Language :: Python :: 3.12
17
23
  Classifier: Topic :: Software Development :: Documentation
18
24
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Classifier: Topic :: Utilities
19
26
  Requires-Python: >=3.9
20
27
  Description-Content-Type: text/markdown
21
28
  License-File: LICENSE
@@ -40,7 +47,7 @@ Dynamic: license-file
40
47
 
41
48
  The tool scans source files, resolves project-local imports into a dependency graph, sends only files that need analysis to an LLM, and writes one combined, structured documentation artifact designed for both humans and AI. By default that artifact is JSON.
42
49
 
43
- Current release: `0.8.0`.
50
+ Current release: `0.9.2`.
44
51
 
45
52
  ## What It Does
46
53
 
@@ -61,6 +68,9 @@ Current release: `0.8.0`.
61
68
  - Survives interruptions: writes a live JSON backup before any AI work starts, then updates it after every completed file. A Ctrl-C or crash always leaves a readable partial output file — no results are lost, and re-running the same command resumes automatically from where it stopped.
62
69
  - Adaptive rate-limit parallelism: when a provider signals 429 / rate-limit, file concurrency is stepped down (`5 → 2 → 1`) and a provider-specific warning is printed to the terminal. No manual intervention needed.
63
70
  - Refuses to overwrite any file it did not create (ownership guard), protecting your data from accidental output collisions.
71
+ - Provides a filesystem-read-only `--dry-run` with approximate lower-bound call and token estimates.
72
+ - Supports a pre-call `--max-files` cap and repeatable `--force-files` reprocessing.
73
+ - Reports stable CI-oriented exit codes and optional `--allow-partial` behavior.
64
74
  - Writes a clean, structured public project view to `codedoc/codedoc.json` by default, or Markdown when requested.
65
75
  - Public output includes project overview, file tree, folder map, dependency graph, dependency catalog, and flattened file summaries.
66
76
  - Converts public JSON to Markdown without another AI call.
@@ -91,6 +101,11 @@ codedoc run
91
101
  | Live JSON backup | always on (0.8.0 default) |
92
102
  | Rate-limit adaptive | `true` |
93
103
  | Max file size | `500 KB` |
104
+ | Max content chars | `12000` |
105
+ | Dry run | `false` |
106
+ | Maximum paid files | `0` (unlimited) |
107
+ | Forced files | `[]` |
108
+ | Allow partial output | `false` |
94
109
 
95
110
  Because the default provider uses the OpenAI API, a user must supply an API key unless they select a different provider.
96
111
 
@@ -205,7 +220,10 @@ Common commands:
205
220
  | `codedoc run --provider gemini --model gemini-2.5-flash` | Use Google Gemini. |
206
221
  | `codedoc run --provider anthropic --model claude-haiku-4-5-20251001` | Use Anthropic Claude. |
207
222
  | `codedoc run --ignore /myenv --ignore generated` | Ignore project paths. |
208
- | `codedoc run --safe-mode` | Deprecated (live backup is always on since 0.8.0). |
223
+ | `codedoc run --dry-run --max-files 25` | Inspect the plan without writes, provider creation, or API calls. |
224
+ | `codedoc run --max-files 25` | Stop before mutation or API calls if more than 25 files need LLM work. |
225
+ | `codedoc run --force-files src/a.py --force-files src/b.py` | Explicitly reprocess selected files. |
226
+ | `codedoc run --allow-partial` | Exit 0 for completed partial runs, with a prominent warning. |
209
227
  | `codedoc run --max-parallel-files 3` | Limit concurrent file processing. |
210
228
  | `codedoc .` | Legacy shorthand for documenting the current directory. |
211
229
  | `codedoc --version` | Print the installed version. |
@@ -340,7 +358,47 @@ Create `codedoc.config.json` in the project being documented:
340
358
  "parallel_ladder": null,
341
359
  "respect_retry_after": true,
342
360
  "retry_after_cap_s": 30,
361
+ "rate_limit_backoff_s": null,
362
+ "rate_limit_backoff_scale": null,
363
+ "rate_limit_signals_add": [],
364
+ "rate_limit_signals_remove": [],
343
365
  "skip_dirs": ["myenv", ".venv", "venv", "env", "node_modules", "__pycache__", "codedoc"],
366
+ "skip_dirs_add": [],
367
+ "skip_dirs_remove": [],
368
+ "max_content_chars": 12000,
369
+ "extension_language_map": {
370
+ ".py": "python",
371
+ ".ts": "typescript",
372
+ ".tsx": "tsx",
373
+ ".js": "javascript",
374
+ ".jsx": "jsx",
375
+ ".dart": "dart",
376
+ ".java": "java",
377
+ ".cs": "csharp",
378
+ ".html": "html",
379
+ ".htm": "html",
380
+ ".kt": "kotlin",
381
+ ".swift": "swift",
382
+ ".go": "go",
383
+ ".rb": "ruby",
384
+ ".rs": "rust",
385
+ ".cpp": "cpp",
386
+ ".c": "c",
387
+ ".h": "c",
388
+ ".hpp": "cpp"
389
+ },
390
+ "extension_language_map_add": {},
391
+ "extension_language_map_remove": [],
392
+ "auto_entry_candidates": ["index.html", "main.tsx", "main.ts", "main.js", "main.py", "main.dart", "Main.java", "Program.cs"],
393
+ "auto_entry_candidates_add": [],
394
+ "auto_entry_candidates_remove": [],
395
+ "provider_prefixes": {
396
+ "anthropic": ["claude"],
397
+ "gemini": ["gemini"],
398
+ "openai": ["gpt-", "o1", "o3", "text-"]
399
+ },
400
+ "provider_prefixes_add": {},
401
+ "provider_prefixes_remove": {},
344
402
  "ignore_paths": ["/myenv", "services/generated"]
345
403
  }
346
404
  ```
@@ -369,6 +427,30 @@ Parallelism settings:
369
427
  | `file_retry_attempts` | Number of sequential retries for a failed file. Default: `1`. |
370
428
  | `max_consecutive_failures` | Stops the run after repeated failures so provider/API problems are visible quickly. Default: `5`. |
371
429
 
430
+ Configurable defaults added in 0.8.1:
431
+
432
+ | Setting | Purpose |
433
+ | --- | --- |
434
+ | `skip_dirs`, `skip_dirs_add`, `skip_dirs_remove` | Replace, extend, or reduce directory names skipped anywhere in the tree. Use `--remove-skip-dir codedoc` to document this package source while codedoc still skips its output directory. |
435
+ | `extension_language_map`, `extension_language_map_add`, `extension_language_map_remove` | Control which extensions are scanned and what language label each gets. Any extension in the resolved map is supported. |
436
+ | `auto_entry_candidates`, `auto_entry_candidates_add`, `auto_entry_candidates_remove` | Control first-run entry auto-detection when `--entry` is omitted. |
437
+ | `provider_prefixes`, `provider_prefixes_add`, `provider_prefixes_remove` | Control model-name based provider auto-detection and matching API-key lookup. |
438
+
439
+ Configurable settings added in 0.9.0:
440
+
441
+ | Setting | Default | Purpose |
442
+ | --- | --- | --- |
443
+ | `max_content_chars` | `12000` | Maximum characters sent to the LLM per file. Long files are truncated once, one WARNING reports the path and counts, and the marker stays inside the ceiling. Must be at least `1000`. |
444
+
445
+ Planning and CI settings added in 0.9.2:
446
+
447
+ | Setting | Default | Purpose |
448
+ | --- | --- | --- |
449
+ | `dry_run` | `false` | Compute the real routing plan without filesystem mutation or provider/API interaction. |
450
+ | `max_files` | `0` | Maximum files allowed to make LLM calls after reuse and resume decisions. `0` is unlimited. |
451
+ | `force_files` | `[]` | Selected project paths to reprocess explicitly before dependency propagation. |
452
+ | `allow_partial` | `false` | Exit 0 only for completed runs that produced partial output after file failures. |
453
+
372
454
  ## Environment Variables
373
455
 
374
456
  Secrets should live in environment variables or a local `.env` file that is ignored by Git. Use [.env.example](.env.example) as the template.
@@ -393,6 +475,11 @@ Supported variables:
393
475
  | `CODEDOC_MAX_CONSECUTIVE_FAILURES` | Consecutive failure threshold before stopping. |
394
476
  | `LOG_LEVEL` | `INFO`, `DEBUG`, etc. |
395
477
  | `CODEDOC_IGNORE_PATHS` | Semicolon-separated ignore paths. |
478
+ | `CODEDOC_MAX_CONTENT_CHARS` | Maximum characters of file content sent to the LLM. Equivalent to `max_content_chars` in config. |
479
+ | `CODEDOC_DRY_RUN` | Boolean planning-only mode. |
480
+ | `CODEDOC_MAX_FILES` | Non-negative paid-file cap; `0` is unlimited. |
481
+ | `CODEDOC_FORCE_FILES` | Semicolon-separated forced project paths. |
482
+ | `CODEDOC_ALLOW_PARTIAL` | Boolean partial-output exit-code override. |
396
483
 
397
484
  Example `.env` for OpenAI:
398
485
 
@@ -582,14 +669,13 @@ On each run, `codedoc` follows this process:
582
669
  3. Scan supported files while respecting `skip_dirs` and `ignore_paths`.
583
670
  4. Build a dependency graph from parsed imports.
584
671
  5. Select files reachable from the entry point.
585
- 6. Compute each selected file's SHA-256 hash.
586
- 7. Skip files whose path and hash already match the existing output.
587
- 8. Reuse existing documentation if another file has the same content hash.
588
- 9. If `propagate_changes` is true, reprocess files that depend on changed files.
589
- 10. Send only remaining files to the selected LLM, up to `max_parallel_files` at a time.
590
- 11. Retry failed parallel files sequentially so errors are easier to diagnose.
591
- 12. Stop early if repeated failures suggest the API or provider is unavailable.
592
- 13. Rebuild the selected output file from processed records, embedding metadata for the next run.
672
+ 6. Normalize forced paths and add valid forced files before dependency propagation.
673
+ 7. Compute one immutable plan covering changed, unchanged, reused, resumed, and paid-agent files.
674
+ 8. In `--dry-run`, return that plan and approximate lower-bound usage without writing or creating a provider.
675
+ 9. In a real run, enforce ownership and `max_files` before creating directories, writers, logs, or providers.
676
+ 10. Materialize identical-content and checkpoint reuse exactly as planned.
677
+ 11. Send only paid-agent files to the LLM, retry failures, and write final output.
678
+ 12. Report actual call attempts and approximate input/output token totals.
593
679
 
594
680
  This means repeated runs should only send new or changed code to the LLM. Unchanged code and exact duplicate content are reused.
595
681
 
@@ -634,7 +720,7 @@ with the final clean output.
634
720
  and now has no effect — live backup is always on. Passing it prints a deprecation
635
721
  notice. It will be removed in a future release.
636
722
 
637
- ### Adaptive rate-limit parallelism (0.8.0)
723
+ ### Adaptive rate-limit parallelism (0.8.1)
638
724
 
639
725
  When a provider signals 429 / rate-limit / quota-exceeded, codedoc automatically
640
726
  steps down file-level concurrency instead of hammering the API:
@@ -656,9 +742,34 @@ Customize it in config:
656
742
  }
657
743
  ```
658
744
 
659
- Provider-specific rate-limit signals are recognised for OpenAI (`429`,
660
- `rate_limit_exceeded`, `tpm`), Anthropic (`529`, `overloaded`), and Gemini
661
- (`RESOURCE_EXHAUSTED`, `quota`). Non-rate-limit errors never trigger a step-down.
745
+ Provider-specific rate-limit signals are recognised for OpenAI (`429`, `rate limit`,
746
+ `rate_limit`, `too many requests`, `tokens per min`, `tpm`, `quota`), Anthropic
747
+ (`529`, `overloaded`, `rate_limit`, `429`), and Gemini (`resource_exhausted`,
748
+ `quota`, `429`, `503`). Non-rate-limit errors never trigger a step-down.
749
+
750
+ In 0.8.1, codedoc sleeps between parallel step-down rungs using provider-aware
751
+ backoff. You can tune this in config:
752
+
753
+ ```json
754
+ {
755
+ "rate_limit_backoff_s": null,
756
+ "rate_limit_backoff_scale": null,
757
+ "rate_limit_signals_add": ["capacity exceeded", "throttled"],
758
+ "rate_limit_signals_remove": ["503"]
759
+ }
760
+ ```
761
+
762
+ Set `rate_limit_backoff_s` to `0` to disable computed inter-rung backoff.
763
+ `Retry-After` hints are still honored when `respect_retry_after` is true.
764
+
765
+ ### Lossless Markdown regeneration (0.8.1)
766
+
767
+ Markdown output remains human-readable, but codedoc now embeds a hidden
768
+ base64-encoded public JSON view in a `<!-- codedoc-ai-view-base64 ... -->`
769
+ comment. This lets later Markdown-to-JSON conversion and incremental re-runs
770
+ recover dependency catalogs, per-file dependency metadata, links, and hashes
771
+ without another LLM call. Legacy Markdown without the embedded view still uses
772
+ the best-effort visible Markdown parser.
662
773
 
663
774
  ### Issue log (`error.log`)
664
775
 
@@ -676,11 +787,64 @@ Only hard file failures are surfaced there.
676
787
 
677
788
  ### Ownership guard
678
789
 
679
- Before writing, `codedoc` checks that any existing file at the target path was
680
- produced by codedoc (a `_codedoc` metadata block in JSON, or a `<!-- codedoc-ai: -->`
681
- comment in Markdown). If the file is foreign, malformed, or empty, the run stops
682
- with a clear `ConfigError` instead of overwriting it. Choose a different
683
- `--output` directory or remove the conflicting file to proceed.
790
+ `codedoc` checks that any existing file at the target path was produced by
791
+ codedoc (a `_codedoc` metadata block in JSON, or a `<!-- codedoc-ai: -->` comment
792
+ in Markdown). If the file is foreign, malformed, or empty, the run stops with a
793
+ clear `ConfigError`. Choose a different `--output` directory or remove the
794
+ conflicting file to proceed.
795
+
796
+ **Preflight (0.9.0).** The ownership check now runs *before* any filesystem
797
+ changes, directory creation, scanning, or LLM calls. A foreign target that would
798
+ block the final write is caught immediately — no tokens are spent and no output
799
+ directory is created.
800
+
801
+ ## Planning, Cost Guardrails, and CI
802
+
803
+ Use `codedoc run --dry-run --max-files 25` to inspect a run safely. Dry-run
804
+ uses the same routing plan as real execution. It may read source, existing
805
+ outputs, live backups, and legacy checkpoints, but it does not create an output
806
+ directory, write `error.log`, initialize `SafeWriter`, create a provider, or
807
+ call an API. It works without an API key.
808
+
809
+ Token figures use a simple character heuristic. Dry-run input totals are
810
+ explicitly lower bounds because the documentation prompt includes earlier
811
+ agent responses that do not exist during planning. No monetary estimate is
812
+ provided.
813
+
814
+ `--max-files N` counts only files that would actually make LLM calls after
815
+ unchanged skipping, identical-content reuse, and eligible checkpoint reuse. A
816
+ real run exceeding the cap exits `2` before persistent mutation or provider
817
+ creation. Dry-run still exits `0` and reports that the equivalent real run
818
+ would fail.
819
+
820
+ Force selected files with repeatable options:
821
+
822
+ ```bash
823
+ codedoc run --force-files src/a.py --force-files src/b.py
824
+ ```
825
+
826
+ Explicitly forced files bypass unchanged, identical-content, and checkpoint
827
+ reuse. They are added before normal dependency propagation; propagated
828
+ dependents retain normal reuse behavior.
829
+
830
+ CLI exit codes:
831
+
832
+ | Code | Meaning |
833
+ | --- | --- |
834
+ | `0` | Success, dry-run success, or explicitly allowed partial output. |
835
+ | `1` | File-processing failure, output/write failure, or unexpected fatal error. |
836
+ | `2` | Invalid input/config/path, ownership conflict, cap exceeded, or provider initialization failure. |
837
+ | `130` | Keyboard interrupt. |
838
+
839
+ `--allow-partial` changes only completed runs with file-level failures. Setup,
840
+ ownership, cap, provider initialization, write, and unexpected fatal errors
841
+ remain nonzero.
842
+
843
+ A packaged manual-only GitHub Actions example is installed at
844
+ `codedoc/templates/github-actions-codedoc.yml`. It performs a dry-run before
845
+ the paid run, applies the same cap to both, uploads documentation as an
846
+ artifact, uses `contents: read`, and never commits or pushes. Selected source
847
+ is sent to an external provider and API usage may cost money.
684
848
 
685
849
  ### More detail
686
850
 
@@ -757,7 +921,10 @@ CLI flags map directly to config keys:
757
921
  | `--output` | `output_dir` |
758
922
  | `--format` | `output_format` |
759
923
  | `--ignore` | `ignore_paths` |
760
- | `--safe-mode` | `safe_mode: True` |
924
+ | `--dry-run` | `dry_run: True` |
925
+ | `--max-files` | `max_files` |
926
+ | `--force-files` | `force_files` |
927
+ | `--allow-partial` | `allow_partial: True` |
761
928
  | `--no-parallel` | `parallel_agents: False` |
762
929
  | `--max-parallel-files` | `max_parallel_files` |
763
930
  | `--verbose` | `log_level: "DEBUG"` |