codedoc-ai 0.7.1__tar.gz → 0.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. codedoc_ai-0.9.1/CHANGELOG.md +800 -0
  2. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/MANIFEST.in +1 -0
  3. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/PKG-INFO +214 -12
  4. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/README.md +841 -639
  5. codedoc_ai-0.9.1/RUN_FLOW.md +665 -0
  6. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/__init__.py +18 -18
  7. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/agents/base_agent.py +11 -11
  8. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/agents/dependency_agent.py +36 -36
  9. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/agents/documentation_agent.py +10 -10
  10. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/agents/orchestrator.py +48 -8
  11. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/agents/structure_agent.py +10 -10
  12. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/cli/cli.py +116 -19
  13. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/core/__init__.py +10 -0
  14. codedoc_ai-0.9.1/codedoc/core/checkpoint.py +210 -0
  15. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/core/graph.py +9 -2
  16. codedoc_ai-0.9.1/codedoc/core/loader.py +618 -0
  17. codedoc_ai-0.9.1/codedoc/core/output.py +209 -0
  18. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/core/project_view.py +268 -14
  19. codedoc_ai-0.9.1/codedoc/core/safe_writer.py +393 -0
  20. codedoc_ai-0.9.1/codedoc/core/scanner.py +270 -0
  21. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/llm/factory.py +63 -29
  22. codedoc_ai-0.9.1/codedoc/llm/rate_limit_profile.py +192 -0
  23. codedoc_ai-0.9.1/codedoc/parser/generic_parser.py +241 -0
  24. codedoc_ai-0.9.1/codedoc/pipeline.py +1475 -0
  25. codedoc_ai-0.9.1/codedoc/utils/errors.py +186 -0
  26. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/utils/logger.py +58 -47
  27. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/PKG-INFO +214 -12
  28. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/SOURCES.txt +11 -0
  29. codedoc_ai-0.9.1/codedoc_ai.egg-info/requires.txt +14 -0
  30. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/pyproject.toml +66 -60
  31. codedoc_ai-0.9.1/tests/conftest.py +30 -0
  32. codedoc_ai-0.9.1/tests/test_080_features.py +1188 -0
  33. codedoc_ai-0.9.1/tests/test_081_configurable_defaults.py +704 -0
  34. codedoc_ai-0.9.1/tests/test_081_lossless_md.py +1051 -0
  35. codedoc_ai-0.9.1/tests/test_081_placeholder.py +500 -0
  36. codedoc_ai-0.9.1/tests/test_081_rate_limit_profiles.py +848 -0
  37. codedoc_ai-0.9.1/tests/test_090_features.py +483 -0
  38. codedoc_ai-0.9.1/tests/test_graph.py +158 -0
  39. codedoc_ai-0.9.1/tests/test_parser.py +310 -0
  40. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/test_pipeline.py +293 -24
  41. codedoc_ai-0.9.1/tests/test_scanner.py +141 -0
  42. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/test_scenarios.py +955 -798
  43. codedoc_ai-0.7.1/CHANGELOG.md +0 -223
  44. codedoc_ai-0.7.1/codedoc/core/loader.py +0 -282
  45. codedoc_ai-0.7.1/codedoc/core/output.py +0 -103
  46. codedoc_ai-0.7.1/codedoc/core/scanner.py +0 -192
  47. codedoc_ai-0.7.1/codedoc/parser/generic_parser.py +0 -88
  48. codedoc_ai-0.7.1/codedoc/pipeline.py +0 -768
  49. codedoc_ai-0.7.1/codedoc/utils/errors.py +0 -112
  50. codedoc_ai-0.7.1/codedoc_ai.egg-info/requires.txt +0 -14
  51. codedoc_ai-0.7.1/tests/test_graph.py +0 -83
  52. codedoc_ai-0.7.1/tests/test_parser.py +0 -96
  53. codedoc_ai-0.7.1/tests/test_scanner.py +0 -69
  54. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/.env.example +0 -0
  55. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  56. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  57. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  58. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/CODE_OF_CONDUCT.md +0 -0
  59. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/CONTRIBUTING.md +0 -0
  60. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/LICENSE +0 -0
  61. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/SECURITY.md +0 -0
  62. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/__main__.py +0 -0
  63. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/agents/__init__.py +0 -0
  64. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/bootstrap.py +0 -0
  65. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/cli/__init__.py +0 -0
  66. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/core/db.py +0 -0
  67. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/core/queue.py +0 -0
  68. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/llm/__init__.py +0 -0
  69. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/llm/api_provider.py +0 -0
  70. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/llm/base.py +0 -0
  71. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/llm/local_provider.py +0 -0
  72. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/parser/__init__.py +0 -0
  73. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/parser/factory.py +0 -0
  74. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/parser/python_parser.py +0 -0
  75. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/parser/react_parser.py +0 -0
  76. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc/utils/__init__.py +0 -0
  77. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/dependency_links.txt +0 -0
  78. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/entry_points.txt +0 -0
  79. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/codedoc_ai.egg-info/top_level.txt +0 -0
  80. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/setup.cfg +0 -0
  81. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/__init__.py +0 -0
  82. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/flutter_app/app.dart +0 -0
  83. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/flutter_app/main.dart +0 -0
  84. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/java_app/Main.java +0 -0
  85. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/java_app/Service.java +0 -0
  86. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/python_app/main.py +0 -0
  87. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/python_app/models.py +0 -0
  88. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/python_app/utils.py +0 -0
  89. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/react_app/App.tsx +0 -0
  90. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/react_app/index.html +0 -0
  91. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/react_app/main.tsx +0 -0
  92. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/react_app/router.tsx +0 -0
  93. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/fixtures/react_sample.tsx +0 -0
  94. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/test_agents.py +0 -0
  95. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/test_llm_mock.py +0 -0
  96. {codedoc_ai-0.7.1 → codedoc_ai-0.9.1}/tests/test_queue.py +0 -0
@@ -0,0 +1,800 @@
1
+ # Changelog
2
+
3
+ ## 0.9.1 - 2026-06-08
4
+
5
+ ### Bug-fix stabilization patch (first PyPI release)
6
+
7
+ Corrective-only patch. No new features or output-shape changes.
8
+
9
+ - **A1 — entry-reachability is no longer silent.** When an entry is given,
10
+ files not reachable from it were dropped without notice. `_select_files` now
11
+ logs a clear WARNING listing the excluded files, records `stats["entry_excluded"]`,
12
+ and the CLI prints an excluded-files line. (The structural selection fix is
13
+ tracked for a later minor; this patch only removes the silent failure.)
14
+ - **A2 — a wrong `--entry` no longer silently documents the whole repo.** An
15
+ explicitly specified entry that cannot be resolved, is not in the scanned set,
16
+ resolves outside the project root, or is given when **no** supported files are
17
+ scanned, now raises `ConfigError` instead of falling back to all files or
18
+ exiting successfully. Auto-detection with no entry still documents everything.
19
+ - **A3 — parser false imports fixed.** The Go parser no longer treats arbitrary
20
+ string literals (e.g. `fmt.Println("hi")`) as imports — only string-literal
21
+ paths in `import "..."` statements and `import ( ... )` blocks are read,
22
+ comments are ignored, and raw-string (backtick) paths are supported.
23
+ Interpreted literals use Go's byte-accurate escape semantics, including
24
+ multi-byte UTF-8 `\xNN` / octal sequences and Unicode escapes. The HTML parser
25
+ no longer treats CSS `<link href>` as a code import (kept `<script src>` and
26
+ JS imports).
27
+ - **A4 — no stale/empty record substituted for a real one.** In the parallel
28
+ batch, a rate-limited file was treated as "already recorded" using state that
29
+ also included records **preloaded** from a prior run, so a *changed* file could
30
+ be restored from stale documentation instead of retried. `SafeWriter` now
31
+ tracks records written *this run* (`recorded_this_run()`); a changed,
32
+ rate-limited file is retried, and a file genuinely recorded this run recovers
33
+ its real record via `get_record()` (never an empty `{}`).
34
+ - **A5 — honest interrupt message.** Removed dead code; the Ctrl-C message is now
35
+ conditional ("…if the run reached file processing") so it never falsely claims
36
+ progress was saved when interrupted before any file was processed.
37
+ - **A6 — scanner is re-entrant.** The directory walker no longer stores state on
38
+ the function object; state lives on a per-scan `_Walker` instance.
39
+ - **Version identity.** `pyproject.toml`, `codedoc.__version__`, the CLI
40
+ `--version`, and the README all report `0.9.1`, and the automated test
41
+ (`test_version_identity_consistent`) enforces agreement across **all four**,
42
+ including the README "Current release" line.
43
+ - **Reliable tests.** `tests/conftest.py` redirects the temp root into the repo
44
+ (`.pyt_tmp`) so a locked system temp dir does not make the suite unrunnable.
45
+ (This addresses the observed locked-system-temp failure; it is not a guarantee
46
+ for every environment.)
47
+
48
+ ## 0.9.0 - 2026-06-04
49
+
50
+ ### Output preflight safety, clean INFO logs, extension list fix, configurable content truncation
51
+
52
+ ---
53
+
54
+ #### G0 — Output Preflight Safety
55
+
56
+ Foreign output targets now fail immediately with a `ConfigError` before the
57
+ scanner runs, the provider initialises, or any LLM API call is made. Previously
58
+ a foreign file at the target path would only be detected inside
59
+ `write_project_outputs`, after all tokens had already been spent.
60
+
61
+ - **`codedoc/core/output.py`**: Added `preflight_output_targets()` which calls
62
+ `_check_file_ownership()` for all final public targets (JSON, MD, both) and a
63
+ new `_check_md_live_backup_ownership()` for the MD live-backup JSON sibling.
64
+ - **`codedoc/pipeline.py`**: Calls `preflight_output_targets()` immediately after
65
+ output spec resolution, before `scan_files()` and `create_provider()`.
66
+ - **`codedoc/core/loader.py`**: `_resolve_output_spec()` now only emits the
67
+ format-conflict warning when `--format` was explicitly passed by the user (not
68
+ when the default `"json"` value from DEFAULTS triggers a mismatch).
69
+
70
+ #### G1 — Clean Log Output
71
+
72
+ Third-party HTTP libraries (`httpx`, `httpcore`, `openai`, `anthropic`,
73
+ `google.auth`) are now silenced at WARNING level by default. At `--verbose` /
74
+ DEBUG the HTTP diagnostics are restored. Per-agent progress lines appear at INFO
75
+ so users can see what codedoc is doing at each step.
76
+
77
+ - **`codedoc/utils/logger.py`**: `_NOISY_LOGGERS` constant defines the list;
78
+ `_configure()` sets those loggers to WARNING; `set_level()` lowers them to
79
+ DEBUG when the root logger is set to DEBUG.
80
+ - **`codedoc/agents/orchestrator.py`**: Added timing via `time.monotonic()` and
81
+ INFO/WARNING log lines after each agent: `[FILE] path | structure ok 0.8s`,
82
+ `[FILE] path | dependencies ok 0.9s`, `[FILE] path | documentation ok 1.2s`.
83
+ Fallbacks emit WARNING with `"fallback"` in the message.
84
+
85
+ #### G5 — Extension List Consistency
86
+
87
+ `_candidate_variants()` in `graph.py` used a hardcoded 9-extension list that
88
+ was out of sync with `_KNOWN_EXTENSIONS` and `DEFAULTS["extension_language_map"]`.
89
+ Import resolution for Go, Kotlin, Swift, Rust, Ruby, and C-family files silently
90
+ produced no candidates.
91
+
92
+ - **`codedoc/core/graph.py`**: `_KNOWN_EXTENSIONS` expanded to all 19 extensions
93
+ in `DEFAULTS["extension_language_map"]`. `_candidate_variants()` now uses
94
+ `sorted(_KNOWN_EXTENSIONS)` instead of a separate hardcoded list. A comment
95
+ notes the sync requirement with `loader.py`.
96
+
97
+ #### G6 — Configurable Content Truncation
98
+
99
+ Files above 12,000 characters were silently truncated with a DEBUG-only log.
100
+ Users saw degraded documentation for large files with no indication why.
101
+
102
+ - **`codedoc/core/loader.py`**: `max_content_chars` added to `DEFAULTS` (12000)
103
+ and `_ENV_KEY_MAP` (`CODEDOC_MAX_CONTENT_CHARS`). Validation requires a positive
104
+ integer ≥ 1000.
105
+ - **`codedoc/agents/base_agent.py`**: Removed module-level `_MAX_CONTENT_CHARS`
106
+ constant. `BaseAgent.__init__` now accepts `max_content_chars: int = 12000`.
107
+ `_truncate()` uses `self._max_content_chars` and logs at INFO with the file
108
+ path and original / truncated character counts.
109
+ - **`codedoc/agents/orchestrator.py`**: `Orchestrator.__init__` accepts
110
+ `max_content_chars: int = 12000` and forwards it to each agent.
111
+ - **`codedoc/pipeline.py`**: Passes `config.get("max_content_chars", 12000)` to
112
+ the `Orchestrator` constructor.
113
+ - All three agent subclasses pass `file_path` to `_truncate()` for accurate logs.
114
+
115
+ ---
116
+
117
+ ## 0.8.1 - 2026-06-02
118
+
119
+ ### Lossless Markdown, placeholder sanitization, configurable defaults, provider-aware rate-limit backoff
120
+
121
+ ---
122
+
123
+ #### Workstream A — Lossless Markdown View
124
+
125
+ Markdown output now embeds the complete public JSON view as a hidden base64
126
+ comment so `json_from_markdown()` (and incremental re-runs that read a `.md`
127
+ file) recover the full dependency catalog, per-file hashes, and all dependency
128
+ metadata without any information loss.
129
+
130
+ - **`codedoc/core/project_view.py`**:
131
+ - `markdown_from_view()` writes a `<!-- codedoc-ai-view-base64 ... -->` block
132
+ immediately after the legacy `<!-- codedoc-ai: ... -->` metadata comment.
133
+ The block is standard base64-encoded UTF-8 JSON, which avoids comment-safety
134
+ issues with raw `--` or `-->` sequences in generated text.
135
+ - `markdown_to_view()` now tries the embedded view first (fast, lossless path);
136
+ falls back to the existing visible Markdown parser for pre-0.8.1 files.
137
+ - New public helper `read_embedded_view(markdown)` decodes and validates the
138
+ embedded block; returns `None` on any failure so callers fall back safely.
139
+ - `read_codedoc_meta()` no longer raises `ConfigError` when `entry_file` is
140
+ `null`; a valid CodeDoc file with no entry point is now correctly identified
141
+ as owned rather than foreign.
142
+ - **`codedoc/pipeline.py`**:
143
+ - `_load_existing_file_docs_from_md()` preserves file hashes from the embedded
144
+ view when the lightweight metadata comment has no hash for a path.
145
+ - `_resolve_entry_and_docs()` no longer raises unconditionally when no existing
146
+ output is found; first runs without `--entry` now reach `detect_entry_file()`
147
+ for auto-detection instead of failing immediately.
148
+
149
+ #### Workstream B — Placeholder Usage Example Sanitization
150
+
151
+ LLM-generated usage examples that contain placeholder package names (e.g.
152
+ `import 'package:your_package/...'`) are now removed before any output is
153
+ written or cached.
154
+
155
+ - **`codedoc/core/project_view.py`**: `_clean_file()` calls the new
156
+ `_sanitize_usage_example()` helper, which checks against `_PLACEHOLDER_PATTERN`
157
+ (a compiled `re.IGNORECASE` regex with word-boundary guards). Covered
158
+ placeholders: `your_package_name`, `your_package`, `your_project`, `your_app`,
159
+ `example_package`, `my_package`, and Dart-style `package:example/`.
160
+ Sanitization is idempotent and applies to both freshly generated records and
161
+ cached/reused records loaded from prior output files.
162
+
163
+ #### Workstream C — Configurable Hardcoded Defaults
164
+
165
+ All previously hardcoded scanner and provider defaults are now driven by a
166
+ single source of truth in `DEFAULTS` (`loader.py`) and support `_add` / `_remove`
167
+ override keys.
168
+
169
+ - **`codedoc/core/loader.py`**:
170
+ - `DEFAULTS` gains eleven new keys: `skip_dirs_add`, `skip_dirs_remove`,
171
+ `extension_language_map` (full 18-entry map), `extension_language_map_add`,
172
+ `extension_language_map_remove`, `auto_entry_candidates`,
173
+ `auto_entry_candidates_add`, `auto_entry_candidates_remove`,
174
+ `provider_prefixes`, `provider_prefixes_add`, `provider_prefixes_remove`.
175
+ - Three resolver helpers implement the resolution order (replace → `_add` →
176
+ `_remove`): `_resolve_list_override`, `_resolve_dict_override`,
177
+ `_resolve_nested_list_dict_override`.
178
+ - `_apply_config_overrides()` is called after all config sources are merged;
179
+ it resolves all four configurable keys and derives `supported_extensions`
180
+ from the resolved `extension_language_map`.
181
+ - Backward-compat bridge: if `supported_extensions` was explicitly set to a
182
+ value different from the defaults, it is used as a filter on
183
+ `extension_language_map` so old configs continue to restrict scanning as
184
+ intended.
185
+ - **`codedoc/core/scanner.py`**:
186
+ - Hardcoded `SKIP_DIRS` and `EXTENSION_LANGUAGE_MAP` removed.
187
+ - `scan_files()` receives `extension_language_map` (primary) instead of
188
+ `supported_extensions`. A positional-list guard handles legacy callers
189
+ that pass a list as the second argument.
190
+ - `detect_entry_file()` receives the resolved `auto_entry_candidates` list;
191
+ falls back to a module-level default for direct callers.
192
+ - **`codedoc/pipeline.py`**: passes `extension_language_map` and
193
+ `auto_entry_candidates` to the scanner; always appends the output directory
194
+ name to the scan skip list (even when the user removed it via
195
+ `--remove-skip-dir`) to prevent codedoc from documenting its own output.
196
+ - **`codedoc/cli/cli.py`**: three new flags: `--skip-dirs DIR [...]`,
197
+ `--add-skip-dir DIR` (repeatable), `--remove-skip-dir DIR` (repeatable).
198
+ - **`codedoc/llm/factory.py`**: `create_provider()`, `_make_api()`,
199
+ `_resolve_api_provider()`, and `_provider_api_key()` all accept and use
200
+ `provider_prefixes` from config; module-level tuples kept as fallbacks.
201
+
202
+ #### Workstream D — Provider-Aware Rate-Limit Backoff
203
+
204
+ Parallel ladder step-downs now sleep between rungs using provider-aware
205
+ exponential backoff, with optional `Retry-After` hint parsing.
206
+
207
+ - **`codedoc/llm/rate_limit_profile.py`** *(new)*:
208
+ - `RateLimitProfile` dataclass — `provider`, `signals`, `min_backoff_s`,
209
+ `backoff_scale`.
210
+ - `PROVIDER_PROFILES` — preconfigured profiles for `openai`, `anthropic`,
211
+ `gemini`, and `default`.
212
+ - `get_rate_limit_profile(provider_name, config)` — returns the resolved
213
+ profile with `rate_limit_backoff_s`, `rate_limit_backoff_scale`,
214
+ `rate_limit_signals_add`, and `rate_limit_signals_remove` applied without
215
+ mutating module defaults.
216
+ - **`codedoc/pipeline.py`**:
217
+ - `_is_rate_limit_error(exc, profile=None)` — when a `profile` is supplied,
218
+ checks only `profile.signals`; falls back to `_RATE_LIMIT_SIGNALS` for
219
+ backward compatibility with callers without a profile.
220
+ - `_detect_limit_type(error_msg)` — classifies errors as `"tpm"`, `"rpm"`,
221
+ `"quota"`, `"overloaded"`, or `None`.
222
+ - `_process_descriptor_batch()` return type changed:
223
+ `retry_rate_limited` is now `list[tuple[dict, Exception]]` so the causing
224
+ exception is preserved for `Retry-After` parsing and error sampling.
225
+ - `_process_agent_files()`: fetches the provider profile, passes it to
226
+ `_process_descriptor_batch()`, and sleeps between rungs using:
227
+ - `min(Retry-After, retry_after_cap_s)` when a hint is present and
228
+ `respect_retry_after = True`,
229
+ - `min(min_backoff_s × backoff_scale ^ rung, retry_after_cap_s)` otherwise,
230
+ - no sleep when `rate_limit_backoff_s = 0`.
231
+ - Rate-limit warning dicts now include: `retry_after_s`, `sleep_s`,
232
+ `error_sample`, `limit_type`, `event_number`, `rung_index`.
233
+ - **`codedoc/core/loader.py`**: four new `DEFAULTS` keys:
234
+ `rate_limit_backoff_s`, `rate_limit_backoff_scale`, `rate_limit_signals_add`,
235
+ `rate_limit_signals_remove`.
236
+ - **`codedoc/cli/cli.py`**: compact rate-limit summary line printed only when
237
+ step-down events occurred; shows event count, providers, and total sleep time.
238
+
239
+ #### Version
240
+
241
+ - `codedoc/__init__.py`, `pyproject.toml`, `cli.py`: `0.8.0` → `0.8.1`.
242
+
243
+ #### Validation
244
+
245
+ - Added regression coverage for lossless Markdown regeneration, placeholder
246
+ sanitization, configurable defaults, provider-aware rate-limit backoff, and
247
+ rate-limit edge cases.
248
+ - Full test suite passes.
249
+ - Built sdist/wheel and verified release metadata with `twine check`.
250
+
251
+ ---
252
+
253
+ ## 0.8.0 - 2026-05-31
254
+
255
+ ### Always-on live JSON crash backup, parallel crash-safety, rate-limit adaptive parallelism, error.log overhaul
256
+
257
+ 0.8.0 closes the full known crash-safety/output-safety gap end to end.
258
+
259
+ ---
260
+
261
+ #### Work Item 1 — Always-on live JSON backup (replaces hidden checkpoint)
262
+
263
+ Every run now writes a visible live JSON backup that is updated after each completed file.
264
+ `--safe-mode` is deprecated and kept only for backwards compatibility — it now prints a
265
+ deprecation notice and has no additional effect.
266
+
267
+ - **`codedoc/core/safe_writer.py`** (overhauled): `SafeWriter` is now the default recorder.
268
+ Constructor now accepts a pre-computed `backup_path: Path` directly. The live backup
269
+ always starts with a `_crash_safety` banner as the first JSON key so interrupted files are
270
+ immediately recognisable as crash-recovery backups. Three new methods:
271
+ `initialize_empty()` — writes the banner before any AI call;
272
+ `set_queue_order()` — controls the `files` array order (topological / queue order, not
273
+ alphabetical); `has_record()` — deduplication check for retry logic.
274
+ `delete()` removes the live backup for MD-only runs after a clean Markdown conversion.
275
+ If deletion fails (Windows file-lock) a warning is logged and the path is reported so the
276
+ user knows the leftover file is safe to remove manually.
277
+
278
+ - **`codedoc/pipeline.py`** — `_resolve_live_backup_path()` helper centralises all backup
279
+ path logic, including the named-MD sibling case (`--output docs/report.md` → live backup
280
+ at `docs/report.json`). `SafeWriter` is always created regardless of `--safe-mode`.
281
+ `initialize_empty()` is called before `create_provider()` so the backup exists even if
282
+ provider initialisation fails. The topological order is passed to `set_queue_order()`.
283
+ Old `.codedoc_progress.json` checkpoints are migrated on the first run that finds no live
284
+ backup and deleted from the rotation afterwards. New stats keys returned:
285
+ `live_backup_path` (absolute path to live backup), `error_log` (absolute path, set when
286
+ any issue is recorded), `issues_recorded` (total count), `rate_limit_warnings` (list of
287
+ step-down events).
288
+
289
+ - **`codedoc/core/output.py`**: removed the intermediate `.codedoc_build.json` write for
290
+ `--format md` runs. Markdown is written directly from the in-memory view; crash safety
291
+ is provided by the live JSON backup. `BUILD_FILENAME` is kept only for reading/migrating
292
+ stale 0.7.x build files.
293
+
294
+ - **`codedoc/core/loader.py`**: updated `_load_existing_file_docs()` to accept
295
+ `live_backup_path` so the named-MD sibling (`report.json`) is probed before the default
296
+ `json_filename`.
297
+
298
+ #### Work Item 2 — Parallel crash-safety: record in worker thread
299
+
300
+ Previously a Ctrl-C or crash during parallel processing could discard a completed file's
301
+ result because `recorder.record()` was called in the main `as_completed` loop.
302
+
303
+ - **`codedoc/pipeline.py`** — `_process_and_record()` wrapper calls `recorder.record()`
304
+ inside the worker thread before returning, so a crash between worker completion and main
305
+ collection never loses a result. The main loop no longer calls `recorder.record()` in the
306
+ parallel path. `has_record()` is checked before adding a descriptor to the retry list so
307
+ a file that already recorded before batch cancellation is not submitted twice.
308
+
309
+ #### Work Item 3 — Adaptive parallelism on rate limits
310
+
311
+ When a provider signals 429 / rate-limit / too-many-requests, file concurrency is stepped
312
+ down through a ladder instead of hammering the API at the original concurrency.
313
+
314
+ - **`codedoc/pipeline.py`**:
315
+ - `_is_rate_limit_error()` — walks the full `__cause__`/`__context__` chain; covers
316
+ OpenAI (`429`, `rate_limit_exceeded`, `tpm`), Anthropic (`529`, `overloaded`), and
317
+ Gemini (`RESOURCE_EXHAUSTED`, `quota`).
318
+ - `_build_default_ladder()` — generates the step-down ladder for any
319
+ `max_parallel_files` value (e.g. `5 → [5, 2, 1]`, `10 → [10, 5, 1]`).
320
+ - `_process_descriptor_batch()` — processes one ladder level and classifies results as
321
+ succeeded / retry-rate-limited / failed-non-rate-limit.
322
+ - `_process_agent_files()` — iterates the ladder, collects step-down events into
323
+ `stats["rate_limit_warnings"]`, prints a provider-specific WARNING to stdout on each
324
+ step-down with the provider name and original `max_parallel_files` value.
325
+ - `_parse_retry_after()` — extracts `Retry-After` sleep delays from error messages;
326
+ applied in sequential mode too when `respect_retry_after = True`.
327
+ - **`codedoc/core/loader.py`**: added `rate_limit_adaptive`, `parallel_ladder`,
328
+ `respect_retry_after`, `retry_after_cap_s` to `DEFAULTS`; full `parallel_ladder`
329
+ validation in `_validate()` (strictly decreasing, clamped to `max_parallel_files`,
330
+ trailing `1` appended if missing).
331
+
332
+ #### Work Item 4 — `error.log` discoverability and `ErrorReporter` severity
333
+
334
+ - **`codedoc/utils/errors.py`**: `ErrorReporter.record()` gains a `level` parameter
335
+ (`"error"` / `"warning"`). `has_errors()` and `error_count()` count only error-level
336
+ entries. `has_issues()` and `issue_count()` count all entries. `summary()` returns `""`
337
+ for warning-only runs so recovered rate-limits never appear in the final `codedoc.json`
338
+ `errors` field or the Markdown `## Errors` section. Log header changed from `error(s)` to
339
+ `issue(s)`.
340
+ - **`codedoc/pipeline.py`**: `ErrorReporter` is now initialised with
341
+ `output_dir / "error.log"` instead of `root / "error.log"`. `stats["error_log"]` and
342
+ `stats["issues_recorded"]` are set on every return path (not only when `failed > 0`).
343
+ Rate-limit health-check notes are recorded as `level="warning"` so they appear in
344
+ `error.log` for diagnostics but do not alarm the final output.
345
+ - **`codedoc/cli/cli.py`**: the error log path is always printed when
346
+ `stats["issues_recorded"] > 0`; message distinguishes "file(s) failed" from "issue(s)
347
+ recorded (all recovered)". Rate-limit step-down warnings are printed to stdout.
348
+ `--safe-mode` help updated to `[DEPRECATED]`.
349
+
350
+ #### Version
351
+
352
+ - `codedoc/__init__.py`, `pyproject.toml`, `cli.py`: `0.7.2` → `0.8.0`.
353
+
354
+ #### Tests
355
+
356
+ - `tests/test_scenarios.py`: updated 3 `SafeWriter` constructor calls to new `backup_path`
357
+ signature.
358
+ - `tests/test_080_features.py` *(new, 38 tests)*: covers live backup creation, banner
359
+ presence, queue order, parallel crash-safety, ownership guard, resume, hash-change
360
+ reprocess, checkpoint migration, rate-limit ladder, signal detector (OpenAI/Anthropic/
361
+ Gemini/false-positives/cause-chain), provider notifications, error.log location and stats,
362
+ deprecation notice, `--format both` behaviour, stats keys, ladder validation,
363
+ no-files early return, and warning exclusion from final output.
364
+
365
+ **All 163 tests pass** (125 existing + 38 new).
366
+
367
+ ---
368
+
369
+ **Behaviour on interrupt and resume (0.8.0 default — always-on live backup):**
370
+ 1. User runs `codedoc run --entry src/main.py` on a 100-file project.
371
+ 2. Before the first LLM call, `codedoc/codedoc.json` is created with a `_crash_safety`
372
+ banner and an empty `files` array.
373
+ 3. After every completed file, `codedoc/codedoc.json` is updated atomically (`.tmp` rename).
374
+ 4. Run is interrupted (Ctrl-C, crash) after 60 files. `codedoc/codedoc.json` contains 60
375
+ complete file records in topological order, clearly marked with `_crash_safety` as
376
+ partial output.
377
+ 5. User re-runs; `codedoc.json` is read (including in-progress entries), 60 unchanged files
378
+ are skipped, only the remaining 40 are sent to the LLM.
379
+ 6. On clean completion, `write_project_outputs` overwrites `codedoc.json` with a final
380
+ clean output (no `_crash_safety`, no `status = "in_progress"`).
381
+
382
+ **MD-only and named-MD runs:**
383
+ - `--format md`: live backup is `codedoc/codedoc.json`; removed automatically on clean
384
+ Markdown write. On interrupt, the JSON sibling remains as the resume source.
385
+ - `--output docs/report.md`: live backup is `docs/report.json` (sibling derived from the
386
+ Markdown stem); removed on clean success.
387
+
388
+ **Rate-limit step-down example:**
389
+ ```
390
+ [OpenAI] Rate limit detected - your configured max_parallel_files (5) has been
391
+ reduced to 2. Retrying 4 remaining file(s) at lower concurrency.
392
+ ```
393
+
394
+ ---
395
+
396
+ ## 0.7.2 - 2026-05-30
397
+
398
+ ### Added: incremental progress checkpoint + `--safe-mode` live output + MD intermediate + ownership guard
399
+
400
+ This release fully solves the data-loss-on-interrupt problem for every output format and run
401
+ mode. It also adds the first line of defence against codedoc accidentally overwriting files
402
+ it did not create.
403
+
404
+ ---
405
+
406
+ #### Checkpoint (always-on, default behaviour)
407
+
408
+ Reverses the 0.6.4 decision ("no per-file checkpoint writes during a run") by introducing a
409
+ lightweight, thread-safe checkpoint file that persists each file result to disk the moment it
410
+ completes, for all output formats (JSON, MD, and both).
411
+
412
+ - `codedoc/core/checkpoint.py` *(new)*: `Checkpoint` class — writes `.codedoc_progress.json`
413
+ to the output directory after every file. Writes are atomic: content is serialised to a
414
+ `.tmp` sibling first, then renamed into place so a crash mid-write never leaves a corrupt
415
+ file. Thread-safe via a per-instance lock; safe to call from parallel worker threads.
416
+ - `codedoc/core/__init__.py`: exported `Checkpoint` in `__all__` and the lazy `__getattr__`
417
+ dispatcher, consistent with all other public core exports.
418
+
419
+ #### `--safe-mode` (opt-in, visible partial output)
420
+
421
+ Adds a `--safe-mode` CLI flag and matching `safe_mode` config key / `CODEDOC_SAFE_MODE`
422
+ environment variable. When active, `Checkpoint` is replaced by `SafeWriter`, which writes
423
+ directly to the real output file after every completed file — so the output always contains
424
+ whatever has been documented so far, even if the run is interrupted.
425
+
426
+ - `codedoc/core/safe_writer.py` *(new)*: `SafeWriter` class — same thread-safe, atomic-write
427
+ design as `Checkpoint`, but the target is the real output file rather than a hidden
428
+ intermediate. The partial JSON embeds `_codedoc.status = "in_progress"` so subsequent runs
429
+ can distinguish it from a completed output and resume correctly.
430
+ - **JSON / both format**: target is `codedoc.json`. The final `write_project_outputs` call
431
+ overwrites it with the complete, polished output — no separate cleanup required.
432
+ - **MD-only format**: target is `.codedoc_build.json` (internal build file, see below).
433
+ After a successful MD write, `SafeWriter.delete()` removes it. On failure it is
434
+ preserved so the user still has partial output and a re-run resumes automatically.
435
+ - `codedoc/core/project_view.py`: added public `clean_file_record()` wrapper around the
436
+ internal `_clean_file()` so `SafeWriter` can produce structurally identical file entries to
437
+ what `build_project_view` would produce.
438
+ - `codedoc/core/__init__.py`: exported `SafeWriter`.
439
+ - `codedoc/core/loader.py`: added `"safe_mode": False` to `DEFAULTS`, `"CODEDOC_SAFE_MODE"`
440
+ to `_ENV_KEY_MAP`, and bool-coercion in `_validate()` (env vars arrive as strings).
441
+ - `codedoc/pipeline.py`:
442
+ - `run_pipeline`: creates either `SafeWriter` or `Checkpoint` depending on `safe_mode`;
443
+ both are referred to via the `recorder` variable. Calls `recorder.record()` /
444
+ `recorder.delete()` uniformly — the recorder type determines the behaviour.
445
+ - `_process_agent_files` / `_process_files_sequentially`: parameter renamed
446
+ `checkpoint` → `recorder`; type annotation updated to `Checkpoint | SafeWriter`.
447
+ - `_resolve_entry_and_docs`: always probes the JSON candidate and build file before MD,
448
+ regardless of the current `--format` setting, enabling cross-format and build-file resume.
449
+ - `codedoc/cli/cli.py`: added `--safe-mode` flag; `KeyboardInterrupt` message updated;
450
+ `Files resumed` summary line added.
451
+
452
+ #### MD-only runs now always produce a JSON intermediate before converting
453
+
454
+ Previously a `--format md` run held all results in RAM and wrote one file at the end — a
455
+ crash before that point lost everything. Now `write_project_outputs` for MD format writes
456
+ the full result to `.codedoc_build.json` **before** starting the Markdown conversion.
457
+
458
+ - On successful MD write → `.codedoc_build.json` is deleted automatically.
459
+ - On failure (exception, crash during conversion) → `.codedoc_build.json` is preserved;
460
+ codedoc logs its location. Re-running the same command loads it via the incremental hash
461
+ check and re-attempts the conversion without any LLM calls.
462
+
463
+ `--format both` is unaffected: the JSON output itself serves as the durable intermediate.
464
+
465
+ #### Internal build file (`.codedoc_build.json`)
466
+
467
+ `BUILD_FILENAME = ".codedoc_build.json"` (exported from `codedoc.core.output`) names the
468
+ internal intermediate file used by both `write_project_outputs` (MD-only runs) and
469
+ `SafeWriter` (safe-mode MD runs). The dot-prefix marks it as a system-managed file — not a
470
+ final output, not user-editable.
471
+
472
+ - `codedoc/pipeline.py` — `_load_existing_file_docs`: loads from both `codedoc.json`
473
+ (baseline) and `.codedoc_build.json` (newer-run overlay) and **merges** them. Build-file
474
+ records take priority per-file so that LLM work completed in an interrupted newer run is
475
+ never discarded just because an older `codedoc.json` already exists.
476
+ - `codedoc/pipeline.py` — `_resolve_entry_and_docs`: adds `.codedoc_build.json` to the
477
+ candidate list so the entry file is recoverable from a partial build file.
478
+
479
+ #### Ownership guard before writing output files
480
+
481
+ `write_project_outputs` and `SafeWriter` now verify that any existing file at the target path
482
+ was produced by codedoc before allowing an overwrite. If the file does **not** carry a
483
+ `_codedoc` metadata block (JSON) or `<!-- codedoc-ai: -->` comment (Markdown), a
484
+ `ConfigError` is raised — codedoc refuses to overwrite data it did not create.
485
+
486
+ - `codedoc/core/output.py`: `_check_file_ownership(path)` — raises `ConfigError` for
487
+ non-codedoc files; passes silently for new files or files codedoc owns. The check now
488
+ covers `json_path`, `md_path`, **and** `build_path` (`.codedoc_build.json`).
489
+ - `codedoc/core/safe_writer.py`: `load()` now raises `ConfigError` at startup when the
490
+ target file exists but has no `_codedoc` block, preventing SafeWriter from ever flushing
491
+ over a foreign file during the run.
492
+ - `codedoc/cli/cli.py`: `ConfigError` is surfaced with an `"Error: ..."` prefix (matching
493
+ `FileNotFoundError`) rather than `"Fatal error: ..."`, giving the user a clean actionable
494
+ message without a traceback.
495
+
496
+ #### Fixed: modified files are re-documented when resuming from a checkpoint
497
+
498
+ When a run is interrupted and a file is edited before the user re-runs, the checkpoint entry
499
+ for that file is discarded and the file is re-documented rather than silently restoring stale
500
+ documentation.
501
+
502
+ - `codedoc/core/checkpoint.py`: `record()` now accepts an optional `file_hash` parameter.
503
+ When provided, the hash is stored inside the checkpoint entry under the reserved key
504
+ ``"_checkpoint_hash"``.
505
+ - `codedoc/core/safe_writer.py`: `record()` updated with the same optional `file_hash`
506
+ parameter for interface consistency.
507
+ - `codedoc/pipeline.py`:
508
+ - Added `_safe_file_hash()` helper.
509
+ - Both `_process_agent_files` (parallel path) and `_process_files_sequentially` compute
510
+ and forward the file hash to `recorder.record()`.
511
+ - The routing loop uses three explicit branches:
512
+ 1. **No hash stored** (`stored_hash == ""`): checkpoint was written by code older than
513
+ 0.7.2 and cannot be verified — reprocess to avoid silently restoring potentially
514
+ stale documentation.
515
+ 2. **Hash mismatch** (`content_hash != stored_hash`): file was modified after it was
516
+ checkpointed — discard entry, reprocess.
517
+ 3. **Hash matches**: checkpoint entry is current — restore it and skip the LLM.
518
+ - The ``"_checkpoint_hash"`` key is stripped before the entry is stored in
519
+ ``new_results``, so it never surfaces in the final output.
520
+
521
+ #### Fixed: hardening of the recovery / ownership work (review follow-ups)
522
+
523
+ Follow-up fixes to the recovery and ownership features above, found while
524
+ reviewing the release.
525
+
526
+ - `codedoc/core/safe_writer.py` — `SafeWriter.load()`:
527
+ - **No longer erases prior work on a safe-mode interrupt.** When a *completed*
528
+ `codedoc.json` already exists, its records are now pre-loaded into memory, so
529
+ the first per-file flush preserves them. Previously the first flush wrote
530
+ only the files processed in the current run, erasing previously completed
531
+ records if the run was then interrupted — making `--safe-mode` worse than the
532
+ default checkpoint. Records are now pre-loaded for both `in_progress`
533
+ intermediates and completed outputs.
534
+ - **Refuses to overwrite malformed / unreadable target files.** `load()` now
535
+ raises `ConfigError` when the target file cannot be parsed as JSON or is not a
536
+ JSON object with a `_codedoc` block, instead of logging a warning and starting
537
+ fresh (which would overwrite the foreign file on the first flush). This brings
538
+ `SafeWriter` in line with `_check_file_ownership` in `output.py`, which already
539
+ treated malformed files as foreign.
540
+ - The stale module docstring describing `codedoc.json` as the MD-only
541
+ intermediate was corrected to `.codedoc_build.json`.
542
+ - `codedoc/pipeline.py` — `_load_existing_file_docs()`: the `.codedoc_build.json`
543
+ overlay is now **freshness-gated**. A build file is only overlaid onto
544
+ `codedoc.json` when it is at least as new (by modification time). A build file
545
+ left behind by an earlier crashed MD run, after a later `--format json` run
546
+ rewrote `codedoc.json`, is now detected as stale, skipped, and removed — so older
547
+ build-file records can no longer silently replace newer JSON documentation (the
548
+ inverse of the merge case the overlay was added for).
549
+ - `codedoc/__init__.py`: `__version__` corrected from `0.7.0` to `0.7.2` to match
550
+ the CLI `--version` output and `pyproject.toml`.
551
+ - `OPENAI_RUN_FLOW.md` → `RUN_FLOW.md`: the run-flow / scenario reference was
552
+ renamed and generalised from OpenAI-only to cover all three providers (OpenAI,
553
+ Anthropic, Gemini) — correcting the API-key resolution and JSON-mode sections —
554
+ and four scenarios were added: newer vs. stale build-file overlay, safe-mode
555
+ resume with a completed output present, and malformed/foreign target files.
556
+ - `README.md`: documented the checkpoint recovery, `--safe-mode`, the
557
+ `.codedoc_build.json` intermediate, the ownership guard, and the
558
+ `CODEDOC_SAFE_MODE` environment variable; bumped the documented release to
559
+ `0.7.2`.
560
+
561
+ ---
562
+
563
+ **Behaviour on interrupt and resume (default — Checkpoint):**
564
+ 1. User runs `codedoc run --entry src/main.py` on a 100-file project.
565
+ 2. Run is interrupted (Ctrl-C, crash) after 60 files complete.
566
+ 3. `.codedoc_progress.json` in the output directory holds all 60 results.
567
+ 4. User re-runs the same command; 60 files are restored from the checkpoint (hash-verified),
568
+ only the remaining 40 are sent to the LLM.
569
+ 5. On clean completion the checkpoint file is deleted automatically.
570
+
571
+ **Behaviour on interrupt and resume (`--safe-mode`):**
572
+ 1. User runs `codedoc run --safe-mode --entry src/main.py` on a 100-file project.
573
+ 2. After every file, the output file is updated with the results so far.
574
+ 3. Run is interrupted after 60 files; the output contains 60 complete file records.
575
+ 4. User re-runs; the existing hash-based incremental logic detects all 60 files as unchanged
576
+ and skips them automatically — only the remaining 40 are sent to the LLM.
577
+ 5. On clean completion `write_project_outputs` overwrites the output with the final polished
578
+ result (and `SafeWriter.delete()` removes the intermediate for MD-only runs).
579
+
580
+ ## 0.7.1 - 2026-05-25
581
+
582
+ ### Fixed: provider-specific default models not applied when `--model` is omitted (GitHub Issue #2)
583
+
584
+ - `codedoc/core/loader.py`: changed `DEFAULTS["model_name"]` from `"gpt-4o-mini"` to `""`.
585
+ - Previously, the global default `"gpt-4o-mini"` was a truthy string that short-circuited the `or` fallbacks in the provider factory for every provider. Running `--provider gemini` without `--model` would silently send requests to Gemini using the OpenAI model name `gpt-4o-mini`, causing a 404 from the Gemini API. The same bug applied to `--provider anthropic` without `--model`, which would have called Anthropic with `gpt-4o-mini` and failed.
586
+ - With an empty string default, the factory's per-provider fallbacks now activate correctly:
587
+ - Gemini with no model → `gemini-2.5-flash`
588
+ - Anthropic with no model → `claude-haiku-4-5-20251001`
589
+ - OpenAI / auto with no model → `gpt-4o-mini` (unchanged)
590
+ - Behaviour when `--model` is explicitly passed is unchanged.
591
+
592
+ ## 0.7.0 - 2026-05-24
593
+
594
+ ### MD-only incremental now works (Issue 1)
595
+ - `_build_meta_comment` now embeds a `file_hashes` dict inside the `<!-- codedoc-ai: ... -->` metadata comment written at the top of every `codedoc.md`. Each entry maps a relative file path to its SHA-256 hash.
596
+ - `_load_existing_file_docs` now falls back to the MD file when no JSON exists. It reads hashes from the metadata comment and file records from the parsed MD content. Users who only ever run `--format md` no longer pay full LLM cost on every run.
597
+ - MD files generated before 0.7.0 have no `file_hashes`; the first 0.7.0 run re-processes everything once, then subsequent runs are incremental.
598
+ - Zero extra files: MD-only output remains a single file.
599
+
600
+ ### Cross-format resume (Issue 2)
601
+ - `_resolve_entry_and_docs` now checks for a same-stem `.md` sibling when a `.json` candidate does not exist (e.g. `--output codedoc/claude.json` after a previous run wrote `codedoc/claude.md`).
602
+ - `_load_existing_file_docs` checks the same-stem MD sibling before falling back to the configured MD filename.
603
+
604
+ ### Warning when entry file not in scanned set (Issue 3)
605
+ - `_select_files` now logs a `WARNING` when the entry file exists on disk but is absent from the scanner's file map (unsupported extension, too large, in a skip directory).
606
+
607
+ ### Removed dead `write_outputs` function (Issue 4)
608
+ - `codedoc/core/output.py`: removed the never-called `write_outputs()` backward-compat wrapper that still referenced removed fields (`id`, `format`, `last_processed`, `git_commit`, `author`). Unused `datetime`/`timezone` imports also removed.
609
+
610
+ ### `--format both` with a named file is now a hard error (Issue 5)
611
+ - `_resolve_output_spec` raises `ConfigError` when `output_format` is `"both"` and a named file path is given. Previously this silently downgraded to a single format. The error message directs developers to use a directory path instead.
612
+
613
+ ### Tests
614
+ - Added 5 regression tests covering all fixes above.
615
+
616
+ ## 0.6.4 - 2026-05-24
617
+
618
+ - Removed `codedoc_db.json` entirely — the public `codedoc.json` output already stores `hash` per file, which is sufficient for incremental processing.
619
+ - Hash-based incremental check now compares `compute_file_hash(path)` against `existing_docs[rel].get("hash")` from the public JSON, replacing the DB lookup.
620
+ - Added `_deps` field per file in the public JSON: stores the raw `dependencies_analysis` dict so the dependency catalog can be fully rebuilt from unchanged files on the next incremental run without an LLM call. Not rendered in Markdown output.
621
+ - `_public_record_to_doc` now reads `_deps` back and sets it as `dependencies_analysis`; falls back to `links.external_dependencies` for old-format JSON files.
622
+ - No per-file checkpoint writes during a run — crash recovery now means re-running the affected files.
623
+ - Legacy cleanup: if `codedoc_db.json` exists in the output directory at run time, it is deleted and a log message is emitted.
624
+ - `codedoc/core/db.py` stripped to just the `compute_file_hash` utility; `CodeDocDB` class removed.
625
+
626
+ ## 0.6.3 - 2026-05-24
627
+
628
+ - Trimmed `codedoc_db.json` to the minimum needed for incremental runs:
629
+ - Removed `history` array entirely — every field it contained (`file_path`, `processed_at`, `hash`, `author`) was already present in the `files` section, making it pure duplication. It was also never read anywhere in the pipeline.
630
+ - Removed `author` and `git_commit` fields from per-file DB entries — no longer stored in any output since 0.6.2, so they served no purpose in the cache.
631
+ - Removed git subprocess calls (`git rev-parse`, `git config user.name`) from the DB write path — nothing reads their output anymore, so there is no reason to shell out on every file write.
632
+ - Each DB entry now contains only: `hash`, `last_processed`, and (when present) `dependencies_analysis`.
633
+ - Existing `codedoc_db.json` files with the old format are migrated transparently on the next run (history is silently dropped).
634
+
635
+ ## 0.6.2 - 2026-05-23
636
+
637
+ - Cleaned public output for better AI scannability (schema version 1.4):
638
+ - Removed `id` field per file (always identical to `hash` — pure duplication).
639
+ - Removed `last_processed` field per file (internal processing timestamp, not documentation content).
640
+ - Removed `state` field per file (always `"checked"` in public output — carries no signal).
641
+ - Removed `format` field per file (file extension is already in `path`; `language` covers the language name).
642
+ - Result: each file record is smaller and contains only documentation-relevant content.
643
+ - Markdown output no longer renders `**ID:**` or `**Format:**` header lines per file.
644
+
645
+ ## 0.6.1 - 2026-05-23
646
+
647
+ - Improved run logging:
648
+ - Replaced animated file progress bars with stable log lines.
649
+ - Logs now show provider/model, configured file concurrency, file start events, completion percentage, and remaining file count.
650
+ - Format switches now log when an unselected public output file is removed.
651
+ - Parallel file processing is now visible in log output.
652
+ - Internal agent processing events demoted to debug level to reduce noise.
653
+
654
+ ## 0.6.0 - 2026-05-23
655
+
656
+ - Added metadata-backed reruns:
657
+ - JSON output now includes a top-level `_codedoc` metadata block.
658
+ - Markdown output now includes a hidden `codedoc-ai` metadata comment.
659
+ - Stored metadata includes the entry file, schema version, and generation time.
660
+ - Subsequent runs can recover the entry file from a previously generated `.json` or `.md` documentation file.
661
+ - Changed first-run/resume behavior:
662
+ - First runs require an explicit entry file when no valid previous CodeDoc output is available.
663
+ - If no output path is provided, CodeDoc checks the default `codedoc/` folder for previous docs.
664
+ - Invalid or metadata-free documentation files now fail clearly instead of being treated as valid resume sources.
665
+ - Changed default generated output location from `docs_output/` to `codedoc/`.
666
+ - Kept JSON as the default public output format.
667
+ - Added support for output file paths:
668
+ - `--output docs/report.json` writes a named JSON file.
669
+ - `--output docs/report.md` writes a named Markdown file.
670
+ - File extension now determines the selected output format for explicit file paths.
671
+ - Unsupported output file extensions now raise a configuration error.
672
+ - Moved the incremental cache into the selected output directory:
673
+ - `codedoc_db.json` is now stored beside generated docs.
674
+ - Existing root-level `codedoc_db.json` files are migrated into the output directory when possible.
675
+ - Improved output cleanup:
676
+ - Default managed files (`codedoc.json`, `codedoc.md`) are removed when switching formats.
677
+ - Legacy per-file outputs such as `main.py.json` and `main.py.md` are cleaned up.
678
+ - Custom-named output files are preserved across runs.
679
+ - Simplified provider mode support for this release:
680
+ - Active providers are OpenAI/OpenAI-compatible, Anthropic, and Gemini.
681
+ - Local provider code remains in the package but is not exposed through the CLI/factory in 0.6.0.
682
+ - Removed `--llm` / `LLM_MODE` from the documented public workflow.
683
+ - Improved provider implementations:
684
+ - Reused Anthropic clients instead of creating a client per request.
685
+ - Added native JSON-mode handling for OpenAI and Gemini where available.
686
+ - Improved Gemini system-instruction handling.
687
+ - Updated CLI help, README, and version metadata for the 0.6.0 workflow.
688
+ - Added regression coverage for:
689
+ - Missing entry plus missing docs raising a clear configuration error.
690
+ - Resuming from existing JSON metadata.
691
+ - Custom output filename behavior.
692
+ - JSON remaining the default format.
693
+ - Cache/output cleanup and metadata preservation.
694
+
695
+ ## 0.5.2 - 2026-05-13
696
+
697
+ - Fixed cache structure duplication issues in generated documentation output.
698
+ - Improved dependency/import resolution to prevent incorrect file mappings and false dependency relationships.
699
+ - Cleaned and normalized public dependency output generation.
700
+ - Reduced noisy dependency cycles in generated Markdown and JSON outputs.
701
+ - Added regression coverage for cache structure and dependency resolution behavior.
702
+
703
+ ## 0.5.1 - 2026-05-13
704
+
705
+ - Cleaned generated cache and public JSON by pruning empty arrays, empty objects, nulls, and duplicate nested fields.
706
+ - Removed the top-level cache `version` field from newly written `codedoc_db.json`.
707
+ - Improved Markdown-to-JSON conversion so it no longer recreates empty default sections.
708
+ - Tightened agent prompts to avoid placeholder package names and empty output fields.
709
+
710
+ ## 0.5.0 - 2026-05-13
711
+
712
+ - Promoted `codedoc-ai` to the 0.5.0 feature line.
713
+ - Added bounded file-level parallelism:
714
+ - Processes up to 5 files at a time by default.
715
+ - Adds `--max-parallel-files N` for CLI control.
716
+ - Adds `max_parallel_files`, `file_retry_attempts`, and `max_consecutive_failures` config options.
717
+ - Added sequential retry fallback for files that fail during parallel execution.
718
+ - Added provider/API health diagnostics when repeated file processing failures suggest bad credentials, rate limits, model errors, network issues, or provider downtime.
719
+ - Kept cache writes ordered and centralized so `codedoc_db.json` remains structured even when files are processed concurrently.
720
+ - Added AI-friendly dependency cataloging:
721
+ - File-level dependencies remain on each file.
722
+ - AI can suggest `catalog_updates` internally.
723
+ - Public output receives a merged `dependency_catalog`.
724
+ - Repeated dependency explanations are deduplicated across JSON and Markdown.
725
+ - Added deterministic JSON/Markdown conversion helpers so public JSON can become Markdown without another AI call, and generated Markdown can be parsed back into the public JSON shape.
726
+ - Clarified DependencyAgent output so generic import notes stay out of repeated file records unless they are file-specific.
727
+ - Added Google Gemini support through the official `google-genai` SDK.
728
+ - Added `llm_provider` config and `--provider auto|openai|anthropic|gemini` CLI selection.
729
+ - Expanded README with Codex/AI-agent analysis covering token savings, hallucination reduction, complex edit safety, and recommended workflows.
730
+ - Added tests for:
731
+ - File-level parallel processing.
732
+ - Retry behavior.
733
+ - Dependency catalog output.
734
+ - JSON/Markdown conversion.
735
+ - Format switching from cache.
736
+
737
+ ## 0.1.4 - 2026-05-02
738
+
739
+ - Redesigned **public output structure** for cleaner, AI-friendly documentation.
740
+ - Separated **internal cache (`codedoc_db.json`)** from **public output (`codedoc.json` / `codedoc.md`)**.
741
+ - Added **project-level overview** including entry file, file count, languages, and folder summary.
742
+ - Added **project tree visualization** in both JSON and Markdown outputs.
743
+ - Added **folder-based grouping** with summarized purpose and file listings.
744
+ - Introduced **dependency graph** with internal file relationships and external dependencies.
745
+ - Flattened file structure in public output:
746
+ - Removed nested and duplicated `result` / `documentation` blocks.
747
+ - Consolidated descriptions, roles, functions, classes, and exports into a single clean structure.
748
+ - Added **file-level linking metadata**:
749
+ - `internal_dependencies`
750
+ - `external_dependencies`
751
+ - `imported_by`
752
+ - Removed **author and git metadata** from public output by default.
753
+ - Improved **Markdown output (`--format md`)**:
754
+ - Added Project Overview, Tree, Folder Map, Dependency Map, and structured file summaries.
755
+ - Ensured **format-specific output behavior**:
756
+ - `--format md` → only `codedoc.md`
757
+ - `--format json` → only `codedoc.json`
758
+ - `--format both` → both files
759
+ - Added **clear CLI and pipeline logging**:
760
+ - Displays selected output format
761
+ - Displays exact output file path
762
+ - Added **BOM-safe file reading (`utf-8-sig`)** across Python, JS/TS, and generic parsers.
763
+ - Ensured **language-agnostic processing** (no Python-only assumptions).
764
+ - Added tests for:
765
+ - New public output structure
766
+ - Markdown generation
767
+ - Dependency graph presence
768
+ - Cross-language compatibility (including TS/TSX)
769
+ - Cleaned up public output by removing:
770
+ - Cache history
771
+ - Raw agent responses
772
+ - Redundant description fields
773
+
774
+ ## 0.1.3 - 2026-05-02
775
+
776
+ - Changed generated docs to one combined JSON file by default.
777
+ - Added `--format json|md|both` output selection.
778
+ - Added smart content-hash reuse for unchanged and duplicate files.
779
+ - Added cache-based output regeneration when selected docs are missing.
780
+ - Redesigned public output with project overview, tree, folder map, dependency graph, and flattened file summaries.
781
+ - Removed local author metadata and raw agent result duplication from public output.
782
+ - Expanded public README with provider setup, defaults, config, output, and cache behavior.
783
+
784
+ ## 0.1.1 - 2026-05-01
785
+
786
+ - Added safer default scanning for virtual environments such as `myenv`.
787
+ - Added configurable `skip_dirs`.
788
+ - Added strict project-relative ignore paths through CLI, config, environment, and Python API.
789
+ - Added `--ignore PATH` CLI option.
790
+ - Added scanner tests for virtual environment and strict path ignores.
791
+ - Fixed misleading API key warning when CLI overrides select local LLM mode.
792
+
793
+ ## 0.1.0 - 2026-05-01
794
+
795
+ - Initial alpha release.
796
+ - Added entry-file dependency traversal.
797
+ - Added local and API LLM provider support.
798
+ - Added per-file Markdown and JSON output.
799
+ - Added `_index.json`, `_summary.md`, and incremental `codedoc_db.json` memory.
800
+ - Added CLI and Python API entry points.