codedoc-ai 0.9.2__tar.gz → 0.9.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/CHANGELOG.md +191 -1
  2. codedoc_ai-0.9.6/CONTRIBUTING.md +102 -0
  3. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/MANIFEST.in +2 -4
  4. {codedoc_ai-0.9.2/codedoc_ai.egg-info → codedoc_ai-0.9.6}/PKG-INFO +16 -15
  5. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/README.md +109 -106
  6. codedoc_ai-0.9.6/RUN_FLOW.md +112 -0
  7. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/__init__.py +1 -1
  8. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/agents/documentation_agent.py +33 -10
  9. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/agents/orchestrator.py +0 -20
  10. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/__init__.py +1 -1
  11. codedoc_ai-0.9.6/codedoc/core/block_manager.py +92 -0
  12. codedoc_ai-0.9.6/codedoc/core/dependency_kind.py +223 -0
  13. codedoc_ai-0.9.6/codedoc/core/discovery.py +221 -0
  14. codedoc_ai-0.9.6/codedoc/core/document.py +435 -0
  15. codedoc_ai-0.9.6/codedoc/core/execution.py +729 -0
  16. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/graph.py +16 -13
  17. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/loader.py +53 -1
  18. codedoc_ai-0.9.6/codedoc/core/markdown_view.py +641 -0
  19. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/output.py +105 -32
  20. codedoc_ai-0.9.6/codedoc/core/project_view.py +715 -0
  21. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/queue.py +2 -3
  22. codedoc_ai-0.9.6/codedoc/core/record_meta.py +41 -0
  23. codedoc_ai-0.9.6/codedoc/core/resume.py +316 -0
  24. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/safe_writer.py +49 -30
  25. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/scanner.py +217 -18
  26. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/llm/factory.py +2 -18
  27. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/llm/local_provider.py +12 -11
  28. codedoc_ai-0.9.6/codedoc/pipeline.py +633 -0
  29. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/utils/errors.py +12 -0
  30. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6/codedoc_ai.egg-info}/PKG-INFO +16 -15
  31. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc_ai.egg-info/SOURCES.txt +9 -33
  32. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc_ai.egg-info/requires.txt +0 -1
  33. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/pyproject.toml +45 -47
  34. codedoc_ai-0.9.2/.github/ISSUE_TEMPLATE/bug_report.md +0 -43
  35. codedoc_ai-0.9.2/.github/ISSUE_TEMPLATE/feature_request.md +0 -23
  36. codedoc_ai-0.9.2/.github/PULL_REQUEST_TEMPLATE.md +0 -21
  37. codedoc_ai-0.9.2/CONTRIBUTING.md +0 -65
  38. codedoc_ai-0.9.2/RUN_FLOW.md +0 -698
  39. codedoc_ai-0.9.2/codedoc/core/project_view.py +0 -1124
  40. codedoc_ai-0.9.2/codedoc/pipeline.py +0 -1653
  41. codedoc_ai-0.9.2/tests/__init__.py +0 -0
  42. codedoc_ai-0.9.2/tests/conftest.py +0 -30
  43. codedoc_ai-0.9.2/tests/fixtures/flutter_app/app.dart +0 -0
  44. codedoc_ai-0.9.2/tests/fixtures/flutter_app/main.dart +0 -0
  45. codedoc_ai-0.9.2/tests/fixtures/java_app/Main.java +0 -0
  46. codedoc_ai-0.9.2/tests/fixtures/java_app/Service.java +0 -0
  47. codedoc_ai-0.9.2/tests/fixtures/python_app/main.py +0 -0
  48. codedoc_ai-0.9.2/tests/fixtures/python_app/models.py +0 -0
  49. codedoc_ai-0.9.2/tests/fixtures/python_app/utils.py +0 -0
  50. codedoc_ai-0.9.2/tests/fixtures/react_app/App.tsx +0 -0
  51. codedoc_ai-0.9.2/tests/fixtures/react_app/index.html +0 -0
  52. codedoc_ai-0.9.2/tests/fixtures/react_app/main.tsx +0 -0
  53. codedoc_ai-0.9.2/tests/fixtures/react_app/router.tsx +0 -0
  54. codedoc_ai-0.9.2/tests/fixtures/react_sample.tsx +0 -0
  55. codedoc_ai-0.9.2/tests/test_080_features.py +0 -1188
  56. codedoc_ai-0.9.2/tests/test_081_configurable_defaults.py +0 -704
  57. codedoc_ai-0.9.2/tests/test_081_lossless_md.py +0 -1051
  58. codedoc_ai-0.9.2/tests/test_081_placeholder.py +0 -500
  59. codedoc_ai-0.9.2/tests/test_081_rate_limit_profiles.py +0 -848
  60. codedoc_ai-0.9.2/tests/test_090_features.py +0 -487
  61. codedoc_ai-0.9.2/tests/test_092_features.py +0 -600
  62. codedoc_ai-0.9.2/tests/test_agents.py +0 -143
  63. codedoc_ai-0.9.2/tests/test_graph.py +0 -158
  64. codedoc_ai-0.9.2/tests/test_llm_mock.py +0 -382
  65. codedoc_ai-0.9.2/tests/test_parser.py +0 -310
  66. codedoc_ai-0.9.2/tests/test_pipeline.py +0 -1392
  67. codedoc_ai-0.9.2/tests/test_queue.py +0 -83
  68. codedoc_ai-0.9.2/tests/test_scanner.py +0 -141
  69. codedoc_ai-0.9.2/tests/test_scenarios.py +0 -955
  70. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/.env.example +0 -0
  71. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/CODE_OF_CONDUCT.md +0 -0
  72. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/LICENSE +0 -0
  73. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/SECURITY.md +0 -0
  74. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/__main__.py +0 -0
  75. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/agents/__init__.py +0 -0
  76. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/agents/base_agent.py +0 -0
  77. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/agents/dependency_agent.py +0 -0
  78. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/agents/structure_agent.py +0 -0
  79. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/bootstrap.py +0 -0
  80. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/cli/__init__.py +0 -0
  81. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/cli/cli.py +0 -0
  82. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/checkpoint.py +0 -0
  83. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/db.py +0 -0
  84. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/planning.py +0 -0
  85. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/core/usage.py +0 -0
  86. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/llm/__init__.py +0 -0
  87. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/llm/api_provider.py +0 -0
  88. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/llm/base.py +0 -0
  89. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/llm/rate_limit_profile.py +0 -0
  90. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/parser/__init__.py +0 -0
  91. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/parser/factory.py +0 -0
  92. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/parser/generic_parser.py +0 -0
  93. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/parser/python_parser.py +0 -0
  94. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/parser/react_parser.py +0 -0
  95. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/templates/github-actions-codedoc.yml +0 -0
  96. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/utils/__init__.py +0 -0
  97. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc/utils/logger.py +0 -0
  98. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc_ai.egg-info/dependency_links.txt +0 -0
  99. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc_ai.egg-info/entry_points.txt +0 -0
  100. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/codedoc_ai.egg-info/top_level.txt +0 -0
  101. {codedoc_ai-0.9.2 → codedoc_ai-0.9.6}/setup.cfg +0 -0
@@ -1,5 +1,195 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.9.6 - 2026-06-17
4
+
5
+ ### Scan robustness and resolution precision (corrective patch)
6
+
7
+ A corrective patch release. It fixes correctness and robustness defects found
8
+ in a code audit and makes the existing CI green by fixing two non-portable
9
+ tests. No new documentation scopes, prompts, providers, response-schema
10
+ changes, or output artifacts. The only new configuration key is the safety
11
+ control `follow_symlinks` (default `False`).
12
+
13
+ - **Symlink-safe, iterative scanner.** The directory walk is now an explicit
14
+ stack instead of recursion, so a deeply nested acyclic tree can no longer
15
+ raise `RecursionError`. Every traversed directory's resolved identity
16
+ (`(st_dev, st_ino)` where meaningful, else the normalized resolved path) is
17
+ tracked, so symlink/junction cycles and multiple aliases to one real
18
+ directory are visited at most once. By default (`follow_symlinks=False`) all
19
+ symlinked directories and files are skipped — preventing both link cycles and
20
+ escapes outside the project root. With `follow_symlinks=True`, links are
21
+ followed only when their target exists, has the expected type, and resolves
22
+ inside the project root; broken, inaccessible, type-mismatched, and
23
+ out-of-root links are skipped. Lexical skip/dot/ignore rules are applied to a
24
+ link's in-root alias before it is resolved, so a link cannot bypass an ignored
25
+ path, and only project-relative paths are ever emitted as `rel_path`.
26
+ - **Deterministic, exact-case import resolution.** Import resolution no longer
27
+ probes the filesystem, so the same repository resolves to the same dependency
28
+ graph on case-sensitive and case-insensitive hosts. The filesystem-dependent
29
+ case-folded matching and the standalone bare final-segment candidate (which
30
+ could link `collections.abc` or `com.example.Bar` to an unrelated root-level
31
+ file) are removed. Dotted imports resolve only through their directory-anchored
32
+ forms; relative, Python dotted-relative, and Dart `package:` imports are
33
+ unchanged. A case-mismatched or otherwise unresolved import stays in the
34
+ per-file `imports` list but creates no internal graph edge, so the dependency
35
+ graph, entry reachability, and catalog reflect only real resolved edges. The
36
+ now-unused `_filesystem_is_case_insensitive` / `_swap_case_letter` helpers were
37
+ removed from `graph.py`; `resolve_import()`'s `root` parameter is retained for
38
+ compatibility only and is no longer read.
39
+ - **Atomic legacy summary writer.** The backward-compatible `write_summary()`
40
+ helper now routes through `atomic_write_text` like every other final writer,
41
+ so no completed public artifact is ever written via truncate-in-place.
42
+ - **Stricter configuration bounds.** `max_file_size_kb` must be a positive
43
+ integer (`>= 1`); `0`, negatives, and booleans are rejected before scanning
44
+ instead of silently skipping every file. `retry_after_cap_s` must be `>= 0`
45
+ (zero still disables the cap); negatives and booleans are rejected.
46
+ - **Portable CI tests.** Two environment-coupled tests were made
47
+ platform- and checkout-name independent (force-path normalization is asserted
48
+ per platform; the `run`-alias test compares the captured root to the actual
49
+ working directory) so the existing 3.10/3.11/3.12 CI matrix passes unchanged.
50
+ No product code, CI workflow, configuration default, prompt, schema, or output
51
+ artifact changed for this work.
52
+
53
+ ## 0.9.5 - 2026-06-15
54
+
55
+ ### Correctness and reliability (behavior changes are bounded and listed below)
56
+
57
+ A corrective patch release. The only intentional change to successful output is
58
+ the dependency-catalog correction; all other changes harden persistence,
59
+ packaging, and CI without altering successful serialized contents. No schema
60
+ bump, no new configuration, CLI feature, prompt, provider, or output artifact.
61
+
62
+ - **Evidence-based dependency catalog.** A catalog entry is now admitted only
63
+ when its `(type, canonical_name)` key is authorized by a file's finalized
64
+ links (graph-resolved `internal_dependencies`, or deterministically classified
65
+ `external_dependencies` / `sdk_dependencies`). Model `catalog_updates`,
66
+ `dependency_refs`, and `usage_notes` may enrich a proven dependency with
67
+ `used_for` text but can no longer create or retype one; unresolved hints are
68
+ discarded rather than reclassified. A Python external whose canonical root is
69
+ the project's own package and is resolved internally by the graph is dropped as
70
+ a false external. Every emitted entry now carries non-empty `used_for` text and
71
+ a backing file. Deterministic output remains byte-identical except where an
72
+ entry lacked authoritative evidence.
73
+ - **Atomic completed output.** Final JSON and Markdown are written through a
74
+ single canonical `atomic_write_text` helper (unique temp sibling, flush +
75
+ fsync, rename) so a completed artifact can never be truncated in place. In
76
+ `both` mode both payloads are rendered before any target is mutated, Markdown
77
+ is replaced first and JSON last (the JSON path is also the live backup), giving
78
+ per-artifact atomicity.
79
+ - **Fatal live-backup persistence.** A failed live-backup write now raises
80
+ `LiveBackupWriteError` (an `OutputError`) instead of being silently swallowed.
81
+ `SafeWriter.record()` rolls back all in-memory markers on failure, and the
82
+ execution layer treats persistence failure as fatal on both the sequential and
83
+ parallel paths — no retry, no rate-limit reclassification, pending work
84
+ cancelled — so the run never continues under a false crash-safety guarantee.
85
+ - **Artifact-path collision rejection.** Distinct generated artifacts that would
86
+ target the same normalized path are rejected before scanning or mutation, while
87
+ the intentional final-JSON / live-backup phase alias is accepted.
88
+ - **Three-provider contract matrix.** OpenAI, Anthropic, and Gemini are verified
89
+ through one shared contract using injected fake SDK clients (no network or
90
+ credentials in normal tests).
91
+ - **Active CI and metadata honesty.** Added a least-privilege CI workflow
92
+ (tests, lint, build, `twine check`, clean-wheel smoke). The declared
93
+ `requires-python` is now `>=3.10,<3.13`, the classifiers drop Python 3.9, and
94
+ the CI matrix tests exactly 3.10, 3.11, and 3.12.
95
+ - **Release hygiene.** Import resolution now follows the actual target
96
+ filesystem's case
97
+ semantics instead of folding case unconditionally. Documentation-agent
98
+ fallback handling is defined on the agent class rather than installed by a
99
+ runtime monkey-patch. The dormant local-provider compatibility module now
100
+ uses the standard library for its optional liveness check, so `requests` is
101
+ no longer a runtime dependency, and repository tests are no longer bundled
102
+ into the source distribution.
103
+
104
+ ## 0.9.4 - 2026-06-14
105
+
106
+ ### Internal decomposition (structural only — no behavior change)
107
+
108
+ This release reorganizes two oversized modules into cohesive,
109
+ single-responsibility units. It does **not** change file selection, provider
110
+ calls, prompts, retries, the output schema, output contents, the dependency
111
+ catalog, configuration defaults, or the CLI. For the same inputs the run
112
+ behaves identically and the serialized JSON/Markdown is byte-identical to
113
+ 0.9.3 output.
114
+
115
+ - **Pipeline decomposition.** `codedoc/pipeline.py` is now a thin lifecycle
116
+ coordinator. Its internals moved into three modules behind the unchanged
117
+ `run_pipeline()` facade and phase ordering:
118
+ - `codedoc/core/resume.py` — live-backup path resolution, existing JSON/MD
119
+ record loading, public→internal record reconstruction, final
120
+ documentation-record construction, and stale-build / legacy-db cleanup.
121
+ - `codedoc/core/discovery.py` — entry recovery from existing CodeDoc
122
+ metadata, dependency-graph construction, entry-reachability selection, and
123
+ graph-edge serialization (selection behavior moved unchanged).
124
+ - `codedoc/core/execution.py` — rate-limit / retry-after classification, the
125
+ adaptive-parallelism ladder, and sequential/parallel processing behind a
126
+ new `ExecutionContext` / `ExecutionOptions` boundary and the
127
+ `execute_agent_files()` entry point. The provider-aware `RateLimitProfile`
128
+ and execution policy are built by the pipeline and passed in; execution no
129
+ longer reads the configuration dictionary.
130
+ - **Serializer extraction.** Markdown serialization/parsing moved from
131
+ `codedoc/core/project_view.py` into `codedoc/core/markdown_view.py`
132
+ (`markdown_from_view`, `markdown_to_view`, `json_from_markdown`,
133
+ `markdown_from_json`, the embedded-view readers, and the visible-Markdown
134
+ parsers/render helpers). `project_view.py` retains view assembly, the
135
+ dependency catalog, pruning, usage-example sanitization, and
136
+ `read_codedoc_meta`.
137
+ - **Compatibility.** The moved private helpers remain importable from their
138
+ previous modules for one release: pipeline helpers from `codedoc.pipeline`,
139
+ and the serializer helpers from `codedoc.core.project_view` (forwarded
140
+ lazily to `codedoc.core.markdown_view`). These re-exports are deprecated and
141
+ emit no runtime warning. No schema change; `SCHEMA_VERSION` stays `1.4`.
142
+ - **Tests.** Added `tests/test_094_pipeline_boundaries.py` and
143
+ `tests/test_094_project_view_split.py`, including byte-identical
144
+ golden-output fixtures (`tests/fixtures/golden_094_*`). One existing
145
+ monkeypatch target (`codedoc.pipeline.time.sleep`) was retargeted to the
146
+ defining module (`codedoc.core.execution.time.sleep`).
147
+
148
+ ## 0.9.3 - 2026-06-13
149
+
150
+ ### Deterministic output, dependency categories, and centralized reading
151
+
152
+ - **SDK/standard-library separation.** A new pure, deterministic, language-aware
153
+ classifier (`codedoc/core/dependency_kind.py`) splits non-project imports into
154
+ third-party `external_dependencies` and standard-library / SDK
155
+ `sdk_dependencies` (additive field). Dart `dart:*`, Python stdlib (via
156
+ `sys.stdlib_module_names` with a committed Python 3.9 fallback), and Node
157
+ built-ins / `node:*` are recognized as SDK; package subpaths and scoped npm
158
+ packages are canonicalized to their package root. Importability is never used
159
+ to classify modules.
160
+ - **Internal links only from the graph.** `links.internal_dependencies` /
161
+ `links.imported_by` now come exclusively from resolved dependency-graph edges.
162
+ Unresolved agent text can no longer create an internal link, and an `internal`
163
+ catalog hint is accepted only when it exactly matches a resolved internal path
164
+ for that file; otherwise it is reclassified as non-project data. The catalog is
165
+ grouped by `(type, canonical_name)`.
166
+ - **Centralized document reader.** A single read-only parser
167
+ (`codedoc/core/document.py`, `read_codedoc_document`) owns CodeDoc JSON /
168
+ Markdown parsing and structural ownership. Output ownership, `SafeWriter`,
169
+ metadata reads, existing-record reads, resume candidates, and stale-build
170
+ migration all route through it while keeping their own missing/malformed
171
+ policy. It reads UTF-8 with optional BOM, rejects invalid UTF-8, validates
172
+ collection types, rejects duplicate paths, prefers a valid embedded view, and
173
+ fails closed on unknown/missing-schema completed output and unsupported
174
+ extensions.
175
+ - **Ownership tightening (intentional).** Markdown that merely contains a
176
+ `<!-- codedoc-ai:` marker but whose metadata is malformed (and which has no
177
+ valid embedded view) is now treated as foreign and is never overwritten. Valid
178
+ legacy Markdown remains accepted.
179
+ - **Deterministic, timestamp-free completed output.** `generated_at` is removed
180
+ from the completed JSON `_codedoc` block, the Markdown metadata comment, and
181
+ the embedded lossless view. Two runs with identical sources, documentation,
182
+ configuration, and stats now produce byte-identical JSON and Markdown. Old
183
+ outputs containing `generated_at` remain readable. Live backups keep
184
+ `created_at` / `updated_at` diagnostics (new backups write `created_at`).
185
+ - **Private record metadata plumbing.** A registry
186
+ (`codedoc/core/record_meta.py`) preserves explicitly registered private keys
187
+ through JSON, Markdown (embedded view only — never visible prose), live backup,
188
+ and resume reconstruction. The production registry is empty in this release;
189
+ arbitrary underscore-prefixed model output is not preserved.
190
+ - No schema bump: `sdk_dependencies` and private keys are additive; missing
191
+ `sdk_dependencies` loads as an empty list.
192
+
3
193
  ## 0.9.2 - 2026-06-12
4
194
 
5
195
  ### Safe planning and CI ergonomics
@@ -791,7 +981,7 @@ reviewing the release.
791
981
  - Cache history
792
982
  - Raw agent responses
793
983
  - Redundant description fields
794
-
984
+
795
985
  ## 0.1.3 - 2026-05-02
796
986
 
797
987
  - Changed generated docs to one combined JSON file by default.
@@ -0,0 +1,102 @@
1
+ # Contributing
2
+
3
+ Thanks for helping improve `codedoc-ai`.
4
+
5
+ ## How Contributions Work
6
+
7
+ 1. Fork the repository.
8
+ 2. Create a branch from `main`.
9
+ 3. Make your changes.
10
+ 4. Add or update tests when behavior changes.
11
+ 5. Run the verification commands.
12
+ 6. Open a pull request with a clear description.
13
+
14
+ ## Development Setup
15
+
16
+ ```bash
17
+ git clone https://github.com/atharvm416/codedoc-ai.git
18
+ cd codedoc-ai
19
+ python -m venv .venv
20
+ .venv\Scripts\activate
21
+ pip install -e ".[dev]"
22
+ ```
23
+
24
+ On macOS/Linux:
25
+
26
+ ```bash
27
+ source .venv/bin/activate
28
+ ```
29
+
30
+ ## Supported Python versions
31
+
32
+ `codedoc-ai` supports Python 3.10, 3.11, and 3.12. The declared
33
+ `requires-python` range, the package classifiers, and the CI test matrix are
34
+ kept in agreement; CI tests every interpreter the package claims to support.
35
+
36
+ ## Verification
37
+
38
+ Run these before opening a pull request:
39
+
40
+ ```bash
41
+ python -m pytest
42
+ python -m ruff check codedoc
43
+ python -m ruff check tests/test_095_*.py tests/test_093_dependency_view.py tests/test_graph.py tests/test_agents.py
44
+ python -m build
45
+ python -m twine check dist/*
46
+ ```
47
+
48
+ Continuous integration (`.github/workflows/ci.yml`) runs the full test suite on
49
+ Python 3.10–3.12, Ruff over production and the release-touched tests, and a
50
+ packaging job that builds the sdist and wheel, runs `twine check`, installs the
51
+ wheel into a clean environment, and smoke-tests `import codedoc`,
52
+ `codedoc --version`, and `codedoc --help`. The older test files still have known
53
+ Ruff findings, so the lint gate is intentionally scoped until those are cleaned
54
+ in a dedicated behavior-free change. CI never publishes, makes paid provider
55
+ calls, or requires provider secrets.
56
+
57
+ ## Run lifecycle
58
+
59
+ The verified phase ordering of a run — read-only preflight, scan/plan, the
60
+ mutation boundary, live-backup initialization before provider creation,
61
+ execution, atomic finalization, diagnostics, and cleanup — is documented in
62
+ [`RUN_FLOW.md`](RUN_FLOW.md).
63
+
64
+ ## Good First Contributions
65
+
66
+ - Add parser fixtures for more frameworks.
67
+ - Improve import resolution for a specific language.
68
+ - Add tests for edge cases in dependency traversal.
69
+ - Improve documentation examples.
70
+ - Add provider support for another OpenAI-compatible local server.
71
+
72
+ ## Module Size and Cohesion
73
+
74
+ File size is a review *signal*, not a hard gate. Production modules should
75
+ normally stay cohesive and under roughly 700 lines; a module that grows well
76
+ past that is usually doing too many jobs and is a good candidate for
77
+ extraction into single-responsibility modules (as the pipeline and the
78
+ project-view serializer were split in 0.9.4).
79
+
80
+ There is no CI line-count rule, and you should never split code merely to hit
81
+ a number. Generated files, large data tables, parsers, and tightly coupled
82
+ serializers may legitimately justify more lines. Prefer splitting along clear
83
+ responsibilities (and one-way import boundaries) rather than by line count.
84
+
85
+ ## Pull Request Guidelines
86
+
87
+ - Keep changes focused.
88
+ - Explain why the change is needed.
89
+ - Include before/after behavior when fixing a bug.
90
+ - Do not commit `.env`, API keys, `docs_output`, `codedoc_db.json`, `dist`, or virtual environments.
91
+ - Keep generated artifacts out of pull requests unless a maintainer asks for them.
92
+
93
+ ## Reporting Bugs
94
+
95
+ Please include:
96
+
97
+ - Your operating system.
98
+ - Python version.
99
+ - `codedoc-ai` version.
100
+ - The command you ran.
101
+ - Relevant config without secrets.
102
+ - A small reproduction project or fixture when possible.
@@ -1,14 +1,12 @@
1
1
  include README.md
2
- include RUN_FLOW.md
3
2
  include LICENSE
4
3
  include CHANGELOG.md
5
4
  include CONTRIBUTING.md
5
+ include RUN_FLOW.md
6
6
  include SECURITY.md
7
7
  include CODE_OF_CONDUCT.md
8
8
  include .env.example
9
9
  recursive-include codedoc/templates *.yml
10
- recursive-include tests *.py
11
- recursive-include tests/fixtures *
12
- recursive-include .github *.md
10
+ prune tests
13
11
  global-exclude __pycache__/*
14
12
  global-exclude *.py[cod]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codedoc-ai
3
- Version: 0.9.2
3
+ Version: 0.9.6
4
4
  Summary: Generate structured, incremental documentation for any codebase using OpenAI, Anthropic, or Gemini
5
5
  Author: Atharv Mannur
6
6
  License-Expression: MIT
@@ -16,17 +16,15 @@ Classifier: Environment :: Console
16
16
  Classifier: Intended Audience :: Developers
17
17
  Classifier: Operating System :: OS Independent
18
18
  Classifier: Programming Language :: Python :: 3
19
- Classifier: Programming Language :: Python :: 3.9
20
19
  Classifier: Programming Language :: Python :: 3.10
21
20
  Classifier: Programming Language :: Python :: 3.11
22
21
  Classifier: Programming Language :: Python :: 3.12
23
22
  Classifier: Topic :: Software Development :: Documentation
24
23
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
24
  Classifier: Topic :: Utilities
26
- Requires-Python: >=3.9
25
+ Requires-Python: <3.13,>=3.10
27
26
  Description-Content-Type: text/markdown
28
27
  License-File: LICENSE
29
- Requires-Dist: requests~=2.33
30
28
  Requires-Dist: openai~=2.36
31
29
  Requires-Dist: anthropic~=0.102
32
30
  Requires-Dist: google-genai~=2.3
@@ -47,7 +45,7 @@ Dynamic: license-file
47
45
 
48
46
  The tool scans source files, resolves project-local imports into a dependency graph, sends only files that need analysis to an LLM, and writes one combined, structured documentation artifact designed for both humans and AI. By default that artifact is JSON.
49
47
 
50
- Current release: `0.9.2`.
48
+ Current release: `0.9.6`.
51
49
 
52
50
  ## What It Does
53
51
 
@@ -353,6 +351,7 @@ Create `codedoc.config.json` in the project being documented:
353
351
  "max_consecutive_failures": 5,
354
352
  "log_level": "INFO",
355
353
  "max_file_size_kb": 500,
354
+ "follow_symlinks": false,
356
355
  "propagate_changes": true,
357
356
  "rate_limit_adaptive": true,
358
357
  "parallel_ladder": null,
@@ -426,6 +425,8 @@ Parallelism settings:
426
425
  | `max_parallel_files` | Maximum number of files processed at the same time. Default: `5`. |
427
426
  | `file_retry_attempts` | Number of sequential retries for a failed file. Default: `1`. |
428
427
  | `max_consecutive_failures` | Stops the run after repeated failures so provider/API problems are visible quickly. Default: `5`. |
428
+ | `max_file_size_kb` | Files larger than this are skipped. Must be a positive integer (at least `1`). Default: `500`. |
429
+ | `follow_symlinks` | When `false` (default) symlinked directories and files are skipped, so a scan never follows a link cycle or escapes the project root. When `true`, links are followed only when their target exists, has the expected type, and resolves inside the project root. Settable via config file or the Python API only — there is no CLI flag or environment variable for it. |
429
430
 
430
431
  Configurable defaults added in 0.8.1:
431
432
 
@@ -582,13 +583,14 @@ In JSON files the block is the first key in the document:
582
583
  {
583
584
  "_codedoc": {
584
585
  "entry_file": "src/main.py",
585
- "schema_version": "1.4",
586
- "generated_at": "2025-..."
586
+ "schema_version": "1.4"
587
587
  },
588
588
  ...
589
589
  }
590
590
  ```
591
591
 
592
+ Since 0.9.3 the completed output contains no run-varying timestamp: two runs over identical sources, documentation, configuration, and stats produce byte-identical JSON and Markdown. Older outputs that still contain a `generated_at` field remain fully readable. (Live crash-safety backups keep `created_at` / `updated_at` diagnostics.)
593
+
592
594
  In Markdown files it is an HTML comment at the very top. It also embeds `file_hashes` so that subsequent Markdown-only runs can perform incremental hash checks without requiring a sibling JSON file:
593
595
 
594
596
  ```text
@@ -614,7 +616,9 @@ The public `codedoc.json` and `codedoc.md` are structured, human- and AI-readabl
614
616
  - Project-level dependency catalog with deduplicated dependency purpose.
615
617
  - Flattened file summaries (no nested duplication).
616
618
  - Imports, exports, functions, classes.
617
- - Internal, external, and reverse dependencies (`imported_by`).
619
+ - Internal, external, SDK/standard-library, and reverse dependencies (`imported_by`).
620
+
621
+ Since 0.9.3, third-party packages and language standard-library / SDK modules are separated: each file's `links` carry `external_dependencies` (third-party) and `sdk_dependencies` (e.g. Python stdlib, Dart `dart:*`, Node built-ins). The `SDK / Standard Library` Markdown section is rendered only when non-empty, and `internal_dependencies` / `imported_by` are derived **only** from resolved project-graph edges — unresolved agent text can never become an internal link. Missing `sdk_dependencies` loads as an empty list for older outputs.
618
622
 
619
623
  They exclude internal processing data such as raw LLM responses and per-file history.
620
624
 
@@ -639,13 +643,16 @@ Example public JSON:
639
643
  {
640
644
  "path": "schemas/userschema.py",
641
645
  "links": {
642
- "external_dependencies": ["pydantic"]
646
+ "external_dependencies": ["pydantic"],
647
+ "sdk_dependencies": ["typing"]
643
648
  }
644
649
  }
645
650
  ]
646
651
  }
647
652
  ```
648
653
 
654
+ The catalog is grouped by `(type, canonical_name)`, so the same package seen across files merges into one entry, while `external` and `sdk` entries stay distinct. An `internal` catalog hint from the model is kept only when it exactly matches a resolved internal path for that file; otherwise it is reclassified as a third-party / SDK dependency.
655
+
649
656
  The file still says what it uses. The shared explanation lives once in the catalog. This keeps JSON smaller, Markdown cleaner, and later agent analysis less noisy.
650
657
 
651
658
  ### JSON and Markdown Conversion
@@ -846,12 +853,6 @@ the paid run, applies the same cap to both, uploads documentation as an
846
853
  artifact, uses `contents: read`, and never commits or pushes. Selected source
847
854
  is sent to an external provider and API usage may cost money.
848
855
 
849
- ### More detail
850
-
851
- [`RUN_FLOW.md`](RUN_FLOW.md) documents the full end-to-end pipeline and every
852
- success, interrupt/resume, and failure scenario across OpenAI, Anthropic, and
853
- Gemini.
854
-
855
856
  ## Python API
856
857
 
857
858
  The CLI is not required. You can run the same workflow from Python with `run_pipeline(...)`.