docforge-gen 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. docforge_gen-0.1.0/.gitignore +21 -0
  2. docforge_gen-0.1.0/LICENSE +21 -0
  3. docforge_gen-0.1.0/PKG-INFO +107 -0
  4. docforge_gen-0.1.0/README.md +43 -0
  5. docforge_gen-0.1.0/docforge/__init__.py +3 -0
  6. docforge_gen-0.1.0/docforge/analysis/__init__.py +5 -0
  7. docforge_gen-0.1.0/docforge/analysis/context_builder.py +131 -0
  8. docforge_gen-0.1.0/docforge/analysis/dependency_detector.py +120 -0
  9. docforge_gen-0.1.0/docforge/analysis/file_walker.py +63 -0
  10. docforge_gen-0.1.0/docforge/analysis/repo_analyzer.py +53 -0
  11. docforge_gen-0.1.0/docforge/cli/__init__.py +3 -0
  12. docforge_gen-0.1.0/docforge/cli/app.py +29 -0
  13. docforge_gen-0.1.0/docforge/cli/commands/__init__.py +0 -0
  14. docforge_gen-0.1.0/docforge/cli/commands/generate.py +171 -0
  15. docforge_gen-0.1.0/docforge/cli/console.py +3 -0
  16. docforge_gen-0.1.0/docforge/config/__init__.py +4 -0
  17. docforge_gen-0.1.0/docforge/config/loader.py +48 -0
  18. docforge_gen-0.1.0/docforge/config/schema.py +40 -0
  19. docforge_gen-0.1.0/docforge/generators/__init__.py +16 -0
  20. docforge_gen-0.1.0/docforge/generators/api_generator.py +111 -0
  21. docforge_gen-0.1.0/docforge/generators/architecture_generator.py +18 -0
  22. docforge_gen-0.1.0/docforge/generators/base_generator.py +29 -0
  23. docforge_gen-0.1.0/docforge/generators/changelog_generator.py +25 -0
  24. docforge_gen-0.1.0/docforge/generators/installation_generator.py +22 -0
  25. docforge_gen-0.1.0/docforge/generators/readme_generator.py +20 -0
  26. docforge_gen-0.1.0/docforge/git/__init__.py +4 -0
  27. docforge_gen-0.1.0/docforge/git/changelog_builder.py +88 -0
  28. docforge_gen-0.1.0/docforge/git/repo_reader.py +44 -0
  29. docforge_gen-0.1.0/docforge/llm/__init__.py +5 -0
  30. docforge_gen-0.1.0/docforge/llm/chunker.py +69 -0
  31. docforge_gen-0.1.0/docforge/llm/client.py +49 -0
  32. docforge_gen-0.1.0/docforge/llm/prompt_manager.py +23 -0
  33. docforge_gen-0.1.0/docforge/output/__init__.py +5 -0
  34. docforge_gen-0.1.0/docforge/output/github_action_writer.py +34 -0
  35. docforge_gen-0.1.0/docforge/output/mkdocs_builder.py +29 -0
  36. docforge_gen-0.1.0/docforge/output/writer.py +21 -0
  37. docforge_gen-0.1.0/docforge/parsing/__init__.py +4 -0
  38. docforge_gen-0.1.0/docforge/parsing/base_parser.py +39 -0
  39. docforge_gen-0.1.0/docforge/parsing/go_parser.py +99 -0
  40. docforge_gen-0.1.0/docforge/parsing/java_parser.py +113 -0
  41. docforge_gen-0.1.0/docforge/parsing/javascript_parser.py +84 -0
  42. docforge_gen-0.1.0/docforge/parsing/python_parser.py +103 -0
  43. docforge_gen-0.1.0/docforge/parsing/registry.py +29 -0
  44. docforge_gen-0.1.0/docforge/parsing/rust_parser.py +107 -0
  45. docforge_gen-0.1.0/docforge/parsing/typescript_parser.py +105 -0
  46. docforge_gen-0.1.0/docforge/source/__init__.py +3 -0
  47. docforge_gen-0.1.0/docforge/source/github_fetcher.py +58 -0
  48. docforge_gen-0.1.0/docforge/source/local_fetcher.py +12 -0
  49. docforge_gen-0.1.0/docforge/source/resolver.py +17 -0
  50. docforge_gen-0.1.0/docforge/templates/outputs/github_action.yml.j2 +50 -0
  51. docforge_gen-0.1.0/docforge/templates/outputs/mkdocs.yml.j2 +53 -0
  52. docforge_gen-0.1.0/docforge/templates/prompts/api_module.j2 +38 -0
  53. docforge_gen-0.1.0/docforge/templates/prompts/architecture.j2 +42 -0
  54. docforge_gen-0.1.0/docforge/templates/prompts/changelog.j2 +22 -0
  55. docforge_gen-0.1.0/docforge/templates/prompts/installation.j2 +41 -0
  56. docforge_gen-0.1.0/docforge/templates/prompts/readme.j2 +47 -0
  57. docforge_gen-0.1.0/pyproject.toml +71 -0
  58. docforge_gen-0.1.0/tests/__init__.py +0 -0
  59. docforge_gen-0.1.0/tests/conftest.py +15 -0
  60. docforge_gen-0.1.0/tests/fixtures/sample_go/main.go +25 -0
  61. docforge_gen-0.1.0/tests/fixtures/sample_python/main.py +26 -0
  62. docforge_gen-0.1.0/tests/unit/__init__.py +0 -0
  63. docforge_gen-0.1.0/tests/unit/test_generators.py +38 -0
  64. docforge_gen-0.1.0/tests/unit/test_parsers.py +66 -0
@@ -0,0 +1,21 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ build/
7
+ dist/
8
+ *.egg-info/
9
+ .eggs/
10
+ .env
11
+ .venv
12
+ venv/
13
+ env/
14
+ .pytest_cache/
15
+ .ruff_cache/
16
+ .mypy_cache/
17
+ htmlcov/
18
+ .coverage
19
+ *.log
20
+ docs/
21
+ .docforge_tmp/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 DocForge Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: docforge-gen
3
+ Version: 0.1.0
4
+ Summary: Automatically generate documentation for any GitHub repository using LLMs
5
+ Project-URL: Homepage, https://github.com/your-org/docforge
6
+ Project-URL: Repository, https://github.com/your-org/docforge
7
+ Project-URL: Issues, https://github.com/your-org/docforge/issues
8
+ License: MIT License
9
+
10
+ Copyright (c) 2026 DocForge Contributors
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ License-File: LICENSE
30
+ Keywords: cli,code-analysis,documentation,llm,tree-sitter
31
+ Classifier: Development Status :: 3 - Alpha
32
+ Classifier: Environment :: Console
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Topic :: Software Development :: Documentation
38
+ Requires-Python: >=3.11
39
+ Requires-Dist: gitpython>=3.1.40
40
+ Requires-Dist: httpx>=0.27.0
41
+ Requires-Dist: jinja2>=3.1.4
42
+ Requires-Dist: litellm>=1.52.0
43
+ Requires-Dist: pydantic>=2.8.0
44
+ Requires-Dist: rich>=14.0.0
45
+ Requires-Dist: tenacity>=9.0.0
46
+ Requires-Dist: tiktoken>=0.7.0
47
+ Requires-Dist: tree-sitter-go>=0.23.0
48
+ Requires-Dist: tree-sitter-java>=0.23.0
49
+ Requires-Dist: tree-sitter-javascript>=0.23.0
50
+ Requires-Dist: tree-sitter-python>=0.23.0
51
+ Requires-Dist: tree-sitter-rust>=0.23.0
52
+ Requires-Dist: tree-sitter-typescript>=0.23.0
53
+ Requires-Dist: tree-sitter>=0.23.0
54
+ Requires-Dist: typer>=0.12.0
55
+ Provides-Extra: dev
56
+ Requires-Dist: coverage>=7.5.0; extra == 'dev'
57
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
58
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
59
+ Requires-Dist: ruff>=0.4.0; extra == 'dev'
60
+ Provides-Extra: mkdocs
61
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'mkdocs'
62
+ Requires-Dist: mkdocs>=1.6.0; extra == 'mkdocs'
63
+ Description-Content-Type: text/markdown
64
+
65
+ # DocForge
66
+
67
+ AI-powered documentation generator for any GitHub repository or local project.
68
+
69
+ ## Quick Start
70
+
71
+ ```bash
72
+ pip install docforge
73
+
74
+ # From GitHub URL
75
+ docforge generate https://github.com/user/repo
76
+
77
+ # From local path
78
+ docforge generate ./my-project
79
+
80
+ # With Ollama (local, free)
81
+ docforge generate ./my-project --model ollama/llama3 --api-base http://localhost:11434
82
+ ```
83
+
84
+ ## What it generates
85
+
86
+ - `README.md` — Comprehensive project overview
87
+ - `INSTALLATION.md` — Setup and installation guide
88
+ - `ARCHITECTURE.md` — System architecture with Mermaid diagrams
89
+ - `CHANGELOG.md` — Formatted changelog from git history
90
+ - `api/*.md` — Per-module API reference docs
91
+ - `mkdocs.yml` — Optional MkDocs site config (`--mkdocs`)
92
+ - `.github/workflows/auto-docs.yml` — Optional GitHub Action (`--github-action`)
93
+
94
+ ## Supported Languages
95
+
96
+ Python, JavaScript, TypeScript, Go, Rust, Java
97
+
98
+ ## Supported LLM Providers
99
+
100
+ - **OpenAI** (default): `--model gpt-4o`
101
+ - **Anthropic Claude**: `--model claude-sonnet-4-6`
102
+ - **Ollama (local)**: `--model ollama/llama3 --api-base http://localhost:11434`
103
+ - Any provider supported by [LiteLLM](https://docs.litellm.ai)
104
+
105
+ ## License
106
+
107
+ MIT
@@ -0,0 +1,43 @@
1
+ # DocForge
2
+
3
+ AI-powered documentation generator for any GitHub repository or local project.
4
+
5
+ ## Quick Start
6
+
7
+ ```bash
8
+ pip install docforge
9
+
10
+ # From GitHub URL
11
+ docforge generate https://github.com/user/repo
12
+
13
+ # From local path
14
+ docforge generate ./my-project
15
+
16
+ # With Ollama (local, free)
17
+ docforge generate ./my-project --model ollama/llama3 --api-base http://localhost:11434
18
+ ```
19
+
20
+ ## What it generates
21
+
22
+ - `README.md` — Comprehensive project overview
23
+ - `INSTALLATION.md` — Setup and installation guide
24
+ - `ARCHITECTURE.md` — System architecture with Mermaid diagrams
25
+ - `CHANGELOG.md` — Formatted changelog from git history
26
+ - `api/*.md` — Per-module API reference docs
27
+ - `mkdocs.yml` — Optional MkDocs site config (`--mkdocs`)
28
+ - `.github/workflows/auto-docs.yml` — Optional GitHub Action (`--github-action`)
29
+
30
+ ## Supported Languages
31
+
32
+ Python, JavaScript, TypeScript, Go, Rust, Java
33
+
34
+ ## Supported LLM Providers
35
+
36
+ - **OpenAI** (default): `--model gpt-4o`
37
+ - **Anthropic Claude**: `--model claude-sonnet-4-6`
38
+ - **Ollama (local)**: `--model ollama/llama3 --api-base http://localhost:11434`
39
+ - Any provider supported by [LiteLLM](https://docs.litellm.ai)
40
+
41
+ ## License
42
+
43
+ MIT
@@ -0,0 +1,3 @@
1
+ """DocForge - Automatically generate documentation for any GitHub repository using LLMs."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,5 @@
1
+ from .context_builder import RepoContext, CommitGroup, build_context
2
+ from .repo_analyzer import analyze_repo
3
+ from .dependency_detector import detect_dependencies
4
+
5
+ __all__ = ["RepoContext", "CommitGroup", "build_context", "analyze_repo", "detect_dependencies"]
@@ -0,0 +1,131 @@
1
+ from dataclasses import dataclass, field
2
+ from pathlib import Path
3
+
4
+ from ..parsing.base_parser import ParsedFile
5
+
6
+
7
+ @dataclass
8
+ class CommitGroup:
9
+ tag: str # "v1.2.0" or "Unreleased"
10
+ date: str
11
+ commits: list[dict] = field(default_factory=list)
12
+
13
+
14
+ @dataclass
15
+ class RepoContext:
16
+ name: str
17
+ description: str
18
+ primary_language: str
19
+ languages: list[str]
20
+ file_tree: list[str]
21
+ parsed_files: list[ParsedFile]
22
+ dependencies: dict[str, list[str]]
23
+ commit_groups: list[CommitGroup]
24
+ github_url: str | None = None
25
+ topics: list[str] = field(default_factory=list)
26
+ license: str | None = None
27
+ has_tests: bool = False
28
+ has_ci: bool = False
29
+ entry_points: list[str] = field(default_factory=list)
30
+
31
+
32
+ def build_context(
33
+ repo_path: Path,
34
+ parsed_files: list[ParsedFile],
35
+ dependencies: dict[str, list[str]],
36
+ commit_groups: list[CommitGroup],
37
+ gh_metadata: dict | None = None,
38
+ ) -> RepoContext:
39
+ gh = gh_metadata or {}
40
+
41
+ name = gh.get("name") or repo_path.name
42
+ description = gh.get("description") or _infer_description(parsed_files, repo_path)
43
+ languages = _detect_languages(parsed_files)
44
+ primary_language = languages[0] if languages else "unknown"
45
+ file_tree = _build_file_tree(repo_path, parsed_files)
46
+ has_tests = _has_tests(repo_path)
47
+ has_ci = _has_ci(repo_path)
48
+ entry_points = _find_entry_points(repo_path, parsed_files)
49
+
50
+ return RepoContext(
51
+ name=name,
52
+ description=description,
53
+ primary_language=primary_language,
54
+ languages=languages,
55
+ file_tree=file_tree,
56
+ parsed_files=parsed_files,
57
+ dependencies=dependencies,
58
+ commit_groups=commit_groups,
59
+ github_url=gh.get("html_url"),
60
+ topics=gh.get("topics", []),
61
+ license=gh.get("license", {}).get("name") if gh.get("license") else None,
62
+ has_tests=has_tests,
63
+ has_ci=has_ci,
64
+ entry_points=entry_points,
65
+ )
66
+
67
+
68
+ def _detect_languages(parsed_files: list[ParsedFile]) -> list[str]:
69
+ counts: dict[str, int] = {}
70
+ for f in parsed_files:
71
+ counts[f.language] = counts.get(f.language, 0) + 1
72
+ return sorted(counts, key=lambda k: counts[k], reverse=True)
73
+
74
+
75
+ def _build_file_tree(repo_path: Path, parsed_files: list[ParsedFile]) -> list[str]:
76
+ paths = []
77
+ for f in parsed_files:
78
+ try:
79
+ rel = Path(f.path).relative_to(repo_path)
80
+ paths.append(str(rel))
81
+ except ValueError:
82
+ paths.append(f.path)
83
+ return sorted(paths)
84
+
85
+
86
+ def _infer_description(parsed_files: list[ParsedFile], repo_path: Path) -> str:
87
+ # Try to read existing README for description
88
+ for readme_name in ["README.md", "README.rst", "README.txt", "README"]:
89
+ readme = repo_path / readme_name
90
+ if readme.exists():
91
+ try:
92
+ content = readme.read_text(errors="replace")
93
+ lines = [l.strip() for l in content.splitlines() if l.strip()]
94
+ # Find first non-heading line
95
+ for line in lines:
96
+ if not line.startswith("#") and len(line) > 20:
97
+ return line[:200]
98
+ except Exception:
99
+ pass
100
+ return "A software project"
101
+
102
+
103
+ def _has_tests(repo_path: Path) -> bool:
104
+ test_indicators = ["tests/", "test/", "spec/", "__tests__/", "test_*.py", "*_test.go"]
105
+ for indicator in test_indicators:
106
+ if list(repo_path.glob(f"**/{indicator}")):
107
+ return True
108
+ return False
109
+
110
+
111
+ def _has_ci(repo_path: Path) -> bool:
112
+ ci_paths = [
113
+ ".github/workflows",
114
+ ".gitlab-ci.yml",
115
+ ".circleci/config.yml",
116
+ "Jenkinsfile",
117
+ ".travis.yml",
118
+ ]
119
+ for ci_path in ci_paths:
120
+ if (repo_path / ci_path).exists():
121
+ return True
122
+ return False
123
+
124
+
125
+ def _find_entry_points(repo_path: Path, parsed_files: list[ParsedFile]) -> list[str]:
126
+ entry_points = []
127
+ candidates = ["main.py", "app.py", "cli.py", "main.go", "main.rs", "index.js", "index.ts"]
128
+ for candidate in candidates:
129
+ if (repo_path / candidate).exists():
130
+ entry_points.append(candidate)
131
+ return entry_points
@@ -0,0 +1,120 @@
1
+ import json
2
+ import tomllib
3
+ from pathlib import Path
4
+
5
+
6
+ def detect_dependencies(repo_path: Path) -> dict[str, list[str]]:
7
+ """Read manifest files and return dependencies per language/ecosystem."""
8
+ deps: dict[str, list[str]] = {}
9
+
10
+ # Python
11
+ py_deps = _detect_python(repo_path)
12
+ if py_deps:
13
+ deps["python"] = py_deps
14
+
15
+ # JavaScript/TypeScript
16
+ js_deps = _detect_javascript(repo_path)
17
+ if js_deps:
18
+ deps["javascript"] = js_deps
19
+
20
+ # Go
21
+ go_deps = _detect_go(repo_path)
22
+ if go_deps:
23
+ deps["go"] = go_deps
24
+
25
+ # Rust
26
+ rust_deps = _detect_rust(repo_path)
27
+ if rust_deps:
28
+ deps["rust"] = rust_deps
29
+
30
+ # Java
31
+ java_deps = _detect_java(repo_path)
32
+ if java_deps:
33
+ deps["java"] = java_deps
34
+
35
+ return deps
36
+
37
+
38
+ def _detect_python(repo_path: Path) -> list[str]:
39
+ deps = []
40
+
41
+ pyproject = repo_path / "pyproject.toml"
42
+ if pyproject.exists():
43
+ try:
44
+ with open(pyproject, "rb") as f:
45
+ data = tomllib.load(f)
46
+ deps += data.get("project", {}).get("dependencies", [])
47
+ except Exception:
48
+ pass
49
+
50
+ requirements = repo_path / "requirements.txt"
51
+ if requirements.exists():
52
+ try:
53
+ lines = requirements.read_text().splitlines()
54
+ deps += [l.strip() for l in lines if l.strip() and not l.startswith("#")]
55
+ except Exception:
56
+ pass
57
+
58
+ return deps
59
+
60
+
61
+ def _detect_javascript(repo_path: Path) -> list[str]:
62
+ package_json = repo_path / "package.json"
63
+ if not package_json.exists():
64
+ return []
65
+ try:
66
+ data = json.loads(package_json.read_text())
67
+ deps = list(data.get("dependencies", {}).keys())
68
+ deps += list(data.get("devDependencies", {}).keys())
69
+ return deps
70
+ except Exception:
71
+ return []
72
+
73
+
74
+ def _detect_go(repo_path: Path) -> list[str]:
75
+ go_mod = repo_path / "go.mod"
76
+ if not go_mod.exists():
77
+ return []
78
+ deps = []
79
+ try:
80
+ for line in go_mod.read_text().splitlines():
81
+ line = line.strip()
82
+ if line.startswith("require ") or (line and not line.startswith("//") and " v" in line):
83
+ parts = line.split()
84
+ if len(parts) >= 2 and "/" in parts[0]:
85
+ deps.append(parts[0])
86
+ except Exception:
87
+ pass
88
+ return deps
89
+
90
+
91
+ def _detect_rust(repo_path: Path) -> list[str]:
92
+ cargo_toml = repo_path / "Cargo.toml"
93
+ if not cargo_toml.exists():
94
+ return []
95
+ try:
96
+ with open(cargo_toml, "rb") as f:
97
+ data = tomllib.load(f)
98
+ deps = list(data.get("dependencies", {}).keys())
99
+ deps += list(data.get("dev-dependencies", {}).keys())
100
+ return deps
101
+ except Exception:
102
+ return []
103
+
104
+
105
+ def _detect_java(repo_path: Path) -> list[str]:
106
+ # Basic pom.xml detection
107
+ pom = repo_path / "pom.xml"
108
+ if pom.exists():
109
+ try:
110
+ content = pom.read_text()
111
+ # Very basic extraction - just flag that Maven is used
112
+ return ["[Maven - see pom.xml]"]
113
+ except Exception:
114
+ pass
115
+
116
+ build_gradle = repo_path / "build.gradle"
117
+ if build_gradle.exists():
118
+ return ["[Gradle - see build.gradle]"]
119
+
120
+ return []
@@ -0,0 +1,63 @@
1
+ import fnmatch
2
+ from pathlib import Path
3
+
4
+
5
+ BINARY_EXTENSIONS = {
6
+ ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg",
7
+ ".pdf", ".zip", ".tar", ".gz", ".bz2", ".xz", ".rar",
8
+ ".exe", ".dll", ".so", ".dylib", ".a", ".o",
9
+ ".woff", ".woff2", ".ttf", ".eot",
10
+ ".mp3", ".mp4", ".wav", ".avi", ".mov",
11
+ ".db", ".sqlite", ".lock",
12
+ }
13
+
14
+
15
+ def walk_repo(
16
+ repo_path: Path,
17
+ exclude_patterns: list[str],
18
+ max_file_size_kb: int = 512,
19
+ supported_extensions: list[str] | None = None,
20
+ ):
21
+ """
22
+ Yield (file_path, extension) for all code files in repo_path.
23
+ Respects exclude patterns and size limits.
24
+ """
25
+ max_bytes = max_file_size_kb * 1024
26
+
27
+ for file_path in sorted(repo_path.rglob("*")):
28
+ if not file_path.is_file():
29
+ continue
30
+
31
+ # Check binary extension
32
+ if file_path.suffix.lower() in BINARY_EXTENSIONS:
33
+ continue
34
+
35
+ # Check exclude patterns
36
+ relative = str(file_path.relative_to(repo_path))
37
+ if _is_excluded(relative, exclude_patterns):
38
+ continue
39
+
40
+ # Check supported extension
41
+ ext = file_path.suffix.lower()
42
+ if supported_extensions and ext not in supported_extensions:
43
+ continue
44
+
45
+ # Check file size
46
+ try:
47
+ if file_path.stat().st_size > max_bytes:
48
+ continue
49
+ except OSError:
50
+ continue
51
+
52
+ yield file_path, ext
53
+
54
+
55
+ def _is_excluded(relative_path: str, patterns: list[str]) -> bool:
56
+ for pattern in patterns:
57
+ if fnmatch.fnmatch(relative_path, pattern):
58
+ return True
59
+ # Also match path components
60
+ parts = relative_path.replace("\\", "/")
61
+ if fnmatch.fnmatch(parts, pattern):
62
+ return True
63
+ return False
@@ -0,0 +1,53 @@
1
+ from concurrent.futures import ThreadPoolExecutor, as_completed
2
+ from pathlib import Path
3
+
4
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
5
+
6
+ from ..config.schema import DocForgeConfig
7
+ from ..parsing.base_parser import ParsedFile
8
+ from ..parsing.registry import get_parser, supported_extensions
9
+ from .file_walker import walk_repo
10
+
11
+
12
+ def analyze_repo(repo_path: Path, config: DocForgeConfig) -> list[ParsedFile]:
13
+ """Parse all code files in repo_path and return ParsedFile list."""
14
+ exts = supported_extensions()
15
+ files_to_parse = list(walk_repo(
16
+ repo_path,
17
+ exclude_patterns=config.exclude_patterns,
18
+ max_file_size_kb=config.max_file_size_kb,
19
+ supported_extensions=exts,
20
+ ))
21
+
22
+ parsed: list[ParsedFile] = []
23
+
24
+ with Progress(
25
+ SpinnerColumn(),
26
+ TextColumn("[bold blue]Parsing files..."),
27
+ BarColumn(),
28
+ TaskProgressColumn(),
29
+ transient=True,
30
+ ) as progress:
31
+ task = progress.add_task("parsing", total=len(files_to_parse))
32
+
33
+ def parse_file(args):
34
+ file_path, ext = args
35
+ parser = get_parser(ext)
36
+ if parser is None:
37
+ return None
38
+ try:
39
+ source = file_path.read_bytes()
40
+ return parser.parse(file_path, source)
41
+ except Exception:
42
+ return None
43
+ finally:
44
+ progress.advance(task)
45
+
46
+ with ThreadPoolExecutor(max_workers=8) as executor:
47
+ futures = {executor.submit(parse_file, item): item for item in files_to_parse}
48
+ for future in as_completed(futures):
49
+ result = future.result()
50
+ if result is not None:
51
+ parsed.append(result)
52
+
53
+ return parsed
@@ -0,0 +1,3 @@
1
+ from docforge import __version__
2
+
3
+ __all__ = ["__version__"]
@@ -0,0 +1,29 @@
1
+ import typer
2
+ from . import __version__
3
+ from .commands.generate import generate
4
+
5
+ app = typer.Typer(
6
+ name="docforge",
7
+ help="AI-powered documentation generator for any GitHub repository.",
8
+ add_completion=False,
9
+ rich_markup_mode="rich",
10
+ )
11
+
12
+ app.command(name="generate", help="Generate documentation for a repo.")(generate)
13
+
14
+
15
+ def version_callback(value: bool):
16
+ if value:
17
+ typer.echo(f"DocForge v{__version__}")
18
+ raise typer.Exit()
19
+
20
+
21
+ @app.callback()
22
+ def main_callback(
23
+ version: bool = typer.Option(None, "--version", "-v", callback=version_callback, is_eager=True),
24
+ ):
25
+ pass
26
+
27
+
28
+ def main():
29
+ app()
File without changes