codeannex 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Tanhleno Teixeira de Sousa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,147 @@
1
+ Metadata-Version: 2.4
2
+ Name: codeannex
3
+ Version: 0.1.1
4
+ Summary: Generates a professional PDF source code annex with Smart Index, Images and Emoji support.
5
+ License: MIT
6
+ Project-URL: Repository, https://github.com/tanhleno/codeannex
7
+ Keywords: pdf,source-code,documentation,annex,reportlab,emoji
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Documentation
17
+ Classifier: Topic :: Software Development :: Documentation
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: reportlab>=4.0
22
+ Requires-Dist: Pillow>=10.0
23
+ Requires-Dist: Pygments>=2.16
24
+ Requires-Dist: pdfminer.six>=20231228
25
+ Provides-Extra: svg
26
+ Requires-Dist: cairosvg>=2.7; extra == "svg"
27
+ Requires-Dist: svglib>=1.5; extra == "svg"
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # đź“‚ codeannex
33
+
34
+ Generates a professional PDF annex from a project's source code — with syntax highlighting, a clickable table of contents, image rendering, non-selectable line numbers, and configurable fonts.
35
+
36
+ ## Features
37
+
38
+ - **Syntax highlighting** via Pygments (Catppuccin Mocha theme)
39
+ - **Clickable table of contents** with page numbers (two-pass layout)
40
+ - **Smart word wrap** — breaks at word boundaries, respects emoji codepoints
41
+ - **Non-selectable line numbers** rendered as images
42
+ - **Image support** — embeds PNG, JPG, GIF, WebP, BMP, ICO, and SVG files
43
+ - **Git-aware** — uses `git ls-files` when available; falls back to manual `.gitignore` parsing
44
+ - **Cross-platform font discovery** — finds monospace and emoji fonts on Windows, macOS, and Linux
45
+ - **Emoji support** — configurable fonts for emojis, with fallback for better rendering.
46
+ - *Tip for Linux:* Install `fonts-symbola` for high-coverage monochromatic emoji support that works reliably with ReportLab.
47
+ - **Emoji descriptions** — option to print `[EMOJI NAME]` instead of glyphs for systems without emoji fonts.
48
+ - **Configurable fonts** — customize normal, bold, monospace, and emoji fonts.
49
+ - **Page numbering** — customizable font size and position.
50
+ - **Repository Link** — show a clickable repository link on the cover page.
51
+ - **Fast binary file detection** — quickly ignores common binary extensions.
52
+
53
+ ## Requirements
54
+
55
+ - Python >= 3.11
56
+ - `reportlab`, `Pillow`, `Pygments`
57
+
58
+ Optional SVG rendering (install at least one):
59
+ - `cairosvg` (preferred)
60
+ - `svglib`
61
+
62
+ For development and testing:
63
+ - `pytest` (install with `pip install -e ".[dev]"`)
64
+
65
+ ## Installation
66
+
67
+ ```bash
68
+ pip install codeannex
69
+ ```
70
+
71
+ With SVG support:
72
+
73
+ ```bash
74
+ pip install "codeannex[svg]"
75
+ ```
76
+ ## Requirements
77
+
78
+ - Python >= 3.11
79
+ - `reportlab`, `Pillow`, `Pygments`, `pdfminer.six`
80
+
81
+ Optional SVG rendering (install at least one):
82
+ ...
83
+ ## Usage
84
+
85
+ ```bash
86
+ # Annotate current directory
87
+ codeannex
88
+
89
+ # Annotate a specific project
90
+ codeannex /path/to/project
91
+
92
+ # Custom output filename
93
+ codeannex /path/to/project -o my_annex.pdf
94
+
95
+ # Customized configuration (margins in cm)
96
+ codeannex /path/to/project \
97
+ --name "My Project" \
98
+ --repo-url "https://github.com/user/repo" \
99
+ --margin 1.5 \
100
+ --margin-top 2.5 \
101
+ --page-number-size 10 \
102
+ --show-project
103
+ ```
104
+
105
+ ### Configuration Options
106
+
107
+ - `--name NAME` — Custom project name (default: directory name).
108
+ - `--repo-url URL` — Repository URL to show as a clickable link on the cover.
109
+ - `--start-page N` — Starting page number (default: 1).
110
+ - `--margin CM` — General margin in cm for all sides.
111
+ - `--margin-left CM` — Left margin in cm (default: 1.5).
112
+ - `--margin-right CM` — Right margin in cm (default: 1.5).
113
+ - `--margin-top CM` — Top margin in cm (default: 2.0).
114
+ - `--margin-bottom CM` — Bottom margin in cm (default: 2.0).
115
+ - `--show-project` — Show project name in page footer (default: off).
116
+ - `--page-number-size N` — Font size for page numbers (default: 8).
117
+ - `--normal-font FONT` — Font for normal text (default: Helvetica).
118
+ - `--bold-font FONT` — Font for bold text (default: Helvetica-Bold).
119
+ - `--mono-font FONT` — Monospace font for code (default: auto-detect).
120
+ - `--emoji-font FONT` — Font for emojis (default: auto-detect).
121
+ - `--emoji-description` — Print `[DESCRIPTION]` instead of emoji glyphs.
122
+ - `--check-emoji-font` — Check current emoji font style and exit.
123
+
124
+
125
+ ## Testing
126
+
127
+ Run the test suite with pytest:
128
+
129
+ ```bash
130
+ pytest
131
+ ```
132
+
133
+ ## Output
134
+
135
+ The generated PDF contains:
136
+
137
+ 1. **Cover page** — project name with page number
138
+ 2. **Table of contents** — tree-structured, with clickable links to each file
139
+ 3. **File pages** — syntax-highlighted source, with file path headers and line numbers
140
+
141
+ Files are ordered: root files first (alphabetical), then directories recursively (alphabetical).
142
+
143
+ Binary files (e.g., .pdf, .exe) are automatically ignored with warnings if not in .gitignore.
144
+
145
+ ## License
146
+
147
+ MIT
@@ -0,0 +1,116 @@
1
+ # đź“‚ codeannex
2
+
3
+ Generates a professional PDF annex from a project's source code — with syntax highlighting, a clickable table of contents, image rendering, non-selectable line numbers, and configurable fonts.
4
+
5
+ ## Features
6
+
7
+ - **Syntax highlighting** via Pygments (Catppuccin Mocha theme)
8
+ - **Clickable table of contents** with page numbers (two-pass layout)
9
+ - **Smart word wrap** — breaks at word boundaries, respects emoji codepoints
10
+ - **Non-selectable line numbers** rendered as images
11
+ - **Image support** — embeds PNG, JPG, GIF, WebP, BMP, ICO, and SVG files
12
+ - **Git-aware** — uses `git ls-files` when available; falls back to manual `.gitignore` parsing
13
+ - **Cross-platform font discovery** — finds monospace and emoji fonts on Windows, macOS, and Linux
14
+ - **Emoji support** — configurable fonts for emojis, with fallback for better rendering.
15
+ - *Tip for Linux:* Install `fonts-symbola` for high-coverage monochromatic emoji support that works reliably with ReportLab.
16
+ - **Emoji descriptions** — option to print `[EMOJI NAME]` instead of glyphs for systems without emoji fonts.
17
+ - **Configurable fonts** — customize normal, bold, monospace, and emoji fonts.
18
+ - **Page numbering** — customizable font size and position.
19
+ - **Repository Link** — show a clickable repository link on the cover page.
20
+ - **Fast binary file detection** — quickly ignores common binary extensions.
21
+
22
+ ## Requirements
23
+
24
+ - Python >= 3.11
25
+ - `reportlab`, `Pillow`, `Pygments`
26
+
27
+ Optional SVG rendering (install at least one):
28
+ - `cairosvg` (preferred)
29
+ - `svglib`
30
+
31
+ For development and testing:
32
+ - `pytest` (install with `pip install -e ".[dev]"`)
33
+
34
+ ## Installation
35
+
36
+ ```bash
37
+ pip install codeannex
38
+ ```
39
+
40
+ With SVG support:
41
+
42
+ ```bash
43
+ pip install "codeannex[svg]"
44
+ ```
45
+ ## Requirements
46
+
47
+ - Python >= 3.11
48
+ - `reportlab`, `Pillow`, `Pygments`, `pdfminer.six`
49
+
50
+ Optional SVG rendering (install at least one):
51
+ ...
52
+ ## Usage
53
+
54
+ ```bash
55
+ # Annotate current directory
56
+ codeannex
57
+
58
+ # Annotate a specific project
59
+ codeannex /path/to/project
60
+
61
+ # Custom output filename
62
+ codeannex /path/to/project -o my_annex.pdf
63
+
64
+ # Customized configuration (margins in cm)
65
+ codeannex /path/to/project \
66
+ --name "My Project" \
67
+ --repo-url "https://github.com/user/repo" \
68
+ --margin 1.5 \
69
+ --margin-top 2.5 \
70
+ --page-number-size 10 \
71
+ --show-project
72
+ ```
73
+
74
+ ### Configuration Options
75
+
76
+ - `--name NAME` — Custom project name (default: directory name).
77
+ - `--repo-url URL` — Repository URL to show as a clickable link on the cover.
78
+ - `--start-page N` — Starting page number (default: 1).
79
+ - `--margin CM` — General margin in cm for all sides.
80
+ - `--margin-left CM` — Left margin in cm (default: 1.5).
81
+ - `--margin-right CM` — Right margin in cm (default: 1.5).
82
+ - `--margin-top CM` — Top margin in cm (default: 2.0).
83
+ - `--margin-bottom CM` — Bottom margin in cm (default: 2.0).
84
+ - `--show-project` — Show project name in page footer (default: off).
85
+ - `--page-number-size N` — Font size for page numbers (default: 8).
86
+ - `--normal-font FONT` — Font for normal text (default: Helvetica).
87
+ - `--bold-font FONT` — Font for bold text (default: Helvetica-Bold).
88
+ - `--mono-font FONT` — Monospace font for code (default: auto-detect).
89
+ - `--emoji-font FONT` — Font for emojis (default: auto-detect).
90
+ - `--emoji-description` — Print `[DESCRIPTION]` instead of emoji glyphs.
91
+ - `--check-emoji-font` — Check current emoji font style and exit.
92
+
93
+
94
+ ## Testing
95
+
96
+ Run the test suite with pytest:
97
+
98
+ ```bash
99
+ pytest
100
+ ```
101
+
102
+ ## Output
103
+
104
+ The generated PDF contains:
105
+
106
+ 1. **Cover page** — project name with page number
107
+ 2. **Table of contents** — tree-structured, with clickable links to each file
108
+ 3. **File pages** — syntax-highlighted source, with file path headers and line numbers
109
+
110
+ Files are ordered: root files first (alphabetical), then directories recursively (alphabetical).
111
+
112
+ Binary files (e.g., .pdf, .exe) are automatically ignored with warnings if not in .gitignore.
113
+
114
+ ## License
115
+
116
+ MIT
File without changes
@@ -0,0 +1,157 @@
1
+ import argparse
2
+ import io
3
+ import os
4
+ from pathlib import Path
5
+
6
+ from .config import IMAGE_EXTENSIONS, BINARY_EXTENSIONS, PDFConfig
7
+ from .file_utils import get_project_files, classify_file, sort_files
8
+ from .fonts import init_sprites, register_best_font, register_emoji_font
9
+ from .pdf_builder import ModernAnnexPDF
10
+ from reportlab.lib.units import cm
11
+
12
+
13
+ def check_emoji_font_style():
14
+ """Utility function to check current emoji font style."""
15
+ from .fonts import register_emoji_font, get_emoji_font_style, is_google_like_emoji_font
16
+
17
+ emoji_font = register_emoji_font()
18
+ if emoji_font:
19
+ # Tentar obter informações sobre a fonte
20
+ # Nota: Esta é uma simplificação - em produção teríamos o caminho armazenado
21
+ print(f"âś… Emoji font registered: {emoji_font}")
22
+ print("ℹ️ To check if using Google-like style, look for 'Noto' in the font path above")
23
+ print("đź’ˇ Tip: Install Google Noto fonts for authentic Google emoji style")
24
+ else:
25
+ print("⚠️ No emoji font found - emojis may not render correctly")
26
+ return emoji_font
27
+
28
+
29
+ def main():
30
+ parser = argparse.ArgumentParser(description="Generates a PDF code annex with Smart Index and Images.")
31
+ parser.add_argument("dir", nargs="?", default=".", help="Project directory")
32
+ parser.add_argument("-o", "--output", default=None, help="Output PDF filename")
33
+ parser.add_argument("-n", "--name", default=None, help="Project name (default: directory name)")
34
+ parser.add_argument("--margin", type=float, default=None, help="General margin in cm for all sides")
35
+ parser.add_argument("--margin-left", type=float, default=None, help="Left margin in cm (default: 1.5cm)")
36
+ parser.add_argument("--margin-right", type=float, default=None, help="Right margin in cm (default: 1.5cm)")
37
+ parser.add_argument("--margin-top", type=float, default=None, help="Top margin in cm (default: 2.0cm)")
38
+ parser.add_argument("--margin-bottom", type=float, default=None, help="Bottom margin in cm (default: 2.0cm)")
39
+ parser.add_argument("--start-page", type=int, default=1, help="Starting page number (default: 1)")
40
+ parser.add_argument("--show-project", action="store_true", help="Show project name in footer")
41
+ parser.add_argument("--repo-url", default=None, help="Repository URL to show on cover")
42
+ parser.add_argument("--page-number-size", type=int, default=8, help="Font size for page numbers (default: 8)")
43
+ parser.add_argument("--normal-font", default=None, help="Normal text font (default: Helvetica)")
44
+ parser.add_argument("--bold-font", default=None, help="Bold text font (default: Helvetica-Bold)")
45
+ parser.add_argument("--mono-font", default=None, help="Monospace font for code (default: auto-detect)")
46
+ parser.add_argument("--emoji-font", default=None, help="Font for emojis (default: auto-detect)")
47
+ parser.add_argument("--emoji-description", action="store_true", help="Print [description] instead of emoji glyphs")
48
+ parser.add_argument("--check-emoji-font", action="store_true", help="Check current emoji font style and exit")
49
+
50
+ args, unknown = parser.parse_known_args()
51
+
52
+ # Handle unknown arguments
53
+ if unknown:
54
+ for u in unknown:
55
+ if u.startswith("-"):
56
+ print(f"⚠️ Warning: Unrecognized argument ignored: {u}")
57
+
58
+ # Handle emoji font check
59
+ if args.check_emoji_font:
60
+ check_emoji_font_style()
61
+ return
62
+
63
+ root = Path(args.dir).resolve()
64
+ output = args.output or f"{root.name}_anexo_codigo.pdf"
65
+
66
+ script_path = Path(os.path.abspath(__file__))
67
+ output_path = Path(output).resolve()
68
+
69
+ mono_font, is_ttf, ttf_path = register_best_font()
70
+ # If --emoji-description is NOT set, we MUST have an emoji font or we exit with error
71
+ emoji_font = register_emoji_font(error_on_missing=not args.emoji_description)
72
+ init_sprites(is_ttf, ttf_path)
73
+
74
+ # Determine margin values (specific margins override general --margin)
75
+ def get_margin(spec, general, default):
76
+ if spec is not None:
77
+ return spec * cm
78
+ if general is not None:
79
+ return general * cm
80
+ return default
81
+
82
+ # Criar configuração
83
+ config = PDFConfig(
84
+ project_name=args.name or root.name,
85
+ margin_left=get_margin(args.margin_left, args.margin, PDFConfig().margin_left),
86
+ margin_right=get_margin(args.margin_right, args.margin, PDFConfig().margin_right),
87
+ margin_top=get_margin(args.margin_top, args.margin, PDFConfig().margin_top),
88
+ margin_bottom=get_margin(args.margin_bottom, args.margin, PDFConfig().margin_bottom),
89
+ start_page_num=args.start_page,
90
+ show_project_name=args.show_project,
91
+ normal_font=args.normal_font or PDFConfig().normal_font,
92
+ bold_font=args.bold_font or PDFConfig().bold_font,
93
+ mono_font=args.mono_font or mono_font,
94
+ emoji_font=args.emoji_font or emoji_font,
95
+ emoji_description=args.emoji_description,
96
+ repo_url=args.repo_url,
97
+ page_number_size=args.page_number_size,
98
+ )
99
+
100
+ print(f"🔍 Analyzing directory: {root}")
101
+ all_files = sort_files(get_project_files(root), root)
102
+ included = []
103
+ ignored_binaries = []
104
+
105
+ for fp in all_files:
106
+ try:
107
+ if fp.resolve() in (script_path, output_path):
108
+ continue
109
+ except Exception:
110
+ pass
111
+ if not fp.is_file():
112
+ continue
113
+
114
+ ext = fp.suffix.lower()
115
+
116
+ # Check known extensions first (without reading file)
117
+ if ext == ".svg":
118
+ included += [(fp, "image"), (fp, "text")]
119
+ continue
120
+ elif ext in IMAGE_EXTENSIONS:
121
+ included.append((fp, "image"))
122
+ continue
123
+ elif ext in BINARY_EXTENSIONS:
124
+ ignored_binaries.append(fp)
125
+ continue
126
+
127
+ # Classify file with a single read
128
+ file_type = classify_file(fp)
129
+ if file_type == "text":
130
+ included.append((fp, "text"))
131
+ elif file_type == "binary":
132
+ ignored_binaries.append(fp)
133
+
134
+ if ignored_binaries:
135
+ for bp in ignored_binaries:
136
+ rel_path = bp.relative_to(root)
137
+ print(f"⚠️ Ignoring binary file: {rel_path}")
138
+
139
+ if not included:
140
+ print("❌ No compatible files found.")
141
+ return
142
+
143
+ print(f"đź§® Step 1/2: Simulating layout of {len(included)} files to generate the Table of Contents...")
144
+ pdf_sim = ModernAnnexPDF(io.BytesIO(), root, mono_font, emoji_font, config)
145
+ pdf_sim.is_simulation = True
146
+ pdf_sim.build(included)
147
+
148
+ print("🚀 Step 2/2: Generating the final document with images and code...")
149
+ pdf_final = ModernAnnexPDF(output, root, mono_font, emoji_font, config)
150
+ pdf_final.summary_data = pdf_sim.summary_data
151
+ pdf_final.build(included)
152
+
153
+ print(f"âś… Success! The annex was saved to: {output}")
154
+
155
+
156
+ if __name__ == "__main__":
157
+ main()
@@ -0,0 +1,65 @@
1
+ from dataclasses import dataclass
2
+ from reportlab.lib import colors
3
+ from reportlab.lib.pagesizes import A4
4
+ from reportlab.lib.units import mm, cm
5
+
6
+ # ── Page (default) ───────────────────────────────
7
+ PAGE_W, PAGE_H = A4
8
+ _DEFAULT_MARGIN_LEFT = 1.5 * cm
9
+ _DEFAULT_MARGIN_RIGHT = 1.5 * cm
10
+ _DEFAULT_MARGIN_TOP = 2.0 * cm
11
+ _DEFAULT_MARGIN_BOTTOM = 2.0 * cm
12
+
13
+ # ── Code ───────────────────────────────────────
14
+ CODE_FONT_SIZE = 10
15
+ CODE_LINE_H = CODE_FONT_SIZE * 1.4
16
+ GUTTER_W = 14 * mm
17
+
18
+ # ── Colors ────────────────────────────────────────
19
+ COLOR_PAGE_BG = colors.HexColor("#ffffff")
20
+ COLOR_TEXT_MAIN = colors.HexColor("#4c4f69")
21
+ COLOR_CODE_BG = colors.HexColor("#1e1e2e")
22
+ COLOR_GUTTER_BG = colors.HexColor("#181825")
23
+ COLOR_GUTTER_FG = colors.HexColor("#bac2de")
24
+ COLOR_HEADER_BG = colors.HexColor("#313244")
25
+ COLOR_HEADER_FG = colors.HexColor("#cdd6f4")
26
+ COLOR_ACCENT = colors.HexColor("#89b4fa")
27
+
28
+ # ── Base fonts ──────────────────────────────────
29
+ NORMAL_FONT = "Helvetica"
30
+ BOLD_FONT = "Helvetica-Bold"
31
+
32
+ # ── Supported image extensions ───────────────
33
+ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".ico", ".gif", ".webp", ".bmp", ".svg"}
34
+
35
+ # ── Binary extensions to ignore quickly ──────
36
+ BINARY_EXTENSIONS = {".pdf", ".pyc", ".pyo", ".exe", ".dll", ".so", ".dylib",
37
+ ".zip", ".tar", ".gz", ".rar", ".7z", ".jar", ".whl",
38
+ ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx"}
39
+
40
+
41
+ @dataclass
42
+ class PDFConfig:
43
+ """PDF configuration for code annex."""
44
+ project_name: str | None = None
45
+ margin_left: float = _DEFAULT_MARGIN_LEFT
46
+ margin_right: float = _DEFAULT_MARGIN_RIGHT
47
+ margin_top: float = _DEFAULT_MARGIN_TOP
48
+ margin_bottom: float = _DEFAULT_MARGIN_BOTTOM
49
+ start_page_num: int = 1
50
+ show_project_name: bool = False
51
+ normal_font: str = NORMAL_FONT
52
+ bold_font: str = BOLD_FONT
53
+ mono_font: str | None = None # Will be set by register_best_font
54
+ emoji_font: str | None = None # Will be set by register_emoji_font
55
+ emoji_description: bool = False # Print descriptions instead of emojis
56
+ repo_url: str | None = None # URL of the repository for the cover page
57
+ page_number_size: int = 8 # Font size for page numbers
58
+
59
+ def get_code_x(self) -> float:
60
+ """Calculates the initial X position of the code."""
61
+ return self.margin_left + GUTTER_W
62
+
63
+ def get_code_w(self) -> float:
64
+ """Calculates the available width for code."""
65
+ return PAGE_W - self.get_code_x() - self.margin_right
@@ -0,0 +1,116 @@
1
+ import fnmatch
2
+ import os
3
+ import subprocess
4
+ from pathlib import Path
5
+
6
+
7
+ def classify_file(filepath: Path) -> str:
8
+ """
9
+ Detects binaries cross-platform (Windows/Linux/macOS).
10
+ Simple and reliable heuristic: null bytes = binary
11
+ Returns: "text", "binary", or "error"
12
+ """
13
+ try:
14
+ # Empty files are text
15
+ if filepath.stat().st_size == 0:
16
+ return "text"
17
+
18
+ # Read first 512 bytes (enough to detect null bytes)
19
+ with open(filepath, "rb") as f:
20
+ chunk = f.read(512)
21
+
22
+ # Null bytes is the most reliable indicator of binary
23
+ # (works on Windows, Linux, macOS without dependencies)
24
+ if b"\0" in chunk:
25
+ return "binary"
26
+
27
+ return "text"
28
+ except Exception:
29
+ return "error"
30
+
31
+
32
+ def is_text_file(filepath: Path) -> bool:
33
+ # Rejeita extensões binárias conhecidas rapidamente
34
+ binary_extensions = {".pdf", ".pyc", ".pyo", ".exe", ".dll", ".so", ".dylib",
35
+ ".zip", ".tar", ".gz", ".rar", ".7z", ".jar", ".whl"}
36
+ if filepath.suffix.lower() in binary_extensions:
37
+ return False
38
+
39
+ return classify_file(filepath) == "text"
40
+
41
+
42
+ def is_binary_file(filepath: Path) -> bool:
43
+ return classify_file(filepath) == "binary"
44
+
45
+
46
+ class FallbackGitignoreFilter:
47
+ def __init__(self, root: Path):
48
+ self.root = root
49
+ gi = root / ".gitignore"
50
+ self.rules = []
51
+ if gi.exists():
52
+ for line in gi.read_text(encoding="utf-8", errors="replace").splitlines():
53
+ line = line.strip()
54
+ if line and not line.startswith("#"):
55
+ self.rules.append(line.rstrip("/"))
56
+
57
+ def is_ignored(self, path: Path) -> bool:
58
+ try:
59
+ rel = path.relative_to(self.root)
60
+ except ValueError:
61
+ return False
62
+ rel_str = rel.as_posix()
63
+ parts = rel.parts
64
+ for rule in self.rules:
65
+ if rule.startswith("/"):
66
+ r = rule[1:]
67
+ if fnmatch.fnmatch(rel_str, r) or fnmatch.fnmatch(rel_str, f"{r}/*"):
68
+ return True
69
+ else:
70
+ if fnmatch.fnmatch(rel_str, rule) or fnmatch.fnmatch(rel_str, f"{rule}/*"):
71
+ return True
72
+ if any(fnmatch.fnmatch(p, rule) for p in parts):
73
+ return True
74
+ return False
75
+
76
+
77
+ def sort_files(files: list[Path], root: Path) -> list[Path]:
78
+ """
79
+ Sorts files respecting the rule:
80
+ - Root files first, in alphabetical order
81
+ - Then each directory (alphabetical), recursively with the same rule
82
+ """
83
+ def sort_key(p: Path):
84
+ parts = p.relative_to(root).parts
85
+ # Intercala cada parte com um flag (1 = está dentro de dir, 0 = é arquivo na raiz do nível)
86
+ # Para cada nĂ­vel exceto o Ăşltimo (nome do arquivo), coloca (1, nome_dir)
87
+ # Para o arquivo em si, coloca (0, nome) no seu nĂ­vel
88
+ dirs = parts[:-1]
89
+ fname = parts[-1]
90
+ return tuple(val for d in dirs for val in (1, d.lower())) + (0, fname.lower())
91
+
92
+ return sorted(files, key=sort_key)
93
+
94
+
95
+ def get_project_files(root: Path) -> list[Path]:
96
+ try:
97
+ subprocess.run(["git", "rev-parse", "--is-inside-work-tree"],
98
+ cwd=root, capture_output=True, check=True)
99
+ tracked = subprocess.run(["git", "ls-files", "-z"],
100
+ cwd=root, capture_output=True, check=True).stdout.split(b"\0")
101
+ untracked = subprocess.run(["git", "ls-files", "-z", "--others", "--exclude-standard"],
102
+ cwd=root, capture_output=True, check=True).stdout.split(b"\0")
103
+ files = [root / f.decode() for f in set(tracked + untracked) if f]
104
+ print("âś… Using Git's native engine to perfectly interpret .gitignore.")
105
+ return sorted(files)
106
+
107
+ except (subprocess.CalledProcessError, FileNotFoundError):
108
+ print("⚠️ Git not detected in the project. Using manual scan with Fallback.")
109
+ system_ignores = {".git", "node_modules", "__pycache__", "venv", ".venv", "dist", "build"}
110
+ filt = FallbackGitignoreFilter(root)
111
+ files = []
112
+ for dp, dns, fns in os.walk(root):
113
+ curr = Path(dp)
114
+ dns[:] = [d for d in dns if d not in system_ignores and not filt.is_ignored(curr / d)]
115
+ files.extend(curr / f for f in fns if not filt.is_ignored(curr / f))
116
+ return sorted(files)