code-to-txt 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_to_txt/.extensions +34 -0
- code_to_txt/.ignore +20 -0
- code_to_txt/__init__.py +1 -0
- code_to_txt/cli.py +113 -96
- code_to_txt/code_to_txt.py +190 -120
- code_to_txt/config.py +13 -7
- code_to_txt/utils.py +13 -0
- {code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/METADATA +93 -67
- code_to_txt-0.3.0.dist-info/RECORD +12 -0
- code_to_txt-0.2.0.dist-info/RECORD +0 -9
- {code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/WHEEL +0 -0
- {code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/entry_points.txt +0 -0
- {code_to_txt-0.2.0.dist-info → code_to_txt-0.3.0.dist-info}/licenses/LICENSE +0 -0
code_to_txt/.extensions
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
.py
|
|
2
|
+
.js
|
|
3
|
+
.ts
|
|
4
|
+
.jsx
|
|
5
|
+
.tsx
|
|
6
|
+
.java
|
|
7
|
+
.c
|
|
8
|
+
.cpp
|
|
9
|
+
.h
|
|
10
|
+
.hpp
|
|
11
|
+
.cs
|
|
12
|
+
.go
|
|
13
|
+
.rs
|
|
14
|
+
.rb
|
|
15
|
+
.php
|
|
16
|
+
.swift
|
|
17
|
+
.kt
|
|
18
|
+
.scala
|
|
19
|
+
.r
|
|
20
|
+
.sql
|
|
21
|
+
.sh
|
|
22
|
+
.bash
|
|
23
|
+
.zsh
|
|
24
|
+
.yaml
|
|
25
|
+
.yml
|
|
26
|
+
.json
|
|
27
|
+
.toml
|
|
28
|
+
.xml
|
|
29
|
+
.html
|
|
30
|
+
.css
|
|
31
|
+
.scss
|
|
32
|
+
.md
|
|
33
|
+
.txt
|
|
34
|
+
.rst
|
code_to_txt/.ignore
ADDED
code_to_txt/__init__.py
CHANGED
code_to_txt/cli.py
CHANGED
|
@@ -4,96 +4,81 @@ from pathlib import Path
|
|
|
4
4
|
import click
|
|
5
5
|
import pyperclip
|
|
6
6
|
|
|
7
|
+
from . import __version__
|
|
7
8
|
from .code_to_txt import CodeToText
|
|
8
9
|
from .config import create_default_config, load_config
|
|
9
10
|
|
|
10
11
|
|
|
12
|
+
def display_statistics(stats: dict) -> None:
|
|
13
|
+
"""Display statistics about the codebase."""
|
|
14
|
+
divider = "=" * 60
|
|
15
|
+
click.echo(f"\n{divider}")
|
|
16
|
+
click.echo("CODEBASE STATISTICS")
|
|
17
|
+
click.echo(divider)
|
|
18
|
+
click.echo(f"Total files: {stats['total_files']}")
|
|
19
|
+
click.echo(f"Total size: {stats['total_size_bytes'] / 1024 / 1024:.2f} MB")
|
|
20
|
+
click.echo(f"Total lines: {stats['total_lines']:,}")
|
|
21
|
+
|
|
22
|
+
if stats["skipped_files"] > 0:
|
|
23
|
+
click.echo(f"Skipped files: {stats['skipped_files']}")
|
|
24
|
+
|
|
25
|
+
click.echo("\nFiles by extension:")
|
|
26
|
+
by_ext = sorted(stats["by_extension"].items(), key=lambda x: x[1]["count"], reverse=True)
|
|
27
|
+
for ext, data in by_ext[:10]:
|
|
28
|
+
size_mb = data["size"] / 1024 / 1024
|
|
29
|
+
click.echo(f" {ext:15} {data['count']:5} files {size_mb:8.2f} MB")
|
|
30
|
+
|
|
31
|
+
if len(by_ext) > 10:
|
|
32
|
+
click.echo(f" ... and {len(by_ext) - 10} more extensions")
|
|
33
|
+
|
|
34
|
+
if stats["largest_files"]:
|
|
35
|
+
click.echo("\nLargest files:")
|
|
36
|
+
for file_info in stats["largest_files"][:5]:
|
|
37
|
+
click.echo(f" {file_info['size_kb']:8.2f} KB {file_info['path']}")
|
|
38
|
+
|
|
39
|
+
click.echo(f"{divider}\n")
|
|
40
|
+
|
|
41
|
+
|
|
11
42
|
@click.command()
|
|
12
43
|
@click.argument("path", type=click.Path(exists=True), default=".")
|
|
13
|
-
@click.option(
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
)
|
|
20
|
-
@click.option(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
)
|
|
26
|
-
@click.option(
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
help="Patterns to exclude (gitignore style). Can be specified multiple times.",
|
|
31
|
-
)
|
|
32
|
-
@click.option(
|
|
33
|
-
"-g",
|
|
34
|
-
"--glob",
|
|
35
|
-
multiple=True,
|
|
36
|
-
help="Glob patterns to include (e.g., '*.py' 'src/**/*.js'). Can be specified multiple times.",
|
|
37
|
-
)
|
|
38
|
-
@click.option(
|
|
39
|
-
"--no-gitignore",
|
|
40
|
-
is_flag=True,
|
|
41
|
-
help="Don't respect .gitignore files",
|
|
42
|
-
)
|
|
43
|
-
@click.option(
|
|
44
|
-
"--no-tree",
|
|
45
|
-
is_flag=True,
|
|
46
|
-
help="Don't include directory tree in output",
|
|
47
|
-
)
|
|
48
|
-
@click.option(
|
|
49
|
-
"--separator",
|
|
50
|
-
default="=" * 80,
|
|
51
|
-
help="Separator between files",
|
|
52
|
-
)
|
|
53
|
-
@click.option(
|
|
54
|
-
"--clipboard",
|
|
55
|
-
"-c",
|
|
56
|
-
is_flag=True,
|
|
57
|
-
help="Copy output to clipboard in addition to saving to file",
|
|
58
|
-
)
|
|
59
|
-
@click.option(
|
|
60
|
-
"--clipboard-only",
|
|
61
|
-
is_flag=True,
|
|
62
|
-
help="Copy output to clipboard only (don't save to file)",
|
|
63
|
-
)
|
|
64
|
-
@click.option(
|
|
65
|
-
"--config",
|
|
66
|
-
type=click.Path(exists=True),
|
|
67
|
-
help="Path to config file (.yml or .yaml)",
|
|
68
|
-
)
|
|
69
|
-
@click.option(
|
|
70
|
-
"--init-config",
|
|
71
|
-
is_flag=True,
|
|
72
|
-
help="Create default configuration file (.code-to-txt.yml)",
|
|
73
|
-
)
|
|
74
|
-
@click.option(
|
|
75
|
-
"--timestamp",
|
|
76
|
-
"-t",
|
|
77
|
-
is_flag=True,
|
|
78
|
-
help="Add timestamp to output filename",
|
|
79
|
-
)
|
|
44
|
+
@click.option("-o", "--output", default=None, type=click.Path(),
|
|
45
|
+
help="Output file path (default: codetotxt_YYYYMMDD_HHMMSS.txt)")
|
|
46
|
+
@click.option("-e", "--extensions", default=None, help="File extensions to include (space or comma separated)")
|
|
47
|
+
@click.option("-x", "--exclude", multiple=True, help="Patterns to exclude (can be used multiple times)")
|
|
48
|
+
@click.option("-g", "--glob", multiple=True, help="Glob patterns to include (can be used multiple times)")
|
|
49
|
+
@click.option("--no-gitignore", is_flag=True, help="Don't respect .gitignore files")
|
|
50
|
+
@click.option("--no-tree", is_flag=True, help="Don't include directory tree in output")
|
|
51
|
+
@click.option("--separator", default="=" * 80, help="Separator between files")
|
|
52
|
+
@click.option("-c", "--clipboard", is_flag=True, help="Copy output to clipboard in addition to file")
|
|
53
|
+
@click.option("--clipboard-only", is_flag=True, help="Copy to clipboard only (don't save file)")
|
|
54
|
+
@click.option("--config", type=click.Path(exists=True), help="Path to config file (.yml or .yaml)")
|
|
55
|
+
@click.option("--init-config", is_flag=True, help="Create default configuration file")
|
|
56
|
+
@click.option("-t", "--timestamp", is_flag=True, help="Add timestamp to output filename")
|
|
57
|
+
@click.option("-v", "--version", is_flag=True, help="Show version and exit")
|
|
58
|
+
@click.option("--dry-run", is_flag=True, help="Show which files would be processed without creating output")
|
|
59
|
+
@click.option("--stats", is_flag=True, help="Show detailed statistics about the codebase")
|
|
60
|
+
@click.option("--max-file-size", type=int, default=None, help="Skip files larger than N KB")
|
|
80
61
|
def main(
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
62
|
+
path: str,
|
|
63
|
+
output: str | None,
|
|
64
|
+
extensions: str | None,
|
|
65
|
+
exclude: tuple[str, ...],
|
|
66
|
+
glob: tuple[str, ...],
|
|
67
|
+
no_gitignore: bool,
|
|
68
|
+
no_tree: bool,
|
|
69
|
+
separator: str,
|
|
70
|
+
clipboard: bool,
|
|
71
|
+
clipboard_only: bool,
|
|
72
|
+
config: str | None,
|
|
73
|
+
init_config: bool,
|
|
74
|
+
timestamp: bool,
|
|
75
|
+
version: bool,
|
|
76
|
+
dry_run: bool,
|
|
77
|
+
stats: bool,
|
|
78
|
+
max_file_size: int | None,
|
|
94
79
|
) -> None:
|
|
95
80
|
"""
|
|
96
|
-
Convert code files to a single text file for
|
|
81
|
+
Convert code files to a single text file for LLM consumption.
|
|
97
82
|
|
|
98
83
|
PATH: Directory to scan (default: current directory)
|
|
99
84
|
|
|
@@ -127,13 +112,14 @@ def main(
|
|
|
127
112
|
# Use config file
|
|
128
113
|
code-to-txt --config .code-to-txt.yml
|
|
129
114
|
"""
|
|
115
|
+
if version:
|
|
116
|
+
click.echo(f"v{__version__}")
|
|
117
|
+
return
|
|
118
|
+
|
|
130
119
|
if init_config:
|
|
131
120
|
config_path = Path(".code-to-txt.yml")
|
|
132
121
|
if config_path.exists():
|
|
133
|
-
click.confirm(
|
|
134
|
-
f"Config file {config_path} already exists. Overwrite?",
|
|
135
|
-
abort=True,
|
|
136
|
-
)
|
|
122
|
+
click.confirm(f"Config file {config_path} already exists. Overwrite?", abort=True)
|
|
137
123
|
create_default_config(config_path)
|
|
138
124
|
click.echo(f"Created default config file: {config_path}")
|
|
139
125
|
click.echo("You can now edit this file and use it with --config flag")
|
|
@@ -155,6 +141,7 @@ def main(
|
|
|
155
141
|
clipboard = clipboard or config_data.get("clipboard", False)
|
|
156
142
|
clipboard_only = clipboard_only or config_data.get("clipboard_only", False)
|
|
157
143
|
timestamp = timestamp or config_data.get("timestamp", False)
|
|
144
|
+
max_file_size = max_file_size or config_data.get("max_file_size")
|
|
158
145
|
|
|
159
146
|
if not output or timestamp:
|
|
160
147
|
timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
@@ -183,30 +170,54 @@ def main(
|
|
|
183
170
|
if ext:
|
|
184
171
|
include_extensions.add(ext)
|
|
185
172
|
|
|
186
|
-
|
|
173
|
+
code_to_txt = CodeToText(
|
|
187
174
|
root_path=path,
|
|
188
|
-
output_file=output if not clipboard_only else None,
|
|
175
|
+
output_file=output if not clipboard_only and not dry_run and not stats else None,
|
|
189
176
|
include_extensions=include_extensions,
|
|
190
177
|
exclude_patterns=list(exclude),
|
|
191
178
|
glob_patterns=list(glob_patterns),
|
|
192
179
|
gitignore=not no_gitignore,
|
|
180
|
+
max_file_size_kb=max_file_size,
|
|
193
181
|
)
|
|
194
182
|
|
|
195
183
|
try:
|
|
184
|
+
if stats or dry_run:
|
|
185
|
+
statistics = code_to_txt.calculate_statistics()
|
|
186
|
+
display_statistics(statistics)
|
|
187
|
+
|
|
188
|
+
if stats:
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
if dry_run:
|
|
192
|
+
files = code_to_txt._collect_files()
|
|
193
|
+
click.echo("Files that would be processed:")
|
|
194
|
+
for i, file_path in enumerate(files, 1):
|
|
195
|
+
relative_path = file_path.relative_to(Path(path).resolve())
|
|
196
|
+
size_kb = file_path.stat().st_size / 1024
|
|
197
|
+
click.echo(f" {i:4}. {relative_path} ({size_kb:.1f} KB)")
|
|
198
|
+
|
|
199
|
+
if code_to_txt.skipped_files:
|
|
200
|
+
click.echo(f"\nSkipped {len(code_to_txt.skipped_files)} files:")
|
|
201
|
+
for file_path, reason in code_to_txt.skipped_files[:20]:
|
|
202
|
+
relative_path = file_path.relative_to(Path(path).resolve())
|
|
203
|
+
click.echo(f" - {relative_path} ({reason})")
|
|
204
|
+
if len(code_to_txt.skipped_files) > 20:
|
|
205
|
+
click.echo(f" ... and {len(code_to_txt.skipped_files) - 20} more")
|
|
206
|
+
|
|
207
|
+
if not stats or dry_run:
|
|
208
|
+
return
|
|
209
|
+
|
|
196
210
|
if clipboard_only:
|
|
197
|
-
content =
|
|
198
|
-
add_tree=not no_tree,
|
|
199
|
-
separator=separator,
|
|
200
|
-
)
|
|
211
|
+
content = code_to_txt.generate_content(add_tree=not no_tree, separator=separator)
|
|
201
212
|
pyperclip.copy(content)
|
|
202
213
|
click.echo("Content copied to clipboard")
|
|
203
|
-
click.echo(f"Processed {
|
|
214
|
+
click.echo(f"Processed {code_to_txt.file_count} files")
|
|
204
215
|
click.echo(f"Content size: {len(content) / 1024:.2f} KB")
|
|
216
|
+
|
|
217
|
+
estimated_tokens = len(content) / 4
|
|
218
|
+
click.echo(f"Estimated tokens: ~{estimated_tokens:,.0f}")
|
|
205
219
|
else:
|
|
206
|
-
num_files =
|
|
207
|
-
add_tree=not no_tree,
|
|
208
|
-
separator=separator,
|
|
209
|
-
)
|
|
220
|
+
num_files = code_to_txt.convert(add_tree=not no_tree, separator=separator)
|
|
210
221
|
|
|
211
222
|
output_path = Path(output).resolve()
|
|
212
223
|
click.echo(f"Successfully processed {num_files} files")
|
|
@@ -215,11 +226,17 @@ def main(
|
|
|
215
226
|
size_kb = output_path.stat().st_size / 1024
|
|
216
227
|
click.echo(f"File size: {size_kb:.2f} KB")
|
|
217
228
|
|
|
229
|
+
estimated_tokens = size_kb * 1024 / 4
|
|
230
|
+
click.echo(f"Estimated tokens: ~{estimated_tokens:,.0f}")
|
|
231
|
+
|
|
218
232
|
if clipboard:
|
|
219
233
|
content = output_path.read_text(encoding="utf-8")
|
|
220
234
|
pyperclip.copy(content)
|
|
221
235
|
click.echo("Content also copied to clipboard")
|
|
222
236
|
|
|
237
|
+
if code_to_txt.skipped_files:
|
|
238
|
+
click.echo(f"\nNote: Skipped {len(code_to_txt.skipped_files)} files (use --dry-run to see details)")
|
|
239
|
+
|
|
223
240
|
except Exception as e:
|
|
224
241
|
click.echo(f"Error: {e}", err=True)
|
|
225
242
|
raise click.Abort()
|
code_to_txt/code_to_txt.py
CHANGED
|
@@ -6,41 +6,11 @@ from typing import Any
|
|
|
6
6
|
import pathspec
|
|
7
7
|
from pathspec import PathSpec
|
|
8
8
|
|
|
9
|
+
from .utils import load_patterns_from_file
|
|
10
|
+
|
|
9
11
|
|
|
10
12
|
class CodeToText:
|
|
11
|
-
|
|
12
|
-
"__pycache__",
|
|
13
|
-
"*.pyc",
|
|
14
|
-
"*.pyo",
|
|
15
|
-
"*.pyd",
|
|
16
|
-
".git",
|
|
17
|
-
".svn",
|
|
18
|
-
".hg",
|
|
19
|
-
"node_modules",
|
|
20
|
-
".venv",
|
|
21
|
-
"venv",
|
|
22
|
-
".env",
|
|
23
|
-
"*.egg-info",
|
|
24
|
-
"dist",
|
|
25
|
-
"build",
|
|
26
|
-
".pytest_cache",
|
|
27
|
-
".mypy_cache",
|
|
28
|
-
".ruff_cache",
|
|
29
|
-
"*.so",
|
|
30
|
-
"*.dylib",
|
|
31
|
-
"*.dll",
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
DEFAULT_EXTENSIONS = {
|
|
35
|
-
".py", ".js", ".ts", ".jsx", ".tsx",
|
|
36
|
-
".java", ".c", ".cpp", ".h", ".hpp",
|
|
37
|
-
".cs", ".go", ".rs", ".rb", ".php",
|
|
38
|
-
".swift", ".kt", ".scala", ".r",
|
|
39
|
-
".sql", ".sh", ".bash", ".zsh",
|
|
40
|
-
".yaml", ".yml", ".json", ".toml",
|
|
41
|
-
".xml", ".html", ".css", ".scss",
|
|
42
|
-
".md", ".txt", ".rst",
|
|
43
|
-
}
|
|
13
|
+
"""Convert code files to a single text file for LLM consumption."""
|
|
44
14
|
|
|
45
15
|
def __init__(
|
|
46
16
|
self,
|
|
@@ -50,9 +20,10 @@ class CodeToText:
|
|
|
50
20
|
exclude_patterns: list[str] | None = None,
|
|
51
21
|
glob_patterns: list[str] | None = None,
|
|
52
22
|
gitignore: bool = True,
|
|
23
|
+
max_file_size_kb: int | None = None,
|
|
53
24
|
):
|
|
54
25
|
"""
|
|
55
|
-
Initialize
|
|
26
|
+
Initialize CodeToText instance.
|
|
56
27
|
|
|
57
28
|
Args:
|
|
58
29
|
root_path: Root directory to scan
|
|
@@ -61,89 +32,188 @@ class CodeToText:
|
|
|
61
32
|
exclude_patterns: List of patterns to exclude (gitignore style)
|
|
62
33
|
glob_patterns: List of glob patterns to include (e.g., '*.py', 'src/**/*.js')
|
|
63
34
|
gitignore: Whether to respect .gitignore files
|
|
35
|
+
max_file_size_kb: Skip files larger than this size in KB
|
|
64
36
|
"""
|
|
65
37
|
self.root_path = Path(root_path).resolve()
|
|
66
38
|
self.output_file = output_file
|
|
67
|
-
self.include_extensions = include_extensions or self.DEFAULT_EXTENSIONS
|
|
68
|
-
self.exclude_patterns = exclude_patterns or []
|
|
69
39
|
self.glob_patterns = glob_patterns or []
|
|
70
40
|
self.gitignore = gitignore
|
|
41
|
+
self.max_file_size_kb = max_file_size_kb
|
|
71
42
|
self.spec: PathSpec | None = None
|
|
72
43
|
self.file_count = 0
|
|
44
|
+
self.skipped_files: list[tuple[Path, str]] = []
|
|
73
45
|
|
|
74
|
-
|
|
75
|
-
|
|
46
|
+
config_dir = Path(__file__).parent
|
|
47
|
+
default_extensions = load_patterns_from_file(config_dir / ".extensions")
|
|
48
|
+
default_ignore = load_patterns_from_file(config_dir / ".ignore")
|
|
76
49
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
patterns = list(self.DEFAULT_IGNORE)
|
|
50
|
+
self.include_extensions = include_extensions or default_extensions
|
|
51
|
+
self.exclude_patterns = exclude_patterns or []
|
|
52
|
+
self.default_ignore = default_ignore
|
|
81
53
|
|
|
82
|
-
if
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
54
|
+
if self.gitignore:
|
|
55
|
+
self._init_pathspec()
|
|
56
|
+
|
|
57
|
+
def _init_pathspec(self) -> None:
|
|
58
|
+
"""Initialize pathspec from .gitignore files and default patterns."""
|
|
59
|
+
patterns = list(self.default_ignore)
|
|
60
|
+
current_path = self.root_path
|
|
61
|
+
|
|
62
|
+
for _ in range(5):
|
|
63
|
+
gitignore_path = current_path / ".gitignore"
|
|
64
|
+
if gitignore_path.exists():
|
|
65
|
+
try:
|
|
66
|
+
with open(gitignore_path, encoding="utf-8") as f:
|
|
67
|
+
for line in f:
|
|
68
|
+
clean_line = line.strip()
|
|
69
|
+
if clean_line and not clean_line.startswith("#"):
|
|
70
|
+
patterns.append(clean_line)
|
|
71
|
+
except Exception:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
parent = current_path.parent
|
|
75
|
+
if parent == current_path:
|
|
76
|
+
break
|
|
77
|
+
current_path = parent
|
|
88
78
|
|
|
89
79
|
patterns.extend(self.exclude_patterns)
|
|
90
80
|
self.spec = pathspec.PathSpec.from_lines("gitignore", patterns)
|
|
91
81
|
|
|
92
|
-
def
|
|
82
|
+
def _check_glob_match(self, file_path: Path) -> bool:
|
|
93
83
|
"""Check if file matches any glob pattern."""
|
|
94
84
|
if not self.glob_patterns:
|
|
95
85
|
return False
|
|
96
86
|
|
|
97
87
|
relative_path = file_path.relative_to(self.root_path)
|
|
98
|
-
|
|
88
|
+
path_str = str(relative_path)
|
|
99
89
|
|
|
100
90
|
for pattern in self.glob_patterns:
|
|
101
|
-
if fnmatch(
|
|
91
|
+
if fnmatch(path_str, pattern):
|
|
102
92
|
return True
|
|
103
93
|
if fnmatch(file_path.name, pattern):
|
|
104
94
|
return True
|
|
95
|
+
if fnmatch(path_str.replace(os.sep, "/"), pattern):
|
|
96
|
+
return True
|
|
105
97
|
|
|
106
98
|
return False
|
|
107
99
|
|
|
108
|
-
def
|
|
109
|
-
"""
|
|
100
|
+
def _check_file_inclusion(self, file_path: Path) -> bool:
|
|
101
|
+
"""Determine if a file should be included in the output."""
|
|
102
|
+
if self.max_file_size_kb is not None:
|
|
103
|
+
try:
|
|
104
|
+
file_size_kb = file_path.stat().st_size / 1024
|
|
105
|
+
if file_size_kb > self.max_file_size_kb:
|
|
106
|
+
self.skipped_files.append(
|
|
107
|
+
(file_path, f"exceeds size limit ({file_size_kb:.1f}KB)")
|
|
108
|
+
)
|
|
109
|
+
return False
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
|
|
110
113
|
if self.glob_patterns:
|
|
111
|
-
if not self.
|
|
112
|
-
return False
|
|
113
|
-
else:
|
|
114
|
-
if file_path.suffix not in self.include_extensions:
|
|
114
|
+
if not self._check_glob_match(file_path):
|
|
115
115
|
return False
|
|
116
116
|
|
|
117
117
|
if self.spec:
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
try:
|
|
119
|
+
relative_path = file_path.relative_to(self.root_path)
|
|
120
|
+
relative_str = str(relative_path).replace(os.sep, "/")
|
|
121
|
+
|
|
122
|
+
if self.spec.match_file(relative_str):
|
|
123
|
+
self.skipped_files.append((file_path, "matches ignore pattern"))
|
|
124
|
+
return False
|
|
125
|
+
except ValueError:
|
|
120
126
|
return False
|
|
121
127
|
|
|
128
|
+
if file_path.suffix not in self.include_extensions:
|
|
129
|
+
return False
|
|
130
|
+
|
|
122
131
|
return True
|
|
123
132
|
|
|
124
|
-
def
|
|
125
|
-
"""
|
|
133
|
+
def _collect_files(self) -> list[Path]:
|
|
134
|
+
"""Collect all files to process based on filters."""
|
|
126
135
|
files = []
|
|
136
|
+
self.skipped_files = []
|
|
137
|
+
|
|
127
138
|
for root, dirs, filenames in os.walk(self.root_path):
|
|
128
139
|
root_path = Path(root)
|
|
129
140
|
|
|
130
141
|
if self.spec:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
142
|
+
try:
|
|
143
|
+
relative_root = root_path.relative_to(self.root_path)
|
|
144
|
+
root_str = str(relative_root).replace(os.sep, "/") if str(relative_root) != "." else ""
|
|
145
|
+
|
|
146
|
+
filtered_dirs = []
|
|
147
|
+
for d in dirs:
|
|
148
|
+
dir_path = f"{root_str}/{d}" if root_str else d
|
|
149
|
+
|
|
150
|
+
if not self.spec.match_file(dir_path) and not self.spec.match_file(f"{dir_path}/"):
|
|
151
|
+
filtered_dirs.append(d)
|
|
152
|
+
|
|
153
|
+
dirs[:] = filtered_dirs
|
|
154
|
+
except ValueError:
|
|
155
|
+
pass
|
|
136
156
|
|
|
137
157
|
for filename in filenames:
|
|
138
158
|
file_path = root_path / filename
|
|
139
|
-
if self.
|
|
159
|
+
if self._check_file_inclusion(file_path):
|
|
140
160
|
files.append(file_path)
|
|
141
161
|
|
|
142
162
|
return sorted(files)
|
|
143
163
|
|
|
164
|
+
def calculate_statistics(self) -> dict[str, Any]:
|
|
165
|
+
"""
|
|
166
|
+
Calculate statistics about the codebase.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Dictionary containing total files, size, lines, breakdown by extension, etc.
|
|
170
|
+
"""
|
|
171
|
+
files = self._collect_files()
|
|
172
|
+
|
|
173
|
+
stats: dict = {
|
|
174
|
+
"total_files": len(files),
|
|
175
|
+
"total_size_bytes": 0,
|
|
176
|
+
"total_lines": 0,
|
|
177
|
+
"by_extension": {},
|
|
178
|
+
"skipped_files": len(self.skipped_files),
|
|
179
|
+
"largest_files": [],
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
file_sizes = []
|
|
183
|
+
|
|
184
|
+
for file_path in files:
|
|
185
|
+
try:
|
|
186
|
+
size = file_path.stat().st_size
|
|
187
|
+
stats["total_size_bytes"] += size
|
|
188
|
+
file_sizes.append((file_path, size))
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
with open(file_path, encoding="utf-8") as f:
|
|
192
|
+
lines = sum(1 for _ in f)
|
|
193
|
+
stats["total_lines"] += lines
|
|
194
|
+
except Exception:
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
ext = file_path.suffix or "(no extension)"
|
|
198
|
+
if ext not in stats["by_extension"]:
|
|
199
|
+
stats["by_extension"][ext] = {"count": 0, "size": 0}
|
|
200
|
+
stats["by_extension"][ext]["count"] += 1
|
|
201
|
+
stats["by_extension"][ext]["size"] += size
|
|
202
|
+
|
|
203
|
+
except Exception:
|
|
204
|
+
pass
|
|
205
|
+
|
|
206
|
+
file_sizes.sort(key=lambda x: x[1], reverse=True)
|
|
207
|
+
stats["largest_files"] = [
|
|
208
|
+
{"path": str(f.relative_to(self.root_path)), "size_kb": s / 1024}
|
|
209
|
+
for f, s in file_sizes[:10]
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
return stats
|
|
213
|
+
|
|
144
214
|
def generate_content(self, add_tree: bool = True, separator: str = "=" * 80) -> str:
|
|
145
215
|
"""
|
|
146
|
-
Generate content as string
|
|
216
|
+
Generate content as string without writing to file.
|
|
147
217
|
|
|
148
218
|
Args:
|
|
149
219
|
add_tree: Whether to add directory tree at the beginning
|
|
@@ -152,43 +222,43 @@ class CodeToText:
|
|
|
152
222
|
Returns:
|
|
153
223
|
Generated content as string
|
|
154
224
|
"""
|
|
155
|
-
files = self.
|
|
225
|
+
files = self._collect_files()
|
|
156
226
|
self.file_count = len(files)
|
|
157
227
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
228
|
+
output_lines = []
|
|
229
|
+
output_lines.append(f"Code Export from: {self.root_path}")
|
|
230
|
+
output_lines.append(f"Total files: {len(files)}")
|
|
231
|
+
output_lines.append(separator)
|
|
232
|
+
output_lines.append("")
|
|
163
233
|
|
|
164
234
|
if add_tree:
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
for
|
|
235
|
+
output_lines.append("DIRECTORY TREE:")
|
|
236
|
+
output_lines.append(separator)
|
|
237
|
+
output_lines.append(self._build_tree_structure())
|
|
238
|
+
output_lines.append("")
|
|
239
|
+
output_lines.append(separator)
|
|
240
|
+
output_lines.append("")
|
|
241
|
+
|
|
242
|
+
for idx, file_path in enumerate(files, 1):
|
|
173
243
|
relative_path = file_path.relative_to(self.root_path)
|
|
174
244
|
|
|
175
|
-
|
|
176
|
-
|
|
245
|
+
output_lines.append(f"FILE {idx}/{len(files)}: {relative_path}")
|
|
246
|
+
output_lines.append(separator)
|
|
177
247
|
|
|
178
248
|
try:
|
|
179
249
|
with open(file_path, encoding="utf-8") as f:
|
|
180
250
|
content = f.read()
|
|
181
|
-
|
|
251
|
+
output_lines.append(content)
|
|
182
252
|
except UnicodeDecodeError:
|
|
183
|
-
|
|
253
|
+
output_lines.append("[Binary file - skipped]")
|
|
184
254
|
except Exception as e:
|
|
185
|
-
|
|
255
|
+
output_lines.append(f"[Error reading file: {e}]")
|
|
186
256
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
257
|
+
output_lines.append("")
|
|
258
|
+
output_lines.append(separator)
|
|
259
|
+
output_lines.append("")
|
|
190
260
|
|
|
191
|
-
return "\n".join(
|
|
261
|
+
return "\n".join(output_lines)
|
|
192
262
|
|
|
193
263
|
def convert(self, add_tree: bool = True, separator: str = "=" * 80) -> int:
|
|
194
264
|
"""
|
|
@@ -211,49 +281,49 @@ class CodeToText:
|
|
|
211
281
|
|
|
212
282
|
return self.file_count
|
|
213
283
|
|
|
214
|
-
def
|
|
215
|
-
"""
|
|
216
|
-
|
|
217
|
-
files = self.
|
|
284
|
+
def _build_tree_structure(self) -> str:
|
|
285
|
+
"""Build a directory tree representation of included files."""
|
|
286
|
+
tree_output = []
|
|
287
|
+
files = self._collect_files()
|
|
218
288
|
|
|
219
289
|
if not files:
|
|
220
290
|
return "(no files to display)"
|
|
221
291
|
|
|
222
|
-
|
|
292
|
+
structure: dict[str, Any] = {}
|
|
223
293
|
for file_path in files:
|
|
224
294
|
relative_path = file_path.relative_to(self.root_path)
|
|
225
295
|
parts = relative_path.parts
|
|
226
296
|
|
|
227
|
-
|
|
297
|
+
current_level = structure
|
|
228
298
|
for part in parts[:-1]:
|
|
229
|
-
if part not in
|
|
230
|
-
|
|
231
|
-
|
|
299
|
+
if part not in current_level:
|
|
300
|
+
current_level[part] = {}
|
|
301
|
+
current_level = current_level[part]
|
|
232
302
|
|
|
233
|
-
if "__files__" not in
|
|
234
|
-
|
|
235
|
-
|
|
303
|
+
if "__files__" not in current_level:
|
|
304
|
+
current_level["__files__"] = []
|
|
305
|
+
current_level["__files__"].append(parts[-1])
|
|
236
306
|
|
|
237
|
-
def
|
|
238
|
-
|
|
239
|
-
for key in sorted(
|
|
307
|
+
def render_tree(node: dict[str, Any], prefix: str = "", is_final: bool = True) -> None:
|
|
308
|
+
entries = []
|
|
309
|
+
for key in sorted(node.keys()):
|
|
240
310
|
if key != "__files__":
|
|
241
|
-
|
|
311
|
+
entries.append((key, True))
|
|
242
312
|
|
|
243
|
-
if "__files__" in
|
|
244
|
-
for file in sorted(
|
|
245
|
-
|
|
313
|
+
if "__files__" in node:
|
|
314
|
+
for file in sorted(node["__files__"]):
|
|
315
|
+
entries.append((file, False))
|
|
246
316
|
|
|
247
|
-
for i, (name,
|
|
248
|
-
|
|
249
|
-
connector = "└── " if
|
|
250
|
-
|
|
317
|
+
for i, (name, is_directory) in enumerate(entries):
|
|
318
|
+
is_last_entry = i == len(entries) - 1
|
|
319
|
+
connector = "└── " if is_last_entry else "├── "
|
|
320
|
+
tree_output.append(f"{prefix}{connector}{name}{'/' if is_directory else ''}")
|
|
251
321
|
|
|
252
|
-
if
|
|
253
|
-
extension = " " if
|
|
254
|
-
|
|
322
|
+
if is_directory:
|
|
323
|
+
extension = " " if is_last_entry else "│ "
|
|
324
|
+
render_tree(node[name], prefix + extension, is_last_entry)
|
|
255
325
|
|
|
256
|
-
|
|
257
|
-
|
|
326
|
+
tree_output.append(f"{self.root_path.name}/")
|
|
327
|
+
render_tree(structure)
|
|
258
328
|
|
|
259
|
-
return "\n".join(
|
|
329
|
+
return "\n".join(tree_output)
|
code_to_txt/config.py
CHANGED
|
@@ -4,8 +4,8 @@ from typing import Any
|
|
|
4
4
|
import yaml
|
|
5
5
|
|
|
6
6
|
DEFAULT_CONFIG = {
|
|
7
|
-
"output": "
|
|
8
|
-
"extensions": None,
|
|
7
|
+
"output": "code-to-txt.txt",
|
|
8
|
+
"extensions": None,
|
|
9
9
|
"exclude": [
|
|
10
10
|
"tests/*",
|
|
11
11
|
"*.test.js",
|
|
@@ -13,13 +13,14 @@ DEFAULT_CONFIG = {
|
|
|
13
13
|
"*.spec.js",
|
|
14
14
|
"*.spec.ts",
|
|
15
15
|
],
|
|
16
|
-
"glob": [],
|
|
16
|
+
"glob": [],
|
|
17
17
|
"no_gitignore": False,
|
|
18
18
|
"no_tree": False,
|
|
19
19
|
"separator": "=" * 80,
|
|
20
20
|
"clipboard": False,
|
|
21
21
|
"clipboard_only": False,
|
|
22
|
-
"timestamp":
|
|
22
|
+
"timestamp": True,
|
|
23
|
+
"max_file_size": None,
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
|
|
@@ -31,7 +32,7 @@ def load_config(config_path: str) -> dict[str, Any]:
|
|
|
31
32
|
config_path: Path to the configuration file
|
|
32
33
|
|
|
33
34
|
Returns:
|
|
34
|
-
Dictionary with configuration values
|
|
35
|
+
Dictionary with validated configuration values
|
|
35
36
|
"""
|
|
36
37
|
path = Path(config_path)
|
|
37
38
|
|
|
@@ -75,6 +76,9 @@ def load_config(config_path: str) -> dict[str, Any]:
|
|
|
75
76
|
if field in config:
|
|
76
77
|
validated_config[field] = bool(config[field])
|
|
77
78
|
|
|
79
|
+
if "max_file_size" in config and config["max_file_size"] is not None:
|
|
80
|
+
validated_config["max_file_size"] = int(config["max_file_size"])
|
|
81
|
+
|
|
78
82
|
return validated_config
|
|
79
83
|
|
|
80
84
|
|
|
@@ -91,7 +95,7 @@ def create_default_config(config_path: Path) -> None:
|
|
|
91
95
|
|
|
92
96
|
# Output file name (supports strftime formatting)
|
|
93
97
|
# Use timestamp: true to automatically add timestamp
|
|
94
|
-
output:
|
|
98
|
+
output: code-to-txt.txt
|
|
95
99
|
|
|
96
100
|
# File extensions to include
|
|
97
101
|
# Can be a list or space/comma-separated string
|
|
@@ -136,7 +140,9 @@ clipboard: false
|
|
|
136
140
|
clipboard_only: false
|
|
137
141
|
|
|
138
142
|
# Add timestamp to output filename
|
|
139
|
-
timestamp:
|
|
143
|
+
timestamp: true
|
|
144
|
+
|
|
145
|
+
max_file_size: null
|
|
140
146
|
|
|
141
147
|
# Example configurations:
|
|
142
148
|
#
|
code_to_txt/utils.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def load_patterns_from_file(file_path: Path) -> set[str]:
|
|
5
|
+
"""Load patterns from a text file, one per line."""
|
|
6
|
+
patterns = set()
|
|
7
|
+
if file_path.exists():
|
|
8
|
+
with open(file_path, encoding="utf-8") as f:
|
|
9
|
+
for line in f:
|
|
10
|
+
stripped = line.strip()
|
|
11
|
+
if stripped and not stripped.startswith("#"):
|
|
12
|
+
patterns.add(stripped)
|
|
13
|
+
return patterns
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-to-txt
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Convert code files to a single text file for LLM consumption
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -28,15 +28,6 @@ Models (LLMs) or for easy code review and documentation.
|
|
|
28
28
|
|
|
29
29
|
## Features
|
|
30
30
|
|
|
31
|
-
✨ **New in v0.2.0:**
|
|
32
|
-
|
|
33
|
-
- 🕐 **Automatic timestamps** in output filenames
|
|
34
|
-
- 📋 **Clipboard support** - copy output directly to clipboard
|
|
35
|
-
- 🎯 **Better extension handling** - specify multiple extensions without repeating `-e` flag
|
|
36
|
-
- 🔍 **Glob pattern support** - use patterns like `*.py` or `src/**/*.js`
|
|
37
|
-
- ⚙️ **Configuration file support** - save your preferences in `.code-to-txt.yml`
|
|
38
|
-
- 🚀 **Enhanced defaults** - more file types and ignore patterns out of the box
|
|
39
|
-
|
|
40
31
|
**Core Features:**
|
|
41
32
|
|
|
42
33
|
- 📁 Convert entire directories of code into a single text file
|
|
@@ -63,9 +54,18 @@ poetry add code-to-txt
|
|
|
63
54
|
### Basic Usage
|
|
64
55
|
|
|
65
56
|
```bash
|
|
66
|
-
#
|
|
57
|
+
# Show version
|
|
58
|
+
code-to-txt --version
|
|
59
|
+
|
|
60
|
+
# Convert all code files with timestamp
|
|
67
61
|
code-to-txt -t
|
|
68
62
|
|
|
63
|
+
# Preview what would be processed
|
|
64
|
+
code-to-txt --dry-run
|
|
65
|
+
|
|
66
|
+
# Get codebase statistics
|
|
67
|
+
code-to-txt --stats
|
|
68
|
+
|
|
69
69
|
# Convert specific directory
|
|
70
70
|
code-to-txt ./my-project -o project.txt
|
|
71
71
|
|
|
@@ -88,6 +88,9 @@ code-to-txt -g "*.py" -g "*.md"
|
|
|
88
88
|
### Advanced Usage
|
|
89
89
|
|
|
90
90
|
```bash
|
|
91
|
+
# Limit file sizes (useful for LLM token limits)
|
|
92
|
+
code-to-txt --max-file-size 500
|
|
93
|
+
|
|
91
94
|
# Exclude patterns
|
|
92
95
|
code-to-txt -x "tests/*" -x "*.test.js"
|
|
93
96
|
|
|
@@ -116,7 +119,7 @@ This creates `.code-to-txt.yml` with default settings:
|
|
|
116
119
|
|
|
117
120
|
```yaml
|
|
118
121
|
# Output file name
|
|
119
|
-
output:
|
|
122
|
+
output: code-to-txt.txt
|
|
120
123
|
|
|
121
124
|
# File extensions to include (null = use defaults)
|
|
122
125
|
extensions: null
|
|
@@ -125,7 +128,12 @@ extensions: null
|
|
|
125
128
|
exclude:
|
|
126
129
|
- "tests/*"
|
|
127
130
|
- "*.test.js"
|
|
131
|
+
- "*.test.ts"
|
|
132
|
+
- "*.spec.js"
|
|
133
|
+
- "*.spec.ts"
|
|
128
134
|
- "node_modules/*"
|
|
135
|
+
- "__pycache__/*"
|
|
136
|
+
- "*.pyc"
|
|
129
137
|
|
|
130
138
|
# Glob patterns (alternative to extensions)
|
|
131
139
|
glob: [ ]
|
|
@@ -137,6 +145,7 @@ separator: "================"
|
|
|
137
145
|
clipboard: false
|
|
138
146
|
clipboard_only: false
|
|
139
147
|
timestamp: false
|
|
148
|
+
max_file_size: null
|
|
140
149
|
```
|
|
141
150
|
|
|
142
151
|
Use the config file:
|
|
@@ -155,6 +164,7 @@ code-to-txt --config .code-to-txt.yml
|
|
|
155
164
|
extensions: [ .py ]
|
|
156
165
|
exclude: [ "tests/*", "*.pyc", "__pycache__/*", "venv/*", ".venv/*" ]
|
|
157
166
|
timestamp: true
|
|
167
|
+
max_file_size: 500
|
|
158
168
|
```
|
|
159
169
|
|
|
160
170
|
**JavaScript/TypeScript Project:**
|
|
@@ -163,20 +173,18 @@ timestamp: true
|
|
|
163
173
|
extensions: [ .js, .ts, .jsx, .tsx ]
|
|
164
174
|
exclude: [ "node_modules/*", "dist/*", "build/*", "*.test.js", "*.spec.ts" ]
|
|
165
175
|
no_tree: false
|
|
176
|
+
max_file_size: 1000
|
|
166
177
|
```
|
|
167
178
|
|
|
168
|
-
**
|
|
169
|
-
|
|
170
|
-
```yaml
|
|
171
|
-
extensions: [ .c, .cpp, .h, .hpp ]
|
|
172
|
-
exclude: [ "build/*", "*.o", "*.a", "cmake-build-*" ]
|
|
173
|
-
```
|
|
174
|
-
|
|
175
|
-
**Using Glob Patterns:**
|
|
179
|
+
**LLM-Optimized:**
|
|
176
180
|
|
|
177
181
|
```yaml
|
|
178
|
-
|
|
179
|
-
|
|
182
|
+
extensions: [ .py, .js, .md ]
|
|
183
|
+
exclude: [ "tests/*", "*.test.*", "node_modules/*", "dist/*", "build/*" ]
|
|
184
|
+
timestamp: true
|
|
185
|
+
clipboard: true
|
|
186
|
+
max_file_size: 200
|
|
187
|
+
no_tree: false
|
|
180
188
|
```
|
|
181
189
|
|
|
182
190
|
## Command Line Options
|
|
@@ -194,12 +202,16 @@ Options:
|
|
|
194
202
|
-g, --glob TEXT Glob patterns to include (can be used multiple times)
|
|
195
203
|
--no-gitignore Don't respect .gitignore files
|
|
196
204
|
--no-tree Don't include directory tree in output
|
|
197
|
-
--separator TEXT Separator between files
|
|
205
|
+
--separator TEXT Separator between files
|
|
198
206
|
-c, --clipboard Copy output to clipboard in addition to file
|
|
199
207
|
--clipboard-only Copy to clipboard only (don't save file)
|
|
200
208
|
--config PATH Path to config file (.yml or .yaml)
|
|
201
209
|
--init-config Create default configuration file
|
|
202
210
|
-t, --timestamp Add timestamp to output filename
|
|
211
|
+
-v, --version Show version and exit
|
|
212
|
+
--dry-run Show which files would be processed
|
|
213
|
+
--stats Show detailed statistics
|
|
214
|
+
--max-file-size INT Skip files larger than N KB
|
|
203
215
|
--help Show this message and exit
|
|
204
216
|
```
|
|
205
217
|
|
|
@@ -210,15 +222,13 @@ Options:
|
|
|
210
222
|
```python
|
|
211
223
|
from code_to_txt import CodeToText
|
|
212
224
|
|
|
213
|
-
|
|
214
|
-
code_to_text = CodeToText(
|
|
225
|
+
code_to_txt = CodeToText(
|
|
215
226
|
root_path="./my-project",
|
|
216
227
|
output_file="output.txt",
|
|
217
228
|
include_extensions={".py", ".js"},
|
|
218
229
|
)
|
|
219
230
|
|
|
220
|
-
|
|
221
|
-
num_files = code_to_text.convert(add_tree=True)
|
|
231
|
+
num_files = code_to_txt.convert(add_tree=True)
|
|
222
232
|
print(f"Processed {num_files} files")
|
|
223
233
|
```
|
|
224
234
|
|
|
@@ -226,54 +236,47 @@ print(f"Processed {num_files} files")
|
|
|
226
236
|
|
|
227
237
|
```python
|
|
228
238
|
from code_to_txt import CodeToText
|
|
239
|
+
import pyperclip
|
|
229
240
|
|
|
230
|
-
|
|
231
|
-
code_to_text = CodeToText(
|
|
241
|
+
code_to_txt = CodeToText(
|
|
232
242
|
root_path="./my-project",
|
|
233
|
-
output_file=None,
|
|
243
|
+
output_file=None,
|
|
234
244
|
include_extensions={".py"},
|
|
235
245
|
)
|
|
236
246
|
|
|
237
|
-
content =
|
|
238
|
-
print(f"Generated {len(content)} characters")
|
|
239
|
-
|
|
240
|
-
# Copy to clipboard using pyperclip
|
|
241
|
-
import pyperclip
|
|
242
|
-
|
|
247
|
+
content = code_to_txt.generate_content(add_tree=True)
|
|
243
248
|
pyperclip.copy(content)
|
|
244
249
|
```
|
|
245
250
|
|
|
246
|
-
###
|
|
251
|
+
### Get Statistics
|
|
247
252
|
|
|
248
253
|
```python
|
|
249
254
|
from code_to_txt import CodeToText
|
|
250
255
|
|
|
251
|
-
|
|
256
|
+
code_to_txt = CodeToText(
|
|
252
257
|
root_path="./my-project",
|
|
253
|
-
output_file=
|
|
254
|
-
|
|
258
|
+
output_file=None,
|
|
259
|
+
max_file_size_kb=500,
|
|
255
260
|
)
|
|
256
261
|
|
|
257
|
-
|
|
262
|
+
stats = code_to_txt.calculate_statistics()
|
|
263
|
+
print(f"Total files: {stats['total_files']}")
|
|
264
|
+
print(f"Total size: {stats['total_size_bytes'] / 1024 / 1024:.2f} MB")
|
|
265
|
+
print(f"Total lines: {stats['total_lines']:,}")
|
|
258
266
|
```
|
|
259
267
|
|
|
260
|
-
###
|
|
268
|
+
### Using Glob Patterns
|
|
261
269
|
|
|
262
270
|
```python
|
|
263
271
|
from code_to_txt import CodeToText
|
|
264
272
|
|
|
265
|
-
|
|
273
|
+
code_to_txt = CodeToText(
|
|
266
274
|
root_path="./my-project",
|
|
267
|
-
output_file="
|
|
268
|
-
|
|
269
|
-
exclude_patterns=["tests/*", "*.test.js", "node_modules/*"],
|
|
270
|
-
gitignore=True, # Respect .gitignore (default)
|
|
275
|
+
output_file="output.txt",
|
|
276
|
+
glob_patterns=["*.py", "src/**/*.js", "**/*.md"],
|
|
271
277
|
)
|
|
272
278
|
|
|
273
|
-
num_files =
|
|
274
|
-
add_tree=True,
|
|
275
|
-
separator="=" * 100,
|
|
276
|
-
)
|
|
279
|
+
num_files = code_to_txt.convert()
|
|
277
280
|
```
|
|
278
281
|
|
|
279
282
|
## Default File Extensions
|
|
@@ -301,7 +304,7 @@ CodeToTxt automatically ignores common build artifacts and dependencies:
|
|
|
301
304
|
- `.pytest_cache`, `.mypy_cache`, `.ruff_cache`
|
|
302
305
|
- `*.so`, `*.dylib`, `*.dll`
|
|
303
306
|
|
|
304
|
-
Plus any patterns in your `.gitignore` file.
|
|
307
|
+
Plus any patterns in your `.gitignore` file (including parent directories).
|
|
305
308
|
|
|
306
309
|
## Output Format
|
|
307
310
|
|
|
@@ -353,34 +356,43 @@ if __name__ == "__main__":
|
|
|
353
356
|
|
|
354
357
|
## Tips & Tricks
|
|
355
358
|
|
|
356
|
-
### For
|
|
359
|
+
### For LLM Consumption
|
|
357
360
|
|
|
358
361
|
```bash
|
|
359
|
-
#
|
|
360
|
-
code-to-txt
|
|
362
|
+
# Step 1: Check what you're working with
|
|
363
|
+
code-to-txt --stats
|
|
361
364
|
|
|
362
|
-
#
|
|
363
|
-
code-to-txt -
|
|
365
|
+
# Step 2: Preview files
|
|
366
|
+
code-to-txt --dry-run --max-file-size 200
|
|
367
|
+
|
|
368
|
+
# Step 3: Copy to clipboard with size limit
|
|
369
|
+
code-to-txt --clipboard-only --max-file-size 200 -e ".py .md"
|
|
370
|
+
|
|
371
|
+
# See token estimate:
|
|
372
|
+
# Estimated tokens: ~95,000
|
|
364
373
|
```
|
|
365
374
|
|
|
366
|
-
### For
|
|
375
|
+
### For Large Projects
|
|
367
376
|
|
|
368
377
|
```bash
|
|
369
|
-
#
|
|
370
|
-
code-to-txt
|
|
378
|
+
# Use specific extensions to reduce size
|
|
379
|
+
code-to-txt -e ".py" -t --max-file-size 500
|
|
371
380
|
|
|
372
|
-
#
|
|
373
|
-
code-to-txt -
|
|
381
|
+
# Exclude heavy directories
|
|
382
|
+
code-to-txt -x "node_modules/*" -x "venv/*" -x "dist/*"
|
|
383
|
+
|
|
384
|
+
# Get statistics first
|
|
385
|
+
code-to-txt --stats --max-file-size 300
|
|
374
386
|
```
|
|
375
387
|
|
|
376
|
-
###
|
|
388
|
+
### Debug Ignore Patterns
|
|
377
389
|
|
|
378
390
|
```bash
|
|
379
|
-
#
|
|
380
|
-
code-to-txt -
|
|
391
|
+
# See which files are being skipped and why
|
|
392
|
+
code-to-txt --dry-run
|
|
381
393
|
|
|
382
|
-
#
|
|
383
|
-
code-to-txt -
|
|
394
|
+
# Compare with and without gitignore
|
|
395
|
+
code-to-txt --dry-run --no-gitignore
|
|
384
396
|
```
|
|
385
397
|
|
|
386
398
|
## Requirements
|
|
@@ -416,6 +428,20 @@ MIT License - see LICENSE file for details.
|
|
|
416
428
|
|
|
417
429
|
## Changelog
|
|
418
430
|
|
|
431
|
+
### v0.3.0
|
|
432
|
+
|
|
433
|
+
- 🔧 Refactored codebase for better maintainability
|
|
434
|
+
- 📁 Externalized default extensions and ignore patterns to separate files
|
|
435
|
+
- 🐛 Fixed critical gitignore bug (now checks parent directories)
|
|
436
|
+
- 🔍 Improved cross-platform path handling
|
|
437
|
+
- 📊 Added `--stats` flag for detailed codebase statistics
|
|
438
|
+
- 🎯 Added `--dry-run` mode to preview without processing
|
|
439
|
+
- 📏 Added `--max-file-size` to skip large files
|
|
440
|
+
- 🔢 Added token estimation for LLM consumption
|
|
441
|
+
- 📝 Added skip tracking to see which files were excluded
|
|
442
|
+
- 🚀 Improved method naming and code structure
|
|
443
|
+
- ✅ Enhanced test coverage
|
|
444
|
+
|
|
419
445
|
### v0.2.0
|
|
420
446
|
|
|
421
447
|
- ✨ Added automatic timestamp generation for output files
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
code_to_txt/.extensions,sha256=wmqH99IE9LSVPBQjOlmFH7e32aBhW-Gyx5pUk_aHTTw,164
|
|
2
|
+
code_to_txt/.ignore,sha256=h-2N-vrqYosVthADpYPSMwvHmZJXhdr9sUutlPtoEyw,151
|
|
3
|
+
code_to_txt/__init__.py,sha256=0BlnuJBBoiatWYgynf7iIw8LnMl-vyksXOwiSxLg7AI,84
|
|
4
|
+
code_to_txt/cli.py,sha256=Gg45vpewnQWZcQmbWgArMaXa6HYovCL7BK_nDdgQKqg,9633
|
|
5
|
+
code_to_txt/code_to_txt.py,sha256=h9UXYUdbXbPd4vaGn-EhgxSUlCGtN-JdTLw86lIakYE,11607
|
|
6
|
+
code_to_txt/config.py,sha256=DRjZ5uLXYbSwfTu36dGDbVUagYSMDhiw6TKgjAQkMU8,4292
|
|
7
|
+
code_to_txt/utils.py,sha256=K-eKT05eTCgkWuRDwSzPdmcmMZECRB4gubabO2vOgVE,434
|
|
8
|
+
code_to_txt-0.3.0.dist-info/METADATA,sha256=CTNP9Yjp9F7FeG7gakv2Lk1jO_da5jaKmgSNWMXAtNQ,11160
|
|
9
|
+
code_to_txt-0.3.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
|
|
10
|
+
code_to_txt-0.3.0.dist-info/entry_points.txt,sha256=jPT0g_nryiuAd0E496deFZAhdscNLXiUmUdD3KGN3iA,52
|
|
11
|
+
code_to_txt-0.3.0.dist-info/licenses/LICENSE,sha256=-K4fNS51V7AiwILLB_InW4EECFSbFrrOBd66OqVVyh4,1068
|
|
12
|
+
code_to_txt-0.3.0.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
code_to_txt/__init__.py,sha256=0_iks7Uz24B1pc-Na1n8C97vgMms6haaFNqIRkpq_Cg,62
|
|
2
|
-
code_to_txt/cli.py,sha256=5NEXWGts1JBSXpAWsgrAfz9O0YFHzu6uRPf4NUePOj0,6662
|
|
3
|
-
code_to_txt/code_to_txt.py,sha256=ZCqc-Yk-hc5AexDIGaphCe_2Ck3LhfVeQP8-pDVRCec,8417
|
|
4
|
-
code_to_txt/config.py,sha256=KMlpeKO0F8YRbEmlXMnCs_PrR3iYQNTYOgZISZfCzVU,4148
|
|
5
|
-
code_to_txt-0.2.0.dist-info/METADATA,sha256=AS-XxI1i8Au96Y1_y04nhgY2U6A8whMEshnGHUfHNgc,10519
|
|
6
|
-
code_to_txt-0.2.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
|
|
7
|
-
code_to_txt-0.2.0.dist-info/entry_points.txt,sha256=jPT0g_nryiuAd0E496deFZAhdscNLXiUmUdD3KGN3iA,52
|
|
8
|
-
code_to_txt-0.2.0.dist-info/licenses/LICENSE,sha256=-K4fNS51V7AiwILLB_InW4EECFSbFrrOBd66OqVVyh4,1068
|
|
9
|
-
code_to_txt-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|