code-to-txt 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_to_txt/.extensions +34 -0
- code_to_txt/.ignore +20 -0
- code_to_txt/__init__.py +1 -0
- code_to_txt/cli.py +208 -58
- code_to_txt/code_to_txt.py +249 -100
- code_to_txt/config.py +167 -0
- code_to_txt/utils.py +13 -0
- code_to_txt-0.3.0.dist-info/METADATA +466 -0
- code_to_txt-0.3.0.dist-info/RECORD +12 -0
- {code_to_txt-0.1.0.dist-info → code_to_txt-0.3.0.dist-info}/WHEEL +1 -1
- code_to_txt-0.3.0.dist-info/licenses/LICENSE +21 -0
- code_to_txt-0.1.0.dist-info/METADATA +0 -24
- code_to_txt-0.1.0.dist-info/RECORD +0 -7
- {code_to_txt-0.1.0.dist-info → code_to_txt-0.3.0.dist-info}/entry_points.txt +0 -0
code_to_txt/.extensions
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
.py
|
|
2
|
+
.js
|
|
3
|
+
.ts
|
|
4
|
+
.jsx
|
|
5
|
+
.tsx
|
|
6
|
+
.java
|
|
7
|
+
.c
|
|
8
|
+
.cpp
|
|
9
|
+
.h
|
|
10
|
+
.hpp
|
|
11
|
+
.cs
|
|
12
|
+
.go
|
|
13
|
+
.rs
|
|
14
|
+
.rb
|
|
15
|
+
.php
|
|
16
|
+
.swift
|
|
17
|
+
.kt
|
|
18
|
+
.scala
|
|
19
|
+
.r
|
|
20
|
+
.sql
|
|
21
|
+
.sh
|
|
22
|
+
.bash
|
|
23
|
+
.zsh
|
|
24
|
+
.yaml
|
|
25
|
+
.yml
|
|
26
|
+
.json
|
|
27
|
+
.toml
|
|
28
|
+
.xml
|
|
29
|
+
.html
|
|
30
|
+
.css
|
|
31
|
+
.scss
|
|
32
|
+
.md
|
|
33
|
+
.txt
|
|
34
|
+
.rst
|
code_to_txt/.ignore
ADDED
code_to_txt/__init__.py
CHANGED
code_to_txt/cli.py
CHANGED
|
@@ -1,91 +1,241 @@
|
|
|
1
|
-
import
|
|
1
|
+
from datetime import datetime
|
|
2
2
|
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
import pyperclip
|
|
6
|
+
|
|
7
|
+
from . import __version__
|
|
3
8
|
from .code_to_txt import CodeToText
|
|
9
|
+
from .config import create_default_config, load_config
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def display_statistics(stats: dict) -> None:
|
|
13
|
+
"""Display statistics about the codebase."""
|
|
14
|
+
divider = "=" * 60
|
|
15
|
+
click.echo(f"\n{divider}")
|
|
16
|
+
click.echo("CODEBASE STATISTICS")
|
|
17
|
+
click.echo(divider)
|
|
18
|
+
click.echo(f"Total files: {stats['total_files']}")
|
|
19
|
+
click.echo(f"Total size: {stats['total_size_bytes'] / 1024 / 1024:.2f} MB")
|
|
20
|
+
click.echo(f"Total lines: {stats['total_lines']:,}")
|
|
21
|
+
|
|
22
|
+
if stats["skipped_files"] > 0:
|
|
23
|
+
click.echo(f"Skipped files: {stats['skipped_files']}")
|
|
24
|
+
|
|
25
|
+
click.echo("\nFiles by extension:")
|
|
26
|
+
by_ext = sorted(stats["by_extension"].items(), key=lambda x: x[1]["count"], reverse=True)
|
|
27
|
+
for ext, data in by_ext[:10]:
|
|
28
|
+
size_mb = data["size"] / 1024 / 1024
|
|
29
|
+
click.echo(f" {ext:15} {data['count']:5} files {size_mb:8.2f} MB")
|
|
30
|
+
|
|
31
|
+
if len(by_ext) > 10:
|
|
32
|
+
click.echo(f" ... and {len(by_ext) - 10} more extensions")
|
|
33
|
+
|
|
34
|
+
if stats["largest_files"]:
|
|
35
|
+
click.echo("\nLargest files:")
|
|
36
|
+
for file_info in stats["largest_files"][:5]:
|
|
37
|
+
click.echo(f" {file_info['size_kb']:8.2f} KB {file_info['path']}")
|
|
38
|
+
|
|
39
|
+
click.echo(f"{divider}\n")
|
|
4
40
|
|
|
5
41
|
|
|
6
42
|
@click.command()
|
|
7
43
|
@click.argument("path", type=click.Path(exists=True), default=".")
|
|
8
|
-
@click.option(
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
)
|
|
15
|
-
@click.option(
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
)
|
|
21
|
-
@click.option(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
44
|
+
@click.option("-o", "--output", default=None, type=click.Path(),
|
|
45
|
+
help="Output file path (default: codetotxt_YYYYMMDD_HHMMSS.txt)")
|
|
46
|
+
@click.option("-e", "--extensions", default=None, help="File extensions to include (space or comma separated)")
|
|
47
|
+
@click.option("-x", "--exclude", multiple=True, help="Patterns to exclude (can be used multiple times)")
|
|
48
|
+
@click.option("-g", "--glob", multiple=True, help="Glob patterns to include (can be used multiple times)")
|
|
49
|
+
@click.option("--no-gitignore", is_flag=True, help="Don't respect .gitignore files")
|
|
50
|
+
@click.option("--no-tree", is_flag=True, help="Don't include directory tree in output")
|
|
51
|
+
@click.option("--separator", default="=" * 80, help="Separator between files")
|
|
52
|
+
@click.option("-c", "--clipboard", is_flag=True, help="Copy output to clipboard in addition to file")
|
|
53
|
+
@click.option("--clipboard-only", is_flag=True, help="Copy to clipboard only (don't save file)")
|
|
54
|
+
@click.option("--config", type=click.Path(exists=True), help="Path to config file (.yml or .yaml)")
|
|
55
|
+
@click.option("--init-config", is_flag=True, help="Create default configuration file")
|
|
56
|
+
@click.option("-t", "--timestamp", is_flag=True, help="Add timestamp to output filename")
|
|
57
|
+
@click.option("-v", "--version", is_flag=True, help="Show version and exit")
|
|
58
|
+
@click.option("--dry-run", is_flag=True, help="Show which files would be processed without creating output")
|
|
59
|
+
@click.option("--stats", is_flag=True, help="Show detailed statistics about the codebase")
|
|
60
|
+
@click.option("--max-file-size", type=int, default=None, help="Skip files larger than N KB")
|
|
61
|
+
def main(
|
|
62
|
+
path: str,
|
|
63
|
+
output: str | None,
|
|
64
|
+
extensions: str | None,
|
|
65
|
+
exclude: tuple[str, ...],
|
|
66
|
+
glob: tuple[str, ...],
|
|
67
|
+
no_gitignore: bool,
|
|
68
|
+
no_tree: bool,
|
|
69
|
+
separator: str,
|
|
70
|
+
clipboard: bool,
|
|
71
|
+
clipboard_only: bool,
|
|
72
|
+
config: str | None,
|
|
73
|
+
init_config: bool,
|
|
74
|
+
timestamp: bool,
|
|
75
|
+
version: bool,
|
|
76
|
+
dry_run: bool,
|
|
77
|
+
stats: bool,
|
|
78
|
+
max_file_size: int | None,
|
|
79
|
+
) -> None:
|
|
43
80
|
"""
|
|
44
|
-
Convert code files to a single text file for
|
|
81
|
+
Convert code files to a single text file for LLM consumption.
|
|
45
82
|
|
|
46
83
|
PATH: Directory to scan (default: current directory)
|
|
47
84
|
|
|
48
85
|
Examples:
|
|
49
86
|
|
|
50
|
-
# Convert all code files in current directory
|
|
51
|
-
code-to-txt
|
|
87
|
+
# Convert all code files in current directory with timestamp
|
|
88
|
+
code-to-txt -t
|
|
52
89
|
|
|
53
90
|
# Convert specific directory to custom output
|
|
54
91
|
code-to-txt ./my-project -o project.txt
|
|
55
92
|
|
|
56
|
-
#
|
|
57
|
-
code-to-txt -e .py
|
|
93
|
+
# Include Python, C, and header files (space or comma separated)
|
|
94
|
+
code-to-txt -e ".py .c .h"
|
|
95
|
+
code-to-txt -e ".py,.c,.h"
|
|
96
|
+
|
|
97
|
+
# Use glob patterns
|
|
98
|
+
code-to-txt -g "*.py" -g "src/**/*.js"
|
|
58
99
|
|
|
59
100
|
# Exclude test files
|
|
60
101
|
code-to-txt -x "tests/*" -x "*.test.js"
|
|
61
102
|
|
|
62
|
-
#
|
|
63
|
-
code-to-txt --
|
|
103
|
+
# Copy to clipboard
|
|
104
|
+
code-to-txt --clipboard
|
|
105
|
+
|
|
106
|
+
# Copy to clipboard only (no file)
|
|
107
|
+
code-to-txt --clipboard-only
|
|
108
|
+
|
|
109
|
+
# Create default config file
|
|
110
|
+
code-to-txt --init-config
|
|
111
|
+
|
|
112
|
+
# Use config file
|
|
113
|
+
code-to-txt --config .code-to-txt.yml
|
|
64
114
|
"""
|
|
65
|
-
|
|
115
|
+
if version:
|
|
116
|
+
click.echo(f"v{__version__}")
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
if init_config:
|
|
120
|
+
config_path = Path(".code-to-txt.yml")
|
|
121
|
+
if config_path.exists():
|
|
122
|
+
click.confirm(f"Config file {config_path} already exists. Overwrite?", abort=True)
|
|
123
|
+
create_default_config(config_path)
|
|
124
|
+
click.echo(f"Created default config file: {config_path}")
|
|
125
|
+
click.echo("You can now edit this file and use it with --config flag")
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
config_data = {}
|
|
129
|
+
if config:
|
|
130
|
+
config_data = load_config(config)
|
|
131
|
+
click.echo(f"Using config file: {config}")
|
|
132
|
+
|
|
133
|
+
output = output or config_data.get("output")
|
|
134
|
+
extensions = extensions or config_data.get("extensions")
|
|
135
|
+
exclude = exclude or config_data.get("exclude", [])
|
|
136
|
+
glob_patterns = glob or config_data.get("glob", [])
|
|
137
|
+
no_gitignore = no_gitignore or config_data.get("no_gitignore", False)
|
|
138
|
+
no_tree = no_tree or config_data.get("no_tree", False)
|
|
139
|
+
separator = separator if separator == "=" * 80 else separator
|
|
140
|
+
separator = config_data.get("separator", separator)
|
|
141
|
+
clipboard = clipboard or config_data.get("clipboard", False)
|
|
142
|
+
clipboard_only = clipboard_only or config_data.get("clipboard_only", False)
|
|
143
|
+
timestamp = timestamp or config_data.get("timestamp", False)
|
|
144
|
+
max_file_size = max_file_size or config_data.get("max_file_size")
|
|
145
|
+
|
|
146
|
+
if not output or timestamp:
|
|
147
|
+
timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
148
|
+
if output:
|
|
149
|
+
output_path = Path(output)
|
|
150
|
+
stem = output_path.stem
|
|
151
|
+
suffix = output_path.suffix or ".txt"
|
|
152
|
+
output = str(output_path.parent / f"{stem}_{timestamp_str}{suffix}")
|
|
153
|
+
else:
|
|
154
|
+
output = f"codetotxt_{timestamp_str}.txt"
|
|
66
155
|
|
|
67
|
-
|
|
156
|
+
click.echo(f"Scanning: {path}")
|
|
68
157
|
|
|
69
|
-
|
|
158
|
+
include_extensions = None
|
|
159
|
+
if extensions:
|
|
160
|
+
if "," in extensions:
|
|
161
|
+
ext_list = [e.strip() for e in extensions.split(",")]
|
|
162
|
+
else:
|
|
163
|
+
ext_list = extensions.split()
|
|
164
|
+
|
|
165
|
+
include_extensions = set()
|
|
166
|
+
for ext in ext_list:
|
|
167
|
+
ext = ext.strip()
|
|
168
|
+
if ext and not ext.startswith("."):
|
|
169
|
+
ext = "." + ext
|
|
170
|
+
if ext:
|
|
171
|
+
include_extensions.add(ext)
|
|
172
|
+
|
|
173
|
+
code_to_txt = CodeToText(
|
|
70
174
|
root_path=path,
|
|
71
|
-
output_file=output,
|
|
175
|
+
output_file=output if not clipboard_only and not dry_run and not stats else None,
|
|
72
176
|
include_extensions=include_extensions,
|
|
73
177
|
exclude_patterns=list(exclude),
|
|
178
|
+
glob_patterns=list(glob_patterns),
|
|
74
179
|
gitignore=not no_gitignore,
|
|
180
|
+
max_file_size_kb=max_file_size,
|
|
75
181
|
)
|
|
76
182
|
|
|
77
183
|
try:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
184
|
+
if stats or dry_run:
|
|
185
|
+
statistics = code_to_txt.calculate_statistics()
|
|
186
|
+
display_statistics(statistics)
|
|
187
|
+
|
|
188
|
+
if stats:
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
if dry_run:
|
|
192
|
+
files = code_to_txt._collect_files()
|
|
193
|
+
click.echo("Files that would be processed:")
|
|
194
|
+
for i, file_path in enumerate(files, 1):
|
|
195
|
+
relative_path = file_path.relative_to(Path(path).resolve())
|
|
196
|
+
size_kb = file_path.stat().st_size / 1024
|
|
197
|
+
click.echo(f" {i:4}. {relative_path} ({size_kb:.1f} KB)")
|
|
198
|
+
|
|
199
|
+
if code_to_txt.skipped_files:
|
|
200
|
+
click.echo(f"\nSkipped {len(code_to_txt.skipped_files)} files:")
|
|
201
|
+
for file_path, reason in code_to_txt.skipped_files[:20]:
|
|
202
|
+
relative_path = file_path.relative_to(Path(path).resolve())
|
|
203
|
+
click.echo(f" - {relative_path} ({reason})")
|
|
204
|
+
if len(code_to_txt.skipped_files) > 20:
|
|
205
|
+
click.echo(f" ... and {len(code_to_txt.skipped_files) - 20} more")
|
|
206
|
+
|
|
207
|
+
if not stats or dry_run:
|
|
208
|
+
return
|
|
209
|
+
|
|
210
|
+
if clipboard_only:
|
|
211
|
+
content = code_to_txt.generate_content(add_tree=not no_tree, separator=separator)
|
|
212
|
+
pyperclip.copy(content)
|
|
213
|
+
click.echo("Content copied to clipboard")
|
|
214
|
+
click.echo(f"Processed {code_to_txt.file_count} files")
|
|
215
|
+
click.echo(f"Content size: {len(content) / 1024:.2f} KB")
|
|
216
|
+
|
|
217
|
+
estimated_tokens = len(content) / 4
|
|
218
|
+
click.echo(f"Estimated tokens: ~{estimated_tokens:,.0f}")
|
|
219
|
+
else:
|
|
220
|
+
num_files = code_to_txt.convert(add_tree=not no_tree, separator=separator)
|
|
221
|
+
|
|
222
|
+
output_path = Path(output).resolve()
|
|
223
|
+
click.echo(f"Successfully processed {num_files} files")
|
|
224
|
+
click.echo(f"Output saved to: {output_path}")
|
|
225
|
+
|
|
226
|
+
size_kb = output_path.stat().st_size / 1024
|
|
227
|
+
click.echo(f"File size: {size_kb:.2f} KB")
|
|
228
|
+
|
|
229
|
+
estimated_tokens = size_kb * 1024 / 4
|
|
230
|
+
click.echo(f"Estimated tokens: ~{estimated_tokens:,.0f}")
|
|
231
|
+
|
|
232
|
+
if clipboard:
|
|
233
|
+
content = output_path.read_text(encoding="utf-8")
|
|
234
|
+
pyperclip.copy(content)
|
|
235
|
+
click.echo("Content also copied to clipboard")
|
|
236
|
+
|
|
237
|
+
if code_to_txt.skipped_files:
|
|
238
|
+
click.echo(f"\nNote: Skipped {len(code_to_txt.skipped_files)} files (use --dry-run to see details)")
|
|
89
239
|
|
|
90
240
|
except Exception as e:
|
|
91
241
|
click.echo(f"Error: {e}", err=True)
|