code-to-txt 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: code-to-txt
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Convert code files to a single text file for LLM consumption
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Andrii Sonsiadlo
|
|
7
|
+
Author-email: andrii.sonsiadlo@gmail.com
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Requires-Dist: click (>=8.3.1,<9.0.0)
|
|
17
|
+
Requires-Dist: gitpython (>=3.1.46,<4.0.0)
|
|
18
|
+
Requires-Dist: pathspec (>=1.0.4,<2.0.0)
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# CodeToTxt
|
|
22
|
+
|
|
23
|
+
A small Python package to convert code to text.
|
|
24
|
+
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "code-to-txt"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Convert code files to a single text file for LLM consumption"
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Andrii Sonsiadlo",email = "andrii.sonsiadlo@gmail.com"}
|
|
7
|
+
]
|
|
8
|
+
license = {text = "MIT"}
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"gitpython (>=3.1.46,<4.0.0)",
|
|
13
|
+
"click (>=8.3.1,<9.0.0)",
|
|
14
|
+
"pathspec (>=1.0.4,<2.0.0)"
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[tool.poetry]
|
|
18
|
+
packages = [{include = "code_to_txt", from = "src"}]
|
|
19
|
+
|
|
20
|
+
[tool.poetry.group.dev.dependencies]
|
|
21
|
+
pytest = "^9.0.2"
|
|
22
|
+
ruff = "^0.14.14"
|
|
23
|
+
mypy = "^1.19.1"
|
|
24
|
+
|
|
25
|
+
[build-system]
|
|
26
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
27
|
+
build-backend = "poetry.core.masonry.api"
|
|
28
|
+
|
|
29
|
+
[tool.poetry.scripts]
|
|
30
|
+
code-to-txt = "code_to_txt.cli:main"
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from .code_to_txt import CodeToText
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@click.command()
|
|
7
|
+
@click.argument("path", type=click.Path(exists=True), default=".")
|
|
8
|
+
@click.option(
|
|
9
|
+
"-o",
|
|
10
|
+
"--output",
|
|
11
|
+
default="code_output.txt",
|
|
12
|
+
help="Output file path",
|
|
13
|
+
type=click.Path(),
|
|
14
|
+
)
|
|
15
|
+
@click.option(
|
|
16
|
+
"-e",
|
|
17
|
+
"--extensions",
|
|
18
|
+
multiple=True,
|
|
19
|
+
help="File extensions to include (e.g., .py .js). Can be specified multiple times.",
|
|
20
|
+
)
|
|
21
|
+
@click.option(
|
|
22
|
+
"-x",
|
|
23
|
+
"--exclude",
|
|
24
|
+
multiple=True,
|
|
25
|
+
help="Patterns to exclude (gitignore style). Can be specified multiple times.",
|
|
26
|
+
)
|
|
27
|
+
@click.option(
|
|
28
|
+
"--no-gitignore",
|
|
29
|
+
is_flag=True,
|
|
30
|
+
help="Don't respect .gitignore files",
|
|
31
|
+
)
|
|
32
|
+
@click.option(
|
|
33
|
+
"--no-tree",
|
|
34
|
+
is_flag=True,
|
|
35
|
+
help="Don't include directory tree in output",
|
|
36
|
+
)
|
|
37
|
+
@click.option(
|
|
38
|
+
"--separator",
|
|
39
|
+
default="=" * 80,
|
|
40
|
+
help="Separator between files",
|
|
41
|
+
)
|
|
42
|
+
def main(path, output, extensions, exclude, no_gitignore, no_tree, separator):
|
|
43
|
+
"""
|
|
44
|
+
Convert code files to a single text file for easy LLM consumption.
|
|
45
|
+
|
|
46
|
+
PATH: Directory to scan (default: current directory)
|
|
47
|
+
|
|
48
|
+
Examples:
|
|
49
|
+
|
|
50
|
+
# Convert all code files in current directory
|
|
51
|
+
code-to-txt
|
|
52
|
+
|
|
53
|
+
# Convert specific directory to custom output
|
|
54
|
+
code-to-txt ./my-project -o project.txt
|
|
55
|
+
|
|
56
|
+
# Only include Python and JavaScript files
|
|
57
|
+
code-to-txt -e .py -e .js
|
|
58
|
+
|
|
59
|
+
# Exclude test files
|
|
60
|
+
code-to-txt -x "tests/*" -x "*.test.js"
|
|
61
|
+
|
|
62
|
+
# Don't use .gitignore and don't show tree
|
|
63
|
+
code-to-txt --no-gitignore --no-tree
|
|
64
|
+
"""
|
|
65
|
+
click.echo(f"Scanning: {path}")
|
|
66
|
+
|
|
67
|
+
include_extensions = set(extensions) if extensions else None
|
|
68
|
+
|
|
69
|
+
codetotxt = CodeToText(
|
|
70
|
+
root_path=path,
|
|
71
|
+
output_file=output,
|
|
72
|
+
include_extensions=include_extensions,
|
|
73
|
+
exclude_patterns=list(exclude),
|
|
74
|
+
gitignore=not no_gitignore,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
num_files = codetotxt.convert(
|
|
79
|
+
add_tree=not no_tree,
|
|
80
|
+
separator=separator,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
output_path = Path(output).resolve()
|
|
84
|
+
click.echo(f"Successfully processed {num_files} files")
|
|
85
|
+
click.echo(f"Output saved to: {output_path}")
|
|
86
|
+
|
|
87
|
+
size_kb = output_path.stat().st_size / 1024
|
|
88
|
+
click.echo(f"File size: {size_kb:.2f} KB")
|
|
89
|
+
|
|
90
|
+
except Exception as e:
|
|
91
|
+
click.echo(f"Error: {e}", err=True)
|
|
92
|
+
raise click.Abort()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
main()
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import List, Optional, Set
|
|
4
|
+
import pathspec
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CodeToText:
|
|
8
|
+
DEFAULT_IGNORE = {
|
|
9
|
+
"__pycache__",
|
|
10
|
+
"*.pyc",
|
|
11
|
+
"*.pyo",
|
|
12
|
+
"*.pyd"
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
DEFAULT_EXTENSIONS = {
|
|
16
|
+
".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".c", ".cpp", ".h"
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
root_path: str,
|
|
22
|
+
output_file: str = "output.txt",
|
|
23
|
+
include_extensions: Optional[Set[str]] = None,
|
|
24
|
+
exclude_patterns: Optional[List[str]] = None,
|
|
25
|
+
gitignore: bool = True,
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Initialize the instance of CodeToText.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
root_path: Root directory to scan
|
|
32
|
+
output_file: Output file path
|
|
33
|
+
include_extensions: Set of file extensions to include (with dots)
|
|
34
|
+
exclude_patterns: List of patterns to exclude (gitignore style)
|
|
35
|
+
gitignore: Whether to respect .gitignore files
|
|
36
|
+
"""
|
|
37
|
+
self.root_path = Path(root_path).resolve()
|
|
38
|
+
self.output_file = output_file
|
|
39
|
+
self.include_extensions = include_extensions or self.DEFAULT_EXTENSIONS
|
|
40
|
+
self.exclude_patterns = exclude_patterns or []
|
|
41
|
+
self.gitignore = gitignore
|
|
42
|
+
self.spec = None
|
|
43
|
+
|
|
44
|
+
if self.gitignore:
|
|
45
|
+
self._load_gitignore()
|
|
46
|
+
|
|
47
|
+
def _load_gitignore(self):
|
|
48
|
+
"""Load .gitignore patterns if present."""
|
|
49
|
+
gitignore_path = self.root_path / ".gitignore"
|
|
50
|
+
patterns = list(self.DEFAULT_IGNORE)
|
|
51
|
+
|
|
52
|
+
if gitignore_path.exists():
|
|
53
|
+
with open(gitignore_path, "r") as f:
|
|
54
|
+
for line in f:
|
|
55
|
+
line = line.strip()
|
|
56
|
+
if line and not line.startswith("#"):
|
|
57
|
+
patterns.append(line)
|
|
58
|
+
|
|
59
|
+
patterns.extend(self.exclude_patterns)
|
|
60
|
+
self.spec = pathspec.PathSpec.from_lines("gitignore", patterns)
|
|
61
|
+
|
|
62
|
+
def _should_include_file(self, file_path: Path) -> bool:
|
|
63
|
+
"""Check if a file should be included."""
|
|
64
|
+
# Check extension
|
|
65
|
+
if file_path.suffix not in self.include_extensions:
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
if self.spec:
|
|
69
|
+
relative_path = file_path.relative_to(self.root_path)
|
|
70
|
+
if self.spec.match_file(str(relative_path)):
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
def _get_files(self) -> List[Path]:
|
|
76
|
+
"""Get all files to process."""
|
|
77
|
+
files = []
|
|
78
|
+
for root, dirs, filenames in os.walk(self.root_path):
|
|
79
|
+
root_path = Path(root)
|
|
80
|
+
|
|
81
|
+
if self.spec:
|
|
82
|
+
relative_root = root_path.relative_to(self.root_path)
|
|
83
|
+
dirs[:] = [
|
|
84
|
+
d for d in dirs
|
|
85
|
+
if not self.spec.match_file(str(relative_root / d))
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
for filename in filenames:
|
|
89
|
+
file_path = root_path / filename
|
|
90
|
+
if self._should_include_file(file_path):
|
|
91
|
+
files.append(file_path)
|
|
92
|
+
|
|
93
|
+
return sorted(files)
|
|
94
|
+
|
|
95
|
+
def convert(self, add_tree: bool = True, separator: str = "=" * 80) -> int:
|
|
96
|
+
"""
|
|
97
|
+
Convert files to single text file.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
add_tree: Whether to add directory tree at the beginning
|
|
101
|
+
separator: Separator between files
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Number of files processed
|
|
105
|
+
"""
|
|
106
|
+
files = self._get_files()
|
|
107
|
+
|
|
108
|
+
with open(self.output_file, "w", encoding="utf-8") as out:
|
|
109
|
+
out.write(f"Code Export from: {self.root_path}\n")
|
|
110
|
+
out.write(f"Total files: {len(files)}\n")
|
|
111
|
+
out.write(f"{separator}\n\n")
|
|
112
|
+
|
|
113
|
+
if add_tree:
|
|
114
|
+
out.write("DIRECTORY TREE:\n")
|
|
115
|
+
out.write(separator + "\n")
|
|
116
|
+
out.write(self._generate_tree())
|
|
117
|
+
out.write(f"\n{separator}\n\n")
|
|
118
|
+
|
|
119
|
+
for i, file_path in enumerate(files, 1):
|
|
120
|
+
relative_path = file_path.relative_to(self.root_path)
|
|
121
|
+
|
|
122
|
+
out.write(f"FILE {i}/{len(files)}: {relative_path}\n")
|
|
123
|
+
out.write(separator + "\n")
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
127
|
+
content = f.read()
|
|
128
|
+
out.write(content)
|
|
129
|
+
except UnicodeDecodeError:
|
|
130
|
+
out.write(f"[Binary file - skipped]\n")
|
|
131
|
+
except Exception as e:
|
|
132
|
+
out.write(f"[Error reading file: {e}]\n")
|
|
133
|
+
|
|
134
|
+
out.write(f"\n{separator}\n\n")
|
|
135
|
+
|
|
136
|
+
return len(files)
|
|
137
|
+
|
|
138
|
+
def _generate_tree(self) -> str:
|
|
139
|
+
"""Generate a directory tree representation."""
|
|
140
|
+
tree_lines = []
|
|
141
|
+
files = self._get_files()
|
|
142
|
+
|
|
143
|
+
dir_structure = {}
|
|
144
|
+
for file_path in files:
|
|
145
|
+
relative_path = file_path.relative_to(self.root_path)
|
|
146
|
+
parts = relative_path.parts
|
|
147
|
+
|
|
148
|
+
current = dir_structure
|
|
149
|
+
for part in parts[:-1]:
|
|
150
|
+
if part not in current:
|
|
151
|
+
current[part] = {}
|
|
152
|
+
current = current[part]
|
|
153
|
+
|
|
154
|
+
if "__files__" not in current:
|
|
155
|
+
current["__files__"] = []
|
|
156
|
+
current["__files__"].append(parts[-1])
|
|
157
|
+
|
|
158
|
+
def print_tree(structure, prefix="", is_last=True):
|
|
159
|
+
items = []
|
|
160
|
+
for key in sorted(structure.keys()):
|
|
161
|
+
if key != "__files__":
|
|
162
|
+
items.append((key, True)) # directory
|
|
163
|
+
|
|
164
|
+
if "__files__" in structure:
|
|
165
|
+
for file in sorted(structure["__files__"]):
|
|
166
|
+
items.append((file, False)) # file
|
|
167
|
+
|
|
168
|
+
for i, (name, is_dir) in enumerate(items):
|
|
169
|
+
is_last_item = i == len(items) - 1
|
|
170
|
+
connector = "└── " if is_last_item else "├── "
|
|
171
|
+
tree_lines.append(f"{prefix}{connector}{name}{'/' if is_dir else ''}")
|
|
172
|
+
|
|
173
|
+
if is_dir:
|
|
174
|
+
extension = " " if is_last_item else "│ "
|
|
175
|
+
print_tree(structure[name], prefix + extension, is_last_item)
|
|
176
|
+
|
|
177
|
+
tree_lines.append(f"{self.root_path.name}/")
|
|
178
|
+
print_tree(dir_structure)
|
|
179
|
+
|
|
180
|
+
return "\n".join(tree_lines)
|