code-to-txt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from .code_to_txt import CodeToText
2
+
3
+ __all__ = ["CodeToText"]
code_to_txt/cli.py ADDED
@@ -0,0 +1,96 @@
1
+ import click
2
+ from pathlib import Path
3
+ from .code_to_txt import CodeToText
4
+
5
+
6
+ @click.command()
7
+ @click.argument("path", type=click.Path(exists=True), default=".")
8
+ @click.option(
9
+ "-o",
10
+ "--output",
11
+ default="code_output.txt",
12
+ help="Output file path",
13
+ type=click.Path(),
14
+ )
15
+ @click.option(
16
+ "-e",
17
+ "--extensions",
18
+ multiple=True,
19
+ help="File extensions to include (e.g., .py .js). Can be specified multiple times.",
20
+ )
21
+ @click.option(
22
+ "-x",
23
+ "--exclude",
24
+ multiple=True,
25
+ help="Patterns to exclude (gitignore style). Can be specified multiple times.",
26
+ )
27
+ @click.option(
28
+ "--no-gitignore",
29
+ is_flag=True,
30
+ help="Don't respect .gitignore files",
31
+ )
32
+ @click.option(
33
+ "--no-tree",
34
+ is_flag=True,
35
+ help="Don't include directory tree in output",
36
+ )
37
+ @click.option(
38
+ "--separator",
39
+ default="=" * 80,
40
+ help="Separator between files",
41
+ )
42
+ def main(path, output, extensions, exclude, no_gitignore, no_tree, separator):
43
+ """
44
+ Convert code files to a single text file for easy LLM consumption.
45
+
46
+ PATH: Directory to scan (default: current directory)
47
+
48
+ Examples:
49
+
50
+ # Convert all code files in current directory
51
+ code-to-txt
52
+
53
+ # Convert specific directory to custom output
54
+ code-to-txt ./my-project -o project.txt
55
+
56
+ # Only include Python and JavaScript files
57
+ code-to-txt -e .py -e .js
58
+
59
+ # Exclude test files
60
+ code-to-txt -x "tests/*" -x "*.test.js"
61
+
62
+ # Don't use .gitignore and don't show tree
63
+ code-to-txt --no-gitignore --no-tree
64
+ """
65
+ click.echo(f"Scanning: {path}")
66
+
67
+ include_extensions = set(extensions) if extensions else None
68
+
69
+ codetotxt = CodeToText(
70
+ root_path=path,
71
+ output_file=output,
72
+ include_extensions=include_extensions,
73
+ exclude_patterns=list(exclude),
74
+ gitignore=not no_gitignore,
75
+ )
76
+
77
+ try:
78
+ num_files = codetotxt.convert(
79
+ add_tree=not no_tree,
80
+ separator=separator,
81
+ )
82
+
83
+ output_path = Path(output).resolve()
84
+ click.echo(f"Successfully processed {num_files} files")
85
+ click.echo(f"Output saved to: {output_path}")
86
+
87
+ size_kb = output_path.stat().st_size / 1024
88
+ click.echo(f"File size: {size_kb:.2f} KB")
89
+
90
+ except Exception as e:
91
+ click.echo(f"Error: {e}", err=True)
92
+ raise click.Abort()
93
+
94
+
95
+ if __name__ == "__main__":
96
+ main()
@@ -0,0 +1,180 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import List, Optional, Set
4
+ import pathspec
5
+
6
+
7
+ class CodeToText:
8
+ DEFAULT_IGNORE = {
9
+ "__pycache__",
10
+ "*.pyc",
11
+ "*.pyo",
12
+ "*.pyd"
13
+ }
14
+
15
+ DEFAULT_EXTENSIONS = {
16
+ ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".c", ".cpp", ".h"
17
+ }
18
+
19
+ def __init__(
20
+ self,
21
+ root_path: str,
22
+ output_file: str = "output.txt",
23
+ include_extensions: Optional[Set[str]] = None,
24
+ exclude_patterns: Optional[List[str]] = None,
25
+ gitignore: bool = True,
26
+ ):
27
+ """
28
+ Initialize the instance of CodeToText.
29
+
30
+ Args:
31
+ root_path: Root directory to scan
32
+ output_file: Output file path
33
+ include_extensions: Set of file extensions to include (with dots)
34
+ exclude_patterns: List of patterns to exclude (gitignore style)
35
+ gitignore: Whether to respect .gitignore files
36
+ """
37
+ self.root_path = Path(root_path).resolve()
38
+ self.output_file = output_file
39
+ self.include_extensions = include_extensions or self.DEFAULT_EXTENSIONS
40
+ self.exclude_patterns = exclude_patterns or []
41
+ self.gitignore = gitignore
42
+ self.spec = None
43
+
44
+ if self.gitignore:
45
+ self._load_gitignore()
46
+
47
+ def _load_gitignore(self):
48
+ """Load .gitignore patterns if present."""
49
+ gitignore_path = self.root_path / ".gitignore"
50
+ patterns = list(self.DEFAULT_IGNORE)
51
+
52
+ if gitignore_path.exists():
53
+ with open(gitignore_path, "r") as f:
54
+ for line in f:
55
+ line = line.strip()
56
+ if line and not line.startswith("#"):
57
+ patterns.append(line)
58
+
59
+ patterns.extend(self.exclude_patterns)
60
+ self.spec = pathspec.PathSpec.from_lines("gitignore", patterns)
61
+
62
+ def _should_include_file(self, file_path: Path) -> bool:
63
+ """Check if a file should be included."""
64
+ # Check extension
65
+ if file_path.suffix not in self.include_extensions:
66
+ return False
67
+
68
+ if self.spec:
69
+ relative_path = file_path.relative_to(self.root_path)
70
+ if self.spec.match_file(str(relative_path)):
71
+ return False
72
+
73
+ return True
74
+
75
+ def _get_files(self) -> List[Path]:
76
+ """Get all files to process."""
77
+ files = []
78
+ for root, dirs, filenames in os.walk(self.root_path):
79
+ root_path = Path(root)
80
+
81
+ if self.spec:
82
+ relative_root = root_path.relative_to(self.root_path)
83
+ dirs[:] = [
84
+ d for d in dirs
85
+ if not self.spec.match_file(str(relative_root / d))
86
+ ]
87
+
88
+ for filename in filenames:
89
+ file_path = root_path / filename
90
+ if self._should_include_file(file_path):
91
+ files.append(file_path)
92
+
93
+ return sorted(files)
94
+
95
+ def convert(self, add_tree: bool = True, separator: str = "=" * 80) -> int:
96
+ """
97
+ Convert files to single text file.
98
+
99
+ Args:
100
+ add_tree: Whether to add directory tree at the beginning
101
+ separator: Separator between files
102
+
103
+ Returns:
104
+ Number of files processed
105
+ """
106
+ files = self._get_files()
107
+
108
+ with open(self.output_file, "w", encoding="utf-8") as out:
109
+ out.write(f"Code Export from: {self.root_path}\n")
110
+ out.write(f"Total files: {len(files)}\n")
111
+ out.write(f"{separator}\n\n")
112
+
113
+ if add_tree:
114
+ out.write("DIRECTORY TREE:\n")
115
+ out.write(separator + "\n")
116
+ out.write(self._generate_tree())
117
+ out.write(f"\n{separator}\n\n")
118
+
119
+ for i, file_path in enumerate(files, 1):
120
+ relative_path = file_path.relative_to(self.root_path)
121
+
122
+ out.write(f"FILE {i}/{len(files)}: {relative_path}\n")
123
+ out.write(separator + "\n")
124
+
125
+ try:
126
+ with open(file_path, "r", encoding="utf-8") as f:
127
+ content = f.read()
128
+ out.write(content)
129
+ except UnicodeDecodeError:
130
+ out.write(f"[Binary file - skipped]\n")
131
+ except Exception as e:
132
+ out.write(f"[Error reading file: {e}]\n")
133
+
134
+ out.write(f"\n{separator}\n\n")
135
+
136
+ return len(files)
137
+
138
+ def _generate_tree(self) -> str:
139
+ """Generate a directory tree representation."""
140
+ tree_lines = []
141
+ files = self._get_files()
142
+
143
+ dir_structure = {}
144
+ for file_path in files:
145
+ relative_path = file_path.relative_to(self.root_path)
146
+ parts = relative_path.parts
147
+
148
+ current = dir_structure
149
+ for part in parts[:-1]:
150
+ if part not in current:
151
+ current[part] = {}
152
+ current = current[part]
153
+
154
+ if "__files__" not in current:
155
+ current["__files__"] = []
156
+ current["__files__"].append(parts[-1])
157
+
158
+ def print_tree(structure, prefix="", is_last=True):
159
+ items = []
160
+ for key in sorted(structure.keys()):
161
+ if key != "__files__":
162
+ items.append((key, True)) # directory
163
+
164
+ if "__files__" in structure:
165
+ for file in sorted(structure["__files__"]):
166
+ items.append((file, False)) # file
167
+
168
+ for i, (name, is_dir) in enumerate(items):
169
+ is_last_item = i == len(items) - 1
170
+ connector = "└── " if is_last_item else "├── "
171
+ tree_lines.append(f"{prefix}{connector}{name}{'/' if is_dir else ''}")
172
+
173
+ if is_dir:
174
+ extension = " " if is_last_item else "│ "
175
+ print_tree(structure[name], prefix + extension, is_last_item)
176
+
177
+ tree_lines.append(f"{self.root_path.name}/")
178
+ print_tree(dir_structure)
179
+
180
+ return "\n".join(tree_lines)
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.4
2
+ Name: code-to-txt
3
+ Version: 0.1.0
4
+ Summary: Convert code files to a single text file for LLM consumption
5
+ License: MIT
6
+ Author: Andrii Sonsiadlo
7
+ Author-email: andrii.sonsiadlo@gmail.com
8
+ Requires-Python: >=3.10
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Requires-Dist: click (>=8.3.1,<9.0.0)
17
+ Requires-Dist: gitpython (>=3.1.46,<4.0.0)
18
+ Requires-Dist: pathspec (>=1.0.4,<2.0.0)
19
+ Description-Content-Type: text/markdown
20
+
21
+ # CodeToTxt
22
+
23
+ A small Python package to convert code to text.
24
+
@@ -0,0 +1,7 @@
1
+ code_to_txt/__init__.py,sha256=0_iks7Uz24B1pc-Na1n8C97vgMms6haaFNqIRkpq_Cg,62
2
+ code_to_txt/cli.py,sha256=EEm87GRTgk1YorSfoLl2IR2NUjG0f_fkgEQ5YnkxpQ8,2409
3
+ code_to_txt/code_to_txt.py,sha256=HKXiTXMUxM612f7SVHcoxsra6uK6SVV6_-UY49jtR2w,6089
4
+ code_to_txt-0.1.0.dist-info/METADATA,sha256=QEbEcYTZonOO00fLkq3EFuIWBgaGWQLsxnGV3UY1O7I,807
5
+ code_to_txt-0.1.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
6
+ code_to_txt-0.1.0.dist-info/entry_points.txt,sha256=jPT0g_nryiuAd0E496deFZAhdscNLXiUmUdD3KGN3iA,52
7
+ code_to_txt-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.3.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ code-to-txt=code_to_txt.cli:main
3
+