code-to-txt 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ .py
2
+ .js
3
+ .ts
4
+ .jsx
5
+ .tsx
6
+ .java
7
+ .c
8
+ .cpp
9
+ .h
10
+ .hpp
11
+ .cs
12
+ .go
13
+ .rs
14
+ .rb
15
+ .php
16
+ .swift
17
+ .kt
18
+ .scala
19
+ .r
20
+ .sql
21
+ .sh
22
+ .bash
23
+ .zsh
24
+ .yaml
25
+ .yml
26
+ .json
27
+ .toml
28
+ .xml
29
+ .html
30
+ .css
31
+ .scss
32
+ .md
33
+ .txt
34
+ .rst
code_to_txt/.ignore ADDED
@@ -0,0 +1,20 @@
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .git
6
+ .svn
7
+ .hg
8
+ node_modules
9
+ .venv
10
+ venv
11
+ .env
12
+ *.egg-info
13
+ dist
14
+ build
15
+ .pytest_cache
16
+ .mypy_cache
17
+ .ruff_cache
18
+ *.so
19
+ *.dylib
20
+ *.dll
code_to_txt/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
1
  from .code_to_txt import CodeToText
2
2
 
3
+ __version__ = "0.3.0"
3
4
  __all__ = ["CodeToText"]
code_to_txt/cli.py CHANGED
@@ -4,96 +4,81 @@ from pathlib import Path
4
4
  import click
5
5
  import pyperclip
6
6
 
7
+ from . import __version__
7
8
  from .code_to_txt import CodeToText
8
9
  from .config import create_default_config, load_config
9
10
 
10
11
 
12
+ def display_statistics(stats: dict) -> None:
13
+ """Display statistics about the codebase."""
14
+ divider = "=" * 60
15
+ click.echo(f"\n{divider}")
16
+ click.echo("CODEBASE STATISTICS")
17
+ click.echo(divider)
18
+ click.echo(f"Total files: {stats['total_files']}")
19
+ click.echo(f"Total size: {stats['total_size_bytes'] / 1024 / 1024:.2f} MB")
20
+ click.echo(f"Total lines: {stats['total_lines']:,}")
21
+
22
+ if stats["skipped_files"] > 0:
23
+ click.echo(f"Skipped files: {stats['skipped_files']}")
24
+
25
+ click.echo("\nFiles by extension:")
26
+ by_ext = sorted(stats["by_extension"].items(), key=lambda x: x[1]["count"], reverse=True)
27
+ for ext, data in by_ext[:10]:
28
+ size_mb = data["size"] / 1024 / 1024
29
+ click.echo(f" {ext:15} {data['count']:5} files {size_mb:8.2f} MB")
30
+
31
+ if len(by_ext) > 10:
32
+ click.echo(f" ... and {len(by_ext) - 10} more extensions")
33
+
34
+ if stats["largest_files"]:
35
+ click.echo("\nLargest files:")
36
+ for file_info in stats["largest_files"][:5]:
37
+ click.echo(f" {file_info['size_kb']:8.2f} KB {file_info['path']}")
38
+
39
+ click.echo(f"{divider}\n")
40
+
41
+
11
42
  @click.command()
12
43
  @click.argument("path", type=click.Path(exists=True), default=".")
13
- @click.option(
14
- "-o",
15
- "--output",
16
- default=None,
17
- help="Output file path (default: codetotxt_YYYYMMDD_HHMMSS.txt)",
18
- type=click.Path(),
19
- )
20
- @click.option(
21
- "-e",
22
- "--extensions",
23
- default=None,
24
- help="File extensions to include. Space-separated list (e.g., '.py .js .ts') or comma-separated (e.g., '.py,.js,.ts')",
25
- )
26
- @click.option(
27
- "-x",
28
- "--exclude",
29
- multiple=True,
30
- help="Patterns to exclude (gitignore style). Can be specified multiple times.",
31
- )
32
- @click.option(
33
- "-g",
34
- "--glob",
35
- multiple=True,
36
- help="Glob patterns to include (e.g., '*.py' 'src/**/*.js'). Can be specified multiple times.",
37
- )
38
- @click.option(
39
- "--no-gitignore",
40
- is_flag=True,
41
- help="Don't respect .gitignore files",
42
- )
43
- @click.option(
44
- "--no-tree",
45
- is_flag=True,
46
- help="Don't include directory tree in output",
47
- )
48
- @click.option(
49
- "--separator",
50
- default="=" * 80,
51
- help="Separator between files",
52
- )
53
- @click.option(
54
- "--clipboard",
55
- "-c",
56
- is_flag=True,
57
- help="Copy output to clipboard in addition to saving to file",
58
- )
59
- @click.option(
60
- "--clipboard-only",
61
- is_flag=True,
62
- help="Copy output to clipboard only (don't save to file)",
63
- )
64
- @click.option(
65
- "--config",
66
- type=click.Path(exists=True),
67
- help="Path to config file (.yml or .yaml)",
68
- )
69
- @click.option(
70
- "--init-config",
71
- is_flag=True,
72
- help="Create default configuration file (.code-to-txt.yml)",
73
- )
74
- @click.option(
75
- "--timestamp",
76
- "-t",
77
- is_flag=True,
78
- help="Add timestamp to output filename",
79
- )
44
+ @click.option("-o", "--output", default=None, type=click.Path(),
45
+ help="Output file path (default: codetotxt_YYYYMMDD_HHMMSS.txt)")
46
+ @click.option("-e", "--extensions", default=None, help="File extensions to include (space or comma separated)")
47
+ @click.option("-x", "--exclude", multiple=True, help="Patterns to exclude (can be used multiple times)")
48
+ @click.option("-g", "--glob", multiple=True, help="Glob patterns to include (can be used multiple times)")
49
+ @click.option("--no-gitignore", is_flag=True, help="Don't respect .gitignore files")
50
+ @click.option("--no-tree", is_flag=True, help="Don't include directory tree in output")
51
+ @click.option("--separator", default="=" * 80, help="Separator between files")
52
+ @click.option("-c", "--clipboard", is_flag=True, help="Copy output to clipboard in addition to file")
53
+ @click.option("--clipboard-only", is_flag=True, help="Copy to clipboard only (don't save file)")
54
+ @click.option("--config", type=click.Path(exists=True), help="Path to config file (.yml or .yaml)")
55
+ @click.option("--init-config", is_flag=True, help="Create default configuration file")
56
+ @click.option("-t", "--timestamp", is_flag=True, help="Add timestamp to output filename")
57
+ @click.option("-v", "--version", is_flag=True, help="Show version and exit")
58
+ @click.option("--dry-run", is_flag=True, help="Show which files would be processed without creating output")
59
+ @click.option("--stats", is_flag=True, help="Show detailed statistics about the codebase")
60
+ @click.option("--max-file-size", type=int, default=None, help="Skip files larger than N KB")
80
61
  def main(
81
- path: str,
82
- output: str | None,
83
- extensions: str | None,
84
- exclude: tuple[str, ...],
85
- glob: tuple[str, ...],
86
- no_gitignore: bool,
87
- no_tree: bool,
88
- separator: str,
89
- clipboard: bool,
90
- clipboard_only: bool,
91
- config: str | None,
92
- init_config: bool,
93
- timestamp: bool,
62
+ path: str,
63
+ output: str | None,
64
+ extensions: str | None,
65
+ exclude: tuple[str, ...],
66
+ glob: tuple[str, ...],
67
+ no_gitignore: bool,
68
+ no_tree: bool,
69
+ separator: str,
70
+ clipboard: bool,
71
+ clipboard_only: bool,
72
+ config: str | None,
73
+ init_config: bool,
74
+ timestamp: bool,
75
+ version: bool,
76
+ dry_run: bool,
77
+ stats: bool,
78
+ max_file_size: int | None,
94
79
  ) -> None:
95
80
  """
96
- Convert code files to a single text file for easy LLM consumption.
81
+ Convert code files to a single text file for LLM consumption.
97
82
 
98
83
  PATH: Directory to scan (default: current directory)
99
84
 
@@ -127,13 +112,14 @@ def main(
127
112
  # Use config file
128
113
  code-to-txt --config .code-to-txt.yml
129
114
  """
115
+ if version:
116
+ click.echo(f"v{__version__}")
117
+ return
118
+
130
119
  if init_config:
131
120
  config_path = Path(".code-to-txt.yml")
132
121
  if config_path.exists():
133
- click.confirm(
134
- f"Config file {config_path} already exists. Overwrite?",
135
- abort=True,
136
- )
122
+ click.confirm(f"Config file {config_path} already exists. Overwrite?", abort=True)
137
123
  create_default_config(config_path)
138
124
  click.echo(f"Created default config file: {config_path}")
139
125
  click.echo("You can now edit this file and use it with --config flag")
@@ -155,6 +141,7 @@ def main(
155
141
  clipboard = clipboard or config_data.get("clipboard", False)
156
142
  clipboard_only = clipboard_only or config_data.get("clipboard_only", False)
157
143
  timestamp = timestamp or config_data.get("timestamp", False)
144
+ max_file_size = max_file_size or config_data.get("max_file_size")
158
145
 
159
146
  if not output or timestamp:
160
147
  timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -183,30 +170,54 @@ def main(
183
170
  if ext:
184
171
  include_extensions.add(ext)
185
172
 
186
- codetotxt = CodeToText(
173
+ code_to_txt = CodeToText(
187
174
  root_path=path,
188
- output_file=output if not clipboard_only else None,
175
+ output_file=output if not clipboard_only and not dry_run and not stats else None,
189
176
  include_extensions=include_extensions,
190
177
  exclude_patterns=list(exclude),
191
178
  glob_patterns=list(glob_patterns),
192
179
  gitignore=not no_gitignore,
180
+ max_file_size_kb=max_file_size,
193
181
  )
194
182
 
195
183
  try:
184
+ if stats or dry_run:
185
+ statistics = code_to_txt.calculate_statistics()
186
+ display_statistics(statistics)
187
+
188
+ if stats:
189
+ return
190
+
191
+ if dry_run:
192
+ files = code_to_txt._collect_files()
193
+ click.echo("Files that would be processed:")
194
+ for i, file_path in enumerate(files, 1):
195
+ relative_path = file_path.relative_to(Path(path).resolve())
196
+ size_kb = file_path.stat().st_size / 1024
197
+ click.echo(f" {i:4}. {relative_path} ({size_kb:.1f} KB)")
198
+
199
+ if code_to_txt.skipped_files:
200
+ click.echo(f"\nSkipped {len(code_to_txt.skipped_files)} files:")
201
+ for file_path, reason in code_to_txt.skipped_files[:20]:
202
+ relative_path = file_path.relative_to(Path(path).resolve())
203
+ click.echo(f" - {relative_path} ({reason})")
204
+ if len(code_to_txt.skipped_files) > 20:
205
+ click.echo(f" ... and {len(code_to_txt.skipped_files) - 20} more")
206
+
207
+ if not stats or dry_run:
208
+ return
209
+
196
210
  if clipboard_only:
197
- content = codetotxt.generate_content(
198
- add_tree=not no_tree,
199
- separator=separator,
200
- )
211
+ content = code_to_txt.generate_content(add_tree=not no_tree, separator=separator)
201
212
  pyperclip.copy(content)
202
213
  click.echo("Content copied to clipboard")
203
- click.echo(f"Processed {codetotxt.file_count} files")
214
+ click.echo(f"Processed {code_to_txt.file_count} files")
204
215
  click.echo(f"Content size: {len(content) / 1024:.2f} KB")
216
+
217
+ estimated_tokens = len(content) / 4
218
+ click.echo(f"Estimated tokens: ~{estimated_tokens:,.0f}")
205
219
  else:
206
- num_files = codetotxt.convert(
207
- add_tree=not no_tree,
208
- separator=separator,
209
- )
220
+ num_files = code_to_txt.convert(add_tree=not no_tree, separator=separator)
210
221
 
211
222
  output_path = Path(output).resolve()
212
223
  click.echo(f"Successfully processed {num_files} files")
@@ -215,11 +226,17 @@ def main(
215
226
  size_kb = output_path.stat().st_size / 1024
216
227
  click.echo(f"File size: {size_kb:.2f} KB")
217
228
 
229
+ estimated_tokens = size_kb * 1024 / 4
230
+ click.echo(f"Estimated tokens: ~{estimated_tokens:,.0f}")
231
+
218
232
  if clipboard:
219
233
  content = output_path.read_text(encoding="utf-8")
220
234
  pyperclip.copy(content)
221
235
  click.echo("Content also copied to clipboard")
222
236
 
237
+ if code_to_txt.skipped_files:
238
+ click.echo(f"\nNote: Skipped {len(code_to_txt.skipped_files)} files (use --dry-run to see details)")
239
+
223
240
  except Exception as e:
224
241
  click.echo(f"Error: {e}", err=True)
225
242
  raise click.Abort()
@@ -6,41 +6,11 @@ from typing import Any
6
6
  import pathspec
7
7
  from pathspec import PathSpec
8
8
 
9
+ from .utils import load_patterns_from_file
10
+
9
11
 
10
12
  class CodeToText:
11
- DEFAULT_IGNORE = {
12
- "__pycache__",
13
- "*.pyc",
14
- "*.pyo",
15
- "*.pyd",
16
- ".git",
17
- ".svn",
18
- ".hg",
19
- "node_modules",
20
- ".venv",
21
- "venv",
22
- ".env",
23
- "*.egg-info",
24
- "dist",
25
- "build",
26
- ".pytest_cache",
27
- ".mypy_cache",
28
- ".ruff_cache",
29
- "*.so",
30
- "*.dylib",
31
- "*.dll",
32
- }
33
-
34
- DEFAULT_EXTENSIONS = {
35
- ".py", ".js", ".ts", ".jsx", ".tsx",
36
- ".java", ".c", ".cpp", ".h", ".hpp",
37
- ".cs", ".go", ".rs", ".rb", ".php",
38
- ".swift", ".kt", ".scala", ".r",
39
- ".sql", ".sh", ".bash", ".zsh",
40
- ".yaml", ".yml", ".json", ".toml",
41
- ".xml", ".html", ".css", ".scss",
42
- ".md", ".txt", ".rst",
43
- }
13
+ """Convert code files to a single text file for LLM consumption."""
44
14
 
45
15
  def __init__(
46
16
  self,
@@ -50,9 +20,10 @@ class CodeToText:
50
20
  exclude_patterns: list[str] | None = None,
51
21
  glob_patterns: list[str] | None = None,
52
22
  gitignore: bool = True,
23
+ max_file_size_kb: int | None = None,
53
24
  ):
54
25
  """
55
- Initialize the instance of CodeToText.
26
+ Initialize CodeToText instance.
56
27
 
57
28
  Args:
58
29
  root_path: Root directory to scan
@@ -61,89 +32,188 @@ class CodeToText:
61
32
  exclude_patterns: List of patterns to exclude (gitignore style)
62
33
  glob_patterns: List of glob patterns to include (e.g., '*.py', 'src/**/*.js')
63
34
  gitignore: Whether to respect .gitignore files
35
+ max_file_size_kb: Skip files larger than this size in KB
64
36
  """
65
37
  self.root_path = Path(root_path).resolve()
66
38
  self.output_file = output_file
67
- self.include_extensions = include_extensions or self.DEFAULT_EXTENSIONS
68
- self.exclude_patterns = exclude_patterns or []
69
39
  self.glob_patterns = glob_patterns or []
70
40
  self.gitignore = gitignore
41
+ self.max_file_size_kb = max_file_size_kb
71
42
  self.spec: PathSpec | None = None
72
43
  self.file_count = 0
44
+ self.skipped_files: list[tuple[Path, str]] = []
73
45
 
74
- if self.gitignore:
75
- self._load_gitignore()
46
+ config_dir = Path(__file__).parent
47
+ default_extensions = load_patterns_from_file(config_dir / ".extensions")
48
+ default_ignore = load_patterns_from_file(config_dir / ".ignore")
76
49
 
77
- def _load_gitignore(self) -> None:
78
- """Load .gitignore patterns if present."""
79
- gitignore_path = self.root_path / ".gitignore"
80
- patterns = list(self.DEFAULT_IGNORE)
50
+ self.include_extensions = include_extensions or default_extensions
51
+ self.exclude_patterns = exclude_patterns or []
52
+ self.default_ignore = default_ignore
81
53
 
82
- if gitignore_path.exists():
83
- with open(gitignore_path, encoding="utf-8") as f:
84
- for line in f:
85
- line = line.strip()
86
- if line and not line.startswith("#"):
87
- patterns.append(line)
54
+ if self.gitignore:
55
+ self._init_pathspec()
56
+
57
+ def _init_pathspec(self) -> None:
58
+ """Initialize pathspec from .gitignore files and default patterns."""
59
+ patterns = list(self.default_ignore)
60
+ current_path = self.root_path
61
+
62
+ for _ in range(5):
63
+ gitignore_path = current_path / ".gitignore"
64
+ if gitignore_path.exists():
65
+ try:
66
+ with open(gitignore_path, encoding="utf-8") as f:
67
+ for line in f:
68
+ clean_line = line.strip()
69
+ if clean_line and not clean_line.startswith("#"):
70
+ patterns.append(clean_line)
71
+ except Exception:
72
+ pass
73
+
74
+ parent = current_path.parent
75
+ if parent == current_path:
76
+ break
77
+ current_path = parent
88
78
 
89
79
  patterns.extend(self.exclude_patterns)
90
80
  self.spec = pathspec.PathSpec.from_lines("gitignore", patterns)
91
81
 
92
- def _matches_glob_pattern(self, file_path: Path) -> bool:
82
+ def _check_glob_match(self, file_path: Path) -> bool:
93
83
  """Check if file matches any glob pattern."""
94
84
  if not self.glob_patterns:
95
85
  return False
96
86
 
97
87
  relative_path = file_path.relative_to(self.root_path)
98
- relative_str = str(relative_path)
88
+ path_str = str(relative_path)
99
89
 
100
90
  for pattern in self.glob_patterns:
101
- if fnmatch(relative_str, pattern):
91
+ if fnmatch(path_str, pattern):
102
92
  return True
103
93
  if fnmatch(file_path.name, pattern):
104
94
  return True
95
+ if fnmatch(path_str.replace(os.sep, "/"), pattern):
96
+ return True
105
97
 
106
98
  return False
107
99
 
108
- def _should_include_file(self, file_path: Path) -> bool:
109
- """Check if a file should be included."""
100
+ def _check_file_inclusion(self, file_path: Path) -> bool:
101
+ """Determine if a file should be included in the output."""
102
+ if self.max_file_size_kb is not None:
103
+ try:
104
+ file_size_kb = file_path.stat().st_size / 1024
105
+ if file_size_kb > self.max_file_size_kb:
106
+ self.skipped_files.append(
107
+ (file_path, f"exceeds size limit ({file_size_kb:.1f}KB)")
108
+ )
109
+ return False
110
+ except Exception:
111
+ pass
112
+
110
113
  if self.glob_patterns:
111
- if not self._matches_glob_pattern(file_path):
112
- return False
113
- else:
114
- if file_path.suffix not in self.include_extensions:
114
+ if not self._check_glob_match(file_path):
115
115
  return False
116
116
 
117
117
  if self.spec:
118
- relative_path = file_path.relative_to(self.root_path)
119
- if self.spec.match_file(str(relative_path)):
118
+ try:
119
+ relative_path = file_path.relative_to(self.root_path)
120
+ relative_str = str(relative_path).replace(os.sep, "/")
121
+
122
+ if self.spec.match_file(relative_str):
123
+ self.skipped_files.append((file_path, "matches ignore pattern"))
124
+ return False
125
+ except ValueError:
120
126
  return False
121
127
 
128
+ if file_path.suffix not in self.include_extensions:
129
+ return False
130
+
122
131
  return True
123
132
 
124
- def _get_files(self) -> list[Path]:
125
- """Get all files to process."""
133
+ def _collect_files(self) -> list[Path]:
134
+ """Collect all files to process based on filters."""
126
135
  files = []
136
+ self.skipped_files = []
137
+
127
138
  for root, dirs, filenames in os.walk(self.root_path):
128
139
  root_path = Path(root)
129
140
 
130
141
  if self.spec:
131
- relative_root = root_path.relative_to(self.root_path)
132
- dirs[:] = [
133
- d for d in dirs
134
- if not self.spec.match_file(str(relative_root / d))
135
- ]
142
+ try:
143
+ relative_root = root_path.relative_to(self.root_path)
144
+ root_str = str(relative_root).replace(os.sep, "/") if str(relative_root) != "." else ""
145
+
146
+ filtered_dirs = []
147
+ for d in dirs:
148
+ dir_path = f"{root_str}/{d}" if root_str else d
149
+
150
+ if not self.spec.match_file(dir_path) and not self.spec.match_file(f"{dir_path}/"):
151
+ filtered_dirs.append(d)
152
+
153
+ dirs[:] = filtered_dirs
154
+ except ValueError:
155
+ pass
136
156
 
137
157
  for filename in filenames:
138
158
  file_path = root_path / filename
139
- if self._should_include_file(file_path):
159
+ if self._check_file_inclusion(file_path):
140
160
  files.append(file_path)
141
161
 
142
162
  return sorted(files)
143
163
 
164
+ def calculate_statistics(self) -> dict[str, Any]:
165
+ """
166
+ Calculate statistics about the codebase.
167
+
168
+ Returns:
169
+ Dictionary containing total files, size, lines, breakdown by extension, etc.
170
+ """
171
+ files = self._collect_files()
172
+
173
+ stats: dict = {
174
+ "total_files": len(files),
175
+ "total_size_bytes": 0,
176
+ "total_lines": 0,
177
+ "by_extension": {},
178
+ "skipped_files": len(self.skipped_files),
179
+ "largest_files": [],
180
+ }
181
+
182
+ file_sizes = []
183
+
184
+ for file_path in files:
185
+ try:
186
+ size = file_path.stat().st_size
187
+ stats["total_size_bytes"] += size
188
+ file_sizes.append((file_path, size))
189
+
190
+ try:
191
+ with open(file_path, encoding="utf-8") as f:
192
+ lines = sum(1 for _ in f)
193
+ stats["total_lines"] += lines
194
+ except Exception:
195
+ pass
196
+
197
+ ext = file_path.suffix or "(no extension)"
198
+ if ext not in stats["by_extension"]:
199
+ stats["by_extension"][ext] = {"count": 0, "size": 0}
200
+ stats["by_extension"][ext]["count"] += 1
201
+ stats["by_extension"][ext]["size"] += size
202
+
203
+ except Exception:
204
+ pass
205
+
206
+ file_sizes.sort(key=lambda x: x[1], reverse=True)
207
+ stats["largest_files"] = [
208
+ {"path": str(f.relative_to(self.root_path)), "size_kb": s / 1024}
209
+ for f, s in file_sizes[:10]
210
+ ]
211
+
212
+ return stats
213
+
144
214
  def generate_content(self, add_tree: bool = True, separator: str = "=" * 80) -> str:
145
215
  """
146
- Generate content as string (for clipboard).
216
+ Generate content as string without writing to file.
147
217
 
148
218
  Args:
149
219
  add_tree: Whether to add directory tree at the beginning
@@ -152,43 +222,43 @@ class CodeToText:
152
222
  Returns:
153
223
  Generated content as string
154
224
  """
155
- files = self._get_files()
225
+ files = self._collect_files()
156
226
  self.file_count = len(files)
157
227
 
158
- lines = []
159
- lines.append(f"Code Export from: {self.root_path}")
160
- lines.append(f"Total files: {len(files)}")
161
- lines.append(separator)
162
- lines.append("")
228
+ output_lines = []
229
+ output_lines.append(f"Code Export from: {self.root_path}")
230
+ output_lines.append(f"Total files: {len(files)}")
231
+ output_lines.append(separator)
232
+ output_lines.append("")
163
233
 
164
234
  if add_tree:
165
- lines.append("DIRECTORY TREE:")
166
- lines.append(separator)
167
- lines.append(self._generate_tree())
168
- lines.append("")
169
- lines.append(separator)
170
- lines.append("")
171
-
172
- for i, file_path in enumerate(files, 1):
235
+ output_lines.append("DIRECTORY TREE:")
236
+ output_lines.append(separator)
237
+ output_lines.append(self._build_tree_structure())
238
+ output_lines.append("")
239
+ output_lines.append(separator)
240
+ output_lines.append("")
241
+
242
+ for idx, file_path in enumerate(files, 1):
173
243
  relative_path = file_path.relative_to(self.root_path)
174
244
 
175
- lines.append(f"FILE {i}/{len(files)}: {relative_path}")
176
- lines.append(separator)
245
+ output_lines.append(f"FILE {idx}/{len(files)}: {relative_path}")
246
+ output_lines.append(separator)
177
247
 
178
248
  try:
179
249
  with open(file_path, encoding="utf-8") as f:
180
250
  content = f.read()
181
- lines.append(content)
251
+ output_lines.append(content)
182
252
  except UnicodeDecodeError:
183
- lines.append("[Binary file - skipped]")
253
+ output_lines.append("[Binary file - skipped]")
184
254
  except Exception as e:
185
- lines.append(f"[Error reading file: {e}]")
255
+ output_lines.append(f"[Error reading file: {e}]")
186
256
 
187
- lines.append("")
188
- lines.append(separator)
189
- lines.append("")
257
+ output_lines.append("")
258
+ output_lines.append(separator)
259
+ output_lines.append("")
190
260
 
191
- return "\n".join(lines)
261
+ return "\n".join(output_lines)
192
262
 
193
263
  def convert(self, add_tree: bool = True, separator: str = "=" * 80) -> int:
194
264
  """
@@ -211,49 +281,49 @@ class CodeToText:
211
281
 
212
282
  return self.file_count
213
283
 
214
- def _generate_tree(self) -> str:
215
- """Generate a directory tree representation."""
216
- tree_lines = []
217
- files = self._get_files()
284
+ def _build_tree_structure(self) -> str:
285
+ """Build a directory tree representation of included files."""
286
+ tree_output = []
287
+ files = self._collect_files()
218
288
 
219
289
  if not files:
220
290
  return "(no files to display)"
221
291
 
222
- dir_structure: dict[str, Any] = {}
292
+ structure: dict[str, Any] = {}
223
293
  for file_path in files:
224
294
  relative_path = file_path.relative_to(self.root_path)
225
295
  parts = relative_path.parts
226
296
 
227
- current = dir_structure
297
+ current_level = structure
228
298
  for part in parts[:-1]:
229
- if part not in current:
230
- current[part] = {}
231
- current = current[part]
299
+ if part not in current_level:
300
+ current_level[part] = {}
301
+ current_level = current_level[part]
232
302
 
233
- if "__files__" not in current:
234
- current["__files__"] = []
235
- current["__files__"].append(parts[-1])
303
+ if "__files__" not in current_level:
304
+ current_level["__files__"] = []
305
+ current_level["__files__"].append(parts[-1])
236
306
 
237
- def print_tree(structure: dict[str, Any], prefix: str = "", is_last: bool = True) -> None:
238
- items = []
239
- for key in sorted(structure.keys()):
307
+ def render_tree(node: dict[str, Any], prefix: str = "", is_final: bool = True) -> None:
308
+ entries = []
309
+ for key in sorted(node.keys()):
240
310
  if key != "__files__":
241
- items.append((key, True)) # directory
311
+ entries.append((key, True))
242
312
 
243
- if "__files__" in structure:
244
- for file in sorted(structure["__files__"]):
245
- items.append((file, False)) # file
313
+ if "__files__" in node:
314
+ for file in sorted(node["__files__"]):
315
+ entries.append((file, False))
246
316
 
247
- for i, (name, is_dir) in enumerate(items):
248
- is_last_item = i == len(items) - 1
249
- connector = "└── " if is_last_item else "├── "
250
- tree_lines.append(f"{prefix}{connector}{name}{'/' if is_dir else ''}")
317
+ for i, (name, is_directory) in enumerate(entries):
318
+ is_last_entry = i == len(entries) - 1
319
+ connector = "└── " if is_last_entry else "├── "
320
+ tree_output.append(f"{prefix}{connector}{name}{'/' if is_directory else ''}")
251
321
 
252
- if is_dir:
253
- extension = " " if is_last_item else "│ "
254
- print_tree(structure[name], prefix + extension, is_last_item)
322
+ if is_directory:
323
+ extension = " " if is_last_entry else "│ "
324
+ render_tree(node[name], prefix + extension, is_last_entry)
255
325
 
256
- tree_lines.append(f"{self.root_path.name}/")
257
- print_tree(dir_structure)
326
+ tree_output.append(f"{self.root_path.name}/")
327
+ render_tree(structure)
258
328
 
259
- return "\n".join(tree_lines)
329
+ return "\n".join(tree_output)
code_to_txt/config.py CHANGED
@@ -4,8 +4,8 @@ from typing import Any
4
4
  import yaml
5
5
 
6
6
  DEFAULT_CONFIG = {
7
- "output": "code_output.txt",
8
- "extensions": None, # None means use defaults
7
+ "output": "code-to-txt.txt",
8
+ "extensions": None,
9
9
  "exclude": [
10
10
  "tests/*",
11
11
  "*.test.js",
@@ -13,13 +13,14 @@ DEFAULT_CONFIG = {
13
13
  "*.spec.js",
14
14
  "*.spec.ts",
15
15
  ],
16
- "glob": [], # e.g., ["*.py", "src/**/*.js"]
16
+ "glob": [],
17
17
  "no_gitignore": False,
18
18
  "no_tree": False,
19
19
  "separator": "=" * 80,
20
20
  "clipboard": False,
21
21
  "clipboard_only": False,
22
- "timestamp": False,
22
+ "timestamp": True,
23
+ "max_file_size": None,
23
24
  }
24
25
 
25
26
 
@@ -31,7 +32,7 @@ def load_config(config_path: str) -> dict[str, Any]:
31
32
  config_path: Path to the configuration file
32
33
 
33
34
  Returns:
34
- Dictionary with configuration values
35
+ Dictionary with validated configuration values
35
36
  """
36
37
  path = Path(config_path)
37
38
 
@@ -75,6 +76,9 @@ def load_config(config_path: str) -> dict[str, Any]:
75
76
  if field in config:
76
77
  validated_config[field] = bool(config[field])
77
78
 
79
+ if "max_file_size" in config and config["max_file_size"] is not None:
80
+ validated_config["max_file_size"] = int(config["max_file_size"])
81
+
78
82
  return validated_config
79
83
 
80
84
 
@@ -91,7 +95,7 @@ def create_default_config(config_path: Path) -> None:
91
95
 
92
96
  # Output file name (supports strftime formatting)
93
97
  # Use timestamp: true to automatically add timestamp
94
- output: code_output.txt
98
+ output: code-to-txt.txt
95
99
 
96
100
  # File extensions to include
97
101
  # Can be a list or space/comma-separated string
@@ -136,7 +140,9 @@ clipboard: false
136
140
  clipboard_only: false
137
141
 
138
142
  # Add timestamp to output filename
139
- timestamp: false
143
+ timestamp: true
144
+
145
+ max_file_size: null
140
146
 
141
147
  # Example configurations:
142
148
  #
code_to_txt/utils.py ADDED
@@ -0,0 +1,13 @@
1
+ from pathlib import Path
2
+
3
+
4
+ def load_patterns_from_file(file_path: Path) -> set[str]:
5
+ """Load patterns from a text file, one per line."""
6
+ patterns = set()
7
+ if file_path.exists():
8
+ with open(file_path, encoding="utf-8") as f:
9
+ for line in f:
10
+ stripped = line.strip()
11
+ if stripped and not stripped.startswith("#"):
12
+ patterns.add(stripped)
13
+ return patterns
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-to-txt
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Convert code files to a single text file for LLM consumption
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -28,15 +28,6 @@ Models (LLMs) or for easy code review and documentation.
28
28
 
29
29
  ## Features
30
30
 
31
- ✨ **New in v0.2.0:**
32
-
33
- - 🕐 **Automatic timestamps** in output filenames
34
- - 📋 **Clipboard support** - copy output directly to clipboard
35
- - 🎯 **Better extension handling** - specify multiple extensions without repeating `-e` flag
36
- - 🔍 **Glob pattern support** - use patterns like `*.py` or `src/**/*.js`
37
- - ⚙️ **Configuration file support** - save your preferences in `.code-to-txt.yml`
38
- - 🚀 **Enhanced defaults** - more file types and ignore patterns out of the box
39
-
40
31
  **Core Features:**
41
32
 
42
33
  - 📁 Convert entire directories of code into a single text file
@@ -63,9 +54,18 @@ poetry add code-to-txt
63
54
  ### Basic Usage
64
55
 
65
56
  ```bash
66
- # Convert all code files in current directory with timestamp
57
+ # Show version
58
+ code-to-txt --version
59
+
60
+ # Convert all code files with timestamp
67
61
  code-to-txt -t
68
62
 
63
+ # Preview what would be processed
64
+ code-to-txt --dry-run
65
+
66
+ # Get codebase statistics
67
+ code-to-txt --stats
68
+
69
69
  # Convert specific directory
70
70
  code-to-txt ./my-project -o project.txt
71
71
 
@@ -88,6 +88,9 @@ code-to-txt -g "*.py" -g "*.md"
88
88
  ### Advanced Usage
89
89
 
90
90
  ```bash
91
+ # Limit file sizes (useful for LLM token limits)
92
+ code-to-txt --max-file-size 500
93
+
91
94
  # Exclude patterns
92
95
  code-to-txt -x "tests/*" -x "*.test.js"
93
96
 
@@ -116,7 +119,7 @@ This creates `.code-to-txt.yml` with default settings:
116
119
 
117
120
  ```yaml
118
121
  # Output file name
119
- output: codetotxt.txt
122
+ output: code-to-txt.txt
120
123
 
121
124
  # File extensions to include (null = use defaults)
122
125
  extensions: null
@@ -125,7 +128,12 @@ extensions: null
125
128
  exclude:
126
129
  - "tests/*"
127
130
  - "*.test.js"
131
+ - "*.test.ts"
132
+ - "*.spec.js"
133
+ - "*.spec.ts"
128
134
  - "node_modules/*"
135
+ - "__pycache__/*"
136
+ - "*.pyc"
129
137
 
130
138
  # Glob patterns (alternative to extensions)
131
139
  glob: [ ]
@@ -137,6 +145,7 @@ separator: "================"
137
145
  clipboard: false
138
146
  clipboard_only: false
139
147
  timestamp: false
148
+ max_file_size: null
140
149
  ```
141
150
 
142
151
  Use the config file:
@@ -155,6 +164,7 @@ code-to-txt --config .code-to-txt.yml
155
164
  extensions: [ .py ]
156
165
  exclude: [ "tests/*", "*.pyc", "__pycache__/*", "venv/*", ".venv/*" ]
157
166
  timestamp: true
167
+ max_file_size: 500
158
168
  ```
159
169
 
160
170
  **JavaScript/TypeScript Project:**
@@ -163,20 +173,18 @@ timestamp: true
163
173
  extensions: [ .js, .ts, .jsx, .tsx ]
164
174
  exclude: [ "node_modules/*", "dist/*", "build/*", "*.test.js", "*.spec.ts" ]
165
175
  no_tree: false
176
+ max_file_size: 1000
166
177
  ```
167
178
 
168
- **C/C++ Project:**
169
-
170
- ```yaml
171
- extensions: [ .c, .cpp, .h, .hpp ]
172
- exclude: [ "build/*", "*.o", "*.a", "cmake-build-*" ]
173
- ```
174
-
175
- **Using Glob Patterns:**
179
+ **LLM-Optimized:**
176
180
 
177
181
  ```yaml
178
- glob: [ "src/**/*.py", "lib/**/*.py", "*.md" ]
179
- extensions: null # Ignore extensions when using glob
182
+ extensions: [ .py, .js, .md ]
183
+ exclude: [ "tests/*", "*.test.*", "node_modules/*", "dist/*", "build/*" ]
184
+ timestamp: true
185
+ clipboard: true
186
+ max_file_size: 200
187
+ no_tree: false
180
188
  ```
181
189
 
182
190
  ## Command Line Options
@@ -194,12 +202,16 @@ Options:
194
202
  -g, --glob TEXT Glob patterns to include (can be used multiple times)
195
203
  --no-gitignore Don't respect .gitignore files
196
204
  --no-tree Don't include directory tree in output
197
- --separator TEXT Separator between files (default: ====...)
205
+ --separator TEXT Separator between files
198
206
  -c, --clipboard Copy output to clipboard in addition to file
199
207
  --clipboard-only Copy to clipboard only (don't save file)
200
208
  --config PATH Path to config file (.yml or .yaml)
201
209
  --init-config Create default configuration file
202
210
  -t, --timestamp Add timestamp to output filename
211
+ -v, --version Show version and exit
212
+ --dry-run Show which files would be processed
213
+ --stats Show detailed statistics
214
+ --max-file-size INT Skip files larger than N KB
203
215
  --help Show this message and exit
204
216
  ```
205
217
 
@@ -210,15 +222,13 @@ Options:
210
222
  ```python
211
223
  from code_to_txt import CodeToText
212
224
 
213
- # Create instance
214
- code_to_text = CodeToText(
225
+ code_to_txt = CodeToText(
215
226
  root_path="./my-project",
216
227
  output_file="output.txt",
217
228
  include_extensions={".py", ".js"},
218
229
  )
219
230
 
220
- # Convert to file
221
- num_files = code_to_text.convert(add_tree=True)
231
+ num_files = code_to_txt.convert(add_tree=True)
222
232
  print(f"Processed {num_files} files")
223
233
  ```
224
234
 
@@ -226,54 +236,47 @@ print(f"Processed {num_files} files")
226
236
 
227
237
  ```python
228
238
  from code_to_txt import CodeToText
239
+ import pyperclip
229
240
 
230
- # Generate content without writing to file
231
- code_to_text = CodeToText(
241
+ code_to_txt = CodeToText(
232
242
  root_path="./my-project",
233
- output_file=None, # No file needed
243
+ output_file=None,
234
244
  include_extensions={".py"},
235
245
  )
236
246
 
237
- content = code_to_text.generate_content(add_tree=True)
238
- print(f"Generated {len(content)} characters")
239
-
240
- # Copy to clipboard using pyperclip
241
- import pyperclip
242
-
247
+ content = code_to_txt.generate_content(add_tree=True)
243
248
  pyperclip.copy(content)
244
249
  ```
245
250
 
246
- ### Using Glob Patterns
251
+ ### Get Statistics
247
252
 
248
253
  ```python
249
254
  from code_to_txt import CodeToText
250
255
 
251
- code_to_text = CodeToText(
256
+ code_to_txt = CodeToText(
252
257
  root_path="./my-project",
253
- output_file="output.txt",
254
- glob_patterns=["*.py", "src/**/*.js", "**/*.md"],
258
+ output_file=None,
259
+ max_file_size_kb=500,
255
260
  )
256
261
 
257
- num_files = code_to_text.convert()
262
+ stats = code_to_txt.calculate_statistics()
263
+ print(f"Total files: {stats['total_files']}")
264
+ print(f"Total size: {stats['total_size_bytes'] / 1024 / 1024:.2f} MB")
265
+ print(f"Total lines: {stats['total_lines']:,}")
258
266
  ```
259
267
 
260
- ### Advanced Configuration
268
+ ### Using Glob Patterns
261
269
 
262
270
  ```python
263
271
  from code_to_txt import CodeToText
264
272
 
265
- code_to_text = CodeToText(
273
+ code_to_txt = CodeToText(
266
274
  root_path="./my-project",
267
- output_file="detailed_output.txt",
268
- include_extensions={".py", ".js", ".ts"},
269
- exclude_patterns=["tests/*", "*.test.js", "node_modules/*"],
270
- gitignore=True, # Respect .gitignore (default)
275
+ output_file="output.txt",
276
+ glob_patterns=["*.py", "src/**/*.js", "**/*.md"],
271
277
  )
272
278
 
273
- num_files = code_to_text.convert(
274
- add_tree=True,
275
- separator="=" * 100,
276
- )
279
+ num_files = code_to_txt.convert()
277
280
  ```
278
281
 
279
282
  ## Default File Extensions
@@ -301,7 +304,7 @@ CodeToTxt automatically ignores common build artifacts and dependencies:
301
304
  - `.pytest_cache`, `.mypy_cache`, `.ruff_cache`
302
305
  - `*.so`, `*.dylib`, `*.dll`
303
306
 
304
- Plus any patterns in your `.gitignore` file.
307
+ Plus any patterns in your `.gitignore` file (including parent directories).
305
308
 
306
309
  ## Output Format
307
310
 
@@ -353,34 +356,43 @@ if __name__ == "__main__":
353
356
 
354
357
  ## Tips & Tricks
355
358
 
356
- ### For Large Projects
359
+ ### For LLM Consumption
357
360
 
358
361
  ```bash
359
- # Use specific extensions to reduce size
360
- code-to-txt -e ".py" -t
362
+ # Step 1: Check what you're working with
363
+ code-to-txt --stats
361
364
 
362
- # Exclude heavy directories
363
- code-to-txt -x "node_modules/*" -x "venv/*" -x "dist/*"
365
+ # Step 2: Preview files
366
+ code-to-txt --dry-run --max-file-size 200
367
+
368
+ # Step 3: Copy to clipboard with size limit
369
+ code-to-txt --clipboard-only --max-file-size 200 -e ".py .md"
370
+
371
+ # See token estimate:
372
+ # Estimated tokens: ~95,000
364
373
  ```
365
374
 
366
- ### For LLM Consumption
375
+ ### For Large Projects
367
376
 
368
377
  ```bash
369
- # Copy directly to clipboard for pasting into ChatGPT/Claude
370
- code-to-txt --clipboard-only -e ".py .md"
378
+ # Use specific extensions to reduce size
379
+ code-to-txt -e ".py" -t --max-file-size 500
371
380
 
372
- # Or save and copy
373
- code-to-txt -t -c -e ".py .js"
381
+ # Exclude heavy directories
382
+ code-to-txt -x "node_modules/*" -x "venv/*" -x "dist/*"
383
+
384
+ # Get statistics first
385
+ code-to-txt --stats --max-file-size 300
374
386
  ```
375
387
 
376
- ### For Specific Features
388
+ ### Debug Ignore Patterns
377
389
 
378
390
  ```bash
379
- # Only include source files, exclude tests
380
- code-to-txt -g "src/**/*.py" -g "lib/**/*.py"
391
+ # See which files are being skipped and why
392
+ code-to-txt --dry-run
381
393
 
382
- # Only documentation
383
- code-to-txt -e ".md .rst .txt"
394
+ # Compare with and without gitignore
395
+ code-to-txt --dry-run --no-gitignore
384
396
  ```
385
397
 
386
398
  ## Requirements
@@ -416,6 +428,20 @@ MIT License - see LICENSE file for details.
416
428
 
417
429
  ## Changelog
418
430
 
431
+ ### v0.3.0
432
+
433
+ - 🔧 Refactored codebase for better maintainability
434
+ - 📁 Externalized default extensions and ignore patterns to separate files
435
+ - 🐛 Fixed critical gitignore bug (now checks parent directories)
436
+ - 🔍 Improved cross-platform path handling
437
+ - 📊 Added `--stats` flag for detailed codebase statistics
438
+ - 🎯 Added `--dry-run` mode to preview without processing
439
+ - 📏 Added `--max-file-size` to skip large files
440
+ - 🔢 Added token estimation for LLM consumption
441
+ - 📝 Added skip tracking to see which files were excluded
442
+ - 🚀 Improved method naming and code structure
443
+ - ✅ Enhanced test coverage
444
+
419
445
  ### v0.2.0
420
446
 
421
447
  - ✨ Added automatic timestamp generation for output files
@@ -0,0 +1,12 @@
1
+ code_to_txt/.extensions,sha256=wmqH99IE9LSVPBQjOlmFH7e32aBhW-Gyx5pUk_aHTTw,164
2
+ code_to_txt/.ignore,sha256=h-2N-vrqYosVthADpYPSMwvHmZJXhdr9sUutlPtoEyw,151
3
+ code_to_txt/__init__.py,sha256=0BlnuJBBoiatWYgynf7iIw8LnMl-vyksXOwiSxLg7AI,84
4
+ code_to_txt/cli.py,sha256=Gg45vpewnQWZcQmbWgArMaXa6HYovCL7BK_nDdgQKqg,9633
5
+ code_to_txt/code_to_txt.py,sha256=h9UXYUdbXbPd4vaGn-EhgxSUlCGtN-JdTLw86lIakYE,11607
6
+ code_to_txt/config.py,sha256=DRjZ5uLXYbSwfTu36dGDbVUagYSMDhiw6TKgjAQkMU8,4292
7
+ code_to_txt/utils.py,sha256=K-eKT05eTCgkWuRDwSzPdmcmMZECRB4gubabO2vOgVE,434
8
+ code_to_txt-0.3.0.dist-info/METADATA,sha256=CTNP9Yjp9F7FeG7gakv2Lk1jO_da5jaKmgSNWMXAtNQ,11160
9
+ code_to_txt-0.3.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
10
+ code_to_txt-0.3.0.dist-info/entry_points.txt,sha256=jPT0g_nryiuAd0E496deFZAhdscNLXiUmUdD3KGN3iA,52
11
+ code_to_txt-0.3.0.dist-info/licenses/LICENSE,sha256=-K4fNS51V7AiwILLB_InW4EECFSbFrrOBd66OqVVyh4,1068
12
+ code_to_txt-0.3.0.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- code_to_txt/__init__.py,sha256=0_iks7Uz24B1pc-Na1n8C97vgMms6haaFNqIRkpq_Cg,62
2
- code_to_txt/cli.py,sha256=5NEXWGts1JBSXpAWsgrAfz9O0YFHzu6uRPf4NUePOj0,6662
3
- code_to_txt/code_to_txt.py,sha256=ZCqc-Yk-hc5AexDIGaphCe_2Ck3LhfVeQP8-pDVRCec,8417
4
- code_to_txt/config.py,sha256=KMlpeKO0F8YRbEmlXMnCs_PrR3iYQNTYOgZISZfCzVU,4148
5
- code_to_txt-0.2.0.dist-info/METADATA,sha256=AS-XxI1i8Au96Y1_y04nhgY2U6A8whMEshnGHUfHNgc,10519
6
- code_to_txt-0.2.0.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
7
- code_to_txt-0.2.0.dist-info/entry_points.txt,sha256=jPT0g_nryiuAd0E496deFZAhdscNLXiUmUdD3KGN3iA,52
8
- code_to_txt-0.2.0.dist-info/licenses/LICENSE,sha256=-K4fNS51V7AiwILLB_InW4EECFSbFrrOBd66OqVVyh4,1068
9
- code_to_txt-0.2.0.dist-info/RECORD,,