dir2text 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.1
2
+ Name: dir2text
3
+ Version: 0.2.0
4
+ Summary:
5
+ Author: Nima Shoghi
6
+ Author-email: nimashoghi@gmail.com
7
+ Requires-Python: >=3.7,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.7
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Dist: gitignore-parser (>=0.1.11,<0.2.0)
16
+ Description-Content-Type: text/markdown
17
+
18
+
File without changes
@@ -0,0 +1,24 @@
1
+ [tool.poetry]
2
+ name = "dir2text"
3
+ version = "0.2.0"
4
+ description = ""
5
+ authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.7"
10
+ gitignore-parser = "^0.1.11"
11
+
12
+ [tool.poetry.group.dev.dependencies]
13
+ ipykernel = "*"
14
+ rich = "*"
15
+ ruff = "*"
16
+ pyright = "*"
17
+
18
+ [build-system]
19
+ requires = ["poetry-core"]
20
+ build-backend = "poetry.core.masonry.api"
21
+
22
+ [tool.poetry.scripts]
23
+ dir2text = "dir2text.text:main"
24
+ dir2md = "dir2text.markdown:main"
File without changes
@@ -0,0 +1,30 @@
1
+ import argparse
2
+
3
+ from . import markdown, text
4
+ from ._util import create_common_parser
5
+
6
+
7
+ def main():
8
+ parser = argparse.ArgumentParser(
9
+ description="Convert project files to a structured string representation."
10
+ )
11
+ subparsers = parser.add_subparsers(dest="format", required=True)
12
+
13
+ # Text subcommand
14
+ parser_text = subparsers.add_parser(
15
+ "text", parents=[create_common_parser()], help="Output in plain text format"
16
+ )
17
+ parser_text.set_defaults(func=text.main)
18
+
19
+ # Markdown subcommand
20
+ parser_md = subparsers.add_parser(
21
+ "markdown", parents=[create_common_parser()], help="Output in Markdown format"
22
+ )
23
+ parser_md.set_defaults(func=markdown.main)
24
+
25
+ args = parser.parse_args()
26
+ args.func(args)
27
+
28
+
29
+ if __name__ == "__main__":
30
+ main()
@@ -0,0 +1,33 @@
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+
5
+ def create_common_parser():
6
+ parser = argparse.ArgumentParser(add_help=False)
7
+ parser.add_argument(
8
+ "directory", type=Path, help="The directory to search for files"
9
+ )
10
+ parser.add_argument(
11
+ "--extension", "-e", help="The file extension to search for (e.g., '.py')"
12
+ )
13
+ parser.add_argument(
14
+ "--include",
15
+ "-i",
16
+ action="append",
17
+ default=[],
18
+ help="Patterns to include (can be used multiple times)",
19
+ )
20
+ parser.add_argument(
21
+ "--exclude",
22
+ "-x",
23
+ action="append",
24
+ default=[],
25
+ help="Patterns to exclude (can be used multiple times)",
26
+ )
27
+ parser.add_argument(
28
+ "--gitignore",
29
+ action=argparse.BooleanOptionalAction,
30
+ default=True,
31
+ help="Respect .gitignore files",
32
+ )
33
+ return parser
@@ -0,0 +1,87 @@
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+ from ._util import create_common_parser
5
+ from .text import find_files_bfs, read_file_content
6
+
7
+ EXTENSION_TO_LANGUAGE = {
8
+ "py": "python",
9
+ "js": "javascript",
10
+ "ts": "typescript",
11
+ "html": "html",
12
+ "css": "css",
13
+ "md": "markdown",
14
+ "json": "json",
15
+ "yml": "yaml",
16
+ "yaml": "yaml",
17
+ "sh": "bash",
18
+ "bat": "batch",
19
+ "ps1": "powershell",
20
+ "sql": "sql",
21
+ "r": "r",
22
+ "cpp": "cpp",
23
+ "c": "c",
24
+ "java": "java",
25
+ "go": "go",
26
+ "rb": "ruby",
27
+ "php": "php",
28
+ "swift": "swift",
29
+ "kt": "kotlin",
30
+ "rs": "rust",
31
+ "scala": "scala",
32
+ "m": "matlab",
33
+ "tex": "latex",
34
+ }
35
+
36
+
37
+ def print_directory_tree_md(files: list[Path], base_dir: Path):
38
+ def format_tree(path: Path, prefix: str = "") -> list[str]:
39
+ result = []
40
+ if path in files or any(f.is_relative_to(path) for f in files):
41
+ result.append(f"{prefix}- `{path.name}{'/' if path.is_dir() else ''}`")
42
+ if path.is_dir():
43
+ children = sorted(path.iterdir(), key=lambda x: (x.is_dir(), x.name))
44
+ for i, child in enumerate(children):
45
+ if child in files or any(f.is_relative_to(child) for f in files):
46
+ if i == len(children) - 1:
47
+ result.extend(format_tree(child, prefix + " "))
48
+ else:
49
+ result.extend(format_tree(child, prefix + " "))
50
+ return result
51
+
52
+ tree_lines = ["## Directory structure\n"]
53
+ tree_lines.extend(format_tree(base_dir))
54
+ return tree_lines
55
+
56
+
57
+ def main(args: argparse.Namespace | None = None):
58
+ if args is None:
59
+ args = create_common_parser().parse_args()
60
+
61
+ matching_files = find_files_bfs(
62
+ args.directory,
63
+ args.extension,
64
+ args.include,
65
+ args.exclude,
66
+ args.gitignore,
67
+ )
68
+
69
+ tree_lines = print_directory_tree_md(matching_files, args.directory)
70
+ file_contents = ["# Project Structure and Contents\n"]
71
+
72
+ for file_path in matching_files:
73
+ relative_path = file_path.relative_to(args.directory)
74
+ file_contents.append(f"## {relative_path}\n")
75
+
76
+ language = EXTENSION_TO_LANGUAGE.get(file_path.suffix, "")
77
+ file_contents.append(f"```{language}")
78
+ file_contents.append(read_file_content(file_path))
79
+ file_contents.append("```\n")
80
+
81
+ # Print in reverse order
82
+ print("\n".join(tree_lines))
83
+ print("\n".join(file_contents))
84
+
85
+
86
+ if __name__ == "__main__":
87
+ main()
@@ -0,0 +1,169 @@
1
+ import argparse
2
+ import fnmatch
3
+ from collections import defaultdict, deque
4
+ from collections.abc import Callable
5
+ from pathlib import Path
6
+
7
+ from gitignore_parser import parse_gitignore
8
+
9
+ from ._util import create_common_parser
10
+
11
+
12
+ def print_directory_tree(files: list[Path], base_dir: Path):
13
+ def nested_defaultdict():
14
+ return defaultdict(nested_defaultdict)
15
+
16
+ def add_to_tree(tree, parts):
17
+ for part in parts:
18
+ tree = tree[part]
19
+ return tree
20
+
21
+ def format_tree(tree, prefix=""):
22
+ result = []
23
+ entries = sorted(
24
+ tree.items(), key=lambda x: (not isinstance(x[1], defaultdict), x[0])
25
+ )
26
+ for i, (name, subtree) in enumerate(entries):
27
+ is_last = i == len(entries) - 1
28
+ result.append(f"{prefix}{'└── ' if is_last else '├── '}{name}")
29
+ if isinstance(subtree, defaultdict):
30
+ extension = " " if is_last else "│ "
31
+ result.extend(format_tree(subtree, prefix + extension))
32
+ return result
33
+
34
+ file_tree = nested_defaultdict()
35
+ for file in files:
36
+ relative = file.relative_to(base_dir)
37
+ add_to_tree(file_tree, relative.parts)
38
+
39
+ tree_lines = ["Directory structure:", base_dir.name]
40
+ tree_lines.extend(format_tree(file_tree))
41
+ return tree_lines
42
+
43
+
44
+ def find_parent_gitignores(directory: Path) -> list[Path]:
45
+ gitignores = []
46
+ current = Path(directory).absolute()
47
+ while current != current.parent:
48
+ gitignore = current / ".gitignore"
49
+ if gitignore.is_file():
50
+ gitignores.append(gitignore)
51
+ current = current.parent
52
+ return list(reversed(gitignores)) # Reverse to respect override order
53
+
54
+
55
+ def should_ignore(path: str, gitignore_matchers: list[Callable[..., bool]]) -> bool:
56
+ # Ignore some common files
57
+ if path in (
58
+ ".gitignore",
59
+ ".git",
60
+ ".hg",
61
+ ".svn",
62
+ ".DS_Store",
63
+ "package-lock.json",
64
+ "yarn.lock",
65
+ "poetry.lock",
66
+ "Pipfile.lock",
67
+ "pixi.lock",
68
+ ):
69
+ return True
70
+
71
+ return any(matcher(path) for matcher in gitignore_matchers)
72
+
73
+
74
+ def find_files_bfs(
75
+ directory: Path,
76
+ extension: str | None,
77
+ include_patterns: list[str],
78
+ exclude_patterns: list[str],
79
+ respect_gitignore: bool,
80
+ ) -> list[Path]:
81
+ result = []
82
+ queue = deque([(directory, [])])
83
+
84
+ if respect_gitignore:
85
+ parent_gitignores = find_parent_gitignores(directory)
86
+ parent_matchers = [
87
+ parse_gitignore(gitignore) for gitignore in parent_gitignores
88
+ ]
89
+ else:
90
+ parent_matchers = []
91
+
92
+ while queue:
93
+ current_dir, current_matchers = queue.popleft()
94
+
95
+ # Skip the .git directory
96
+ if current_dir.name == ".git":
97
+ continue
98
+
99
+ # Check for a .gitignore in the current directory
100
+ if respect_gitignore:
101
+ current_gitignore = current_dir / ".gitignore"
102
+ if current_gitignore.is_file():
103
+ current_matchers = (
104
+ parent_matchers
105
+ + current_matchers
106
+ + [parse_gitignore(current_gitignore)]
107
+ )
108
+ else:
109
+ current_matchers = parent_matchers + current_matchers
110
+
111
+ for item in current_dir.iterdir():
112
+ # Check if the item should be ignored based on accumulated gitignore rules
113
+ if respect_gitignore and should_ignore(str(item), current_matchers):
114
+ continue
115
+
116
+ if item.is_file():
117
+ if extension and item.suffix != extension:
118
+ continue
119
+
120
+ # Check include patterns
121
+ if include_patterns and not any(
122
+ fnmatch.fnmatch(item.name, pattern) for pattern in include_patterns
123
+ ):
124
+ continue
125
+
126
+ # Check exclude patterns
127
+ if any(
128
+ fnmatch.fnmatch(item.name, pattern) for pattern in exclude_patterns
129
+ ):
130
+ continue
131
+
132
+ result.append(item)
133
+ elif item.is_dir():
134
+ queue.append((item, current_matchers))
135
+
136
+ return sorted(result)
137
+
138
+
139
+ def read_file_content(file_path: Path) -> str:
140
+ return file_path.read_text(encoding="utf-8")
141
+
142
+
143
+ def main(args: argparse.Namespace | None = None):
144
+ if args is None:
145
+ args = create_common_parser().parse_args()
146
+
147
+ matching_files = find_files_bfs(
148
+ args.directory,
149
+ args.extension,
150
+ args.include,
151
+ args.exclude,
152
+ args.gitignore,
153
+ )
154
+
155
+ tree_lines = print_directory_tree(matching_files, args.directory)
156
+ file_contents = []
157
+
158
+ for file_path in matching_files:
159
+ relative_path = file_path.relative_to(args.directory)
160
+ file_contents.append(f"# BEGIN {relative_path}")
161
+ file_contents.append(read_file_content(file_path))
162
+ file_contents.append(f"# END {relative_path}\n")
163
+
164
+ print("\n".join(tree_lines))
165
+ print("\n".join(file_contents))
166
+
167
+
168
+ if __name__ == "__main__":
169
+ main()