dir2text 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dir2text-0.2.0/PKG-INFO +18 -0
- dir2text-0.2.0/README.md +0 -0
- dir2text-0.2.0/pyproject.toml +24 -0
- dir2text-0.2.0/src/dir2text/__init__.py +0 -0
- dir2text-0.2.0/src/dir2text/__main__.py +30 -0
- dir2text-0.2.0/src/dir2text/_util.py +33 -0
- dir2text-0.2.0/src/dir2text/markdown.py +87 -0
- dir2text-0.2.0/src/dir2text/text.py +169 -0
dir2text-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: dir2text
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary:
|
|
5
|
+
Author: Nima Shoghi
|
|
6
|
+
Author-email: nimashoghi@gmail.com
|
|
7
|
+
Requires-Python: >=3.7,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Dist: gitignore-parser (>=0.1.11,<0.2.0)
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
|
dir2text-0.2.0/README.md
ADDED
|
File without changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "dir2text"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = ""
|
|
5
|
+
authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
|
|
8
|
+
[tool.poetry.dependencies]
|
|
9
|
+
python = "^3.7"
|
|
10
|
+
gitignore-parser = "^0.1.11"
|
|
11
|
+
|
|
12
|
+
[tool.poetry.group.dev.dependencies]
|
|
13
|
+
ipykernel = "*"
|
|
14
|
+
rich = "*"
|
|
15
|
+
ruff = "*"
|
|
16
|
+
pyright = "*"
|
|
17
|
+
|
|
18
|
+
[build-system]
|
|
19
|
+
requires = ["poetry-core"]
|
|
20
|
+
build-backend = "poetry.core.masonry.api"
|
|
21
|
+
|
|
22
|
+
[tool.poetry.scripts]
|
|
23
|
+
dir2text = "dir2text.text:main"
|
|
24
|
+
dir2md = "dir2text.markdown:main"
|
|
File without changes
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
|
|
3
|
+
from . import markdown, text
|
|
4
|
+
from ._util import create_common_parser
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main():
|
|
8
|
+
parser = argparse.ArgumentParser(
|
|
9
|
+
description="Convert project files to a structured string representation."
|
|
10
|
+
)
|
|
11
|
+
subparsers = parser.add_subparsers(dest="format", required=True)
|
|
12
|
+
|
|
13
|
+
# Text subcommand
|
|
14
|
+
parser_text = subparsers.add_parser(
|
|
15
|
+
"text", parents=[create_common_parser()], help="Output in plain text format"
|
|
16
|
+
)
|
|
17
|
+
parser_text.set_defaults(func=text.main)
|
|
18
|
+
|
|
19
|
+
# Markdown subcommand
|
|
20
|
+
parser_md = subparsers.add_parser(
|
|
21
|
+
"markdown", parents=[create_common_parser()], help="Output in Markdown format"
|
|
22
|
+
)
|
|
23
|
+
parser_md.set_defaults(func=markdown.main)
|
|
24
|
+
|
|
25
|
+
args = parser.parse_args()
|
|
26
|
+
args.func(args)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
if __name__ == "__main__":
|
|
30
|
+
main()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def create_common_parser():
|
|
6
|
+
parser = argparse.ArgumentParser(add_help=False)
|
|
7
|
+
parser.add_argument(
|
|
8
|
+
"directory", type=Path, help="The directory to search for files"
|
|
9
|
+
)
|
|
10
|
+
parser.add_argument(
|
|
11
|
+
"--extension", "-e", help="The file extension to search for (e.g., '.py')"
|
|
12
|
+
)
|
|
13
|
+
parser.add_argument(
|
|
14
|
+
"--include",
|
|
15
|
+
"-i",
|
|
16
|
+
action="append",
|
|
17
|
+
default=[],
|
|
18
|
+
help="Patterns to include (can be used multiple times)",
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"--exclude",
|
|
22
|
+
"-x",
|
|
23
|
+
action="append",
|
|
24
|
+
default=[],
|
|
25
|
+
help="Patterns to exclude (can be used multiple times)",
|
|
26
|
+
)
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--gitignore",
|
|
29
|
+
action=argparse.BooleanOptionalAction,
|
|
30
|
+
default=True,
|
|
31
|
+
help="Respect .gitignore files",
|
|
32
|
+
)
|
|
33
|
+
return parser
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from ._util import create_common_parser
|
|
5
|
+
from .text import find_files_bfs, read_file_content
|
|
6
|
+
|
|
7
|
+
EXTENSION_TO_LANGUAGE = {
|
|
8
|
+
"py": "python",
|
|
9
|
+
"js": "javascript",
|
|
10
|
+
"ts": "typescript",
|
|
11
|
+
"html": "html",
|
|
12
|
+
"css": "css",
|
|
13
|
+
"md": "markdown",
|
|
14
|
+
"json": "json",
|
|
15
|
+
"yml": "yaml",
|
|
16
|
+
"yaml": "yaml",
|
|
17
|
+
"sh": "bash",
|
|
18
|
+
"bat": "batch",
|
|
19
|
+
"ps1": "powershell",
|
|
20
|
+
"sql": "sql",
|
|
21
|
+
"r": "r",
|
|
22
|
+
"cpp": "cpp",
|
|
23
|
+
"c": "c",
|
|
24
|
+
"java": "java",
|
|
25
|
+
"go": "go",
|
|
26
|
+
"rb": "ruby",
|
|
27
|
+
"php": "php",
|
|
28
|
+
"swift": "swift",
|
|
29
|
+
"kt": "kotlin",
|
|
30
|
+
"rs": "rust",
|
|
31
|
+
"scala": "scala",
|
|
32
|
+
"m": "matlab",
|
|
33
|
+
"tex": "latex",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def print_directory_tree_md(files: list[Path], base_dir: Path):
|
|
38
|
+
def format_tree(path: Path, prefix: str = "") -> list[str]:
|
|
39
|
+
result = []
|
|
40
|
+
if path in files or any(f.is_relative_to(path) for f in files):
|
|
41
|
+
result.append(f"{prefix}- `{path.name}{'/' if path.is_dir() else ''}`")
|
|
42
|
+
if path.is_dir():
|
|
43
|
+
children = sorted(path.iterdir(), key=lambda x: (x.is_dir(), x.name))
|
|
44
|
+
for i, child in enumerate(children):
|
|
45
|
+
if child in files or any(f.is_relative_to(child) for f in files):
|
|
46
|
+
if i == len(children) - 1:
|
|
47
|
+
result.extend(format_tree(child, prefix + " "))
|
|
48
|
+
else:
|
|
49
|
+
result.extend(format_tree(child, prefix + " "))
|
|
50
|
+
return result
|
|
51
|
+
|
|
52
|
+
tree_lines = ["## Directory structure\n"]
|
|
53
|
+
tree_lines.extend(format_tree(base_dir))
|
|
54
|
+
return tree_lines
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def main(args: argparse.Namespace | None = None):
|
|
58
|
+
if args is None:
|
|
59
|
+
args = create_common_parser().parse_args()
|
|
60
|
+
|
|
61
|
+
matching_files = find_files_bfs(
|
|
62
|
+
args.directory,
|
|
63
|
+
args.extension,
|
|
64
|
+
args.include,
|
|
65
|
+
args.exclude,
|
|
66
|
+
args.gitignore,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
tree_lines = print_directory_tree_md(matching_files, args.directory)
|
|
70
|
+
file_contents = ["# Project Structure and Contents\n"]
|
|
71
|
+
|
|
72
|
+
for file_path in matching_files:
|
|
73
|
+
relative_path = file_path.relative_to(args.directory)
|
|
74
|
+
file_contents.append(f"## {relative_path}\n")
|
|
75
|
+
|
|
76
|
+
language = EXTENSION_TO_LANGUAGE.get(file_path.suffix, "")
|
|
77
|
+
file_contents.append(f"```{language}")
|
|
78
|
+
file_contents.append(read_file_content(file_path))
|
|
79
|
+
file_contents.append("```\n")
|
|
80
|
+
|
|
81
|
+
# Print in reverse order
|
|
82
|
+
print("\n".join(tree_lines))
|
|
83
|
+
print("\n".join(file_contents))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
main()
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import fnmatch
|
|
3
|
+
from collections import defaultdict, deque
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from gitignore_parser import parse_gitignore
|
|
8
|
+
|
|
9
|
+
from ._util import create_common_parser
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def print_directory_tree(files: list[Path], base_dir: Path):
|
|
13
|
+
def nested_defaultdict():
|
|
14
|
+
return defaultdict(nested_defaultdict)
|
|
15
|
+
|
|
16
|
+
def add_to_tree(tree, parts):
|
|
17
|
+
for part in parts:
|
|
18
|
+
tree = tree[part]
|
|
19
|
+
return tree
|
|
20
|
+
|
|
21
|
+
def format_tree(tree, prefix=""):
|
|
22
|
+
result = []
|
|
23
|
+
entries = sorted(
|
|
24
|
+
tree.items(), key=lambda x: (not isinstance(x[1], defaultdict), x[0])
|
|
25
|
+
)
|
|
26
|
+
for i, (name, subtree) in enumerate(entries):
|
|
27
|
+
is_last = i == len(entries) - 1
|
|
28
|
+
result.append(f"{prefix}{'└── ' if is_last else '├── '}{name}")
|
|
29
|
+
if isinstance(subtree, defaultdict):
|
|
30
|
+
extension = " " if is_last else "│ "
|
|
31
|
+
result.extend(format_tree(subtree, prefix + extension))
|
|
32
|
+
return result
|
|
33
|
+
|
|
34
|
+
file_tree = nested_defaultdict()
|
|
35
|
+
for file in files:
|
|
36
|
+
relative = file.relative_to(base_dir)
|
|
37
|
+
add_to_tree(file_tree, relative.parts)
|
|
38
|
+
|
|
39
|
+
tree_lines = ["Directory structure:", base_dir.name]
|
|
40
|
+
tree_lines.extend(format_tree(file_tree))
|
|
41
|
+
return tree_lines
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def find_parent_gitignores(directory: Path) -> list[Path]:
|
|
45
|
+
gitignores = []
|
|
46
|
+
current = Path(directory).absolute()
|
|
47
|
+
while current != current.parent:
|
|
48
|
+
gitignore = current / ".gitignore"
|
|
49
|
+
if gitignore.is_file():
|
|
50
|
+
gitignores.append(gitignore)
|
|
51
|
+
current = current.parent
|
|
52
|
+
return list(reversed(gitignores)) # Reverse to respect override order
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def should_ignore(path: str, gitignore_matchers: list[Callable[..., bool]]) -> bool:
|
|
56
|
+
# Ignore some common files
|
|
57
|
+
if path in (
|
|
58
|
+
".gitignore",
|
|
59
|
+
".git",
|
|
60
|
+
".hg",
|
|
61
|
+
".svn",
|
|
62
|
+
".DS_Store",
|
|
63
|
+
"package-lock.json",
|
|
64
|
+
"yarn.lock",
|
|
65
|
+
"poetry.lock",
|
|
66
|
+
"Pipfile.lock",
|
|
67
|
+
"pixi.lock",
|
|
68
|
+
):
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
return any(matcher(path) for matcher in gitignore_matchers)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def find_files_bfs(
|
|
75
|
+
directory: Path,
|
|
76
|
+
extension: str | None,
|
|
77
|
+
include_patterns: list[str],
|
|
78
|
+
exclude_patterns: list[str],
|
|
79
|
+
respect_gitignore: bool,
|
|
80
|
+
) -> list[Path]:
|
|
81
|
+
result = []
|
|
82
|
+
queue = deque([(directory, [])])
|
|
83
|
+
|
|
84
|
+
if respect_gitignore:
|
|
85
|
+
parent_gitignores = find_parent_gitignores(directory)
|
|
86
|
+
parent_matchers = [
|
|
87
|
+
parse_gitignore(gitignore) for gitignore in parent_gitignores
|
|
88
|
+
]
|
|
89
|
+
else:
|
|
90
|
+
parent_matchers = []
|
|
91
|
+
|
|
92
|
+
while queue:
|
|
93
|
+
current_dir, current_matchers = queue.popleft()
|
|
94
|
+
|
|
95
|
+
# Skip the .git directory
|
|
96
|
+
if current_dir.name == ".git":
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# Check for a .gitignore in the current directory
|
|
100
|
+
if respect_gitignore:
|
|
101
|
+
current_gitignore = current_dir / ".gitignore"
|
|
102
|
+
if current_gitignore.is_file():
|
|
103
|
+
current_matchers = (
|
|
104
|
+
parent_matchers
|
|
105
|
+
+ current_matchers
|
|
106
|
+
+ [parse_gitignore(current_gitignore)]
|
|
107
|
+
)
|
|
108
|
+
else:
|
|
109
|
+
current_matchers = parent_matchers + current_matchers
|
|
110
|
+
|
|
111
|
+
for item in current_dir.iterdir():
|
|
112
|
+
# Check if the item should be ignored based on accumulated gitignore rules
|
|
113
|
+
if respect_gitignore and should_ignore(str(item), current_matchers):
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
if item.is_file():
|
|
117
|
+
if extension and item.suffix != extension:
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
# Check include patterns
|
|
121
|
+
if include_patterns and not any(
|
|
122
|
+
fnmatch.fnmatch(item.name, pattern) for pattern in include_patterns
|
|
123
|
+
):
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
# Check exclude patterns
|
|
127
|
+
if any(
|
|
128
|
+
fnmatch.fnmatch(item.name, pattern) for pattern in exclude_patterns
|
|
129
|
+
):
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
result.append(item)
|
|
133
|
+
elif item.is_dir():
|
|
134
|
+
queue.append((item, current_matchers))
|
|
135
|
+
|
|
136
|
+
return sorted(result)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def read_file_content(file_path: Path) -> str:
|
|
140
|
+
return file_path.read_text(encoding="utf-8")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def main(args: argparse.Namespace | None = None):
|
|
144
|
+
if args is None:
|
|
145
|
+
args = create_common_parser().parse_args()
|
|
146
|
+
|
|
147
|
+
matching_files = find_files_bfs(
|
|
148
|
+
args.directory,
|
|
149
|
+
args.extension,
|
|
150
|
+
args.include,
|
|
151
|
+
args.exclude,
|
|
152
|
+
args.gitignore,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
tree_lines = print_directory_tree(matching_files, args.directory)
|
|
156
|
+
file_contents = []
|
|
157
|
+
|
|
158
|
+
for file_path in matching_files:
|
|
159
|
+
relative_path = file_path.relative_to(args.directory)
|
|
160
|
+
file_contents.append(f"# BEGIN {relative_path}")
|
|
161
|
+
file_contents.append(read_file_content(file_path))
|
|
162
|
+
file_contents.append(f"# END {relative_path}\n")
|
|
163
|
+
|
|
164
|
+
print("\n".join(tree_lines))
|
|
165
|
+
print("\n".join(file_contents))
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
if __name__ == "__main__":
|
|
169
|
+
main()
|