merger-cli 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- merger_cli-1.0.0/LICENSE +21 -0
- merger_cli-1.0.0/MANIFEST.in +6 -0
- merger_cli-1.0.0/PKG-INFO +194 -0
- merger_cli-1.0.0/README.md +174 -0
- merger_cli-1.0.0/examples/__init__.py +0 -0
- merger_cli-1.0.0/examples/custom_readers/__init__.py +0 -0
- merger_cli-1.0.0/examples/custom_readers/ipynb.py +82 -0
- merger_cli-1.0.0/examples/main.example.py +29 -0
- merger_cli-1.0.0/merger/__init__.py +0 -0
- merger_cli-1.0.0/merger/cli.py +143 -0
- merger_cli-1.0.0/merger/files.py +139 -0
- merger_cli-1.0.0/merger/filtering.py +47 -0
- merger_cli-1.0.0/merger/logger.py +22 -0
- merger_cli-1.0.0/merger/registry.py +136 -0
- merger_cli-1.0.0/merger/tree.py +59 -0
- merger_cli-1.0.0/merger_cli.egg-info/PKG-INFO +194 -0
- merger_cli-1.0.0/merger_cli.egg-info/SOURCES.txt +22 -0
- merger_cli-1.0.0/merger_cli.egg-info/dependency_links.txt +1 -0
- merger_cli-1.0.0/merger_cli.egg-info/entry_points.txt +2 -0
- merger_cli-1.0.0/merger_cli.egg-info/requires.txt +1 -0
- merger_cli-1.0.0/merger_cli.egg-info/top_level.txt +1 -0
- merger_cli-1.0.0/pyproject.toml +33 -0
- merger_cli-1.0.0/setup.cfg +7 -0
merger_cli-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Diogo Losacco Toporcov
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: merger-cli
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Merger is a tool that scans a directory, filters files using customizable patterns, and merges readable content into a single output file.
|
|
5
|
+
Author-email: Diogo Toporcov <diogotoporcov@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/diogotoporcov/merger-cli
|
|
8
|
+
Project-URL: Documentation, https://github.com/diogotoporcov/merger-cli
|
|
9
|
+
Keywords: merger,file system,concatenation,automation,development
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
15
|
+
Requires-Python: >=3.8
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: chardet>=5.2.0
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# Merger CLI
|
|
22
|
+
|
|
23
|
+
[](https://www.python.org/)
|
|
24
|
+
[](LICENSE)
|
|
25
|
+
|
|
26
|
+
Merger is a command-line utility for developers that scans a directory, filters files using customizable ignore patterns, and merges all readable content into a single structured output file. It supports custom file readers and validators, making it easily extendable for formats such as `.ipynb`, `.pdf`, or any specific format.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Summary
|
|
31
|
+
|
|
32
|
+
1. [Core Features](#core-features)
|
|
33
|
+
2. [Dependencies](#dependencies)
|
|
34
|
+
3. [Installation](#installation)
|
|
35
|
+
4. [Usage](#usage)
|
|
36
|
+
5. [Custom Readers](#custom-readers)
|
|
37
|
+
6. [CLI Options](#cli-options)
|
|
38
|
+
7. [License](#license)
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Core Features
|
|
43
|
+
|
|
44
|
+
* **Recursive merge** of all readable text files under a root directory.
|
|
45
|
+
* **Glob-based ignore patterns** using `.gitignore`-style syntax.
|
|
46
|
+
* **Automatic encoding detection**.
|
|
47
|
+
* **Custom file readers and validators** for non-text formats.
|
|
48
|
+
* **CLI support** for installation, removal, and listing of custom readers.
|
|
49
|
+
* **Human-readable merged output**, including a directory tree header and file delimiters.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Dependencies
|
|
54
|
+
|
|
55
|
+
| Component | Version / Type | Notes |
|
|
56
|
+
|-------------|----------------|-----------------------------|
|
|
57
|
+
| **Python** | ≥ 3.8 | Required |
|
|
58
|
+
|
|
59
|
+
All dependencies are listed in [`requirements.txt`](requirements.txt).
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
### 1. Clone the repository
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
git clone https://github.com/diogotoporcov/merger-cli.git
|
|
69
|
+
cd merger-cli
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 2. Create and activate a virtual environment
|
|
73
|
+
|
|
74
|
+
**Linux / macOS**
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
python -m venv .venv
|
|
78
|
+
source .venv/bin/activate
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Windows (PowerShell)**
|
|
82
|
+
|
|
83
|
+
```powershell
|
|
84
|
+
python -m venv .venv
|
|
85
|
+
.venv\Scripts\Activate.ps1
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 3. Install dependencies
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install -r requirements.txt
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### 4. Install as CLI tool
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pip install .
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
This registers the `merger` command globally.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Usage
|
|
105
|
+
|
|
106
|
+
### Basic merge
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
merger ./src ./merged.txt
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Custom ignore patterns
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
merger "C:\Users\USER\Desktop\project" "C:\Users\USER\Desktop\project\output.txt" --ignore "*.log" "__pycache__" "*.tmp"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Custom ignore file
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
merger . ./output.txt -p ./merger.ignore
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Include empty files
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
merger ./data ./output.txt --empty
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Verbose output
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
merger ./src ./merged.txt --log-level DEBUG
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Custom Readers
|
|
139
|
+
|
|
140
|
+
You can extend Merger to handle new file formats.
|
|
141
|
+
|
|
142
|
+
### Installing a custom reader
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
merger --install .ipynb path/to/ipynb.py
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Where `ipynb.py` must define:
|
|
149
|
+
|
|
150
|
+
* ```python
|
|
151
|
+
validator: Callable[[Path], bool]
|
|
152
|
+
```
|
|
153
|
+
* ```python
|
|
154
|
+
reader: Callable[[Path], str]
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
To uninstall:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
merger --uninstall .ipynb
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
List installed readers:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
merger --list-installed
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
An example `.ipynb` reader can be found in
|
|
170
|
+
[`examples/custom_readers/ipynb.py`](examples/custom_readers/ipynb.py).
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## CLI Options
|
|
175
|
+
|
|
176
|
+
| Option | Description |
|
|
177
|
+
|-------------------------|--------------------------------------------------------------------------------|
|
|
178
|
+
| `--ignore` | List of glob-style ignore patterns. |
|
|
179
|
+
| `-f, --ignore-file` | Path to file containing ignore patterns (Default: `<input_dir>/merger.ignore`. |
|
|
180
|
+
| `-i, --install` | Install a custom reader for an extension. |
|
|
181
|
+
| `-u, --uninstall` | Remove a custom reader (`*` removes all). |
|
|
182
|
+
| `--list-installed` | Show installed readers. |
|
|
183
|
+
| `-l, --log-level` | Set logging verbosity (`DEBUG`, `INFO`, etc.). |
|
|
184
|
+
| `--empty` | Include empty files in merged output. |
|
|
185
|
+
| `--prefix` / `--suffix` | Customize file delimiters in output. |
|
|
186
|
+
| `--overrides` | Load override reader definitions from a Python module. |
|
|
187
|
+
| `--no-tree` | Do not include the generated directory tree in the output file. |
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## License
|
|
193
|
+
|
|
194
|
+
This project is licensed under the MIT License — see [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# Merger CLI
|
|
2
|
+
|
|
3
|
+
[](https://www.python.org/)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
|
|
6
|
+
Merger is a command-line utility for developers that scans a directory, filters files using customizable ignore patterns, and merges all readable content into a single structured output file. It supports custom file readers and validators, making it easily extendable for formats such as `.ipynb`, `.pdf`, or any specific format.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Summary
|
|
11
|
+
|
|
12
|
+
1. [Core Features](#core-features)
|
|
13
|
+
2. [Dependencies](#dependencies)
|
|
14
|
+
3. [Installation](#installation)
|
|
15
|
+
4. [Usage](#usage)
|
|
16
|
+
5. [Custom Readers](#custom-readers)
|
|
17
|
+
6. [CLI Options](#cli-options)
|
|
18
|
+
7. [License](#license)
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Core Features
|
|
23
|
+
|
|
24
|
+
* **Recursive merge** of all readable text files under a root directory.
|
|
25
|
+
* **Glob-based ignore patterns** using `.gitignore`-style syntax.
|
|
26
|
+
* **Automatic encoding detection**.
|
|
27
|
+
* **Custom file readers and validators** for non-text formats.
|
|
28
|
+
* **CLI support** for installation, removal, and listing of custom readers.
|
|
29
|
+
* **Human-readable merged output**, including a directory tree header and file delimiters.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Dependencies
|
|
34
|
+
|
|
35
|
+
| Component | Version / Type | Notes |
|
|
36
|
+
|-------------|----------------|-----------------------------|
|
|
37
|
+
| **Python** | ≥ 3.8 | Required |
|
|
38
|
+
|
|
39
|
+
All dependencies are listed in [`requirements.txt`](requirements.txt).
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
### 1. Clone the repository
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
git clone https://github.com/diogotoporcov/merger-cli.git
|
|
49
|
+
cd merger-cli
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### 2. Create and activate a virtual environment
|
|
53
|
+
|
|
54
|
+
**Linux / macOS**
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
python -m venv .venv
|
|
58
|
+
source .venv/bin/activate
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Windows (PowerShell)**
|
|
62
|
+
|
|
63
|
+
```powershell
|
|
64
|
+
python -m venv .venv
|
|
65
|
+
.venv\Scripts\Activate.ps1
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### 3. Install dependencies
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install -r requirements.txt
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### 4. Install as CLI tool
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install .
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
This registers the `merger` command globally.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Usage
|
|
85
|
+
|
|
86
|
+
### Basic merge
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
merger ./src ./merged.txt
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Custom ignore patterns
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
merger "C:\Users\USER\Desktop\project" "C:\Users\USER\Desktop\project\output.txt" --ignore "*.log" "__pycache__" "*.tmp"
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Custom ignore file
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
merger . ./output.txt -p ./merger.ignore
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Include empty files
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
merger ./data ./output.txt --empty
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Verbose output
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
merger ./src ./merged.txt --log-level DEBUG
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Custom Readers
|
|
119
|
+
|
|
120
|
+
You can extend Merger to handle new file formats.
|
|
121
|
+
|
|
122
|
+
### Installing a custom reader
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
merger --install .ipynb path/to/ipynb.py
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Where `ipynb.py` must define:
|
|
129
|
+
|
|
130
|
+
* ```python
|
|
131
|
+
validator: Callable[[Path], bool]
|
|
132
|
+
```
|
|
133
|
+
* ```python
|
|
134
|
+
reader: Callable[[Path], str]
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
To uninstall:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
merger --uninstall .ipynb
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
List installed readers:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
merger --list-installed
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
An example `.ipynb` reader can be found in
|
|
150
|
+
[`examples/custom_readers/ipynb.py`](examples/custom_readers/ipynb.py).
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## CLI Options
|
|
155
|
+
|
|
156
|
+
| Option | Description |
|
|
157
|
+
|-------------------------|--------------------------------------------------------------------------------|
|
|
158
|
+
| `--ignore` | List of glob-style ignore patterns. |
|
|
159
|
+
| `-f, --ignore-file` | Path to file containing ignore patterns (Default: `<input_dir>/merger.ignore`. |
|
|
160
|
+
| `-i, --install` | Install a custom reader for an extension. |
|
|
161
|
+
| `-u, --uninstall` | Remove a custom reader (`*` removes all). |
|
|
162
|
+
| `--list-installed` | Show installed readers. |
|
|
163
|
+
| `-l, --log-level` | Set logging verbosity (`DEBUG`, `INFO`, etc.). |
|
|
164
|
+
| `--empty` | Include empty files in merged output. |
|
|
165
|
+
| `--prefix` / `--suffix` | Customize file delimiters in output. |
|
|
166
|
+
| `--overrides` | Load override reader definitions from a Python module. |
|
|
167
|
+
| `--no-tree` | Do not include the generated directory tree in the output file. |
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## License
|
|
173
|
+
|
|
174
|
+
This project is licensed under the MIT License — see [LICENSE](LICENSE) for details.
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Final, Callable
|
|
4
|
+
|
|
5
|
+
import chardet
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def is_ipynb_file(file_path: Path) -> bool:
|
|
9
|
+
"""
|
|
10
|
+
Checks whether the given file is a valid Jupyter Notebook (.ipynb) file.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
file_path (Path): Path to the file.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
bool: True if the file is a readable notebook with at least one cell, False otherwise.
|
|
17
|
+
"""
|
|
18
|
+
try:
|
|
19
|
+
if not file_path.suffix == ".ipynb":
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
with open(file_path, "rb") as file:
|
|
23
|
+
chunk = file.read(1024)
|
|
24
|
+
|
|
25
|
+
result = chardet.detect(chunk)
|
|
26
|
+
encoding = result.get("encoding")
|
|
27
|
+
confidence = result.get("confidence", 0)
|
|
28
|
+
|
|
29
|
+
if not encoding or confidence < 0.8:
|
|
30
|
+
encoding = "utf-8"
|
|
31
|
+
|
|
32
|
+
with file_path.open(encoding=encoding) as f:
|
|
33
|
+
notebook = json.load(f)
|
|
34
|
+
|
|
35
|
+
return isinstance(notebook, dict) and "cells" in notebook and isinstance(notebook["cells"], list)
|
|
36
|
+
|
|
37
|
+
except Exception:
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def extract_ipynb_content(file_path: Path, include_markdown: bool = True, include_code: bool = True) -> str:
|
|
42
|
+
"""
|
|
43
|
+
Extracts code and markdown content from a Jupyter Notebook (.ipynb) file.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
file_path (Path): Path to the notebook file.
|
|
47
|
+
include_markdown (bool): Whether to include markdown cells.
|
|
48
|
+
include_code (bool): Whether to include code cells.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
str: Extracted content with cells separated by double newlines.
|
|
52
|
+
"""
|
|
53
|
+
result = []
|
|
54
|
+
|
|
55
|
+
with file_path.open(encoding="utf-8") as f:
|
|
56
|
+
notebook = json.load(f)
|
|
57
|
+
|
|
58
|
+
for cell in notebook.get("cells", []):
|
|
59
|
+
cell_type = cell.get("cell_type")
|
|
60
|
+
lines = cell.get("source", [])
|
|
61
|
+
|
|
62
|
+
if isinstance(lines, str):
|
|
63
|
+
lines = lines.splitlines()
|
|
64
|
+
|
|
65
|
+
if cell_type == "code" and include_code:
|
|
66
|
+
block = [line.rstrip() for line in lines]
|
|
67
|
+
if block:
|
|
68
|
+
result.append("\n".join(block))
|
|
69
|
+
|
|
70
|
+
elif cell_type == "markdown" and include_markdown:
|
|
71
|
+
block = [line.rstrip() for line in lines]
|
|
72
|
+
if block:
|
|
73
|
+
result.append("```markdown\n" + "\n".join(block) + "\n```")
|
|
74
|
+
|
|
75
|
+
return "\n\n".join(result)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
validator: Final[Callable[[Path], bool]] = is_ipynb_file
|
|
79
|
+
reader: Final[Callable[[Path], str]] = extract_ipynb_content
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
__all__ = ["validator", "reader"]
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from merger.files import merge
|
|
4
|
+
import examples.custom_readers.ipynb as ipynb
|
|
5
|
+
|
|
6
|
+
if __name__ == "__main__":
|
|
7
|
+
root = Path("path/to/dir")
|
|
8
|
+
ignore_patterns = [
|
|
9
|
+
"README.md",
|
|
10
|
+
".idea",
|
|
11
|
+
"__pycache__",
|
|
12
|
+
".env",
|
|
13
|
+
"./example/path", # File or folder named 'path' relative ./example/, where '.' is the root dir
|
|
14
|
+
"C:/Users/User/Desktop/path/to/dir/2",
|
|
15
|
+
"output.txt",
|
|
16
|
+
".venv",
|
|
17
|
+
"*.docx", # Any file with extension .docx
|
|
18
|
+
"*cache*", # Any file of folder that contains 'cache' in its name or path
|
|
19
|
+
"__*__" # Any file or folder that starts with '__' and ends with '__'
|
|
20
|
+
]
|
|
21
|
+
output_path = Path("./output.txt")
|
|
22
|
+
|
|
23
|
+
merge(
|
|
24
|
+
root,
|
|
25
|
+
ignore_patterns,
|
|
26
|
+
output_path,
|
|
27
|
+
validation_func_override={".ipynb": ipynb.validator},
|
|
28
|
+
read_func_override={".ipynb": ipynb.reader}
|
|
29
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from .files import merge, read_ignore_file
|
|
6
|
+
from .logger import logger, setup_logger
|
|
7
|
+
from .registry import register_reader, unregister_reader, list_readers, load_installed_readers, load_custom_readers
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main():
|
|
11
|
+
parser = argparse.ArgumentParser(
|
|
12
|
+
description="Merge readable files in a directory with support for ignore patterns and custom file readers."
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# Required positional args
|
|
16
|
+
parser.add_argument("input_dir", type=Path, nargs="?", help="Root directory to scan for files")
|
|
17
|
+
parser.add_argument("output_file", type=Path, nargs="?",
|
|
18
|
+
help="File to save merged output (default: <input_dir>/merger.txt)")
|
|
19
|
+
|
|
20
|
+
# Reader management
|
|
21
|
+
parser.add_argument("-i", "--install", nargs=2, metavar=("EXT", "MODULE_PATH"),
|
|
22
|
+
help="Install a custom reader for a given extension (e.g., .pdf)")
|
|
23
|
+
|
|
24
|
+
parser.add_argument("-u", "--uninstall", metavar="EXT",
|
|
25
|
+
help="Uninstall a custom reader by extension (use '*' to remove all)")
|
|
26
|
+
|
|
27
|
+
parser.add_argument("--list-installed", action="store_true",
|
|
28
|
+
help="List all installed custom readers")
|
|
29
|
+
|
|
30
|
+
# Logging
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"-l", "--log-level",
|
|
33
|
+
type=str,
|
|
34
|
+
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
35
|
+
default="INFO",
|
|
36
|
+
help="Set the logging level (default: INFO)"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Ignore logic
|
|
40
|
+
parser.add_argument("--ignore", nargs="*", default=[],
|
|
41
|
+
help="Glob-style patterns to ignore (e.g., '*.log', '__pycache__')")
|
|
42
|
+
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"-f", "--ignore-file", type=Path,
|
|
45
|
+
help="File containing glob-style patterns to ignore (default: <input_dir>/merger.ignore)")
|
|
46
|
+
|
|
47
|
+
# Custom reader overrides
|
|
48
|
+
parser.add_argument("--overrides", type=Path,
|
|
49
|
+
help="Path to Python module with `validators` and `readers` dictionaries for custom overrides")
|
|
50
|
+
|
|
51
|
+
parser.add_argument("--empty", action="store_true", default=False,
|
|
52
|
+
help="Include empty files in the merged output")
|
|
53
|
+
|
|
54
|
+
parser.add_argument("--prefix", type=str, default="<<FILE_START: {path}>>\n",
|
|
55
|
+
help="Format string for file start marker (set empty string to disable)")
|
|
56
|
+
|
|
57
|
+
parser.add_argument("--suffix", type=str, default="\n<<FILE_END: {path}>>\n\n",
|
|
58
|
+
help="Format string for file end marker (set empty string to disable)")
|
|
59
|
+
|
|
60
|
+
# Tree
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--no-tree",
|
|
63
|
+
action="store_true",
|
|
64
|
+
default=False,
|
|
65
|
+
help="Do not include the generated directory tree in the output file"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# CLI Logic
|
|
69
|
+
args = parser.parse_args()
|
|
70
|
+
|
|
71
|
+
setup_logger(level=getattr(logging, args.log_level.upper()))
|
|
72
|
+
|
|
73
|
+
if args.install:
|
|
74
|
+
ext, path = args.install
|
|
75
|
+
register_reader(ext, path)
|
|
76
|
+
logger.info(f"Installed reader for '{ext}' from '{path}'")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
if args.uninstall:
|
|
80
|
+
if args.uninstall == "*":
|
|
81
|
+
installed = list_readers()
|
|
82
|
+
if not installed:
|
|
83
|
+
logger.info("No custom readers to uninstall.")
|
|
84
|
+
else:
|
|
85
|
+
for ext in list(installed.keys()):
|
|
86
|
+
unregister_reader(ext)
|
|
87
|
+
logger.info(f"Uninstalled reader for '{ext}'")
|
|
88
|
+
else:
|
|
89
|
+
unregister_reader(args.uninstall)
|
|
90
|
+
logger.info(f"Uninstalled reader for '{args.uninstall}'")
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
if args.list_installed:
|
|
94
|
+
installed = list_readers()
|
|
95
|
+
if not installed:
|
|
96
|
+
logger.info("No custom readers installed.")
|
|
97
|
+
else:
|
|
98
|
+
logger.info("Installed Custom Readers:")
|
|
99
|
+
for ext, mod_path in installed.items():
|
|
100
|
+
logger.info(f" {ext}: {mod_path}")
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
# Handle default output file
|
|
104
|
+
if not args.input_dir:
|
|
105
|
+
parser.error("input_dir is required unless installing/uninstalling/listing readers.")
|
|
106
|
+
|
|
107
|
+
if not args.output_file:
|
|
108
|
+
args.output_file = args.input_dir / "merger.txt"
|
|
109
|
+
|
|
110
|
+
if not args.ignore_file and args.input_dir:
|
|
111
|
+
default_ignore = args.input_dir / "merger.ignore"
|
|
112
|
+
if default_ignore.exists():
|
|
113
|
+
args.ignore_file = default_ignore
|
|
114
|
+
logger.info("Found default ignore file 'merger.ignore' in input directory. Using it for ignore patterns.")
|
|
115
|
+
|
|
116
|
+
ignore_patterns = args.ignore.copy()
|
|
117
|
+
if args.ignore_file:
|
|
118
|
+
ignore_patterns.extend(read_ignore_file(args.ignore_file))
|
|
119
|
+
|
|
120
|
+
readers, validators = load_installed_readers()
|
|
121
|
+
|
|
122
|
+
if args.overrides:
|
|
123
|
+
r, v = load_custom_readers(args.overrides)
|
|
124
|
+
readers.update(r)
|
|
125
|
+
validators.update(v)
|
|
126
|
+
|
|
127
|
+
merge(
|
|
128
|
+
dir_path=args.input_dir,
|
|
129
|
+
ignore_patterns=ignore_patterns,
|
|
130
|
+
output_path=args.output_file,
|
|
131
|
+
validation_func_override=validators,
|
|
132
|
+
read_func_override=readers,
|
|
133
|
+
write_if_empty=args.empty,
|
|
134
|
+
prefix_format=args.prefix,
|
|
135
|
+
suffix_format=args.suffix,
|
|
136
|
+
include_tree=not args.no_tree
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
logger.info(f"Saved to {args.output_file}")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
if __name__ == "__main__":
|
|
143
|
+
main()
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Callable, Dict, Optional, List
|
|
3
|
+
|
|
4
|
+
import chardet
|
|
5
|
+
|
|
6
|
+
from .filtering import filter_files_by_patterns
|
|
7
|
+
from .tree import generate_tree_visualizer
|
|
8
|
+
from .logger import logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def is_text_file(
|
|
12
|
+
file_path: Path,
|
|
13
|
+
chunk_size: int = 1024,
|
|
14
|
+
*,
|
|
15
|
+
validation_func_override: Optional[Dict[str, Callable[[Path], bool]]] = None,
|
|
16
|
+
min_encoding_detection_confidence: float = 0.8
|
|
17
|
+
) -> bool:
|
|
18
|
+
logger.debug(f"Checking if file is text: {file_path}")
|
|
19
|
+
try:
|
|
20
|
+
if validation_func_override and callable(validation_override_func := validation_func_override.get(file_path.suffix)):
|
|
21
|
+
result = validation_override_func(file_path)
|
|
22
|
+
logger.debug(f"Custom validator used for {file_path.suffix}: {result}")
|
|
23
|
+
return result
|
|
24
|
+
|
|
25
|
+
with open(file_path, "rb") as file:
|
|
26
|
+
chunk = file.read(chunk_size)
|
|
27
|
+
|
|
28
|
+
result = chardet.detect(chunk)
|
|
29
|
+
encoding = result.get("encoding")
|
|
30
|
+
confidence = result.get("confidence", 0)
|
|
31
|
+
logger.debug(f"Detected encoding for {file_path}: {encoding} (confidence: {confidence})")
|
|
32
|
+
|
|
33
|
+
if not encoding or confidence < min_encoding_detection_confidence:
|
|
34
|
+
encoding = "utf-8"
|
|
35
|
+
logger.debug(f"Low confidence for {file_path}. Falling back to utf-8")
|
|
36
|
+
|
|
37
|
+
chunk.decode(encoding)
|
|
38
|
+
return True
|
|
39
|
+
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.error(f"Failed to decode {file_path}: {e}")
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def append_content(
|
|
46
|
+
root: Path,
|
|
47
|
+
file_path: Path,
|
|
48
|
+
output_path: Path,
|
|
49
|
+
*,
|
|
50
|
+
prefix: Optional[str] = "<<FILE_START: {path}>>\n",
|
|
51
|
+
suffix: Optional[str] = "\n<<FILE_END: {path}>>\n\n",
|
|
52
|
+
read_func_override: Optional[Dict[str, Callable[[Path], str]]] = None,
|
|
53
|
+
write_if_empty: bool = False
|
|
54
|
+
) -> None:
|
|
55
|
+
relative_path = ".\\" + str(file_path.relative_to(root))
|
|
56
|
+
formatted_prefix = prefix.format(path=relative_path) if prefix else ""
|
|
57
|
+
formatted_suffix = suffix.format(path=relative_path) if suffix else ""
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
if read_func_override and callable(read_override_func := read_func_override.get(file_path.suffix)):
|
|
61
|
+
content = read_override_func(file_path)
|
|
62
|
+
logger.debug(f"Used custom reader for {file_path}")
|
|
63
|
+
else:
|
|
64
|
+
content = file_path.read_text(encoding="utf-8")
|
|
65
|
+
logger.debug(f"Read text from {file_path} using utf-8")
|
|
66
|
+
|
|
67
|
+
if not write_if_empty and not content:
|
|
68
|
+
logger.debug(f"Skipping empty file: {file_path}")
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
with output_path.open("a", encoding="utf-8") as f:
|
|
72
|
+
f.write(formatted_prefix + content + formatted_suffix)
|
|
73
|
+
logger.debug(f"Merged: {file_path}")
|
|
74
|
+
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logger.error(f"Failed to append content from {file_path}: {e}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def merge(
|
|
80
|
+
dir_path: Path,
|
|
81
|
+
ignore_patterns: List[str],
|
|
82
|
+
output_path: Path,
|
|
83
|
+
*,
|
|
84
|
+
read_func_override: Optional[Dict[str, Callable[[Path], str]]] = None,
|
|
85
|
+
validation_func_override: Optional[Dict[str, Callable[[Path], bool]]] = None,
|
|
86
|
+
min_encoding_detection_confidence: float = 0.8,
|
|
87
|
+
write_if_empty: bool = False,
|
|
88
|
+
prefix_format: Optional[str] = "<<FILE_START: {path}>>\n",
|
|
89
|
+
suffix_format: Optional[str] = "\n<<FILE_END: {path}>>\n\n",
|
|
90
|
+
include_tree: bool = True
|
|
91
|
+
) -> None:
|
|
92
|
+
logger.debug(f"Starting merge from: {dir_path}")
|
|
93
|
+
paths = filter_files_by_patterns(dir_path, ignore_patterns, True)
|
|
94
|
+
paths = [path for path in paths if path.resolve() != output_path.resolve()]
|
|
95
|
+
logger.debug(f"Filtered paths: {len(paths)} files to process")
|
|
96
|
+
|
|
97
|
+
if include_tree:
|
|
98
|
+
try:
|
|
99
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
100
|
+
tree = generate_tree_visualizer(dir_path, paths)
|
|
101
|
+
f.write(f"{tree}\n")
|
|
102
|
+
logger.debug(f"Directory tree written to {output_path}")
|
|
103
|
+
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.error(f"Failed to write tree header to {output_path}: {e}")
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
else:
|
|
109
|
+
output_path.write_text("", encoding="utf-8")
|
|
110
|
+
|
|
111
|
+
paths = [path for path in paths if not path.is_dir()]
|
|
112
|
+
|
|
113
|
+
for path in paths:
|
|
114
|
+
if not is_text_file(
|
|
115
|
+
path,
|
|
116
|
+
validation_func_override=validation_func_override,
|
|
117
|
+
min_encoding_detection_confidence=min_encoding_detection_confidence
|
|
118
|
+
):
|
|
119
|
+
logger.debug(f"Skipped non-text file: {path}")
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
append_content(
|
|
123
|
+
root=dir_path,
|
|
124
|
+
file_path=path,
|
|
125
|
+
output_path=output_path,
|
|
126
|
+
prefix=prefix_format,
|
|
127
|
+
suffix=suffix_format,
|
|
128
|
+
read_func_override=read_func_override,
|
|
129
|
+
write_if_empty=write_if_empty
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def read_ignore_file(file_path: Path) -> list[str]:
|
|
134
|
+
if not file_path.is_file():
|
|
135
|
+
logger.error(f"Ignore file not found: {file_path}")
|
|
136
|
+
raise FileNotFoundError(f"Ignore file not found: {file_path}")
|
|
137
|
+
|
|
138
|
+
logger.debug(f"Reading ignore patterns from: {file_path}")
|
|
139
|
+
return [line.strip() for line in file_path.read_text(encoding="utf-8").splitlines() if line.strip()]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import fnmatch
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from .logger import logger # Importa o logger customizado
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def filter_files_by_patterns(dir_path: Path, ignore_patterns: List[str], recursive: bool) -> List[Path]:
|
|
9
|
+
"""
|
|
10
|
+
Filters files and directories under a root directory, excluding any that match ignore patterns.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
dir_path (Path): Directory to scan files from.
|
|
14
|
+
ignore_patterns (List[str]): List of glob-style patterns to exclude files and directories.
|
|
15
|
+
recursive (bool): Whether to scan directories recursively.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
List[Path]: A list of matching file and directory paths.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def matches_any_pattern(path: Path) -> bool:
|
|
22
|
+
rel_path = path.relative_to(dir_path).as_posix()
|
|
23
|
+
for pattern in ignore_patterns:
|
|
24
|
+
pat = pattern.rstrip("/")
|
|
25
|
+
if fnmatch.fnmatch(rel_path, pat) or fnmatch.fnmatch(path.name, pat):
|
|
26
|
+
logger.debug(f"Ignoring path '{rel_path}' matched by pattern '{pat}'")
|
|
27
|
+
return True
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
results = []
|
|
31
|
+
|
|
32
|
+
def scan_dir(directory: Path):
|
|
33
|
+
logger.debug(f"Scanning directory: {directory}")
|
|
34
|
+
try:
|
|
35
|
+
for entry in directory.iterdir():
|
|
36
|
+
if matches_any_pattern(entry):
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
results.append(entry)
|
|
40
|
+
if recursive and entry.is_dir():
|
|
41
|
+
scan_dir(entry)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
logger.warning(f"Failed to scan directory {directory}: {e}")
|
|
44
|
+
|
|
45
|
+
scan_dir(dir_path)
|
|
46
|
+
logger.debug(f"Total matched files and folders: {len(results)}")
|
|
47
|
+
return results
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def setup_logger(name: str = "merger", level: int = logging.INFO) -> logging.Logger:
|
|
6
|
+
logger = logging.getLogger(name)
|
|
7
|
+
logger.setLevel(level)
|
|
8
|
+
|
|
9
|
+
if not logger.handlers:
|
|
10
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
11
|
+
formatter = logging.Formatter("[%(levelname)s] %(message)s")
|
|
12
|
+
handler.setFormatter(formatter)
|
|
13
|
+
logger.addHandler(handler)
|
|
14
|
+
|
|
15
|
+
else:
|
|
16
|
+
for handler in logger.handlers:
|
|
17
|
+
handler.setLevel(level)
|
|
18
|
+
|
|
19
|
+
return logger
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = setup_logger()
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
import os
|
|
3
|
+
import platform
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Tuple, Dict, Callable
|
|
7
|
+
|
|
8
|
+
from .logger import logger # Adiciona o logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_readers_folder() -> Path:
|
|
12
|
+
system = platform.system()
|
|
13
|
+
if system == "Windows":
|
|
14
|
+
base = os.getenv("LOCALAPPDATA") or os.getenv("APPDATA") or str(Path.home())
|
|
15
|
+
readers_dir = Path(base) / "Merger" / "installed_readers"
|
|
16
|
+
elif system == "Darwin":
|
|
17
|
+
readers_dir = Path.home() / "Library" / "Application Support" / "Merger" / "installed_readers"
|
|
18
|
+
else:
|
|
19
|
+
readers_dir = Path.home() / ".local" / "share" / "Merger" / "installed_readers"
|
|
20
|
+
|
|
21
|
+
readers_dir.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
logger.debug(f"Readers directory resolved to: {readers_dir}")
|
|
23
|
+
return readers_dir
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def register_reader(extension: str, module_path: str):
|
|
27
|
+
if not extension.startswith("."):
|
|
28
|
+
raise ValueError("Extension must start with a dot, e.g. '.pdf'")
|
|
29
|
+
|
|
30
|
+
module_path = Path(module_path).resolve()
|
|
31
|
+
logger.debug(f"Registering reader for {extension} from {module_path}")
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
spec = importlib.util.spec_from_file_location("temp_custom_reader", module_path)
|
|
35
|
+
module = importlib.util.module_from_spec(spec)
|
|
36
|
+
spec.loader.exec_module(module)
|
|
37
|
+
|
|
38
|
+
expected_all = ["reader"]
|
|
39
|
+
actual_all = getattr(module, "__all__", None)
|
|
40
|
+
|
|
41
|
+
if actual_all != expected_all:
|
|
42
|
+
raise ValueError(f"Invalid module: __all__ must be exactly {expected_all}, got {actual_all}")
|
|
43
|
+
|
|
44
|
+
if not hasattr(module, "validator") or not callable(module.validator):
|
|
45
|
+
raise ValueError("Module must define a callable 'validator'")
|
|
46
|
+
|
|
47
|
+
if not hasattr(module, "reader") or not callable(module.reader):
|
|
48
|
+
raise ValueError("Module must define a callable 'reader'")
|
|
49
|
+
|
|
50
|
+
readers_dir = get_readers_folder()
|
|
51
|
+
dest_path = readers_dir / f"{extension[1:]}.py"
|
|
52
|
+
shutil.copy(module_path, dest_path)
|
|
53
|
+
|
|
54
|
+
logger.info(f"Reader for '{extension}' registered successfully at '{dest_path}'")
|
|
55
|
+
|
|
56
|
+
except Exception as e:
|
|
57
|
+
logger.error(f"Failed to register reader for '{extension}': {e}")
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def unregister_reader(extension: str):
|
|
62
|
+
if not extension.startswith("."):
|
|
63
|
+
raise ValueError("Extension must start with a dot, e.g. '.pdf'")
|
|
64
|
+
|
|
65
|
+
readers_dir = get_readers_folder()
|
|
66
|
+
target = readers_dir / f"{extension[1:]}.py"
|
|
67
|
+
|
|
68
|
+
if target.exists():
|
|
69
|
+
target.unlink()
|
|
70
|
+
logger.info(f"Reader for '{extension}' unregistered (removed): {target}")
|
|
71
|
+
else:
|
|
72
|
+
logger.warning(f"No reader found to unregister for extension: '{extension}'")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def list_readers() -> Dict[str, str]:
|
|
76
|
+
readers_dir = get_readers_folder()
|
|
77
|
+
readers = {
|
|
78
|
+
f".{f.stem}": str(f.resolve())
|
|
79
|
+
for f in readers_dir.glob("*.py")
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
logger.debug(f"Listing installed readers: {readers}")
|
|
83
|
+
return readers
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def load_installed_readers() -> Tuple[Dict[str, Callable], Dict[str, Callable]]:
|
|
87
|
+
readers_folder = get_readers_folder()
|
|
88
|
+
validators = {}
|
|
89
|
+
readers = {}
|
|
90
|
+
|
|
91
|
+
for file in readers_folder.glob("*.py"):
|
|
92
|
+
ext = f".{file.stem}"
|
|
93
|
+
logger.debug(f"Loading reader module: {file}")
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
spec = importlib.util.spec_from_file_location(f"reader_{file.stem}", file)
|
|
97
|
+
module = importlib.util.module_from_spec(spec)
|
|
98
|
+
spec.loader.exec_module(module)
|
|
99
|
+
|
|
100
|
+
all_attr = getattr(module, "__all__", [])
|
|
101
|
+
reader = getattr(module, "reader", None)
|
|
102
|
+
validator = getattr(module, "validator", None)
|
|
103
|
+
|
|
104
|
+
if "reader" not in all_attr or not callable(reader):
|
|
105
|
+
logger.warning(f"Skipping invalid reader module '{file.name}': missing or invalid 'reader'")
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
if "validator" in all_attr and not callable(validator):
|
|
109
|
+
logger.warning(f"Skipping invalid validator in '{file.name}'")
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
readers[ext] = reader
|
|
113
|
+
validators[ext] = validator
|
|
114
|
+
logger.debug(f"Loaded reader for {ext}")
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.error(f"Failed to load reader from '{file}': {e}")
|
|
118
|
+
|
|
119
|
+
return readers, validators
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def load_custom_readers(module_path: Path) -> Tuple[Dict[str, Callable], Dict[str, Callable]]:
|
|
123
|
+
logger.debug(f"Loading custom readers from: {module_path}")
|
|
124
|
+
try:
|
|
125
|
+
spec = importlib.util.spec_from_file_location("custom_readers_module", module_path)
|
|
126
|
+
module = importlib.util.module_from_spec(spec)
|
|
127
|
+
spec.loader.exec_module(module)
|
|
128
|
+
|
|
129
|
+
readers = getattr(module, "readers", {})
|
|
130
|
+
validators = getattr(module, "validators", {})
|
|
131
|
+
logger.info(f"Custom readers loaded successfully from: {module_path}")
|
|
132
|
+
return readers, validators
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
logger.error(f"Failed to load custom readers from {module_path}: {e}")
|
|
136
|
+
raise
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
from typing import Dict, Union, List
|
|
4
|
+
|
|
5
|
+
_FileTree = Dict[str, Union["_FileTree", None]]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def generate_tree_visualizer(root_path: Path, paths: List[Path]) -> str:
|
|
9
|
+
"""
|
|
10
|
+
Generates a visual tree structure of the provided paths relative to a root directory.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
root_path (Path): The root directory.
|
|
14
|
+
paths (List[Path]): List of file and directory paths relative to the root to include in the tree.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
str: A formatted string representing the directory structure.
|
|
18
|
+
"""
|
|
19
|
+
root_path = root_path.resolve()
|
|
20
|
+
|
|
21
|
+
def tree() -> _FileTree:
|
|
22
|
+
return defaultdict(tree)
|
|
23
|
+
|
|
24
|
+
file_tree: _FileTree = tree()
|
|
25
|
+
|
|
26
|
+
for path in paths:
|
|
27
|
+
try:
|
|
28
|
+
relative_parts = path.resolve().relative_to(root_path).parts
|
|
29
|
+
except ValueError:
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
current = file_tree
|
|
33
|
+
for part in relative_parts:
|
|
34
|
+
current = current[part]
|
|
35
|
+
|
|
36
|
+
def build_tree_str(d: _FileTree, prefix: str = "", current_path: Path = root_path) -> str:
|
|
37
|
+
tree_str = []
|
|
38
|
+
entries = sorted(d.keys())
|
|
39
|
+
for i, key in enumerate(entries, 1):
|
|
40
|
+
is_last = i == len(entries)
|
|
41
|
+
connector = "└── " if is_last else "├── "
|
|
42
|
+
|
|
43
|
+
full_path = current_path / key
|
|
44
|
+
|
|
45
|
+
if full_path.is_dir():
|
|
46
|
+
display_name = f"{key}/"
|
|
47
|
+
else:
|
|
48
|
+
display_name = key
|
|
49
|
+
|
|
50
|
+
tree_str.append(f"{prefix}{connector}{display_name}\n")
|
|
51
|
+
|
|
52
|
+
extension = " " if is_last else "│ "
|
|
53
|
+
tree_str.append(build_tree_str(d[key], prefix + extension, full_path))
|
|
54
|
+
|
|
55
|
+
return "".join(tree_str)
|
|
56
|
+
|
|
57
|
+
tree_output = f"{root_path.name}/\n{build_tree_str(file_tree)}"
|
|
58
|
+
return tree_output
|
|
59
|
+
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: merger-cli
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Merger is a tool that scans a directory, filters files using customizable patterns, and merges readable content into a single output file.
|
|
5
|
+
Author-email: Diogo Toporcov <diogotoporcov@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/diogotoporcov/merger-cli
|
|
8
|
+
Project-URL: Documentation, https://github.com/diogotoporcov/merger-cli
|
|
9
|
+
Keywords: merger,file system,concatenation,automation,development
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
15
|
+
Requires-Python: >=3.8
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: chardet>=5.2.0
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# Merger CLI
|
|
22
|
+
|
|
23
|
+
[](https://www.python.org/)
|
|
24
|
+
[](LICENSE)
|
|
25
|
+
|
|
26
|
+
Merger is a command-line utility for developers that scans a directory, filters files using customizable ignore patterns, and merges all readable content into a single structured output file. It supports custom file readers and validators, making it easily extendable for formats such as `.ipynb`, `.pdf`, or any specific format.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Summary
|
|
31
|
+
|
|
32
|
+
1. [Core Features](#core-features)
|
|
33
|
+
2. [Dependencies](#dependencies)
|
|
34
|
+
3. [Installation](#installation)
|
|
35
|
+
4. [Usage](#usage)
|
|
36
|
+
5. [Custom Readers](#custom-readers)
|
|
37
|
+
6. [CLI Options](#cli-options)
|
|
38
|
+
7. [License](#license)
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Core Features
|
|
43
|
+
|
|
44
|
+
* **Recursive merge** of all readable text files under a root directory.
|
|
45
|
+
* **Glob-based ignore patterns** using `.gitignore`-style syntax.
|
|
46
|
+
* **Automatic encoding detection**.
|
|
47
|
+
* **Custom file readers and validators** for non-text formats.
|
|
48
|
+
* **CLI support** for installation, removal, and listing of custom readers.
|
|
49
|
+
* **Human-readable merged output**, including a directory tree header and file delimiters.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Dependencies
|
|
54
|
+
|
|
55
|
+
| Component | Version / Type | Notes |
|
|
56
|
+
|-------------|----------------|-----------------------------|
|
|
57
|
+
| **Python** | ≥ 3.8 | Required |
|
|
58
|
+
|
|
59
|
+
All dependencies are listed in [`requirements.txt`](requirements.txt).
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
### 1. Clone the repository
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
git clone https://github.com/diogotoporcov/merger-cli.git
|
|
69
|
+
cd merger-cli
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 2. Create and activate a virtual environment
|
|
73
|
+
|
|
74
|
+
**Linux / macOS**
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
python -m venv .venv
|
|
78
|
+
source .venv/bin/activate
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Windows (PowerShell)**
|
|
82
|
+
|
|
83
|
+
```powershell
|
|
84
|
+
python -m venv .venv
|
|
85
|
+
.venv\Scripts\Activate.ps1
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 3. Install dependencies
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install -r requirements.txt
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### 4. Install as CLI tool
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pip install .
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
This registers the `merger` command globally.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Usage
|
|
105
|
+
|
|
106
|
+
### Basic merge
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
merger ./src ./merged.txt
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Custom ignore patterns
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
merger "C:\Users\USER\Desktop\project" "C:\Users\USER\Desktop\project\output.txt" --ignore "*.log" "__pycache__" "*.tmp"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Custom ignore file
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
merger . ./output.txt -p ./merger.ignore
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Include empty files
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
merger ./data ./output.txt --empty
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Verbose output
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
merger ./src ./merged.txt --log-level DEBUG
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Custom Readers
|
|
139
|
+
|
|
140
|
+
You can extend Merger to handle new file formats.
|
|
141
|
+
|
|
142
|
+
### Installing a custom reader
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
merger --install .ipynb path/to/ipynb.py
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Where `ipynb.py` must define:
|
|
149
|
+
|
|
150
|
+
* ```python
|
|
151
|
+
validator: Callable[[Path], bool]
|
|
152
|
+
```
|
|
153
|
+
* ```python
|
|
154
|
+
reader: Callable[[Path], str]
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
To uninstall:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
merger --uninstall .ipynb
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
List installed readers:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
merger --list-installed
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
An example `.ipynb` reader can be found in
|
|
170
|
+
[`examples/custom_readers/ipynb.py`](examples/custom_readers/ipynb.py).
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## CLI Options
|
|
175
|
+
|
|
176
|
+
| Option | Description |
|
|
177
|
+
|-------------------------|--------------------------------------------------------------------------------|
|
|
178
|
+
| `--ignore` | List of glob-style ignore patterns. |
|
|
179
|
+
| `-f, --ignore-file` | Path to file containing ignore patterns (Default: `<input_dir>/merger.ignore`. |
|
|
180
|
+
| `-i, --install` | Install a custom reader for an extension. |
|
|
181
|
+
| `-u, --uninstall` | Remove a custom reader (`*` removes all). |
|
|
182
|
+
| `--list-installed` | Show installed readers. |
|
|
183
|
+
| `-l, --log-level` | Set logging verbosity (`DEBUG`, `INFO`, etc.). |
|
|
184
|
+
| `--empty` | Include empty files in merged output. |
|
|
185
|
+
| `--prefix` / `--suffix` | Customize file delimiters in output. |
|
|
186
|
+
| `--overrides` | Load override reader definitions from a Python module. |
|
|
187
|
+
| `--no-tree` | Do not include the generated directory tree in the output file. |
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## License
|
|
193
|
+
|
|
194
|
+
This project is licensed under the MIT License — see [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
3
|
+
README.md
|
|
4
|
+
pyproject.toml
|
|
5
|
+
setup.cfg
|
|
6
|
+
examples/__init__.py
|
|
7
|
+
examples/main.example.py
|
|
8
|
+
examples/custom_readers/__init__.py
|
|
9
|
+
examples/custom_readers/ipynb.py
|
|
10
|
+
merger/__init__.py
|
|
11
|
+
merger/cli.py
|
|
12
|
+
merger/files.py
|
|
13
|
+
merger/filtering.py
|
|
14
|
+
merger/logger.py
|
|
15
|
+
merger/registry.py
|
|
16
|
+
merger/tree.py
|
|
17
|
+
merger_cli.egg-info/PKG-INFO
|
|
18
|
+
merger_cli.egg-info/SOURCES.txt
|
|
19
|
+
merger_cli.egg-info/dependency_links.txt
|
|
20
|
+
merger_cli.egg-info/entry_points.txt
|
|
21
|
+
merger_cli.egg-info/requires.txt
|
|
22
|
+
merger_cli.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
chardet>=5.2.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
merger
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "merger-cli"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Merger is a tool that scans a directory, filters files using customizable patterns, and merges readable content into a single output file."
|
|
5
|
+
keywords = ["merger", "file system", "concatenation", "automation", "development"]
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Diogo Toporcov", email = "diogotoporcov@gmail.com" }
|
|
8
|
+
]
|
|
9
|
+
license = { text = "MIT" }
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"chardet>=5.2.0"
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Topic :: Software Development :: Libraries :: Python Modules"
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Homepage = "https://github.com/diogotoporcov/merger-cli"
|
|
26
|
+
Documentation = "https://github.com/diogotoporcov/merger-cli"
|
|
27
|
+
|
|
28
|
+
[build-system]
|
|
29
|
+
requires = ["setuptools>=80.9.0", "wheel"]
|
|
30
|
+
build-backend = "setuptools.build_meta"
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
merger = "merger.cli:main"
|