metadata-cleaner 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metadata_cleaner-1.0.0/LICENSE +0 -0
- metadata_cleaner-1.0.0/PKG-INFO +221 -0
- metadata_cleaner-1.0.0/README.md +191 -0
- metadata_cleaner-1.0.0/metadata_cleaner.egg-info/PKG-INFO +221 -0
- metadata_cleaner-1.0.0/metadata_cleaner.egg-info/SOURCES.txt +28 -0
- metadata_cleaner-1.0.0/metadata_cleaner.egg-info/dependency_links.txt +1 -0
- metadata_cleaner-1.0.0/metadata_cleaner.egg-info/entry_points.txt +2 -0
- metadata_cleaner-1.0.0/metadata_cleaner.egg-info/requires.txt +7 -0
- metadata_cleaner-1.0.0/metadata_cleaner.egg-info/top_level.txt +2 -0
- metadata_cleaner-1.0.0/setup.cfg +4 -0
- metadata_cleaner-1.0.0/setup.py +34 -0
- metadata_cleaner-1.0.0/src/__init__.py +0 -0
- metadata_cleaner-1.0.0/src/cli.py +49 -0
- metadata_cleaner-1.0.0/src/config/__init__.py +0 -0
- metadata_cleaner-1.0.0/src/config/settings.py +19 -0
- metadata_cleaner-1.0.0/src/core/__init__.py +0 -0
- metadata_cleaner-1.0.0/src/core/metadata_utils.py +20 -0
- metadata_cleaner-1.0.0/src/file_handlers/__init__.py +0 -0
- metadata_cleaner-1.0.0/src/file_handlers/audio_handler.py +19 -0
- metadata_cleaner-1.0.0/src/file_handlers/docx_handler.py +17 -0
- metadata_cleaner-1.0.0/src/file_handlers/image_handler.py +22 -0
- metadata_cleaner-1.0.0/src/file_handlers/pdf_handler.py +26 -0
- metadata_cleaner-1.0.0/src/file_handlers/video_handler.py +21 -0
- metadata_cleaner-1.0.0/src/logs/__init__.py +0 -0
- metadata_cleaner-1.0.0/src/logs/logger.py +22 -0
- metadata_cleaner-1.0.0/src/remover.py +111 -0
- metadata_cleaner-1.0.0/tests/__init__.py +0 -0
- metadata_cleaner-1.0.0/tests/test_file_handlers.py +80 -0
- metadata_cleaner-1.0.0/tests/test_remover.py +60 -0
- metadata_cleaner-1.0.0/tests/test_settings_utils.py +57 -0
|
File without changes
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: metadata-cleaner
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A CLI tool to remove metadata from images, documents, audio, and video files.
|
|
5
|
+
Home-page: https://github.com/sandy-sp/metadata-cleaner
|
|
6
|
+
Author: Sandeep Paidipati
|
|
7
|
+
Author-email: sandeep.paidipati@gmail.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: click
|
|
15
|
+
Requires-Dist: pillow
|
|
16
|
+
Requires-Dist: pypdf
|
|
17
|
+
Requires-Dist: python-docx
|
|
18
|
+
Requires-Dist: mutagen
|
|
19
|
+
Requires-Dist: pymediainfo
|
|
20
|
+
Requires-Dist: tqdm
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: author-email
|
|
23
|
+
Dynamic: classifier
|
|
24
|
+
Dynamic: description
|
|
25
|
+
Dynamic: description-content-type
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: requires-dist
|
|
28
|
+
Dynamic: requires-python
|
|
29
|
+
Dynamic: summary
|
|
30
|
+
|
|
31
|
+
# ๐ README.md
|
|
32
|
+
---
|
|
33
|
+
# Metadata Cleaner ๐งน๐
|
|
34
|
+
*A powerful CLI tool to remove metadata from images, PDFs, DOCX, audio, and video files.*
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## ๐ Overview
|
|
39
|
+
**Metadata Cleaner** is a fast and efficient **command-line tool** that removes metadata from various file formats, including **images, PDFs, documents, audio, and videos**.
|
|
40
|
+
This tool is designed for **privacy protection, security compliance, and data sanitization**.
|
|
41
|
+
|
|
42
|
+
๐ **Why use Metadata Cleaner?**
|
|
43
|
+
- **Protect your privacy** by stripping hidden metadata from files.
|
|
44
|
+
- **Sanitize sensitive documents** before sharing.
|
|
45
|
+
- **Reduce file size** by removing unnecessary metadata.
|
|
46
|
+
- **Batch process multiple files or entire folders** for efficiency.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## ๐ Features
|
|
51
|
+
โ
**Supports Metadata Removal for:**
|
|
52
|
+
- ๐ท **Images**: `JPG, PNG, TIFF`
|
|
53
|
+
- ๐ **Documents**: `PDF, DOCX`
|
|
54
|
+
- ๐ต **Audio**: `MP3, WAV, FLAC`
|
|
55
|
+
- ๐ฅ **Videos**: `MP4, MKV, MOV`
|
|
56
|
+
|
|
57
|
+
โ
**Batch Processing**
|
|
58
|
+
- Remove metadata **from individual files or entire folders**.
|
|
59
|
+
|
|
60
|
+
โ
**Parallel Processing**
|
|
61
|
+
- **Speed up processing** with **multi-file parallel execution**.
|
|
62
|
+
|
|
63
|
+
โ
**Interactive & User-Friendly CLI**
|
|
64
|
+
- Features **progress bars, confirmation prompts, and summary reports**.
|
|
65
|
+
|
|
66
|
+
โ
**Safe Metadata Removal**
|
|
67
|
+
- **Original files remain untouched**, and cleaned versions are saved in a separate folder.
|
|
68
|
+
|
|
69
|
+
โ
**Cross-Platform Compatibility**
|
|
70
|
+
- Works on **Linux, macOS, and Windows**.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## ๐ ๏ธ Installation
|
|
75
|
+
|
|
76
|
+
### **1๏ธโฃ Install via `pip` (Recommended)**
|
|
77
|
+
To install the latest version from **PyPI**, run:
|
|
78
|
+
```bash
|
|
79
|
+
pip install metadata-cleaner
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### **2๏ธโฃ Install from Source**
|
|
83
|
+
If you cloned this repository, install it manually:
|
|
84
|
+
```bash
|
|
85
|
+
git clone https://github.com/sandy-sp/metadata-cleaner.git
|
|
86
|
+
cd metadata-cleaner
|
|
87
|
+
pip install .
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## ๐ Usage
|
|
93
|
+
|
|
94
|
+
### **1๏ธโฃ Remove Metadata from a Single File**
|
|
95
|
+
```bash
|
|
96
|
+
metadata-cleaner --file path/to/file.jpg
|
|
97
|
+
```
|
|
98
|
+
โ
**Example Output:**
|
|
99
|
+
```
|
|
100
|
+
Do you want to process file.jpg? [Y/n]: Y
|
|
101
|
+
โ
Metadata removed. Cleaned file saved at: path/to/cleaned/file.jpg
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### **2๏ธโฃ Remove Metadata from All Files in a Folder**
|
|
105
|
+
```bash
|
|
106
|
+
metadata-cleaner --folder test_folder
|
|
107
|
+
```
|
|
108
|
+
โ
**Example Output:**
|
|
109
|
+
```
|
|
110
|
+
Do you want to process all files in test_folder? [Y/n]: Y
|
|
111
|
+
Processing Files: 100% |โโโโโโโโโโโโโโโโโโโโโโโ| 5/5 [00:10s]
|
|
112
|
+
|
|
113
|
+
๐ **Summary Report:**
|
|
114
|
+
โ
Successfully processed: 5 files
|
|
115
|
+
โ Failed to process: 0 files
|
|
116
|
+
Cleaned files saved in: test_folder/cleaned
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### **3๏ธโฃ Remove Metadata Without Confirmation Prompt**
|
|
120
|
+
```bash
|
|
121
|
+
metadata-cleaner --folder test_folder --yes
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### **4๏ธโฃ Display Help**
|
|
125
|
+
```bash
|
|
126
|
+
metadata-cleaner --help
|
|
127
|
+
```
|
|
128
|
+
โ
**Example Output:**
|
|
129
|
+
```
|
|
130
|
+
Usage: metadata-cleaner [OPTIONS]
|
|
131
|
+
|
|
132
|
+
Options:
|
|
133
|
+
--file TEXT Path to the file to clean metadata from.
|
|
134
|
+
--folder TEXT Path to a folder to clean metadata from all supported files.
|
|
135
|
+
--output TEXT Path to save the cleaned file(s).
|
|
136
|
+
--yes Skip confirmation prompts.
|
|
137
|
+
--help Show this message and exit.
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## ๐ง How It Works
|
|
143
|
+
1๏ธโฃ **Detects file type** and selects the appropriate metadata removal method.
|
|
144
|
+
2๏ธโฃ **Processes the file** by removing metadata safely.
|
|
145
|
+
3๏ธโฃ **Saves the cleaned version** in the `cleaned/` subfolder.
|
|
146
|
+
4๏ธโฃ **Generates logs and a summary report** for easy tracking.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## ๐ป Supported File Formats & Methods
|
|
151
|
+
|
|
152
|
+
| File Type | Supported Formats | Metadata Removal Method |
|
|
153
|
+
|-----------|------------------|------------------------|
|
|
154
|
+
| ๐ท **Images** | `JPG, PNG, TIFF` | Pillow (`PIL`) |
|
|
155
|
+
| ๐ **Documents** | `PDF, DOCX` | PyPDF2, python-docx |
|
|
156
|
+
| ๐ต **Audio** | `MP3, WAV, FLAC` | Mutagen |
|
|
157
|
+
| ๐ฅ **Videos** | `MP4, MKV, MOV` | FFmpeg |
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## ๐ Project Structure
|
|
162
|
+
```
|
|
163
|
+
metadata-cleaner/
|
|
164
|
+
โโโ docs/ # Documentation
|
|
165
|
+
โโโ scripts/ # Setup and installation scripts
|
|
166
|
+
โโโ src/ # Source code
|
|
167
|
+
โ โโโ cli.py # CLI entry point
|
|
168
|
+
โ โโโ remover.py # Core metadata remover
|
|
169
|
+
โ โโโ file_handlers/ # File-specific handlers
|
|
170
|
+
โโโ tests/ # Unit tests
|
|
171
|
+
โโโ test_folder/ # Sample test files
|
|
172
|
+
โโโ setup.py # Package setup
|
|
173
|
+
โโโ requirements.txt # Dependencies
|
|
174
|
+
โโโ LICENSE # License information
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## ๐ก Contributing
|
|
180
|
+
We welcome contributions! To contribute:
|
|
181
|
+
|
|
182
|
+
1๏ธโฃ **Fork** the repository.
|
|
183
|
+
2๏ธโฃ **Clone** your forked repo:
|
|
184
|
+
```bash
|
|
185
|
+
git clone https://github.com/sandy-sp/metadata-cleaner.git
|
|
186
|
+
```
|
|
187
|
+
3๏ธโฃ **Create a new branch** for your feature:
|
|
188
|
+
```bash
|
|
189
|
+
git checkout -b feature-name
|
|
190
|
+
```
|
|
191
|
+
4๏ธโฃ **Make changes & test**:
|
|
192
|
+
```bash
|
|
193
|
+
pytest tests/
|
|
194
|
+
```
|
|
195
|
+
5๏ธโฃ **Commit and push**:
|
|
196
|
+
```bash
|
|
197
|
+
git commit -m "Added new feature"
|
|
198
|
+
git push origin feature-name
|
|
199
|
+
```
|
|
200
|
+
6๏ธโฃ **Submit a Pull Request (PR).**
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## ๐ License
|
|
205
|
+
This project is licensed under the **MIT License**.
|
|
206
|
+
See the full license in [`LICENSE`](LICENSE).
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## ๐ Links & Resources
|
|
211
|
+
- ๐ **Documentation**: [API Reference](docs/API_REFERENCE.md)
|
|
212
|
+
- ๐ **PyPI Package**: [metadata-cleaner](https://pypi.org/project/metadata-cleaner/)
|
|
213
|
+
- ๐ **GitHub Repository**: [metadata-cleaner](https://github.com/sandy-sp/metadata-cleaner)
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## โค๏ธ Support
|
|
218
|
+
If you found this tool useful, give it a โญ on GitHub!
|
|
219
|
+
For issues or questions, [open an issue](https://github.com/sandy-sp/metadata-cleaner/issues).
|
|
220
|
+
|
|
221
|
+
---
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# ๐ README.md
|
|
2
|
+
---
|
|
3
|
+
# Metadata Cleaner ๐งน๐
|
|
4
|
+
*A powerful CLI tool to remove metadata from images, PDFs, DOCX, audio, and video files.*
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## ๐ Overview
|
|
9
|
+
**Metadata Cleaner** is a fast and efficient **command-line tool** that removes metadata from various file formats, including **images, PDFs, documents, audio, and videos**.
|
|
10
|
+
This tool is designed for **privacy protection, security compliance, and data sanitization**.
|
|
11
|
+
|
|
12
|
+
๐ **Why use Metadata Cleaner?**
|
|
13
|
+
- **Protect your privacy** by stripping hidden metadata from files.
|
|
14
|
+
- **Sanitize sensitive documents** before sharing.
|
|
15
|
+
- **Reduce file size** by removing unnecessary metadata.
|
|
16
|
+
- **Batch process multiple files or entire folders** for efficiency.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## ๐ Features
|
|
21
|
+
โ
**Supports Metadata Removal for:**
|
|
22
|
+
- ๐ท **Images**: `JPG, PNG, TIFF`
|
|
23
|
+
- ๐ **Documents**: `PDF, DOCX`
|
|
24
|
+
- ๐ต **Audio**: `MP3, WAV, FLAC`
|
|
25
|
+
- ๐ฅ **Videos**: `MP4, MKV, MOV`
|
|
26
|
+
|
|
27
|
+
โ
**Batch Processing**
|
|
28
|
+
- Remove metadata **from individual files or entire folders**.
|
|
29
|
+
|
|
30
|
+
โ
**Parallel Processing**
|
|
31
|
+
- **Speed up processing** with **multi-file parallel execution**.
|
|
32
|
+
|
|
33
|
+
โ
**Interactive & User-Friendly CLI**
|
|
34
|
+
- Features **progress bars, confirmation prompts, and summary reports**.
|
|
35
|
+
|
|
36
|
+
โ
**Safe Metadata Removal**
|
|
37
|
+
- **Original files remain untouched**, and cleaned versions are saved in a separate folder.
|
|
38
|
+
|
|
39
|
+
โ
**Cross-Platform Compatibility**
|
|
40
|
+
- Works on **Linux, macOS, and Windows**.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## ๐ ๏ธ Installation
|
|
45
|
+
|
|
46
|
+
### **1๏ธโฃ Install via `pip` (Recommended)**
|
|
47
|
+
To install the latest version from **PyPI**, run:
|
|
48
|
+
```bash
|
|
49
|
+
pip install metadata-cleaner
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### **2๏ธโฃ Install from Source**
|
|
53
|
+
If you cloned this repository, install it manually:
|
|
54
|
+
```bash
|
|
55
|
+
git clone https://github.com/sandy-sp/metadata-cleaner.git
|
|
56
|
+
cd metadata-cleaner
|
|
57
|
+
pip install .
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## ๐ Usage
|
|
63
|
+
|
|
64
|
+
### **1๏ธโฃ Remove Metadata from a Single File**
|
|
65
|
+
```bash
|
|
66
|
+
metadata-cleaner --file path/to/file.jpg
|
|
67
|
+
```
|
|
68
|
+
โ
**Example Output:**
|
|
69
|
+
```
|
|
70
|
+
Do you want to process file.jpg? [Y/n]: Y
|
|
71
|
+
โ
Metadata removed. Cleaned file saved at: path/to/cleaned/file.jpg
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### **2๏ธโฃ Remove Metadata from All Files in a Folder**
|
|
75
|
+
```bash
|
|
76
|
+
metadata-cleaner --folder test_folder
|
|
77
|
+
```
|
|
78
|
+
โ
**Example Output:**
|
|
79
|
+
```
|
|
80
|
+
Do you want to process all files in test_folder? [Y/n]: Y
|
|
81
|
+
Processing Files: 100% |โโโโโโโโโโโโโโโโโโโโโโโ| 5/5 [00:10s]
|
|
82
|
+
|
|
83
|
+
๐ **Summary Report:**
|
|
84
|
+
โ
Successfully processed: 5 files
|
|
85
|
+
โ Failed to process: 0 files
|
|
86
|
+
Cleaned files saved in: test_folder/cleaned
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### **3๏ธโฃ Remove Metadata Without Confirmation Prompt**
|
|
90
|
+
```bash
|
|
91
|
+
metadata-cleaner --folder test_folder --yes
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### **4๏ธโฃ Display Help**
|
|
95
|
+
```bash
|
|
96
|
+
metadata-cleaner --help
|
|
97
|
+
```
|
|
98
|
+
โ
**Example Output:**
|
|
99
|
+
```
|
|
100
|
+
Usage: metadata-cleaner [OPTIONS]
|
|
101
|
+
|
|
102
|
+
Options:
|
|
103
|
+
--file TEXT Path to the file to clean metadata from.
|
|
104
|
+
--folder TEXT Path to a folder to clean metadata from all supported files.
|
|
105
|
+
--output TEXT Path to save the cleaned file(s).
|
|
106
|
+
--yes Skip confirmation prompts.
|
|
107
|
+
--help Show this message and exit.
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## ๐ง How It Works
|
|
113
|
+
1๏ธโฃ **Detects file type** and selects the appropriate metadata removal method.
|
|
114
|
+
2๏ธโฃ **Processes the file** by removing metadata safely.
|
|
115
|
+
3๏ธโฃ **Saves the cleaned version** in the `cleaned/` subfolder.
|
|
116
|
+
4๏ธโฃ **Generates logs and a summary report** for easy tracking.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## ๐ป Supported File Formats & Methods
|
|
121
|
+
|
|
122
|
+
| File Type | Supported Formats | Metadata Removal Method |
|
|
123
|
+
|-----------|------------------|------------------------|
|
|
124
|
+
| ๐ท **Images** | `JPG, PNG, TIFF` | Pillow (`PIL`) |
|
|
125
|
+
| ๐ **Documents** | `PDF, DOCX` | PyPDF2, python-docx |
|
|
126
|
+
| ๐ต **Audio** | `MP3, WAV, FLAC` | Mutagen |
|
|
127
|
+
| ๐ฅ **Videos** | `MP4, MKV, MOV` | FFmpeg |
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## ๐ Project Structure
|
|
132
|
+
```
|
|
133
|
+
metadata-cleaner/
|
|
134
|
+
โโโ docs/ # Documentation
|
|
135
|
+
โโโ scripts/ # Setup and installation scripts
|
|
136
|
+
โโโ src/ # Source code
|
|
137
|
+
โ โโโ cli.py # CLI entry point
|
|
138
|
+
โ โโโ remover.py # Core metadata remover
|
|
139
|
+
โ โโโ file_handlers/ # File-specific handlers
|
|
140
|
+
โโโ tests/ # Unit tests
|
|
141
|
+
โโโ test_folder/ # Sample test files
|
|
142
|
+
โโโ setup.py # Package setup
|
|
143
|
+
โโโ requirements.txt # Dependencies
|
|
144
|
+
โโโ LICENSE # License information
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## ๐ก Contributing
|
|
150
|
+
We welcome contributions! To contribute:
|
|
151
|
+
|
|
152
|
+
1๏ธโฃ **Fork** the repository.
|
|
153
|
+
2๏ธโฃ **Clone** your forked repo:
|
|
154
|
+
```bash
|
|
155
|
+
git clone https://github.com/sandy-sp/metadata-cleaner.git
|
|
156
|
+
```
|
|
157
|
+
3๏ธโฃ **Create a new branch** for your feature:
|
|
158
|
+
```bash
|
|
159
|
+
git checkout -b feature-name
|
|
160
|
+
```
|
|
161
|
+
4๏ธโฃ **Make changes & test**:
|
|
162
|
+
```bash
|
|
163
|
+
pytest tests/
|
|
164
|
+
```
|
|
165
|
+
5๏ธโฃ **Commit and push**:
|
|
166
|
+
```bash
|
|
167
|
+
git commit -m "Added new feature"
|
|
168
|
+
git push origin feature-name
|
|
169
|
+
```
|
|
170
|
+
6๏ธโฃ **Submit a Pull Request (PR).**
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## ๐ License
|
|
175
|
+
This project is licensed under the **MIT License**.
|
|
176
|
+
See the full license in [`LICENSE`](LICENSE).
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## ๐ Links & Resources
|
|
181
|
+
- ๐ **Documentation**: [API Reference](docs/API_REFERENCE.md)
|
|
182
|
+
- ๐ **PyPI Package**: [metadata-cleaner](https://pypi.org/project/metadata-cleaner/)
|
|
183
|
+
- ๐ **GitHub Repository**: [metadata-cleaner](https://github.com/sandy-sp/metadata-cleaner)
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## โค๏ธ Support
|
|
188
|
+
If you found this tool useful, give it a โญ on GitHub!
|
|
189
|
+
For issues or questions, [open an issue](https://github.com/sandy-sp/metadata-cleaner/issues).
|
|
190
|
+
|
|
191
|
+
---
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: metadata-cleaner
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A CLI tool to remove metadata from images, documents, audio, and video files.
|
|
5
|
+
Home-page: https://github.com/sandy-sp/metadata-cleaner
|
|
6
|
+
Author: Sandeep Paidipati
|
|
7
|
+
Author-email: sandeep.paidipati@gmail.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: click
|
|
15
|
+
Requires-Dist: pillow
|
|
16
|
+
Requires-Dist: pypdf
|
|
17
|
+
Requires-Dist: python-docx
|
|
18
|
+
Requires-Dist: mutagen
|
|
19
|
+
Requires-Dist: pymediainfo
|
|
20
|
+
Requires-Dist: tqdm
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: author-email
|
|
23
|
+
Dynamic: classifier
|
|
24
|
+
Dynamic: description
|
|
25
|
+
Dynamic: description-content-type
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: requires-dist
|
|
28
|
+
Dynamic: requires-python
|
|
29
|
+
Dynamic: summary
|
|
30
|
+
|
|
31
|
+
# ๐ README.md
|
|
32
|
+
---
|
|
33
|
+
# Metadata Cleaner ๐งน๐
|
|
34
|
+
*A powerful CLI tool to remove metadata from images, PDFs, DOCX, audio, and video files.*
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## ๐ Overview
|
|
39
|
+
**Metadata Cleaner** is a fast and efficient **command-line tool** that removes metadata from various file formats, including **images, PDFs, documents, audio, and videos**.
|
|
40
|
+
This tool is designed for **privacy protection, security compliance, and data sanitization**.
|
|
41
|
+
|
|
42
|
+
๐ **Why use Metadata Cleaner?**
|
|
43
|
+
- **Protect your privacy** by stripping hidden metadata from files.
|
|
44
|
+
- **Sanitize sensitive documents** before sharing.
|
|
45
|
+
- **Reduce file size** by removing unnecessary metadata.
|
|
46
|
+
- **Batch process multiple files or entire folders** for efficiency.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## ๐ Features
|
|
51
|
+
โ
**Supports Metadata Removal for:**
|
|
52
|
+
- ๐ท **Images**: `JPG, PNG, TIFF`
|
|
53
|
+
- ๐ **Documents**: `PDF, DOCX`
|
|
54
|
+
- ๐ต **Audio**: `MP3, WAV, FLAC`
|
|
55
|
+
- ๐ฅ **Videos**: `MP4, MKV, MOV`
|
|
56
|
+
|
|
57
|
+
โ
**Batch Processing**
|
|
58
|
+
- Remove metadata **from individual files or entire folders**.
|
|
59
|
+
|
|
60
|
+
โ
**Parallel Processing**
|
|
61
|
+
- **Speed up processing** with **multi-file parallel execution**.
|
|
62
|
+
|
|
63
|
+
โ
**Interactive & User-Friendly CLI**
|
|
64
|
+
- Features **progress bars, confirmation prompts, and summary reports**.
|
|
65
|
+
|
|
66
|
+
โ
**Safe Metadata Removal**
|
|
67
|
+
- **Original files remain untouched**, and cleaned versions are saved in a separate folder.
|
|
68
|
+
|
|
69
|
+
โ
**Cross-Platform Compatibility**
|
|
70
|
+
- Works on **Linux, macOS, and Windows**.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## ๐ ๏ธ Installation
|
|
75
|
+
|
|
76
|
+
### **1๏ธโฃ Install via `pip` (Recommended)**
|
|
77
|
+
To install the latest version from **PyPI**, run:
|
|
78
|
+
```bash
|
|
79
|
+
pip install metadata-cleaner
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### **2๏ธโฃ Install from Source**
|
|
83
|
+
If you cloned this repository, install it manually:
|
|
84
|
+
```bash
|
|
85
|
+
git clone https://github.com/sandy-sp/metadata-cleaner.git
|
|
86
|
+
cd metadata-cleaner
|
|
87
|
+
pip install .
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## ๐ Usage
|
|
93
|
+
|
|
94
|
+
### **1๏ธโฃ Remove Metadata from a Single File**
|
|
95
|
+
```bash
|
|
96
|
+
metadata-cleaner --file path/to/file.jpg
|
|
97
|
+
```
|
|
98
|
+
โ
**Example Output:**
|
|
99
|
+
```
|
|
100
|
+
Do you want to process file.jpg? [Y/n]: Y
|
|
101
|
+
โ
Metadata removed. Cleaned file saved at: path/to/cleaned/file.jpg
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### **2๏ธโฃ Remove Metadata from All Files in a Folder**
|
|
105
|
+
```bash
|
|
106
|
+
metadata-cleaner --folder test_folder
|
|
107
|
+
```
|
|
108
|
+
โ
**Example Output:**
|
|
109
|
+
```
|
|
110
|
+
Do you want to process all files in test_folder? [Y/n]: Y
|
|
111
|
+
Processing Files: 100% |โโโโโโโโโโโโโโโโโโโโโโโ| 5/5 [00:10s]
|
|
112
|
+
|
|
113
|
+
๐ **Summary Report:**
|
|
114
|
+
โ
Successfully processed: 5 files
|
|
115
|
+
โ Failed to process: 0 files
|
|
116
|
+
Cleaned files saved in: test_folder/cleaned
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### **3๏ธโฃ Remove Metadata Without Confirmation Prompt**
|
|
120
|
+
```bash
|
|
121
|
+
metadata-cleaner --folder test_folder --yes
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### **4๏ธโฃ Display Help**
|
|
125
|
+
```bash
|
|
126
|
+
metadata-cleaner --help
|
|
127
|
+
```
|
|
128
|
+
โ
**Example Output:**
|
|
129
|
+
```
|
|
130
|
+
Usage: metadata-cleaner [OPTIONS]
|
|
131
|
+
|
|
132
|
+
Options:
|
|
133
|
+
--file TEXT Path to the file to clean metadata from.
|
|
134
|
+
--folder TEXT Path to a folder to clean metadata from all supported files.
|
|
135
|
+
--output TEXT Path to save the cleaned file(s).
|
|
136
|
+
--yes Skip confirmation prompts.
|
|
137
|
+
--help Show this message and exit.
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## ๐ง How It Works
|
|
143
|
+
1๏ธโฃ **Detects file type** and selects the appropriate metadata removal method.
|
|
144
|
+
2๏ธโฃ **Processes the file** by removing metadata safely.
|
|
145
|
+
3๏ธโฃ **Saves the cleaned version** in the `cleaned/` subfolder.
|
|
146
|
+
4๏ธโฃ **Generates logs and a summary report** for easy tracking.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## ๐ป Supported File Formats & Methods
|
|
151
|
+
|
|
152
|
+
| File Type | Supported Formats | Metadata Removal Method |
|
|
153
|
+
|-----------|------------------|------------------------|
|
|
154
|
+
| ๐ท **Images** | `JPG, PNG, TIFF` | Pillow (`PIL`) |
|
|
155
|
+
| ๐ **Documents** | `PDF, DOCX` | PyPDF2, python-docx |
|
|
156
|
+
| ๐ต **Audio** | `MP3, WAV, FLAC` | Mutagen |
|
|
157
|
+
| ๐ฅ **Videos** | `MP4, MKV, MOV` | FFmpeg |
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## ๐ Project Structure
|
|
162
|
+
```
|
|
163
|
+
metadata-cleaner/
|
|
164
|
+
โโโ docs/ # Documentation
|
|
165
|
+
โโโ scripts/ # Setup and installation scripts
|
|
166
|
+
โโโ src/ # Source code
|
|
167
|
+
โ โโโ cli.py # CLI entry point
|
|
168
|
+
โ โโโ remover.py # Core metadata remover
|
|
169
|
+
โ โโโ file_handlers/ # File-specific handlers
|
|
170
|
+
โโโ tests/ # Unit tests
|
|
171
|
+
โโโ test_folder/ # Sample test files
|
|
172
|
+
โโโ setup.py # Package setup
|
|
173
|
+
โโโ requirements.txt # Dependencies
|
|
174
|
+
โโโ LICENSE # License information
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## ๐ก Contributing
|
|
180
|
+
We welcome contributions! To contribute:
|
|
181
|
+
|
|
182
|
+
1๏ธโฃ **Fork** the repository.
|
|
183
|
+
2๏ธโฃ **Clone** your forked repo:
|
|
184
|
+
```bash
|
|
185
|
+
git clone https://github.com/sandy-sp/metadata-cleaner.git
|
|
186
|
+
```
|
|
187
|
+
3๏ธโฃ **Create a new branch** for your feature:
|
|
188
|
+
```bash
|
|
189
|
+
git checkout -b feature-name
|
|
190
|
+
```
|
|
191
|
+
4๏ธโฃ **Make changes & test**:
|
|
192
|
+
```bash
|
|
193
|
+
pytest tests/
|
|
194
|
+
```
|
|
195
|
+
5๏ธโฃ **Commit and push**:
|
|
196
|
+
```bash
|
|
197
|
+
git commit -m "Added new feature"
|
|
198
|
+
git push origin feature-name
|
|
199
|
+
```
|
|
200
|
+
6๏ธโฃ **Submit a Pull Request (PR).**
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## ๐ License
|
|
205
|
+
This project is licensed under the **MIT License**.
|
|
206
|
+
See the full license in [`LICENSE`](LICENSE).
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## ๐ Links & Resources
|
|
211
|
+
- ๐ **Documentation**: [API Reference](docs/API_REFERENCE.md)
|
|
212
|
+
- ๐ **PyPI Package**: [metadata-cleaner](https://pypi.org/project/metadata-cleaner/)
|
|
213
|
+
- ๐ **GitHub Repository**: [metadata-cleaner](https://github.com/sandy-sp/metadata-cleaner)
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## โค๏ธ Support
|
|
218
|
+
If you found this tool useful, give it a โญ on GitHub!
|
|
219
|
+
For issues or questions, [open an issue](https://github.com/sandy-sp/metadata-cleaner/issues).
|
|
220
|
+
|
|
221
|
+
---
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
metadata_cleaner.egg-info/PKG-INFO
|
|
5
|
+
metadata_cleaner.egg-info/SOURCES.txt
|
|
6
|
+
metadata_cleaner.egg-info/dependency_links.txt
|
|
7
|
+
metadata_cleaner.egg-info/entry_points.txt
|
|
8
|
+
metadata_cleaner.egg-info/requires.txt
|
|
9
|
+
metadata_cleaner.egg-info/top_level.txt
|
|
10
|
+
src/__init__.py
|
|
11
|
+
src/cli.py
|
|
12
|
+
src/remover.py
|
|
13
|
+
src/config/__init__.py
|
|
14
|
+
src/config/settings.py
|
|
15
|
+
src/core/__init__.py
|
|
16
|
+
src/core/metadata_utils.py
|
|
17
|
+
src/file_handlers/__init__.py
|
|
18
|
+
src/file_handlers/audio_handler.py
|
|
19
|
+
src/file_handlers/docx_handler.py
|
|
20
|
+
src/file_handlers/image_handler.py
|
|
21
|
+
src/file_handlers/pdf_handler.py
|
|
22
|
+
src/file_handlers/video_handler.py
|
|
23
|
+
src/logs/__init__.py
|
|
24
|
+
src/logs/logger.py
|
|
25
|
+
tests/__init__.py
|
|
26
|
+
tests/test_file_handlers.py
|
|
27
|
+
tests/test_remover.py
|
|
28
|
+
tests/test_settings_utils.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="metadata-cleaner",
|
|
5
|
+
version="1.0.0",
|
|
6
|
+
author="Sandeep Paidipati",
|
|
7
|
+
author_email="sandeep.paidipati@gmail.com",
|
|
8
|
+
description="A CLI tool to remove metadata from images, documents, audio, and video files.",
|
|
9
|
+
long_description=open("README.md").read(),
|
|
10
|
+
long_description_content_type="text/markdown",
|
|
11
|
+
url="https://github.com/sandy-sp/metadata-cleaner",
|
|
12
|
+
packages=find_packages(),
|
|
13
|
+
include_package_data=True,
|
|
14
|
+
install_requires=[
|
|
15
|
+
"click",
|
|
16
|
+
"pillow",
|
|
17
|
+
"pypdf",
|
|
18
|
+
"python-docx",
|
|
19
|
+
"mutagen",
|
|
20
|
+
"pymediainfo",
|
|
21
|
+
"tqdm"
|
|
22
|
+
],
|
|
23
|
+
entry_points={
|
|
24
|
+
"console_scripts": [
|
|
25
|
+
"metadata-cleaner = src.cli:main",
|
|
26
|
+
],
|
|
27
|
+
},
|
|
28
|
+
classifiers=[
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"License :: OSI Approved :: MIT License",
|
|
31
|
+
"Operating System :: OS Independent",
|
|
32
|
+
],
|
|
33
|
+
python_requires='>=3.7',
|
|
34
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import click
|
|
3
|
+
from src.logs.logger import logger
|
|
4
|
+
from src.remover import remove_metadata, remove_metadata_from_folder
|
|
5
|
+
|
|
6
|
+
@click.command()
|
|
7
|
+
@click.option('--file', '-f', type=click.Path(exists=True), help="Path to the file to clean metadata from.")
|
|
8
|
+
@click.option('--folder', '-d', type=click.Path(exists=True), help="Path to a folder to clean metadata from all supported files.")
|
|
9
|
+
@click.option('--output', '-o', type=click.Path(), help="Path to save the cleaned file(s).")
|
|
10
|
+
@click.option('--yes', '-y', is_flag=True, help="Skip confirmation prompts.")
|
|
11
|
+
def main(file, folder, output, yes):
|
|
12
|
+
"""CLI for metadata removal. Supports single file or batch processing with interactivity."""
|
|
13
|
+
try:
|
|
14
|
+
if file:
|
|
15
|
+
if not yes and not click.confirm(f"Do you want to process {file}?", default=True):
|
|
16
|
+
click.echo("โ Operation cancelled.")
|
|
17
|
+
return
|
|
18
|
+
|
|
19
|
+
logger.info(f"Processing single file: {file}")
|
|
20
|
+
cleaned_file = remove_metadata(file, output)
|
|
21
|
+
if cleaned_file:
|
|
22
|
+
click.echo(f"โ
Metadata removed. Cleaned file saved at: {cleaned_file}")
|
|
23
|
+
else:
|
|
24
|
+
click.echo(f"โ ๏ธ Failed to process file: {file}")
|
|
25
|
+
|
|
26
|
+
elif folder:
|
|
27
|
+
if not yes and not click.confirm(f"Do you want to process all files in {folder}?", default=True):
|
|
28
|
+
click.echo("โ Operation cancelled.")
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
logger.info(f"Processing folder: {folder}")
|
|
32
|
+
cleaned_files = remove_metadata_from_folder(folder, output)
|
|
33
|
+
|
|
34
|
+
# Display summary report
|
|
35
|
+
click.echo("\n๐ **Summary Report:**")
|
|
36
|
+
click.echo(f"โ
Successfully processed: {len(cleaned_files)} files")
|
|
37
|
+
|
|
38
|
+
if cleaned_files:
|
|
39
|
+
click.echo(f"Cleaned files saved in: {output if output else folder}")
|
|
40
|
+
|
|
41
|
+
else:
|
|
42
|
+
click.echo("โ Please specify either --file or --folder to process.")
|
|
43
|
+
|
|
44
|
+
except Exception as e:
|
|
45
|
+
logger.error(f"CLI Error: {e}")
|
|
46
|
+
click.echo(f"โ Error: {e}")
|
|
47
|
+
|
|
48
|
+
if __name__ == "__main__":
|
|
49
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
# ๐ Default output directory
|
|
4
|
+
DEFAULT_OUTPUT_FOLDER = "cleaned"
|
|
5
|
+
|
|
6
|
+
# ๐ Enable or Disable Parallel Processing
|
|
7
|
+
ENABLE_PARALLEL_PROCESSING = True
|
|
8
|
+
|
|
9
|
+
# ๐ Logging Configuration
|
|
10
|
+
LOG_FILE_PATH = os.path.join("logs", "metadata_cleaner.log")
|
|
11
|
+
LOG_LEVEL = "INFO" # Options: DEBUG, INFO, WARNING, ERROR
|
|
12
|
+
|
|
13
|
+
# ๐ง Supported File Formats
|
|
14
|
+
SUPPORTED_FORMATS = {
|
|
15
|
+
"images": [".jpg", ".jpeg", ".png", ".tiff"],
|
|
16
|
+
"documents": [".pdf", ".docx", ".doc"],
|
|
17
|
+
"audio": [".mp3", ".wav", ".flac", ".ogg"],
|
|
18
|
+
"videos": [".mp4", ".mkv", ".mov", ".avi"]
|
|
19
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
def ensure_output_folder(output_folder):
|
|
5
|
+
"""Ensures the output folder exists. If not, creates it."""
|
|
6
|
+
if not os.path.exists(output_folder):
|
|
7
|
+
os.makedirs(output_folder)
|
|
8
|
+
|
|
9
|
+
def copy_file_without_metadata(original_path, output_path):
|
|
10
|
+
"""Copies a file to a new location while ensuring metadata is stripped."""
|
|
11
|
+
try:
|
|
12
|
+
shutil.copy(original_path, output_path)
|
|
13
|
+
return output_path
|
|
14
|
+
except Exception as e:
|
|
15
|
+
print(f"โ Error copying file: {e}")
|
|
16
|
+
return None
|
|
17
|
+
|
|
18
|
+
def get_file_extension(file_path):
|
|
19
|
+
"""Returns the lowercase file extension of a file."""
|
|
20
|
+
return os.path.splitext(file_path)[1].lower()
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from mutagen.mp3 import MP3
|
|
2
|
+
from mutagen.easyid3 import EasyID3
|
|
3
|
+
import shutil
|
|
4
|
+
|
|
5
|
+
def remove_audio_metadata(file_path, output_path=None):
|
|
6
|
+
"""Removes metadata from MP3 and other audio files."""
|
|
7
|
+
try:
|
|
8
|
+
audio = MP3(file_path, ID3=EasyID3)
|
|
9
|
+
audio.delete()
|
|
10
|
+
audio.save()
|
|
11
|
+
|
|
12
|
+
if output_path:
|
|
13
|
+
shutil.copy(file_path, output_path) # Ensure file is saved to the output directory
|
|
14
|
+
return output_path
|
|
15
|
+
return file_path
|
|
16
|
+
|
|
17
|
+
except Exception as e:
|
|
18
|
+
print(f"Error removing metadata from {file_path}: {e}")
|
|
19
|
+
return None
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from docx import Document
|
|
2
|
+
|
|
3
|
+
def remove_docx_metadata(file_path, output_path=None):
|
|
4
|
+
"""Removes metadata from DOCX files."""
|
|
5
|
+
doc = Document(file_path)
|
|
6
|
+
|
|
7
|
+
# Remove core properties
|
|
8
|
+
doc.core_properties.author = ""
|
|
9
|
+
doc.core_properties.title = ""
|
|
10
|
+
doc.core_properties.keywords = ""
|
|
11
|
+
doc.core_properties.comments = ""
|
|
12
|
+
|
|
13
|
+
if not output_path:
|
|
14
|
+
output_path = file_path.replace(".", "_cleaned.")
|
|
15
|
+
|
|
16
|
+
doc.save(output_path)
|
|
17
|
+
return output_path
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from PIL import Image
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
def remove_image_metadata(file_path, output_path=None):
|
|
5
|
+
"""Removes metadata from images (JPG, PNG, TIFF) with error handling."""
|
|
6
|
+
try:
|
|
7
|
+
img = Image.open(file_path)
|
|
8
|
+
|
|
9
|
+
# Create a new image without metadata
|
|
10
|
+
data = list(img.getdata())
|
|
11
|
+
img_no_metadata = Image.new(img.mode, img.size)
|
|
12
|
+
img_no_metadata.putdata(data)
|
|
13
|
+
|
|
14
|
+
if not output_path:
|
|
15
|
+
output_path = file_path.replace(".", "_cleaned.")
|
|
16
|
+
|
|
17
|
+
img_no_metadata.save(output_path)
|
|
18
|
+
return output_path
|
|
19
|
+
|
|
20
|
+
except Exception as e:
|
|
21
|
+
print(f"Error removing metadata from {file_path}: {e}")
|
|
22
|
+
return None
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from PyPDF2 import PdfReader, PdfWriter
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
def remove_pdf_metadata(file_path, output_path=None):
|
|
5
|
+
"""Removes metadata from PDFs with error handling."""
|
|
6
|
+
try:
|
|
7
|
+
reader = PdfReader(file_path)
|
|
8
|
+
writer = PdfWriter()
|
|
9
|
+
|
|
10
|
+
# Copy pages and remove metadata
|
|
11
|
+
for page in reader.pages:
|
|
12
|
+
writer.add_page(page)
|
|
13
|
+
|
|
14
|
+
writer.add_metadata({}) # Clear metadata
|
|
15
|
+
|
|
16
|
+
if not output_path:
|
|
17
|
+
output_path = file_path.replace(".", "_cleaned.")
|
|
18
|
+
|
|
19
|
+
with open(output_path, "wb") as f:
|
|
20
|
+
writer.write(f)
|
|
21
|
+
|
|
22
|
+
return output_path
|
|
23
|
+
|
|
24
|
+
except Exception as e:
|
|
25
|
+
print(f"Error removing metadata from {file_path}: {e}")
|
|
26
|
+
return None
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
def remove_video_metadata(file_path, output_path=None):
|
|
5
|
+
"""Removes metadata from video files using FFmpeg."""
|
|
6
|
+
try:
|
|
7
|
+
if not output_path:
|
|
8
|
+
output_path = file_path.replace(".", "_cleaned.")
|
|
9
|
+
|
|
10
|
+
# Use FFmpeg to re-encode the file and strip metadata
|
|
11
|
+
command = [
|
|
12
|
+
"ffmpeg", "-i", file_path, "-map_metadata", "-1",
|
|
13
|
+
"-c:v", "libx264", "-c:a", "aac", output_path, "-y"
|
|
14
|
+
]
|
|
15
|
+
subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
|
|
16
|
+
|
|
17
|
+
return output_path
|
|
18
|
+
|
|
19
|
+
except subprocess.CalledProcessError as e:
|
|
20
|
+
print(f"Error removing metadata from {file_path}: {e}")
|
|
21
|
+
return None
|
|
File without changes
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
LOG_FILE = "metadata_cleaner.log"
|
|
5
|
+
|
|
6
|
+
# Ensure logs directory exists
|
|
7
|
+
LOG_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "logs"))
|
|
8
|
+
os.makedirs(LOG_DIR, exist_ok=True)
|
|
9
|
+
|
|
10
|
+
LOG_PATH = os.path.join(LOG_DIR, LOG_FILE)
|
|
11
|
+
|
|
12
|
+
# Configure logging
|
|
13
|
+
logging.basicConfig(
|
|
14
|
+
level=logging.INFO,
|
|
15
|
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
16
|
+
handlers=[
|
|
17
|
+
logging.FileHandler(LOG_PATH), # Log to a file
|
|
18
|
+
logging.StreamHandler() # Log to console
|
|
19
|
+
]
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger("metadata_cleaner")
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import concurrent.futures
|
|
3
|
+
from tqdm import tqdm
|
|
4
|
+
from src.logs.logger import logger
|
|
5
|
+
from src.file_handlers.image_handler import remove_image_metadata
|
|
6
|
+
from src.file_handlers.pdf_handler import remove_pdf_metadata
|
|
7
|
+
from src.file_handlers.docx_handler import remove_docx_metadata
|
|
8
|
+
from src.file_handlers.audio_handler import remove_audio_metadata
|
|
9
|
+
from src.file_handlers.video_handler import remove_video_metadata
|
|
10
|
+
|
|
11
|
+
SUPPORTED_EXTENSIONS = {
|
|
12
|
+
".jpg": remove_image_metadata, ".jpeg": remove_image_metadata, ".png": remove_image_metadata, ".tiff": remove_image_metadata,
|
|
13
|
+
".pdf": remove_pdf_metadata,
|
|
14
|
+
".docx": remove_docx_metadata, ".doc": remove_docx_metadata,
|
|
15
|
+
".mp3": remove_audio_metadata, ".wav": remove_audio_metadata, ".flac": remove_audio_metadata, ".ogg": remove_audio_metadata,
|
|
16
|
+
".mp4": remove_video_metadata, ".mkv": remove_video_metadata, ".mov": remove_video_metadata, ".avi": remove_video_metadata
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
def remove_metadata(file_path, output_path=None):
|
|
20
|
+
"""Removes metadata from a single file and logs detailed errors."""
|
|
21
|
+
try:
|
|
22
|
+
if not os.path.exists(file_path):
|
|
23
|
+
logger.error(f"File not found: {file_path}")
|
|
24
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
25
|
+
|
|
26
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
27
|
+
if ext not in SUPPORTED_EXTENSIONS:
|
|
28
|
+
logger.warning(f"Unsupported file type: {ext}")
|
|
29
|
+
raise ValueError(f"Unsupported file type: {ext}")
|
|
30
|
+
|
|
31
|
+
logger.info(f"Processing file: {file_path}")
|
|
32
|
+
remover_function = SUPPORTED_EXTENSIONS[ext]
|
|
33
|
+
|
|
34
|
+
cleaned_file = remover_function(file_path, output_path)
|
|
35
|
+
|
|
36
|
+
if cleaned_file and os.path.exists(cleaned_file):
|
|
37
|
+
logger.info(f"Metadata removed successfully: {cleaned_file}")
|
|
38
|
+
return cleaned_file
|
|
39
|
+
else:
|
|
40
|
+
logger.error(f"Failed to process file: {file_path}")
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
except Exception as e:
|
|
44
|
+
logger.error(f"Error processing file {file_path}: {e}")
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
def process_file(file_path, output_folder):
|
|
48
|
+
"""Processes a single file in parallel."""
|
|
49
|
+
try:
|
|
50
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
51
|
+
if ext in SUPPORTED_EXTENSIONS:
|
|
52
|
+
output_path = os.path.join(output_folder, os.path.basename(file_path)) if output_folder else file_path
|
|
53
|
+
cleaned_file = SUPPORTED_EXTENSIONS[ext](file_path, output_path)
|
|
54
|
+
if cleaned_file and os.path.exists(cleaned_file):
|
|
55
|
+
logger.info(f"โ
Metadata removed: {cleaned_file}")
|
|
56
|
+
return cleaned_file
|
|
57
|
+
else:
|
|
58
|
+
logger.error(f"โ Failed to process: {file_path}")
|
|
59
|
+
return None
|
|
60
|
+
else:
|
|
61
|
+
logger.warning(f"โ ๏ธ Unsupported file type: {file_path}")
|
|
62
|
+
return None
|
|
63
|
+
except Exception as e:
|
|
64
|
+
logger.error(f"Error processing {file_path}: {e}")
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
def remove_metadata_from_folder(folder_path, output_folder=None):
|
|
68
|
+
"""Removes metadata from all supported files in a folder with parallel processing."""
|
|
69
|
+
if not os.path.exists(folder_path):
|
|
70
|
+
logger.error(f"โ Folder not found: {folder_path}")
|
|
71
|
+
raise FileNotFoundError(f"Folder not found: {folder_path}")
|
|
72
|
+
|
|
73
|
+
# Create output folder inside test_folder
|
|
74
|
+
if not output_folder:
|
|
75
|
+
output_folder = os.path.join(folder_path, "cleaned")
|
|
76
|
+
os.makedirs(output_folder, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
files_to_process = []
|
|
79
|
+
for root, _, files in os.walk(folder_path):
|
|
80
|
+
for file in files:
|
|
81
|
+
file_path = os.path.join(root, file)
|
|
82
|
+
ext = os.path.splitext(file)[1].lower()
|
|
83
|
+
if ext in SUPPORTED_EXTENSIONS:
|
|
84
|
+
files_to_process.append(file_path)
|
|
85
|
+
|
|
86
|
+
processed_files = []
|
|
87
|
+
failed_files = []
|
|
88
|
+
|
|
89
|
+
with tqdm(total=len(files_to_process), desc="Processing Files", unit="file") as pbar:
|
|
90
|
+
with concurrent.futures.ProcessPoolExecutor() as executor:
|
|
91
|
+
future_to_file = {executor.submit(process_file, file_path, output_folder): file_path for file_path in files_to_process}
|
|
92
|
+
|
|
93
|
+
for future in concurrent.futures.as_completed(future_to_file):
|
|
94
|
+
result = future.result()
|
|
95
|
+
if result:
|
|
96
|
+
processed_files.append(result)
|
|
97
|
+
else:
|
|
98
|
+
failed_files.append(future_to_file[future])
|
|
99
|
+
pbar.update(1)
|
|
100
|
+
|
|
101
|
+
# Summary Report
|
|
102
|
+
logger.info("\n๐ **Summary Report:**")
|
|
103
|
+
logger.info(f"โ
Successfully processed: {len(processed_files)} files")
|
|
104
|
+
logger.info(f"โ Failed to process: {len(failed_files)} files")
|
|
105
|
+
|
|
106
|
+
if failed_files:
|
|
107
|
+
logger.info("\nโ ๏ธ Failed Files:")
|
|
108
|
+
for file in failed_files:
|
|
109
|
+
logger.info(f" - {file}")
|
|
110
|
+
|
|
111
|
+
return processed_files
|
|
File without changes
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
from PIL import Image
|
|
5
|
+
from docx import Document
|
|
6
|
+
from PyPDF2 import PdfWriter
|
|
7
|
+
from src.file_handlers.image_handler import remove_image_metadata
|
|
8
|
+
from src.file_handlers.pdf_handler import remove_pdf_metadata
|
|
9
|
+
from src.file_handlers.docx_handler import remove_docx_metadata
|
|
10
|
+
from src.file_handlers.audio_handler import remove_audio_metadata
|
|
11
|
+
from src.file_handlers.video_handler import remove_video_metadata
|
|
12
|
+
|
|
13
|
+
class TestFileHandlers(unittest.TestCase):
|
|
14
|
+
|
|
15
|
+
def setUp(self):
|
|
16
|
+
"""Create valid test files."""
|
|
17
|
+
self.test_image = "test_image.jpg"
|
|
18
|
+
self.test_pdf = "test_document.pdf"
|
|
19
|
+
self.test_docx = "test_document.docx"
|
|
20
|
+
self.test_audio = "test_audio.mp3"
|
|
21
|
+
self.test_video = "test_video.mp4"
|
|
22
|
+
|
|
23
|
+
# โ
Create a valid JPG file
|
|
24
|
+
img = Image.new("RGB", (100, 100), color="blue")
|
|
25
|
+
img.save(self.test_image, "JPEG")
|
|
26
|
+
|
|
27
|
+
# โ
Create a valid PDF
|
|
28
|
+
writer = PdfWriter()
|
|
29
|
+
writer.add_metadata({"/Author": "Test"})
|
|
30
|
+
with open(self.test_pdf, "wb") as f:
|
|
31
|
+
writer.write(f)
|
|
32
|
+
|
|
33
|
+
# โ
Create a valid DOCX file
|
|
34
|
+
doc = Document()
|
|
35
|
+
doc.add_paragraph("This is a test document.")
|
|
36
|
+
doc.save(self.test_docx)
|
|
37
|
+
|
|
38
|
+
# โ
Create a valid MP3 file
|
|
39
|
+
subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
|
|
40
|
+
"-t", "3", "-q:a", "9", "-acodec", "libmp3lame", self.test_audio, "-y"],
|
|
41
|
+
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
42
|
+
|
|
43
|
+
# โ
Create a valid MP4 file
|
|
44
|
+
subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "color=c=blue:s=320x240:d=3",
|
|
45
|
+
"-vf", "format=yuv420p", self.test_video, "-y"],
|
|
46
|
+
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
47
|
+
|
|
48
|
+
def test_image_handler(self):
|
|
49
|
+
"""Test image metadata removal."""
|
|
50
|
+
output_file = remove_image_metadata(self.test_image)
|
|
51
|
+
self.assertTrue(os.path.exists(output_file))
|
|
52
|
+
|
|
53
|
+
def test_pdf_handler(self):
|
|
54
|
+
"""Test PDF metadata removal."""
|
|
55
|
+
output_file = remove_pdf_metadata(self.test_pdf)
|
|
56
|
+
self.assertTrue(os.path.exists(output_file))
|
|
57
|
+
|
|
58
|
+
def test_docx_handler(self):
|
|
59
|
+
"""Test DOCX metadata removal."""
|
|
60
|
+
output_file = remove_docx_metadata(self.test_docx)
|
|
61
|
+
self.assertTrue(os.path.exists(output_file))
|
|
62
|
+
|
|
63
|
+
def test_audio_handler(self):
|
|
64
|
+
"""Test audio metadata removal."""
|
|
65
|
+
output_file = remove_audio_metadata(self.test_audio)
|
|
66
|
+
self.assertTrue(os.path.exists(output_file))
|
|
67
|
+
|
|
68
|
+
def test_video_handler(self):
|
|
69
|
+
"""Test video metadata removal."""
|
|
70
|
+
output_file = remove_video_metadata(self.test_video)
|
|
71
|
+
self.assertTrue(os.path.exists(output_file))
|
|
72
|
+
|
|
73
|
+
def tearDown(self):
|
|
74
|
+
"""Clean up test files."""
|
|
75
|
+
for file in [self.test_image, self.test_pdf, self.test_docx, self.test_audio, self.test_video]:
|
|
76
|
+
if os.path.exists(file):
|
|
77
|
+
os.remove(file)
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
unittest.main()
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from PIL import Image
|
|
2
|
+
import subprocess
|
|
3
|
+
import unittest
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
from docx import Document
|
|
7
|
+
from PyPDF2 import PdfWriter
|
|
8
|
+
from src.remover import remove_metadata, remove_metadata_from_folder
|
|
9
|
+
|
|
10
|
+
class TestMetadataRemover(unittest.TestCase):
|
|
11
|
+
|
|
12
|
+
def setUp(self):
|
|
13
|
+
"""Create valid test files with actual content."""
|
|
14
|
+
self.test_folder = "test_batch"
|
|
15
|
+
self.output_folder = "test_batch_output"
|
|
16
|
+
os.makedirs(self.test_folder, exist_ok=True)
|
|
17
|
+
|
|
18
|
+
# โ
Create a valid JPG file
|
|
19
|
+
image_path = os.path.join(self.test_folder, "test_image.jpg")
|
|
20
|
+
img = Image.new("RGB", (100, 100), color="red")
|
|
21
|
+
img.save(image_path, "JPEG")
|
|
22
|
+
|
|
23
|
+
# โ
Create a valid PDF file
|
|
24
|
+
writer = PdfWriter()
|
|
25
|
+
writer.add_metadata({"/Author": "Test"})
|
|
26
|
+
with open(os.path.join(self.test_folder, "test_document.pdf"), "wb") as f:
|
|
27
|
+
writer.write(f)
|
|
28
|
+
|
|
29
|
+
# โ
Create a valid DOCX file
|
|
30
|
+
doc = Document()
|
|
31
|
+
doc.add_paragraph("This is a test document.")
|
|
32
|
+
doc.save(os.path.join(self.test_folder, "test_document.docx"))
|
|
33
|
+
|
|
34
|
+
# โ
Create a valid MP3 file
|
|
35
|
+
mp3_path = os.path.join(self.test_folder, "test_audio.mp3")
|
|
36
|
+
subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
|
|
37
|
+
"-t", "3", "-q:a", "9", "-acodec", "libmp3lame", mp3_path, "-y"],
|
|
38
|
+
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
39
|
+
|
|
40
|
+
# โ
Create a valid MP4 file
|
|
41
|
+
mp4_path = os.path.join(self.test_folder, "test_video.mp4")
|
|
42
|
+
subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "color=c=blue:s=320x240:d=3",
|
|
43
|
+
"-vf", "format=yuv420p", mp4_path, "-y"],
|
|
44
|
+
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
45
|
+
|
|
46
|
+
def test_batch_processing(self):
|
|
47
|
+
"""Test batch metadata removal."""
|
|
48
|
+
cleaned_files = remove_metadata_from_folder(self.test_folder, self.output_folder)
|
|
49
|
+
self.assertEqual(len(cleaned_files), 5) # Expect all 5 files to be processed
|
|
50
|
+
|
|
51
|
+
for file in cleaned_files:
|
|
52
|
+
self.assertTrue(os.path.exists(file))
|
|
53
|
+
|
|
54
|
+
def tearDown(self):
|
|
55
|
+
"""Clean up test files."""
|
|
56
|
+
shutil.rmtree(self.test_folder, ignore_errors=True)
|
|
57
|
+
shutil.rmtree(self.output_folder, ignore_errors=True)
|
|
58
|
+
|
|
59
|
+
if __name__ == "__main__":
|
|
60
|
+
unittest.main()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import unittest
|
|
3
|
+
from src.config.settings import DEFAULT_OUTPUT_FOLDER, ENABLE_PARALLEL_PROCESSING, LOG_LEVEL, SUPPORTED_FORMATS
|
|
4
|
+
from src.core.metadata_utils import ensure_output_folder, copy_file_without_metadata, get_file_extension
|
|
5
|
+
|
|
6
|
+
class TestSettingsAndUtils(unittest.TestCase):
|
|
7
|
+
|
|
8
|
+
def test_default_output_folder(self):
|
|
9
|
+
"""Test if the default output folder is set correctly."""
|
|
10
|
+
self.assertEqual(DEFAULT_OUTPUT_FOLDER, "cleaned")
|
|
11
|
+
|
|
12
|
+
def test_parallel_processing_flag(self):
|
|
13
|
+
"""Test if parallel processing flag is set correctly."""
|
|
14
|
+
self.assertTrue(isinstance(ENABLE_PARALLEL_PROCESSING, bool))
|
|
15
|
+
|
|
16
|
+
def test_log_level(self):
|
|
17
|
+
"""Test if the log level is set to a valid value."""
|
|
18
|
+
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR"]
|
|
19
|
+
self.assertIn(LOG_LEVEL, valid_levels)
|
|
20
|
+
|
|
21
|
+
def test_supported_formats(self):
|
|
22
|
+
"""Test if supported formats include key categories."""
|
|
23
|
+
self.assertIn("images", SUPPORTED_FORMATS)
|
|
24
|
+
self.assertIn("documents", SUPPORTED_FORMATS)
|
|
25
|
+
self.assertIn("audio", SUPPORTED_FORMATS)
|
|
26
|
+
self.assertIn("videos", SUPPORTED_FORMATS)
|
|
27
|
+
|
|
28
|
+
def test_ensure_output_folder(self):
|
|
29
|
+
"""Test if ensure_output_folder creates the correct directory."""
|
|
30
|
+
test_folder = "test_output_folder"
|
|
31
|
+
ensure_output_folder(test_folder)
|
|
32
|
+
self.assertTrue(os.path.exists(test_folder))
|
|
33
|
+
os.rmdir(test_folder) # Cleanup after test
|
|
34
|
+
|
|
35
|
+
def test_copy_file_without_metadata(self):
|
|
36
|
+
"""Test if copy_file_without_metadata correctly copies a file."""
|
|
37
|
+
test_file = "test_original.txt"
|
|
38
|
+
copied_file = "test_copied.txt"
|
|
39
|
+
|
|
40
|
+
with open(test_file, "w") as f:
|
|
41
|
+
f.write("Test file content")
|
|
42
|
+
|
|
43
|
+
result = copy_file_without_metadata(test_file, copied_file)
|
|
44
|
+
self.assertTrue(os.path.exists(result))
|
|
45
|
+
|
|
46
|
+
# Cleanup
|
|
47
|
+
os.remove(test_file)
|
|
48
|
+
os.remove(copied_file)
|
|
49
|
+
|
|
50
|
+
def test_get_file_extension(self):
|
|
51
|
+
"""Test if get_file_extension correctly extracts file extensions."""
|
|
52
|
+
self.assertEqual(get_file_extension("image.JPG"), ".jpg")
|
|
53
|
+
self.assertEqual(get_file_extension("document.PDF"), ".pdf")
|
|
54
|
+
self.assertEqual(get_file_extension("music.mp3"), ".mp3")
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__":
|
|
57
|
+
unittest.main()
|