metadata-cleaner 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. metadata_cleaner-1.0.0/LICENSE +0 -0
  2. metadata_cleaner-1.0.0/PKG-INFO +221 -0
  3. metadata_cleaner-1.0.0/README.md +191 -0
  4. metadata_cleaner-1.0.0/metadata_cleaner.egg-info/PKG-INFO +221 -0
  5. metadata_cleaner-1.0.0/metadata_cleaner.egg-info/SOURCES.txt +28 -0
  6. metadata_cleaner-1.0.0/metadata_cleaner.egg-info/dependency_links.txt +1 -0
  7. metadata_cleaner-1.0.0/metadata_cleaner.egg-info/entry_points.txt +2 -0
  8. metadata_cleaner-1.0.0/metadata_cleaner.egg-info/requires.txt +7 -0
  9. metadata_cleaner-1.0.0/metadata_cleaner.egg-info/top_level.txt +2 -0
  10. metadata_cleaner-1.0.0/setup.cfg +4 -0
  11. metadata_cleaner-1.0.0/setup.py +34 -0
  12. metadata_cleaner-1.0.0/src/__init__.py +0 -0
  13. metadata_cleaner-1.0.0/src/cli.py +49 -0
  14. metadata_cleaner-1.0.0/src/config/__init__.py +0 -0
  15. metadata_cleaner-1.0.0/src/config/settings.py +19 -0
  16. metadata_cleaner-1.0.0/src/core/__init__.py +0 -0
  17. metadata_cleaner-1.0.0/src/core/metadata_utils.py +20 -0
  18. metadata_cleaner-1.0.0/src/file_handlers/__init__.py +0 -0
  19. metadata_cleaner-1.0.0/src/file_handlers/audio_handler.py +19 -0
  20. metadata_cleaner-1.0.0/src/file_handlers/docx_handler.py +17 -0
  21. metadata_cleaner-1.0.0/src/file_handlers/image_handler.py +22 -0
  22. metadata_cleaner-1.0.0/src/file_handlers/pdf_handler.py +26 -0
  23. metadata_cleaner-1.0.0/src/file_handlers/video_handler.py +21 -0
  24. metadata_cleaner-1.0.0/src/logs/__init__.py +0 -0
  25. metadata_cleaner-1.0.0/src/logs/logger.py +22 -0
  26. metadata_cleaner-1.0.0/src/remover.py +111 -0
  27. metadata_cleaner-1.0.0/tests/__init__.py +0 -0
  28. metadata_cleaner-1.0.0/tests/test_file_handlers.py +80 -0
  29. metadata_cleaner-1.0.0/tests/test_remover.py +60 -0
  30. metadata_cleaner-1.0.0/tests/test_settings_utils.py +57 -0
File without changes
@@ -0,0 +1,221 @@
1
+ Metadata-Version: 2.2
2
+ Name: metadata-cleaner
3
+ Version: 1.0.0
4
+ Summary: A CLI tool to remove metadata from images, documents, audio, and video files.
5
+ Home-page: https://github.com/sandy-sp/metadata-cleaner
6
+ Author: Sandeep Paidipati
7
+ Author-email: sandeep.paidipati@gmail.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: click
15
+ Requires-Dist: pillow
16
+ Requires-Dist: pypdf
17
+ Requires-Dist: python-docx
18
+ Requires-Dist: mutagen
19
+ Requires-Dist: pymediainfo
20
+ Requires-Dist: tqdm
21
+ Dynamic: author
22
+ Dynamic: author-email
23
+ Dynamic: classifier
24
+ Dynamic: description
25
+ Dynamic: description-content-type
26
+ Dynamic: home-page
27
+ Dynamic: requires-dist
28
+ Dynamic: requires-python
29
+ Dynamic: summary
30
+
31
+ # ๐Ÿ“„ README.md
32
+ ---
33
+ # Metadata Cleaner ๐Ÿงน๐Ÿ”
34
+ *A powerful CLI tool to remove metadata from images, PDFs, DOCX, audio, and video files.*
35
+
36
+ ---
37
+
38
+ ## ๐Ÿ“Œ Overview
39
+ **Metadata Cleaner** is a fast and efficient **command-line tool** that removes metadata from various file formats, including **images, PDFs, documents, audio, and videos**.
40
+ This tool is designed for **privacy protection, security compliance, and data sanitization**.
41
+
42
+ ๐Ÿ” **Why use Metadata Cleaner?**
43
+ - **Protect your privacy** by stripping hidden metadata from files.
44
+ - **Sanitize sensitive documents** before sharing.
45
+ - **Reduce file size** by removing unnecessary metadata.
46
+ - **Batch process multiple files or entire folders** for efficiency.
47
+
48
+ ---
49
+
50
+ ## ๐Ÿš€ Features
51
+ โœ… **Supports Metadata Removal for:**
52
+ - ๐Ÿ“ท **Images**: `JPG, PNG, TIFF`
53
+ - ๐Ÿ“„ **Documents**: `PDF, DOCX`
54
+ - ๐ŸŽต **Audio**: `MP3, WAV, FLAC`
55
+ - ๐ŸŽฅ **Videos**: `MP4, MKV, MOV`
56
+
57
+ โœ… **Batch Processing**
58
+ - Remove metadata **from individual files or entire folders**.
59
+
60
+ โœ… **Parallel Processing**
61
+ - **Speed up processing** with **multi-file parallel execution**.
62
+
63
+ โœ… **Interactive & User-Friendly CLI**
64
+ - Features **progress bars, confirmation prompts, and summary reports**.
65
+
66
+ โœ… **Safe Metadata Removal**
67
+ - **Original files remain untouched**, and cleaned versions are saved in a separate folder.
68
+
69
+ โœ… **Cross-Platform Compatibility**
70
+ - Works on **Linux, macOS, and Windows**.
71
+
72
+ ---
73
+
74
+ ## ๐Ÿ› ๏ธ Installation
75
+
76
+ ### **1๏ธโƒฃ Install via `pip` (Recommended)**
77
+ To install the latest version from **PyPI**, run:
78
+ ```bash
79
+ pip install metadata-cleaner
80
+ ```
81
+
82
+ ### **2๏ธโƒฃ Install from Source**
83
+ If you cloned this repository, install it manually:
84
+ ```bash
85
+ git clone https://github.com/sandy-sp/metadata-cleaner.git
86
+ cd metadata-cleaner
87
+ pip install .
88
+ ```
89
+
90
+ ---
91
+
92
+ ## ๐Ÿ“– Usage
93
+
94
+ ### **1๏ธโƒฃ Remove Metadata from a Single File**
95
+ ```bash
96
+ metadata-cleaner --file path/to/file.jpg
97
+ ```
98
+ โœ… **Example Output:**
99
+ ```
100
+ Do you want to process file.jpg? [Y/n]: Y
101
+ โœ… Metadata removed. Cleaned file saved at: path/to/cleaned/file.jpg
102
+ ```
103
+
104
+ ### **2๏ธโƒฃ Remove Metadata from All Files in a Folder**
105
+ ```bash
106
+ metadata-cleaner --folder test_folder
107
+ ```
108
+ โœ… **Example Output:**
109
+ ```
110
+ Do you want to process all files in test_folder? [Y/n]: Y
111
+ Processing Files: 100% |โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:10s]
112
+
113
+ ๐Ÿ“Š **Summary Report:**
114
+ โœ… Successfully processed: 5 files
115
+ โŒ Failed to process: 0 files
116
+ Cleaned files saved in: test_folder/cleaned
117
+ ```
118
+
119
+ ### **3๏ธโƒฃ Remove Metadata Without Confirmation Prompt**
120
+ ```bash
121
+ metadata-cleaner --folder test_folder --yes
122
+ ```
123
+
124
+ ### **4๏ธโƒฃ Display Help**
125
+ ```bash
126
+ metadata-cleaner --help
127
+ ```
128
+ โœ… **Example Output:**
129
+ ```
130
+ Usage: metadata-cleaner [OPTIONS]
131
+
132
+ Options:
133
+ --file TEXT Path to the file to clean metadata from.
134
+ --folder TEXT Path to a folder to clean metadata from all supported files.
135
+ --output TEXT Path to save the cleaned file(s).
136
+ --yes Skip confirmation prompts.
137
+ --help Show this message and exit.
138
+ ```
139
+
140
+ ---
141
+
142
+ ## ๐Ÿ”ง How It Works
143
+ 1๏ธโƒฃ **Detects file type** and selects the appropriate metadata removal method.
144
+ 2๏ธโƒฃ **Processes the file** by removing metadata safely.
145
+ 3๏ธโƒฃ **Saves the cleaned version** in the `cleaned/` subfolder.
146
+ 4๏ธโƒฃ **Generates logs and a summary report** for easy tracking.
147
+
148
+ ---
149
+
150
+ ## ๐Ÿ’ป Supported File Formats & Methods
151
+
152
+ | File Type | Supported Formats | Metadata Removal Method |
153
+ |-----------|------------------|------------------------|
154
+ | ๐Ÿ“ท **Images** | `JPG, PNG, TIFF` | Pillow (`PIL`) |
155
+ | ๐Ÿ“„ **Documents** | `PDF, DOCX` | PyPDF2, python-docx |
156
+ | ๐ŸŽต **Audio** | `MP3, WAV, FLAC` | Mutagen |
157
+ | ๐ŸŽฅ **Videos** | `MP4, MKV, MOV` | FFmpeg |
158
+
159
+ ---
160
+
161
+ ## ๐Ÿ— Project Structure
162
+ ```
163
+ metadata-cleaner/
164
+ โ”‚โ”€โ”€ docs/ # Documentation
165
+ โ”‚โ”€โ”€ scripts/ # Setup and installation scripts
166
+ โ”‚โ”€โ”€ src/ # Source code
167
+ โ”‚ โ”‚โ”€โ”€ cli.py # CLI entry point
168
+ โ”‚ โ”‚โ”€โ”€ remover.py # Core metadata remover
169
+ โ”‚ โ”‚โ”€โ”€ file_handlers/ # File-specific handlers
170
+ โ”‚โ”€โ”€ tests/ # Unit tests
171
+ โ”‚โ”€โ”€ test_folder/ # Sample test files
172
+ โ”‚โ”€โ”€ setup.py # Package setup
173
+ โ”‚โ”€โ”€ requirements.txt # Dependencies
174
+ โ”‚โ”€โ”€ LICENSE # License information
175
+ ```
176
+
177
+ ---
178
+
179
+ ## ๐Ÿ’ก Contributing
180
+ We welcome contributions! To contribute:
181
+
182
+ 1๏ธโƒฃ **Fork** the repository.
183
+ 2๏ธโƒฃ **Clone** your forked repo:
184
+ ```bash
185
+ git clone https://github.com/sandy-sp/metadata-cleaner.git
186
+ ```
187
+ 3๏ธโƒฃ **Create a new branch** for your feature:
188
+ ```bash
189
+ git checkout -b feature-name
190
+ ```
191
+ 4๏ธโƒฃ **Make changes & test**:
192
+ ```bash
193
+ pytest tests/
194
+ ```
195
+ 5๏ธโƒฃ **Commit and push**:
196
+ ```bash
197
+ git commit -m "Added new feature"
198
+ git push origin feature-name
199
+ ```
200
+ 6๏ธโƒฃ **Submit a Pull Request (PR).**
201
+
202
+ ---
203
+
204
+ ## ๐Ÿ”’ License
205
+ This project is licensed under the **MIT License**.
206
+ See the full license in [`LICENSE`](LICENSE).
207
+
208
+ ---
209
+
210
+ ## ๐Ÿ”— Links & Resources
211
+ - ๐Ÿ“– **Documentation**: [API Reference](docs/API_REFERENCE.md)
212
+ - ๐Ÿ **PyPI Package**: [metadata-cleaner](https://pypi.org/project/metadata-cleaner/)
213
+ - ๐Ÿš€ **GitHub Repository**: [metadata-cleaner](https://github.com/sandy-sp/metadata-cleaner)
214
+
215
+ ---
216
+
217
+ ## โค๏ธ Support
218
+ If you found this tool useful, give it a โญ on GitHub!
219
+ For issues or questions, [open an issue](https://github.com/sandy-sp/metadata-cleaner/issues).
220
+
221
+ ---
@@ -0,0 +1,191 @@
1
+ # ๐Ÿ“„ README.md
2
+ ---
3
+ # Metadata Cleaner ๐Ÿงน๐Ÿ”
4
+ *A powerful CLI tool to remove metadata from images, PDFs, DOCX, audio, and video files.*
5
+
6
+ ---
7
+
8
+ ## ๐Ÿ“Œ Overview
9
+ **Metadata Cleaner** is a fast and efficient **command-line tool** that removes metadata from various file formats, including **images, PDFs, documents, audio, and videos**.
10
+ This tool is designed for **privacy protection, security compliance, and data sanitization**.
11
+
12
+ ๐Ÿ” **Why use Metadata Cleaner?**
13
+ - **Protect your privacy** by stripping hidden metadata from files.
14
+ - **Sanitize sensitive documents** before sharing.
15
+ - **Reduce file size** by removing unnecessary metadata.
16
+ - **Batch process multiple files or entire folders** for efficiency.
17
+
18
+ ---
19
+
20
+ ## ๐Ÿš€ Features
21
+ โœ… **Supports Metadata Removal for:**
22
+ - ๐Ÿ“ท **Images**: `JPG, PNG, TIFF`
23
+ - ๐Ÿ“„ **Documents**: `PDF, DOCX`
24
+ - ๐ŸŽต **Audio**: `MP3, WAV, FLAC`
25
+ - ๐ŸŽฅ **Videos**: `MP4, MKV, MOV`
26
+
27
+ โœ… **Batch Processing**
28
+ - Remove metadata **from individual files or entire folders**.
29
+
30
+ โœ… **Parallel Processing**
31
+ - **Speed up processing** with **multi-file parallel execution**.
32
+
33
+ โœ… **Interactive & User-Friendly CLI**
34
+ - Features **progress bars, confirmation prompts, and summary reports**.
35
+
36
+ โœ… **Safe Metadata Removal**
37
+ - **Original files remain untouched**, and cleaned versions are saved in a separate folder.
38
+
39
+ โœ… **Cross-Platform Compatibility**
40
+ - Works on **Linux, macOS, and Windows**.
41
+
42
+ ---
43
+
44
+ ## ๐Ÿ› ๏ธ Installation
45
+
46
+ ### **1๏ธโƒฃ Install via `pip` (Recommended)**
47
+ To install the latest version from **PyPI**, run:
48
+ ```bash
49
+ pip install metadata-cleaner
50
+ ```
51
+
52
+ ### **2๏ธโƒฃ Install from Source**
53
+ If you cloned this repository, install it manually:
54
+ ```bash
55
+ git clone https://github.com/sandy-sp/metadata-cleaner.git
56
+ cd metadata-cleaner
57
+ pip install .
58
+ ```
59
+
60
+ ---
61
+
62
+ ## ๐Ÿ“– Usage
63
+
64
+ ### **1๏ธโƒฃ Remove Metadata from a Single File**
65
+ ```bash
66
+ metadata-cleaner --file path/to/file.jpg
67
+ ```
68
+ โœ… **Example Output:**
69
+ ```
70
+ Do you want to process file.jpg? [Y/n]: Y
71
+ โœ… Metadata removed. Cleaned file saved at: path/to/cleaned/file.jpg
72
+ ```
73
+
74
+ ### **2๏ธโƒฃ Remove Metadata from All Files in a Folder**
75
+ ```bash
76
+ metadata-cleaner --folder test_folder
77
+ ```
78
+ โœ… **Example Output:**
79
+ ```
80
+ Do you want to process all files in test_folder? [Y/n]: Y
81
+ Processing Files: 100% |โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:10s]
82
+
83
+ ๐Ÿ“Š **Summary Report:**
84
+ โœ… Successfully processed: 5 files
85
+ โŒ Failed to process: 0 files
86
+ Cleaned files saved in: test_folder/cleaned
87
+ ```
88
+
89
+ ### **3๏ธโƒฃ Remove Metadata Without Confirmation Prompt**
90
+ ```bash
91
+ metadata-cleaner --folder test_folder --yes
92
+ ```
93
+
94
+ ### **4๏ธโƒฃ Display Help**
95
+ ```bash
96
+ metadata-cleaner --help
97
+ ```
98
+ โœ… **Example Output:**
99
+ ```
100
+ Usage: metadata-cleaner [OPTIONS]
101
+
102
+ Options:
103
+ --file TEXT Path to the file to clean metadata from.
104
+ --folder TEXT Path to a folder to clean metadata from all supported files.
105
+ --output TEXT Path to save the cleaned file(s).
106
+ --yes Skip confirmation prompts.
107
+ --help Show this message and exit.
108
+ ```
109
+
110
+ ---
111
+
112
+ ## ๐Ÿ”ง How It Works
113
+ 1๏ธโƒฃ **Detects file type** and selects the appropriate metadata removal method.
114
+ 2๏ธโƒฃ **Processes the file** by removing metadata safely.
115
+ 3๏ธโƒฃ **Saves the cleaned version** in the `cleaned/` subfolder.
116
+ 4๏ธโƒฃ **Generates logs and a summary report** for easy tracking.
117
+
118
+ ---
119
+
120
+ ## ๐Ÿ’ป Supported File Formats & Methods
121
+
122
+ | File Type | Supported Formats | Metadata Removal Method |
123
+ |-----------|------------------|------------------------|
124
+ | ๐Ÿ“ท **Images** | `JPG, PNG, TIFF` | Pillow (`PIL`) |
125
+ | ๐Ÿ“„ **Documents** | `PDF, DOCX` | PyPDF2, python-docx |
126
+ | ๐ŸŽต **Audio** | `MP3, WAV, FLAC` | Mutagen |
127
+ | ๐ŸŽฅ **Videos** | `MP4, MKV, MOV` | FFmpeg |
128
+
129
+ ---
130
+
131
+ ## ๐Ÿ— Project Structure
132
+ ```
133
+ metadata-cleaner/
134
+ โ”‚โ”€โ”€ docs/ # Documentation
135
+ โ”‚โ”€โ”€ scripts/ # Setup and installation scripts
136
+ โ”‚โ”€โ”€ src/ # Source code
137
+ โ”‚ โ”‚โ”€โ”€ cli.py # CLI entry point
138
+ โ”‚ โ”‚โ”€โ”€ remover.py # Core metadata remover
139
+ โ”‚ โ”‚โ”€โ”€ file_handlers/ # File-specific handlers
140
+ โ”‚โ”€โ”€ tests/ # Unit tests
141
+ โ”‚โ”€โ”€ test_folder/ # Sample test files
142
+ โ”‚โ”€โ”€ setup.py # Package setup
143
+ โ”‚โ”€โ”€ requirements.txt # Dependencies
144
+ โ”‚โ”€โ”€ LICENSE # License information
145
+ ```
146
+
147
+ ---
148
+
149
+ ## ๐Ÿ’ก Contributing
150
+ We welcome contributions! To contribute:
151
+
152
+ 1๏ธโƒฃ **Fork** the repository.
153
+ 2๏ธโƒฃ **Clone** your forked repo:
154
+ ```bash
155
+ git clone https://github.com/sandy-sp/metadata-cleaner.git
156
+ ```
157
+ 3๏ธโƒฃ **Create a new branch** for your feature:
158
+ ```bash
159
+ git checkout -b feature-name
160
+ ```
161
+ 4๏ธโƒฃ **Make changes & test**:
162
+ ```bash
163
+ pytest tests/
164
+ ```
165
+ 5๏ธโƒฃ **Commit and push**:
166
+ ```bash
167
+ git commit -m "Added new feature"
168
+ git push origin feature-name
169
+ ```
170
+ 6๏ธโƒฃ **Submit a Pull Request (PR).**
171
+
172
+ ---
173
+
174
+ ## ๐Ÿ”’ License
175
+ This project is licensed under the **MIT License**.
176
+ See the full license in [`LICENSE`](LICENSE).
177
+
178
+ ---
179
+
180
+ ## ๐Ÿ”— Links & Resources
181
+ - ๐Ÿ“– **Documentation**: [API Reference](docs/API_REFERENCE.md)
182
+ - ๐Ÿ **PyPI Package**: [metadata-cleaner](https://pypi.org/project/metadata-cleaner/)
183
+ - ๐Ÿš€ **GitHub Repository**: [metadata-cleaner](https://github.com/sandy-sp/metadata-cleaner)
184
+
185
+ ---
186
+
187
+ ## โค๏ธ Support
188
+ If you found this tool useful, give it a โญ on GitHub!
189
+ For issues or questions, [open an issue](https://github.com/sandy-sp/metadata-cleaner/issues).
190
+
191
+ ---
@@ -0,0 +1,221 @@
1
+ Metadata-Version: 2.2
2
+ Name: metadata-cleaner
3
+ Version: 1.0.0
4
+ Summary: A CLI tool to remove metadata from images, documents, audio, and video files.
5
+ Home-page: https://github.com/sandy-sp/metadata-cleaner
6
+ Author: Sandeep Paidipati
7
+ Author-email: sandeep.paidipati@gmail.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: click
15
+ Requires-Dist: pillow
16
+ Requires-Dist: pypdf
17
+ Requires-Dist: python-docx
18
+ Requires-Dist: mutagen
19
+ Requires-Dist: pymediainfo
20
+ Requires-Dist: tqdm
21
+ Dynamic: author
22
+ Dynamic: author-email
23
+ Dynamic: classifier
24
+ Dynamic: description
25
+ Dynamic: description-content-type
26
+ Dynamic: home-page
27
+ Dynamic: requires-dist
28
+ Dynamic: requires-python
29
+ Dynamic: summary
30
+
31
+ # ๐Ÿ“„ README.md
32
+ ---
33
+ # Metadata Cleaner ๐Ÿงน๐Ÿ”
34
+ *A powerful CLI tool to remove metadata from images, PDFs, DOCX, audio, and video files.*
35
+
36
+ ---
37
+
38
+ ## ๐Ÿ“Œ Overview
39
+ **Metadata Cleaner** is a fast and efficient **command-line tool** that removes metadata from various file formats, including **images, PDFs, documents, audio, and videos**.
40
+ This tool is designed for **privacy protection, security compliance, and data sanitization**.
41
+
42
+ ๐Ÿ” **Why use Metadata Cleaner?**
43
+ - **Protect your privacy** by stripping hidden metadata from files.
44
+ - **Sanitize sensitive documents** before sharing.
45
+ - **Reduce file size** by removing unnecessary metadata.
46
+ - **Batch process multiple files or entire folders** for efficiency.
47
+
48
+ ---
49
+
50
+ ## ๐Ÿš€ Features
51
+ โœ… **Supports Metadata Removal for:**
52
+ - ๐Ÿ“ท **Images**: `JPG, PNG, TIFF`
53
+ - ๐Ÿ“„ **Documents**: `PDF, DOCX`
54
+ - ๐ŸŽต **Audio**: `MP3, WAV, FLAC`
55
+ - ๐ŸŽฅ **Videos**: `MP4, MKV, MOV`
56
+
57
+ โœ… **Batch Processing**
58
+ - Remove metadata **from individual files or entire folders**.
59
+
60
+ โœ… **Parallel Processing**
61
+ - **Speed up processing** with **multi-file parallel execution**.
62
+
63
+ โœ… **Interactive & User-Friendly CLI**
64
+ - Features **progress bars, confirmation prompts, and summary reports**.
65
+
66
+ โœ… **Safe Metadata Removal**
67
+ - **Original files remain untouched**, and cleaned versions are saved in a separate folder.
68
+
69
+ โœ… **Cross-Platform Compatibility**
70
+ - Works on **Linux, macOS, and Windows**.
71
+
72
+ ---
73
+
74
+ ## ๐Ÿ› ๏ธ Installation
75
+
76
+ ### **1๏ธโƒฃ Install via `pip` (Recommended)**
77
+ To install the latest version from **PyPI**, run:
78
+ ```bash
79
+ pip install metadata-cleaner
80
+ ```
81
+
82
+ ### **2๏ธโƒฃ Install from Source**
83
+ If you cloned this repository, install it manually:
84
+ ```bash
85
+ git clone https://github.com/sandy-sp/metadata-cleaner.git
86
+ cd metadata-cleaner
87
+ pip install .
88
+ ```
89
+
90
+ ---
91
+
92
+ ## ๐Ÿ“– Usage
93
+
94
+ ### **1๏ธโƒฃ Remove Metadata from a Single File**
95
+ ```bash
96
+ metadata-cleaner --file path/to/file.jpg
97
+ ```
98
+ โœ… **Example Output:**
99
+ ```
100
+ Do you want to process file.jpg? [Y/n]: Y
101
+ โœ… Metadata removed. Cleaned file saved at: path/to/cleaned/file.jpg
102
+ ```
103
+
104
+ ### **2๏ธโƒฃ Remove Metadata from All Files in a Folder**
105
+ ```bash
106
+ metadata-cleaner --folder test_folder
107
+ ```
108
+ โœ… **Example Output:**
109
+ ```
110
+ Do you want to process all files in test_folder? [Y/n]: Y
111
+ Processing Files: 100% |โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5/5 [00:10s]
112
+
113
+ ๐Ÿ“Š **Summary Report:**
114
+ โœ… Successfully processed: 5 files
115
+ โŒ Failed to process: 0 files
116
+ Cleaned files saved in: test_folder/cleaned
117
+ ```
118
+
119
+ ### **3๏ธโƒฃ Remove Metadata Without Confirmation Prompt**
120
+ ```bash
121
+ metadata-cleaner --folder test_folder --yes
122
+ ```
123
+
124
+ ### **4๏ธโƒฃ Display Help**
125
+ ```bash
126
+ metadata-cleaner --help
127
+ ```
128
+ โœ… **Example Output:**
129
+ ```
130
+ Usage: metadata-cleaner [OPTIONS]
131
+
132
+ Options:
133
+ --file TEXT Path to the file to clean metadata from.
134
+ --folder TEXT Path to a folder to clean metadata from all supported files.
135
+ --output TEXT Path to save the cleaned file(s).
136
+ --yes Skip confirmation prompts.
137
+ --help Show this message and exit.
138
+ ```
139
+
140
+ ---
141
+
142
+ ## ๐Ÿ”ง How It Works
143
+ 1๏ธโƒฃ **Detects file type** and selects the appropriate metadata removal method.
144
+ 2๏ธโƒฃ **Processes the file** by removing metadata safely.
145
+ 3๏ธโƒฃ **Saves the cleaned version** in the `cleaned/` subfolder.
146
+ 4๏ธโƒฃ **Generates logs and a summary report** for easy tracking.
147
+
148
+ ---
149
+
150
+ ## ๐Ÿ’ป Supported File Formats & Methods
151
+
152
+ | File Type | Supported Formats | Metadata Removal Method |
153
+ |-----------|------------------|------------------------|
154
+ | ๐Ÿ“ท **Images** | `JPG, PNG, TIFF` | Pillow (`PIL`) |
155
+ | ๐Ÿ“„ **Documents** | `PDF, DOCX` | PyPDF2, python-docx |
156
+ | ๐ŸŽต **Audio** | `MP3, WAV, FLAC` | Mutagen |
157
+ | ๐ŸŽฅ **Videos** | `MP4, MKV, MOV` | FFmpeg |
158
+
159
+ ---
160
+
161
+ ## ๐Ÿ— Project Structure
162
+ ```
163
+ metadata-cleaner/
164
+ โ”‚โ”€โ”€ docs/ # Documentation
165
+ โ”‚โ”€โ”€ scripts/ # Setup and installation scripts
166
+ โ”‚โ”€โ”€ src/ # Source code
167
+ โ”‚ โ”‚โ”€โ”€ cli.py # CLI entry point
168
+ โ”‚ โ”‚โ”€โ”€ remover.py # Core metadata remover
169
+ โ”‚ โ”‚โ”€โ”€ file_handlers/ # File-specific handlers
170
+ โ”‚โ”€โ”€ tests/ # Unit tests
171
+ โ”‚โ”€โ”€ test_folder/ # Sample test files
172
+ โ”‚โ”€โ”€ setup.py # Package setup
173
+ โ”‚โ”€โ”€ requirements.txt # Dependencies
174
+ โ”‚โ”€โ”€ LICENSE # License information
175
+ ```
176
+
177
+ ---
178
+
179
+ ## ๐Ÿ’ก Contributing
180
+ We welcome contributions! To contribute:
181
+
182
+ 1๏ธโƒฃ **Fork** the repository.
183
+ 2๏ธโƒฃ **Clone** your forked repo:
184
+ ```bash
185
+ git clone https://github.com/sandy-sp/metadata-cleaner.git
186
+ ```
187
+ 3๏ธโƒฃ **Create a new branch** for your feature:
188
+ ```bash
189
+ git checkout -b feature-name
190
+ ```
191
+ 4๏ธโƒฃ **Make changes & test**:
192
+ ```bash
193
+ pytest tests/
194
+ ```
195
+ 5๏ธโƒฃ **Commit and push**:
196
+ ```bash
197
+ git commit -m "Added new feature"
198
+ git push origin feature-name
199
+ ```
200
+ 6๏ธโƒฃ **Submit a Pull Request (PR).**
201
+
202
+ ---
203
+
204
+ ## ๐Ÿ”’ License
205
+ This project is licensed under the **MIT License**.
206
+ See the full license in [`LICENSE`](LICENSE).
207
+
208
+ ---
209
+
210
+ ## ๐Ÿ”— Links & Resources
211
+ - ๐Ÿ“– **Documentation**: [API Reference](docs/API_REFERENCE.md)
212
+ - ๐Ÿ **PyPI Package**: [metadata-cleaner](https://pypi.org/project/metadata-cleaner/)
213
+ - ๐Ÿš€ **GitHub Repository**: [metadata-cleaner](https://github.com/sandy-sp/metadata-cleaner)
214
+
215
+ ---
216
+
217
+ ## โค๏ธ Support
218
+ If you found this tool useful, give it a โญ on GitHub!
219
+ For issues or questions, [open an issue](https://github.com/sandy-sp/metadata-cleaner/issues).
220
+
221
+ ---
@@ -0,0 +1,28 @@
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ metadata_cleaner.egg-info/PKG-INFO
5
+ metadata_cleaner.egg-info/SOURCES.txt
6
+ metadata_cleaner.egg-info/dependency_links.txt
7
+ metadata_cleaner.egg-info/entry_points.txt
8
+ metadata_cleaner.egg-info/requires.txt
9
+ metadata_cleaner.egg-info/top_level.txt
10
+ src/__init__.py
11
+ src/cli.py
12
+ src/remover.py
13
+ src/config/__init__.py
14
+ src/config/settings.py
15
+ src/core/__init__.py
16
+ src/core/metadata_utils.py
17
+ src/file_handlers/__init__.py
18
+ src/file_handlers/audio_handler.py
19
+ src/file_handlers/docx_handler.py
20
+ src/file_handlers/image_handler.py
21
+ src/file_handlers/pdf_handler.py
22
+ src/file_handlers/video_handler.py
23
+ src/logs/__init__.py
24
+ src/logs/logger.py
25
+ tests/__init__.py
26
+ tests/test_file_handlers.py
27
+ tests/test_remover.py
28
+ tests/test_settings_utils.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ metadata-cleaner = src.cli:main
@@ -0,0 +1,7 @@
1
+ click
2
+ pillow
3
+ pypdf
4
+ python-docx
5
+ mutagen
6
+ pymediainfo
7
+ tqdm
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,34 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="metadata-cleaner",
5
+ version="1.0.0",
6
+ author="Sandeep Paidipati",
7
+ author_email="sandeep.paidipati@gmail.com",
8
+ description="A CLI tool to remove metadata from images, documents, audio, and video files.",
9
+ long_description=open("README.md").read(),
10
+ long_description_content_type="text/markdown",
11
+ url="https://github.com/sandy-sp/metadata-cleaner",
12
+ packages=find_packages(),
13
+ include_package_data=True,
14
+ install_requires=[
15
+ "click",
16
+ "pillow",
17
+ "pypdf",
18
+ "python-docx",
19
+ "mutagen",
20
+ "pymediainfo",
21
+ "tqdm"
22
+ ],
23
+ entry_points={
24
+ "console_scripts": [
25
+ "metadata-cleaner = src.cli:main",
26
+ ],
27
+ },
28
+ classifiers=[
29
+ "Programming Language :: Python :: 3",
30
+ "License :: OSI Approved :: MIT License",
31
+ "Operating System :: OS Independent",
32
+ ],
33
+ python_requires='>=3.7',
34
+ )
File without changes
@@ -0,0 +1,49 @@
1
+ import os
2
+ import click
3
+ from src.logs.logger import logger
4
+ from src.remover import remove_metadata, remove_metadata_from_folder
5
+
6
+ @click.command()
7
+ @click.option('--file', '-f', type=click.Path(exists=True), help="Path to the file to clean metadata from.")
8
+ @click.option('--folder', '-d', type=click.Path(exists=True), help="Path to a folder to clean metadata from all supported files.")
9
+ @click.option('--output', '-o', type=click.Path(), help="Path to save the cleaned file(s).")
10
+ @click.option('--yes', '-y', is_flag=True, help="Skip confirmation prompts.")
11
+ def main(file, folder, output, yes):
12
+ """CLI for metadata removal. Supports single file or batch processing with interactivity."""
13
+ try:
14
+ if file:
15
+ if not yes and not click.confirm(f"Do you want to process {file}?", default=True):
16
+ click.echo("โŒ Operation cancelled.")
17
+ return
18
+
19
+ logger.info(f"Processing single file: {file}")
20
+ cleaned_file = remove_metadata(file, output)
21
+ if cleaned_file:
22
+ click.echo(f"โœ… Metadata removed. Cleaned file saved at: {cleaned_file}")
23
+ else:
24
+ click.echo(f"โš ๏ธ Failed to process file: {file}")
25
+
26
+ elif folder:
27
+ if not yes and not click.confirm(f"Do you want to process all files in {folder}?", default=True):
28
+ click.echo("โŒ Operation cancelled.")
29
+ return
30
+
31
+ logger.info(f"Processing folder: {folder}")
32
+ cleaned_files = remove_metadata_from_folder(folder, output)
33
+
34
+ # Display summary report
35
+ click.echo("\n๐Ÿ“Š **Summary Report:**")
36
+ click.echo(f"โœ… Successfully processed: {len(cleaned_files)} files")
37
+
38
+ if cleaned_files:
39
+ click.echo(f"Cleaned files saved in: {output if output else folder}")
40
+
41
+ else:
42
+ click.echo("โŒ Please specify either --file or --folder to process.")
43
+
44
+ except Exception as e:
45
+ logger.error(f"CLI Error: {e}")
46
+ click.echo(f"โŒ Error: {e}")
47
+
48
+ if __name__ == "__main__":
49
+ main()
File without changes
@@ -0,0 +1,19 @@
1
+ import os
2
+
3
+ # ๐Ÿ  Default output directory
4
+ DEFAULT_OUTPUT_FOLDER = "cleaned"
5
+
6
+ # ๐Ÿ›  Enable or Disable Parallel Processing
7
+ ENABLE_PARALLEL_PROCESSING = True
8
+
9
+ # ๐Ÿ“ Logging Configuration
10
+ LOG_FILE_PATH = os.path.join("logs", "metadata_cleaner.log")
11
+ LOG_LEVEL = "INFO" # Options: DEBUG, INFO, WARNING, ERROR
12
+
13
+ # ๐Ÿ”ง Supported File Formats
14
+ SUPPORTED_FORMATS = {
15
+ "images": [".jpg", ".jpeg", ".png", ".tiff"],
16
+ "documents": [".pdf", ".docx", ".doc"],
17
+ "audio": [".mp3", ".wav", ".flac", ".ogg"],
18
+ "videos": [".mp4", ".mkv", ".mov", ".avi"]
19
+ }
File without changes
@@ -0,0 +1,20 @@
1
+ import os
2
+ import shutil
3
+
4
+ def ensure_output_folder(output_folder):
5
+ """Ensures the output folder exists. If not, creates it."""
6
+ if not os.path.exists(output_folder):
7
+ os.makedirs(output_folder)
8
+
9
+ def copy_file_without_metadata(original_path, output_path):
10
+ """Copies a file to a new location while ensuring metadata is stripped."""
11
+ try:
12
+ shutil.copy(original_path, output_path)
13
+ return output_path
14
+ except Exception as e:
15
+ print(f"โŒ Error copying file: {e}")
16
+ return None
17
+
18
+ def get_file_extension(file_path):
19
+ """Returns the lowercase file extension of a file."""
20
+ return os.path.splitext(file_path)[1].lower()
File without changes
@@ -0,0 +1,19 @@
1
+ from mutagen.mp3 import MP3
2
+ from mutagen.easyid3 import EasyID3
3
+ import shutil
4
+
5
+ def remove_audio_metadata(file_path, output_path=None):
6
+ """Removes metadata from MP3 and other audio files."""
7
+ try:
8
+ audio = MP3(file_path, ID3=EasyID3)
9
+ audio.delete()
10
+ audio.save()
11
+
12
+ if output_path:
13
+ shutil.copy(file_path, output_path) # Ensure file is saved to the output directory
14
+ return output_path
15
+ return file_path
16
+
17
+ except Exception as e:
18
+ print(f"Error removing metadata from {file_path}: {e}")
19
+ return None
@@ -0,0 +1,17 @@
1
+ from docx import Document
2
+
3
+ def remove_docx_metadata(file_path, output_path=None):
4
+ """Removes metadata from DOCX files."""
5
+ doc = Document(file_path)
6
+
7
+ # Remove core properties
8
+ doc.core_properties.author = ""
9
+ doc.core_properties.title = ""
10
+ doc.core_properties.keywords = ""
11
+ doc.core_properties.comments = ""
12
+
13
+ if not output_path:
14
+ output_path = file_path.replace(".", "_cleaned.")
15
+
16
+ doc.save(output_path)
17
+ return output_path
@@ -0,0 +1,22 @@
1
+ from PIL import Image
2
+ import os
3
+
4
+ def remove_image_metadata(file_path, output_path=None):
5
+ """Removes metadata from images (JPG, PNG, TIFF) with error handling."""
6
+ try:
7
+ img = Image.open(file_path)
8
+
9
+ # Create a new image without metadata
10
+ data = list(img.getdata())
11
+ img_no_metadata = Image.new(img.mode, img.size)
12
+ img_no_metadata.putdata(data)
13
+
14
+ if not output_path:
15
+ output_path = file_path.replace(".", "_cleaned.")
16
+
17
+ img_no_metadata.save(output_path)
18
+ return output_path
19
+
20
+ except Exception as e:
21
+ print(f"Error removing metadata from {file_path}: {e}")
22
+ return None
@@ -0,0 +1,26 @@
1
+ from PyPDF2 import PdfReader, PdfWriter
2
+ import os
3
+
4
+ def remove_pdf_metadata(file_path, output_path=None):
5
+ """Removes metadata from PDFs with error handling."""
6
+ try:
7
+ reader = PdfReader(file_path)
8
+ writer = PdfWriter()
9
+
10
+ # Copy pages and remove metadata
11
+ for page in reader.pages:
12
+ writer.add_page(page)
13
+
14
+ writer.add_metadata({}) # Clear metadata
15
+
16
+ if not output_path:
17
+ output_path = file_path.replace(".", "_cleaned.")
18
+
19
+ with open(output_path, "wb") as f:
20
+ writer.write(f)
21
+
22
+ return output_path
23
+
24
+ except Exception as e:
25
+ print(f"Error removing metadata from {file_path}: {e}")
26
+ return None
@@ -0,0 +1,21 @@
1
+ import subprocess
2
+ import shutil
3
+
4
+ def remove_video_metadata(file_path, output_path=None):
5
+ """Removes metadata from video files using FFmpeg."""
6
+ try:
7
+ if not output_path:
8
+ output_path = file_path.replace(".", "_cleaned.")
9
+
10
+ # Use FFmpeg to re-encode the file and strip metadata
11
+ command = [
12
+ "ffmpeg", "-i", file_path, "-map_metadata", "-1",
13
+ "-c:v", "libx264", "-c:a", "aac", output_path, "-y"
14
+ ]
15
+ subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
16
+
17
+ return output_path
18
+
19
+ except subprocess.CalledProcessError as e:
20
+ print(f"Error removing metadata from {file_path}: {e}")
21
+ return None
File without changes
@@ -0,0 +1,22 @@
1
+ import logging
2
+ import os
3
+
4
+ LOG_FILE = "metadata_cleaner.log"
5
+
6
+ # Ensure logs directory exists
7
+ LOG_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "logs"))
8
+ os.makedirs(LOG_DIR, exist_ok=True)
9
+
10
+ LOG_PATH = os.path.join(LOG_DIR, LOG_FILE)
11
+
12
+ # Configure logging
13
+ logging.basicConfig(
14
+ level=logging.INFO,
15
+ format="%(asctime)s - %(levelname)s - %(message)s",
16
+ handlers=[
17
+ logging.FileHandler(LOG_PATH), # Log to a file
18
+ logging.StreamHandler() # Log to console
19
+ ]
20
+ )
21
+
22
+ logger = logging.getLogger("metadata_cleaner")
@@ -0,0 +1,111 @@
1
+ import os
2
+ import concurrent.futures
3
+ from tqdm import tqdm
4
+ from src.logs.logger import logger
5
+ from src.file_handlers.image_handler import remove_image_metadata
6
+ from src.file_handlers.pdf_handler import remove_pdf_metadata
7
+ from src.file_handlers.docx_handler import remove_docx_metadata
8
+ from src.file_handlers.audio_handler import remove_audio_metadata
9
+ from src.file_handlers.video_handler import remove_video_metadata
10
+
11
+ SUPPORTED_EXTENSIONS = {
12
+ ".jpg": remove_image_metadata, ".jpeg": remove_image_metadata, ".png": remove_image_metadata, ".tiff": remove_image_metadata,
13
+ ".pdf": remove_pdf_metadata,
14
+ ".docx": remove_docx_metadata, ".doc": remove_docx_metadata,
15
+ ".mp3": remove_audio_metadata, ".wav": remove_audio_metadata, ".flac": remove_audio_metadata, ".ogg": remove_audio_metadata,
16
+ ".mp4": remove_video_metadata, ".mkv": remove_video_metadata, ".mov": remove_video_metadata, ".avi": remove_video_metadata
17
+ }
18
+
19
+ def remove_metadata(file_path, output_path=None):
20
+ """Removes metadata from a single file and logs detailed errors."""
21
+ try:
22
+ if not os.path.exists(file_path):
23
+ logger.error(f"File not found: {file_path}")
24
+ raise FileNotFoundError(f"File not found: {file_path}")
25
+
26
+ ext = os.path.splitext(file_path)[1].lower()
27
+ if ext not in SUPPORTED_EXTENSIONS:
28
+ logger.warning(f"Unsupported file type: {ext}")
29
+ raise ValueError(f"Unsupported file type: {ext}")
30
+
31
+ logger.info(f"Processing file: {file_path}")
32
+ remover_function = SUPPORTED_EXTENSIONS[ext]
33
+
34
+ cleaned_file = remover_function(file_path, output_path)
35
+
36
+ if cleaned_file and os.path.exists(cleaned_file):
37
+ logger.info(f"Metadata removed successfully: {cleaned_file}")
38
+ return cleaned_file
39
+ else:
40
+ logger.error(f"Failed to process file: {file_path}")
41
+ return None
42
+
43
+ except Exception as e:
44
+ logger.error(f"Error processing file {file_path}: {e}")
45
+ return None
46
+
47
+ def process_file(file_path, output_folder):
48
+ """Processes a single file in parallel."""
49
+ try:
50
+ ext = os.path.splitext(file_path)[1].lower()
51
+ if ext in SUPPORTED_EXTENSIONS:
52
+ output_path = os.path.join(output_folder, os.path.basename(file_path)) if output_folder else file_path
53
+ cleaned_file = SUPPORTED_EXTENSIONS[ext](file_path, output_path)
54
+ if cleaned_file and os.path.exists(cleaned_file):
55
+ logger.info(f"โœ… Metadata removed: {cleaned_file}")
56
+ return cleaned_file
57
+ else:
58
+ logger.error(f"โŒ Failed to process: {file_path}")
59
+ return None
60
+ else:
61
+ logger.warning(f"โš ๏ธ Unsupported file type: {file_path}")
62
+ return None
63
+ except Exception as e:
64
+ logger.error(f"Error processing {file_path}: {e}")
65
+ return None
66
+
67
+ def remove_metadata_from_folder(folder_path, output_folder=None):
68
+ """Removes metadata from all supported files in a folder with parallel processing."""
69
+ if not os.path.exists(folder_path):
70
+ logger.error(f"โŒ Folder not found: {folder_path}")
71
+ raise FileNotFoundError(f"Folder not found: {folder_path}")
72
+
73
+ # Create output folder inside test_folder
74
+ if not output_folder:
75
+ output_folder = os.path.join(folder_path, "cleaned")
76
+ os.makedirs(output_folder, exist_ok=True)
77
+
78
+ files_to_process = []
79
+ for root, _, files in os.walk(folder_path):
80
+ for file in files:
81
+ file_path = os.path.join(root, file)
82
+ ext = os.path.splitext(file)[1].lower()
83
+ if ext in SUPPORTED_EXTENSIONS:
84
+ files_to_process.append(file_path)
85
+
86
+ processed_files = []
87
+ failed_files = []
88
+
89
+ with tqdm(total=len(files_to_process), desc="Processing Files", unit="file") as pbar:
90
+ with concurrent.futures.ProcessPoolExecutor() as executor:
91
+ future_to_file = {executor.submit(process_file, file_path, output_folder): file_path for file_path in files_to_process}
92
+
93
+ for future in concurrent.futures.as_completed(future_to_file):
94
+ result = future.result()
95
+ if result:
96
+ processed_files.append(result)
97
+ else:
98
+ failed_files.append(future_to_file[future])
99
+ pbar.update(1)
100
+
101
+ # Summary Report
102
+ logger.info("\n๐Ÿ“Š **Summary Report:**")
103
+ logger.info(f"โœ… Successfully processed: {len(processed_files)} files")
104
+ logger.info(f"โŒ Failed to process: {len(failed_files)} files")
105
+
106
+ if failed_files:
107
+ logger.info("\nโš ๏ธ Failed Files:")
108
+ for file in failed_files:
109
+ logger.info(f" - {file}")
110
+
111
+ return processed_files
File without changes
@@ -0,0 +1,80 @@
1
+ import unittest
2
+ import os
3
+ import subprocess
4
+ from PIL import Image
5
+ from docx import Document
6
+ from PyPDF2 import PdfWriter
7
+ from src.file_handlers.image_handler import remove_image_metadata
8
+ from src.file_handlers.pdf_handler import remove_pdf_metadata
9
+ from src.file_handlers.docx_handler import remove_docx_metadata
10
+ from src.file_handlers.audio_handler import remove_audio_metadata
11
+ from src.file_handlers.video_handler import remove_video_metadata
12
+
13
+ class TestFileHandlers(unittest.TestCase):
14
+
15
+ def setUp(self):
16
+ """Create valid test files."""
17
+ self.test_image = "test_image.jpg"
18
+ self.test_pdf = "test_document.pdf"
19
+ self.test_docx = "test_document.docx"
20
+ self.test_audio = "test_audio.mp3"
21
+ self.test_video = "test_video.mp4"
22
+
23
+ # โœ… Create a valid JPG file
24
+ img = Image.new("RGB", (100, 100), color="blue")
25
+ img.save(self.test_image, "JPEG")
26
+
27
+ # โœ… Create a valid PDF
28
+ writer = PdfWriter()
29
+ writer.add_metadata({"/Author": "Test"})
30
+ with open(self.test_pdf, "wb") as f:
31
+ writer.write(f)
32
+
33
+ # โœ… Create a valid DOCX file
34
+ doc = Document()
35
+ doc.add_paragraph("This is a test document.")
36
+ doc.save(self.test_docx)
37
+
38
+ # โœ… Create a valid MP3 file
39
+ subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
40
+ "-t", "3", "-q:a", "9", "-acodec", "libmp3lame", self.test_audio, "-y"],
41
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
42
+
43
+ # โœ… Create a valid MP4 file
44
+ subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "color=c=blue:s=320x240:d=3",
45
+ "-vf", "format=yuv420p", self.test_video, "-y"],
46
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
47
+
48
+ def test_image_handler(self):
49
+ """Test image metadata removal."""
50
+ output_file = remove_image_metadata(self.test_image)
51
+ self.assertTrue(os.path.exists(output_file))
52
+
53
+ def test_pdf_handler(self):
54
+ """Test PDF metadata removal."""
55
+ output_file = remove_pdf_metadata(self.test_pdf)
56
+ self.assertTrue(os.path.exists(output_file))
57
+
58
+ def test_docx_handler(self):
59
+ """Test DOCX metadata removal."""
60
+ output_file = remove_docx_metadata(self.test_docx)
61
+ self.assertTrue(os.path.exists(output_file))
62
+
63
+ def test_audio_handler(self):
64
+ """Test audio metadata removal."""
65
+ output_file = remove_audio_metadata(self.test_audio)
66
+ self.assertTrue(os.path.exists(output_file))
67
+
68
+ def test_video_handler(self):
69
+ """Test video metadata removal."""
70
+ output_file = remove_video_metadata(self.test_video)
71
+ self.assertTrue(os.path.exists(output_file))
72
+
73
+ def tearDown(self):
74
+ """Clean up test files."""
75
+ for file in [self.test_image, self.test_pdf, self.test_docx, self.test_audio, self.test_video]:
76
+ if os.path.exists(file):
77
+ os.remove(file)
78
+
79
+ if __name__ == "__main__":
80
+ unittest.main()
@@ -0,0 +1,60 @@
1
+ from PIL import Image
2
+ import subprocess
3
+ import unittest
4
+ import os
5
+ import shutil
6
+ from docx import Document
7
+ from PyPDF2 import PdfWriter
8
+ from src.remover import remove_metadata, remove_metadata_from_folder
9
+
10
+ class TestMetadataRemover(unittest.TestCase):
11
+
12
+ def setUp(self):
13
+ """Create valid test files with actual content."""
14
+ self.test_folder = "test_batch"
15
+ self.output_folder = "test_batch_output"
16
+ os.makedirs(self.test_folder, exist_ok=True)
17
+
18
+ # โœ… Create a valid JPG file
19
+ image_path = os.path.join(self.test_folder, "test_image.jpg")
20
+ img = Image.new("RGB", (100, 100), color="red")
21
+ img.save(image_path, "JPEG")
22
+
23
+ # โœ… Create a valid PDF file
24
+ writer = PdfWriter()
25
+ writer.add_metadata({"/Author": "Test"})
26
+ with open(os.path.join(self.test_folder, "test_document.pdf"), "wb") as f:
27
+ writer.write(f)
28
+
29
+ # โœ… Create a valid DOCX file
30
+ doc = Document()
31
+ doc.add_paragraph("This is a test document.")
32
+ doc.save(os.path.join(self.test_folder, "test_document.docx"))
33
+
34
+ # โœ… Create a valid MP3 file
35
+ mp3_path = os.path.join(self.test_folder, "test_audio.mp3")
36
+ subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono",
37
+ "-t", "3", "-q:a", "9", "-acodec", "libmp3lame", mp3_path, "-y"],
38
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
39
+
40
+ # โœ… Create a valid MP4 file
41
+ mp4_path = os.path.join(self.test_folder, "test_video.mp4")
42
+ subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "color=c=blue:s=320x240:d=3",
43
+ "-vf", "format=yuv420p", mp4_path, "-y"],
44
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
45
+
46
+ def test_batch_processing(self):
47
+ """Test batch metadata removal."""
48
+ cleaned_files = remove_metadata_from_folder(self.test_folder, self.output_folder)
49
+ self.assertEqual(len(cleaned_files), 5) # Expect all 5 files to be processed
50
+
51
+ for file in cleaned_files:
52
+ self.assertTrue(os.path.exists(file))
53
+
54
+ def tearDown(self):
55
+ """Clean up test files."""
56
+ shutil.rmtree(self.test_folder, ignore_errors=True)
57
+ shutil.rmtree(self.output_folder, ignore_errors=True)
58
+
59
+ if __name__ == "__main__":
60
+ unittest.main()
@@ -0,0 +1,57 @@
1
+ import os
2
+ import unittest
3
+ from src.config.settings import DEFAULT_OUTPUT_FOLDER, ENABLE_PARALLEL_PROCESSING, LOG_LEVEL, SUPPORTED_FORMATS
4
+ from src.core.metadata_utils import ensure_output_folder, copy_file_without_metadata, get_file_extension
5
+
6
+ class TestSettingsAndUtils(unittest.TestCase):
7
+
8
+ def test_default_output_folder(self):
9
+ """Test if the default output folder is set correctly."""
10
+ self.assertEqual(DEFAULT_OUTPUT_FOLDER, "cleaned")
11
+
12
+ def test_parallel_processing_flag(self):
13
+ """Test if parallel processing flag is set correctly."""
14
+ self.assertTrue(isinstance(ENABLE_PARALLEL_PROCESSING, bool))
15
+
16
+ def test_log_level(self):
17
+ """Test if the log level is set to a valid value."""
18
+ valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR"]
19
+ self.assertIn(LOG_LEVEL, valid_levels)
20
+
21
+ def test_supported_formats(self):
22
+ """Test if supported formats include key categories."""
23
+ self.assertIn("images", SUPPORTED_FORMATS)
24
+ self.assertIn("documents", SUPPORTED_FORMATS)
25
+ self.assertIn("audio", SUPPORTED_FORMATS)
26
+ self.assertIn("videos", SUPPORTED_FORMATS)
27
+
28
+ def test_ensure_output_folder(self):
29
+ """Test if ensure_output_folder creates the correct directory."""
30
+ test_folder = "test_output_folder"
31
+ ensure_output_folder(test_folder)
32
+ self.assertTrue(os.path.exists(test_folder))
33
+ os.rmdir(test_folder) # Cleanup after test
34
+
35
+ def test_copy_file_without_metadata(self):
36
+ """Test if copy_file_without_metadata correctly copies a file."""
37
+ test_file = "test_original.txt"
38
+ copied_file = "test_copied.txt"
39
+
40
+ with open(test_file, "w") as f:
41
+ f.write("Test file content")
42
+
43
+ result = copy_file_without_metadata(test_file, copied_file)
44
+ self.assertTrue(os.path.exists(result))
45
+
46
+ # Cleanup
47
+ os.remove(test_file)
48
+ os.remove(copied_file)
49
+
50
+ def test_get_file_extension(self):
51
+ """Test if get_file_extension correctly extracts file extensions."""
52
+ self.assertEqual(get_file_extension("image.JPG"), ".jpg")
53
+ self.assertEqual(get_file_extension("document.PDF"), ".pdf")
54
+ self.assertEqual(get_file_extension("music.mp3"), ".mp3")
55
+
56
+ if __name__ == "__main__":
57
+ unittest.main()