folderops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
folderops/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ from .splitter import split_dataset
2
+ from .merger import merge_folders
3
+ from .organizer import organize_by_labels
4
+ from .structure import create_structure
5
+
6
+ __all__ = [
7
+ 'split_dataset',
8
+ 'merge_folders',
9
+ 'organize_by_labels',
10
+ 'create_structure',
11
+ ]
folderops/merger.py ADDED
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Sequence
5
+
6
+ from .utils import ensure_directory, list_image_files, normalize_extensions, transfer_file, unique_destination_path, validate_operation
7
+
8
+
9
+ def merge_folders(
10
+ folders: Sequence[str | Path],
11
+ output: str | Path,
12
+ mode: str = 'copy',
13
+ extensions: Sequence[str] | None = None,
14
+ ) -> list[Path]:
15
+ """Merge image files from multiple folders into a single output folder.
16
+
17
+ Args:
18
+ folders: Input directories containing images.
19
+ output: Destination directory.
20
+ mode: File transfer mode, either 'copy' or 'move'.
21
+ extensions: Iterable of allowed image extensions.
22
+
23
+ Returns:
24
+ A list of output file paths created in the merged directory.
25
+
26
+ Raises:
27
+ ValueError: If no folders are provided.
28
+ FileNotFoundError: If an input folder does not exist.
29
+ """
30
+ if not folders:
31
+ raise ValueError('folders must contain at least one directory.')
32
+
33
+ mode = validate_operation(mode)
34
+ normalize_extensions(extensions)
35
+ output_dir = ensure_directory(output)
36
+ merged_paths: list[Path] = []
37
+
38
+ for folder in folders:
39
+ for image_path in list_image_files(folder, extensions):
40
+ destination = unique_destination_path(output_dir, image_path.name)
41
+ merged_paths.append(transfer_file(image_path, destination, operation=mode))
42
+
43
+ return merged_paths
folderops/organizer.py ADDED
@@ -0,0 +1,65 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ from pathlib import Path
5
+ from typing import Sequence
6
+
7
+ from .utils import ensure_directory, normalize_extensions, transfer_file, validate_directory, validate_file, validate_operation
8
+
9
+
10
+ def organize_by_labels(
11
+ image_dir: str | Path,
12
+ label_file: str | Path,
13
+ output: str | Path,
14
+ mode: str = 'copy',
15
+ extensions: Sequence[str] | None = None,
16
+ delimiter: str = ',',
17
+ ) -> dict[str, list[Path]]:
18
+ """Organize images into class folders based on a CSV mapping file.
19
+
20
+ Args:
21
+ image_dir: Directory containing images.
22
+ label_file: CSV file with rows formatted as filename,label.
23
+ output: Directory where class folders will be created.
24
+ mode: File transfer mode, either 'copy' or 'move'.
25
+ extensions: Iterable of allowed image extensions.
26
+ delimiter: CSV delimiter used in the label file.
27
+
28
+ Returns:
29
+ A dictionary mapping class labels to transferred file paths.
30
+
31
+ Raises:
32
+ FileNotFoundError: If the image directory or label file does not exist.
33
+ ValueError: If rows are malformed or files are missing.
34
+ """
35
+ mode = validate_operation(mode)
36
+ allowed = set(normalize_extensions(extensions))
37
+ image_root = validate_directory(image_dir, 'Image directory')
38
+ labels_path = validate_file(label_file, 'Label file')
39
+ output_dir = ensure_directory(output)
40
+
41
+ organized: dict[str, list[Path]] = {}
42
+
43
+ with labels_path.open('r', encoding='utf-8', newline='') as handle:
44
+ reader = csv.reader(handle, delimiter=delimiter)
45
+ for row_number, row in enumerate(reader, start=1):
46
+ if not row:
47
+ continue
48
+ if len(row) < 2:
49
+ raise ValueError(f'Malformed row {row_number} in label file: expected filename and label.')
50
+ filename = row[0].strip()
51
+ label = row[1].strip()
52
+ if not filename or not label:
53
+ raise ValueError(f'Row {row_number} contains an empty filename or label.')
54
+
55
+ image_path = image_root / filename
56
+ if not image_path.exists() or not image_path.is_file():
57
+ raise FileNotFoundError(f'Image listed in label file not found: {image_path}')
58
+ if image_path.suffix.lower() not in allowed:
59
+ raise ValueError(f'Unsupported file extension for image: {image_path.name}')
60
+
61
+ class_dir = ensure_directory(output_dir / label)
62
+ destination = class_dir / image_path.name
63
+ organized.setdefault(label, []).append(transfer_file(image_path, destination, operation=mode))
64
+
65
+ return organized
folderops/splitter.py ADDED
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+ from pathlib import Path
5
+ from typing import Sequence
6
+
7
+ from .utils import ensure_directory, normalize_extensions, transfer_file, validate_operation
8
+
9
+
10
+ def split_dataset(
11
+ source: str | Path,
12
+ output: str | Path,
13
+ train_ratio: float = 0.7,
14
+ val_ratio: float = 0.15,
15
+ test_ratio: float = 0.15,
16
+ seed: int | None = 42,
17
+ mode: str = "copy",
18
+ extensions: Sequence[str] | None = None,
19
+ ) -> dict[str, dict[str, list[Path]]]:
20
+ mode = validate_operation(mode)
21
+ extensions = normalize_extensions(extensions)
22
+
23
+ total_ratio = train_ratio + val_ratio + test_ratio
24
+ if any(r < 0 for r in (train_ratio, val_ratio, test_ratio)):
25
+ raise ValueError("Split ratios must be non-negative.")
26
+ if abs(total_ratio - 1.0) > 1e-9:
27
+ raise ValueError("train_ratio, val_ratio, and test_ratio must sum to 1.0.")
28
+
29
+ source_path = Path(source)
30
+ if not source_path.exists():
31
+ raise FileNotFoundError(f"Source directory not found: {source}")
32
+
33
+ class_dirs = [d for d in source_path.iterdir() if d.is_dir()]
34
+ if not class_dirs:
35
+ raise ValueError("No class folders found inside source directory.")
36
+
37
+ rng = random.Random(seed)
38
+
39
+ output_path = ensure_directory(output)
40
+
41
+ result: dict[str, dict[str, list[Path]]] = {
42
+ "train": {},
43
+ "val": {},
44
+ "test": {},
45
+ }
46
+
47
+ for class_dir in class_dirs:
48
+ class_name = class_dir.name
49
+
50
+ images = [
51
+ p for p in class_dir.iterdir()
52
+ if p.is_file() and (extensions is None or p.suffix.lower() in extensions)
53
+ ]
54
+
55
+ if not images:
56
+ continue
57
+
58
+ rng.shuffle(images)
59
+
60
+ total = len(images)
61
+ train_end = int(total * train_ratio)
62
+ val_end = train_end + int(total * val_ratio)
63
+
64
+ split_map = {
65
+ "train": images[:train_end],
66
+ "val": images[train_end:val_end],
67
+ "test": images[val_end:],
68
+ }
69
+
70
+ for split_name, files in split_map.items():
71
+ split_class_dir = ensure_directory(output_path / split_name / class_name)
72
+
73
+ result[split_name][class_name] = []
74
+
75
+ for file_path in files:
76
+ destination = split_class_dir / file_path.name
77
+ transferred = transfer_file(file_path, destination, operation=mode)
78
+ result[split_name][class_name].append(transferred)
79
+
80
+ return result
folderops/structure.py ADDED
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from .utils import ensure_directory, iter_structure_paths
6
+
7
+
8
+ def create_structure(structure: dict, root: str | Path | None = None) -> list[Path]:
9
+ """Create nested directories from a dictionary template.
10
+
11
+ Args:
12
+ structure: Nested dictionary describing folder names.
13
+ root: Optional base directory under which the structure is created.
14
+
15
+ Returns:
16
+ A list of created or ensured directory paths.
17
+
18
+ Raises:
19
+ TypeError: If the structure is not a valid nested dictionary.
20
+ ValueError: If any folder name is invalid.
21
+ """
22
+ base = Path(root) if root is not None else Path()
23
+ created_paths: list[Path] = []
24
+ for relative_path in iter_structure_paths(structure):
25
+ directory = ensure_directory(base / relative_path)
26
+ created_paths.append(directory)
27
+ return created_paths
folderops/utils.py ADDED
@@ -0,0 +1,104 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from shutil import copy2, move
5
+ from typing import Iterable, Sequence, Tuple
6
+
7
+ DEFAULT_IMAGE_EXTENSIONS: Tuple[str, ...] = ('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tif', '.tiff', '.webp')
8
+
9
+
10
+ def normalize_extensions(extensions: Sequence[str] | None) -> Tuple[str, ...]:
11
+ if extensions is None:
12
+ return DEFAULT_IMAGE_EXTENSIONS
13
+ normalized = []
14
+ for ext in extensions:
15
+ if not ext:
16
+ continue
17
+ ext = ext.lower().strip()
18
+ if not ext.startswith('.'):
19
+ ext = f'.{ext}'
20
+ normalized.append(ext)
21
+ if not normalized:
22
+ raise ValueError('At least one valid file extension must be provided.')
23
+ return tuple(dict.fromkeys(normalized))
24
+
25
+
26
+ def ensure_directory(path: str | Path) -> Path:
27
+ directory = Path(path)
28
+ directory.mkdir(parents=True, exist_ok=True)
29
+ return directory
30
+
31
+
32
+ def validate_directory(path: str | Path, name: str) -> Path:
33
+ directory = Path(path)
34
+ if not directory.exists():
35
+ raise FileNotFoundError(f'{name} does not exist: {directory}')
36
+ if not directory.is_dir():
37
+ raise NotADirectoryError(f'{name} is not a directory: {directory}')
38
+ return directory
39
+
40
+
41
+ def validate_file(path: str | Path, name: str) -> Path:
42
+ file_path = Path(path)
43
+ if not file_path.exists():
44
+ raise FileNotFoundError(f'{name} does not exist: {file_path}')
45
+ if not file_path.is_file():
46
+ raise FileNotFoundError(f'{name} is not a file: {file_path}')
47
+ return file_path
48
+
49
+
50
+ def list_image_files(directory: str | Path, extensions: Sequence[str] | None = None) -> list[Path]:
51
+ folder = validate_directory(directory, 'Source directory')
52
+ allowed = set(normalize_extensions(extensions))
53
+ return sorted([path for path in folder.iterdir() if path.is_file() and path.suffix.lower() in allowed])
54
+
55
+
56
+ def unique_destination_path(output_dir: str | Path, filename: str) -> Path:
57
+ output_path = ensure_directory(output_dir)
58
+ destination = output_path / filename
59
+ if not destination.exists():
60
+ return destination
61
+ stem = destination.stem
62
+ suffix = destination.suffix
63
+ counter = 1
64
+ while True:
65
+ candidate = output_path / f'{stem}_{counter}{suffix}'
66
+ if not candidate.exists():
67
+ return candidate
68
+ counter += 1
69
+
70
+
71
+ def transfer_file(source: str | Path, destination: str | Path, operation: str = 'copy') -> Path:
72
+ src = Path(source)
73
+ dst = Path(destination)
74
+ ensure_directory(dst.parent)
75
+ if operation == 'copy':
76
+ copy2(src, dst)
77
+ elif operation == 'move':
78
+ move(str(src), str(dst))
79
+ else:
80
+ raise ValueError("operation must be either 'copy' or 'move'.")
81
+ return dst
82
+
83
+
84
+ def validate_operation(operation: str) -> str:
85
+ normalized = operation.lower().strip()
86
+ if normalized not in {'copy', 'move'}:
87
+ raise ValueError("mode must be either 'copy' or 'move'.")
88
+ return normalized
89
+
90
+
91
+ def iter_structure_paths(structure: dict, parent: Path | None = None) -> Iterable[Path]:
92
+ if not isinstance(structure, dict):
93
+ raise TypeError('structure must be a dictionary.')
94
+ parent = parent or Path()
95
+ for name, subtree in structure.items():
96
+ if not isinstance(name, str) or not name.strip():
97
+ raise ValueError('Every folder name must be a non-empty string.')
98
+ current = parent / name
99
+ yield current
100
+ if subtree is None:
101
+ continue
102
+ if not isinstance(subtree, dict):
103
+ raise TypeError('Nested structure values must be dictionaries or empty dictionaries.')
104
+ yield from iter_structure_paths(subtree, current)
@@ -0,0 +1,201 @@
1
+ Metadata-Version: 2.4
2
+ Name: folderops
3
+ Version: 0.1.0
4
+ Summary: Python utilities for dataset organization and preprocessing
5
+ Author: Ahamed
6
+ License: MIT
7
+ Keywords: dataset,images,ml,data-preprocessing
8
+ Requires-Python: >=3.8
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Dynamic: license-file
12
+
13
+ # folderops
14
+
15
+ `folderops` is a lightweight Python package for common dataset organization tasks in machine learning workflows. It is designed for direct use inside notebooks, research code, and training scripts through clean Python imports.
16
+
17
+ ## Features
18
+
19
+ - Split image datasets into train, validation, and test folders
20
+ - Merge multiple image folders into a single directory
21
+ - Organize images into class folders using a CSV label file
22
+ - Create nested folder structures from Python dictionaries
23
+ - Support common image formats such as `.jpg`, `.jpeg`, `.png`, `.bmp`, `.gif`, `.tif`, `.tiff`, and `.webp`
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ pip install folderops
29
+ ```
30
+
31
+ For local development:
32
+
33
+ ```bash
34
+ pip install -e .
35
+ ```
36
+
37
+ ## Quick Start
38
+
39
+ ```python
40
+ from folderops import split_dataset, merge_folders, organize_by_labels, create_structure
41
+
42
+ split_dataset(
43
+ source="images",
44
+ output="dataset",
45
+ train_ratio=0.7,
46
+ val_ratio=0.15,
47
+ test_ratio=0.15,
48
+ seed=42,
49
+ )
50
+
51
+ merge_folders(
52
+ folders=["dataset1/images", "dataset2/images", "dataset3/images"],
53
+ output="merged_images",
54
+ )
55
+
56
+ organize_by_labels(
57
+ image_dir="images",
58
+ label_file="labels.csv",
59
+ output="organized_dataset",
60
+ )
61
+
62
+ structure = {
63
+ "dataset": {
64
+ "train": {},
65
+ "val": {},
66
+ "test": {}
67
+ }
68
+ }
69
+ create_structure(structure)
70
+ ```
71
+
72
+ ## Public API
73
+
74
+ ### `split_dataset`
75
+
76
+ Split images from one folder into `train`, `val`, and `test` subdirectories.
77
+
78
+ ```python
79
+ split_dataset(
80
+ source="images",
81
+ output="dataset",
82
+ train_ratio=0.7,
83
+ val_ratio=0.15,
84
+ test_ratio=0.15,
85
+ seed=42,
86
+ mode="copy",
87
+ extensions=(".jpg", ".png"),
88
+ )
89
+ ```
90
+
91
+ Key behavior:
92
+
93
+ - Shuffles files before splitting
94
+ - Supports deterministic splits with a random seed
95
+ - Supports `copy` and `move` modes
96
+ - Validates that split ratios sum to `1.0`
97
+
98
+ ### `merge_folders`
99
+
100
+ Merge images from multiple folders into one output directory.
101
+
102
+ ```python
103
+ merge_folders(
104
+ folders=["dataset1/images", "dataset2/images"],
105
+ output="merged_images",
106
+ mode="copy",
107
+ )
108
+ ```
109
+
110
+ Key behavior:
111
+
112
+ - Avoids overwriting duplicate filenames
113
+ - Automatically renames duplicates like `image_1.jpg`, `image_2.jpg`
114
+ - Supports `copy` and `move` modes
115
+
116
+ ### `organize_by_labels`
117
+
118
+ Organize images into class folders using a CSV file with `filename,label` rows.
119
+
120
+ Example `labels.csv`:
121
+
122
+ ```csv
123
+ img1.jpg,cat
124
+ img2.jpg,dog
125
+ img3.jpg,cat
126
+ ```
127
+
128
+ Usage:
129
+
130
+ ```python
131
+ organize_by_labels(
132
+ image_dir="images",
133
+ label_file="labels.csv",
134
+ output="organized_dataset",
135
+ mode="copy",
136
+ )
137
+ ```
138
+
139
+ Key behavior:
140
+
141
+ - Creates class folders automatically
142
+ - Validates image existence before transfer
143
+ - Supports configurable CSV delimiter
144
+
145
+ ### `create_structure`
146
+
147
+ Create nested directories recursively from a dictionary.
148
+
149
+ ```python
150
+ structure = {
151
+ "dataset": {
152
+ "train": {},
153
+ "val": {},
154
+ "test": {}
155
+ }
156
+ }
157
+
158
+ create_structure(structure)
159
+ create_structure(structure, root="project_data")
160
+ ```
161
+
162
+ ## Project Layout
163
+
164
+ ```text
165
+ folderops/
166
+ ├── folderops/
167
+ │ ├── __init__.py
168
+ │ ├── merger.py
169
+ │ ├── organizer.py
170
+ │ ├── splitter.py
171
+ │ ├── structure.py
172
+ │ └── utils.py
173
+ ├── LICENSE
174
+ ├── pyproject.toml
175
+ └── README.md
176
+ ```
177
+
178
+ ## Build and Publish
179
+
180
+ Build the package:
181
+
182
+ ```bash
183
+ python -m build
184
+ ```
185
+
186
+ Upload to PyPI:
187
+
188
+ ```bash
189
+ twine upload dist/*
190
+ ```
191
+
192
+ ## Development Notes
193
+
194
+ - Python 3.8+
195
+ - No CLI dependency
196
+ - Intended for import-based use only
197
+ - Uses standard library modules only
198
+
199
+ ## License
200
+
201
+ MIT License
@@ -0,0 +1,11 @@
1
+ folderops/__init__.py,sha256=sSC4uuEB5goTK1-MtugUA6om9cQlueQsQwy6GeeTlSo,259
2
+ folderops/merger.py,sha256=zCtVb4o9Q9nKLnVYnRqFyNgHsjXhpr60it6Vw6w-3Uk,1430
3
+ folderops/organizer.py,sha256=9LjiRjLzx_XTkejaxUcSYEXJA879j-9tqfZ0sgbBg9U,2624
4
+ folderops/splitter.py,sha256=GBEnRakb2_6oodNOEFrphfP6sn757drket2IBx3uAZ8,2422
5
+ folderops/structure.py,sha256=iYNmAJlg3qRXKyGAVFrnTtvhvEG6mXKbHY_-YVoGHn8,898
6
+ folderops/utils.py,sha256=oClFh9mCAUXERuQ5glLa5SQk0r3t05kefQ53ssQOuSA,3648
7
+ folderops-0.1.0.dist-info/licenses/LICENSE,sha256=ESYyLizI0WWtxMeS7rGVcX3ivMezm-HOd5WdeOh-9oU,1056
8
+ folderops-0.1.0.dist-info/METADATA,sha256=N3zXoGHSr6kPtR5OashQJqKG4tIp7rPlNQMosc2zU1k,3994
9
+ folderops-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
10
+ folderops-0.1.0.dist-info/top_level.txt,sha256=ar3RNZQUoTwOQJeILal6d35805Iy5Gn2wGvpZxqhY7g,10
11
+ folderops-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ folderops