datasety 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datasety-0.2.0/.gitignore +94 -0
- datasety-0.2.0/PKG-INFO +231 -0
- datasety-0.1.0/PKG-INFO → datasety-0.2.0/README.md +57 -40
- {datasety-0.1.0 → datasety-0.2.0}/pyproject.toml +14 -1
- {datasety-0.1.0 → datasety-0.2.0}/src/datasety/__init__.py +1 -1
- {datasety-0.1.0 → datasety-0.2.0}/src/datasety/cli.py +178 -1
- datasety-0.1.0/.gitignore +0 -36
- datasety-0.1.0/README.md +0 -129
- datasety-0.1.0/src/datasety/py.typed +0 -0
- {datasety-0.1.0 → datasety-0.2.0}/.github/workflows/publish.yml +0 -0
- {datasety-0.1.0 → datasety-0.2.0}/.github/workflows/test.yml +0 -0
- {datasety-0.1.0 → datasety-0.2.0}/LICENSE +0 -0
- {datasety-0.1.0 → datasety-0.2.0}/src/datasety/__main__.py +0 -0
- {datasety-0.1.0 → datasety-0.2.0}/tests/__init__.py +0 -0
- {datasety-0.1.0 → datasety-0.2.0}/tests/test_resize.py +0 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
build/
|
|
11
|
+
dist/
|
|
12
|
+
*.egg-info/
|
|
13
|
+
*.egg
|
|
14
|
+
wheels/
|
|
15
|
+
pip-wheel-metadata/
|
|
16
|
+
MANIFEST
|
|
17
|
+
|
|
18
|
+
# Virtual environments
|
|
19
|
+
venv/
|
|
20
|
+
.venv/
|
|
21
|
+
env/
|
|
22
|
+
.env/
|
|
23
|
+
ENV/
|
|
24
|
+
|
|
25
|
+
# IDE / Editors
|
|
26
|
+
.idea/
|
|
27
|
+
.vscode/
|
|
28
|
+
*.swp
|
|
29
|
+
*.swo
|
|
30
|
+
*~
|
|
31
|
+
.project
|
|
32
|
+
.pydevproject
|
|
33
|
+
.settings/
|
|
34
|
+
*.sublime-project
|
|
35
|
+
*.sublime-workspace
|
|
36
|
+
|
|
37
|
+
# Testing / Coverage
|
|
38
|
+
.pytest_cache/
|
|
39
|
+
.tox/
|
|
40
|
+
.nox/
|
|
41
|
+
.coverage
|
|
42
|
+
.coverage.*
|
|
43
|
+
htmlcov/
|
|
44
|
+
.hypothesis/
|
|
45
|
+
*.cover
|
|
46
|
+
coverage.xml
|
|
47
|
+
nosetests.xml
|
|
48
|
+
|
|
49
|
+
# Type checking
|
|
50
|
+
.mypy_cache/
|
|
51
|
+
.dmypy.json
|
|
52
|
+
dmypy.json
|
|
53
|
+
.pytype/
|
|
54
|
+
|
|
55
|
+
# Linting
|
|
56
|
+
.ruff_cache/
|
|
57
|
+
|
|
58
|
+
# Jupyter
|
|
59
|
+
.ipynb_checkpoints/
|
|
60
|
+
*.ipynb
|
|
61
|
+
|
|
62
|
+
# OS
|
|
63
|
+
.DS_Store
|
|
64
|
+
.DS_Store?
|
|
65
|
+
._*
|
|
66
|
+
.Spotlight-V100
|
|
67
|
+
.Trashes
|
|
68
|
+
Thumbs.db
|
|
69
|
+
ehthumbs.db
|
|
70
|
+
Desktop.ini
|
|
71
|
+
|
|
72
|
+
# Logs
|
|
73
|
+
*.log
|
|
74
|
+
logs/
|
|
75
|
+
|
|
76
|
+
# ML / AI specific
|
|
77
|
+
*.pt
|
|
78
|
+
*.pth
|
|
79
|
+
*.ckpt
|
|
80
|
+
*.safetensors
|
|
81
|
+
*.bin
|
|
82
|
+
.cache/
|
|
83
|
+
huggingface/
|
|
84
|
+
~/.cache/huggingface/
|
|
85
|
+
|
|
86
|
+
# Project specific - test data directories
|
|
87
|
+
test_data/
|
|
88
|
+
tests/fixtures/images/
|
|
89
|
+
/in/
|
|
90
|
+
/out/
|
|
91
|
+
/synthetic/
|
|
92
|
+
/captions/
|
|
93
|
+
/resized/
|
|
94
|
+
/dataset/
|
datasety-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datasety
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: CLI tool for dataset preparation: resize, caption, and synthetic image generation
|
|
5
|
+
Project-URL: Homepage, https://github.com/kontextox/datasety
|
|
6
|
+
Project-URL: Repository, https://github.com/kontextox/datasety
|
|
7
|
+
Project-URL: Issues, https://github.com/kontextox/datasety/issues
|
|
8
|
+
Author: kontextox
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: captioning,cli,dataset,diffusers,florence-2,image-editing,image-processing,machine-learning,synthetic
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: pillow>=9.0.0
|
|
26
|
+
Provides-Extra: all
|
|
27
|
+
Requires-Dist: accelerate; extra == 'all'
|
|
28
|
+
Requires-Dist: diffusers>=0.32.0; extra == 'all'
|
|
29
|
+
Requires-Dist: einops; extra == 'all'
|
|
30
|
+
Requires-Dist: sentencepiece; extra == 'all'
|
|
31
|
+
Requires-Dist: timm; extra == 'all'
|
|
32
|
+
Requires-Dist: torch>=2.0.0; extra == 'all'
|
|
33
|
+
Requires-Dist: transformers<4.46.0,>=4.38.0; extra == 'all'
|
|
34
|
+
Requires-Dist: transformers>=4.38.0; extra == 'all'
|
|
35
|
+
Provides-Extra: caption
|
|
36
|
+
Requires-Dist: einops; extra == 'caption'
|
|
37
|
+
Requires-Dist: timm; extra == 'caption'
|
|
38
|
+
Requires-Dist: torch>=2.0.0; extra == 'caption'
|
|
39
|
+
Requires-Dist: transformers<4.46.0,>=4.38.0; extra == 'caption'
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
42
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
43
|
+
Provides-Extra: synthetic
|
|
44
|
+
Requires-Dist: accelerate; extra == 'synthetic'
|
|
45
|
+
Requires-Dist: diffusers>=0.32.0; extra == 'synthetic'
|
|
46
|
+
Requires-Dist: sentencepiece; extra == 'synthetic'
|
|
47
|
+
Requires-Dist: torch>=2.0.0; extra == 'synthetic'
|
|
48
|
+
Requires-Dist: transformers>=4.38.0; extra == 'synthetic'
|
|
49
|
+
Description-Content-Type: text/markdown
|
|
50
|
+
|
|
51
|
+
# datasety
|
|
52
|
+
|
|
53
|
+
CLI tool for dataset preparation: resize, caption, and synthetic image generation.
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install datasety
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Install with specific features:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install datasety[caption] # Florence-2 captioning
|
|
65
|
+
pip install datasety[synthetic] # Qwen image editing
|
|
66
|
+
pip install datasety[all] # All features
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Usage
|
|
70
|
+
|
|
71
|
+
### Resize Images
|
|
72
|
+
|
|
73
|
+
Resize and crop images to a target resolution:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
datasety resize --input ./images --output ./resized --resolution 768x1024
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**Options:**
|
|
80
|
+
|
|
81
|
+
| Option | Description | Default |
|
|
82
|
+
| ----------------------- | --------------------------------------------------------- | ------------------- |
|
|
83
|
+
| `--input`, `-i` | Input directory | (required) |
|
|
84
|
+
| `--output`, `-o` | Output directory | (required) |
|
|
85
|
+
| `--resolution`, `-r` | Target resolution (WIDTHxHEIGHT) | (required) |
|
|
86
|
+
| `--crop-position` | Crop position: `top`, `center`, `bottom`, `left`, `right` | `center` |
|
|
87
|
+
| `--input-format` | Comma-separated formats | `jpg,jpeg,png,webp` |
|
|
88
|
+
| `--output-format` | Output format: `jpg`, `png`, `webp` | `jpg` |
|
|
89
|
+
| `--output-name-numbers` | Rename files to 1.jpg, 2.jpg, ... | `false` |
|
|
90
|
+
|
|
91
|
+
**Example:**
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
datasety resize \
|
|
95
|
+
--input ./raw_photos \
|
|
96
|
+
--output ./dataset \
|
|
97
|
+
--resolution 1024x1024 \
|
|
98
|
+
--crop-position top \
|
|
99
|
+
--output-format jpg \
|
|
100
|
+
--output-name-numbers
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**How it works:**
|
|
104
|
+
|
|
105
|
+
1. Finds all images matching input formats
|
|
106
|
+
2. Skips images where either dimension is smaller than target
|
|
107
|
+
3. Resizes proportionally so the smaller side matches target
|
|
108
|
+
4. Crops from the specified position to exact dimensions
|
|
109
|
+
5. Saves with high quality (95% for jpg/webp)
|
|
110
|
+
|
|
111
|
+
### Generate Captions
|
|
112
|
+
|
|
113
|
+
Generate captions for images using Microsoft's Florence-2 model:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
datasety caption --input ./images --output ./captions --florence-2-large
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**Options:**
|
|
120
|
+
|
|
121
|
+
| Option | Description | Default |
|
|
122
|
+
| -------------------- | ------------------------------- | ------------------------- |
|
|
123
|
+
| `--input`, `-i` | Input directory | (required) |
|
|
124
|
+
| `--output`, `-o` | Output directory for .txt files | (required) |
|
|
125
|
+
| `--device` | `cpu` or `cuda` | `cpu` |
|
|
126
|
+
| `--trigger-word` | Text to prepend to captions | (none) |
|
|
127
|
+
| `--prompt` | Florence-2 task prompt | `<MORE_DETAILED_CAPTION>` |
|
|
128
|
+
| `--florence-2-base` | Use base model (0.23B, faster) | |
|
|
129
|
+
| `--florence-2-large` | Use large model (0.77B, better) | (default) |
|
|
130
|
+
|
|
131
|
+
**Available prompts:**
|
|
132
|
+
|
|
133
|
+
- `<CAPTION>` - Brief caption
|
|
134
|
+
- `<DETAILED_CAPTION>` - Detailed caption
|
|
135
|
+
- `<MORE_DETAILED_CAPTION>` - Most detailed caption (default)
|
|
136
|
+
|
|
137
|
+
**Example:**
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
datasety caption \
|
|
141
|
+
--input ./dataset \
|
|
142
|
+
--output ./dataset \
|
|
143
|
+
--device cuda \
|
|
144
|
+
--trigger-word "photo of sks person," \
|
|
145
|
+
--florence-2-large
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
This creates a `.txt` file for each image with the generated caption.
|
|
149
|
+
|
|
150
|
+
### Generate Synthetic Images
|
|
151
|
+
|
|
152
|
+
Generate synthetic variations of images using Qwen-Image-Edit:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
datasety synthetic --input ./images --output ./synthetic --prompt "add a winter hat"
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
**Options:**
|
|
159
|
+
|
|
160
|
+
| Option | Description | Default |
|
|
161
|
+
| ------------------- | --------------------------------- | -------------------------- |
|
|
162
|
+
| `--input`, `-i` | Input directory | (required) |
|
|
163
|
+
| `--output`, `-o` | Output directory | (required) |
|
|
164
|
+
| `--prompt`, `-p` | Edit prompt | (required) |
|
|
165
|
+
| `--model` | Model to use | `Qwen/Qwen-Image-Edit-2511`|
|
|
166
|
+
| `--device` | `cpu` or `cuda` | `cuda` |
|
|
167
|
+
| `--steps` | Number of inference steps | `40` |
|
|
168
|
+
| `--cfg-scale` | Guidance scale | `1.0` |
|
|
169
|
+
| `--true-cfg-scale` | True CFG scale | `4.0` |
|
|
170
|
+
| `--negative-prompt` | Negative prompt | `" "` |
|
|
171
|
+
| `--num-images` | Images to generate per input | `1` |
|
|
172
|
+
| `--seed` | Random seed for reproducibility | (random) |
|
|
173
|
+
|
|
174
|
+
**Example:**
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
datasety synthetic \
|
|
178
|
+
--input ./dataset \
|
|
179
|
+
--output ./synthetic \
|
|
180
|
+
--prompt "add sunglasses to the person, keep everything else the same" \
|
|
181
|
+
--device cuda \
|
|
182
|
+
--steps 40 \
|
|
183
|
+
--true-cfg-scale 4.0 \
|
|
184
|
+
--seed 42
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Common Workflows
|
|
188
|
+
|
|
189
|
+
### Prepare a LoRA Training Dataset
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
# 1. Resize images to 1024x1024
|
|
193
|
+
datasety resize -i ./raw -o ./dataset -r 1024x1024 --crop-position center
|
|
194
|
+
|
|
195
|
+
# 2. Generate captions with trigger word
|
|
196
|
+
datasety caption -i ./dataset -o ./dataset --trigger-word "[trigger]" --device cuda
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Augment Dataset with Synthetic Variations
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
# Generate variations with different accessories
|
|
203
|
+
datasety synthetic \
|
|
204
|
+
-i ./dataset \
|
|
205
|
+
-o ./synthetic \
|
|
206
|
+
--prompt "add a red scarf" \
|
|
207
|
+
--num-images 2 \
|
|
208
|
+
--device cuda
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Batch Process with Numbered Files
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
datasety resize \
|
|
215
|
+
-i ./photos \
|
|
216
|
+
-o ./processed \
|
|
217
|
+
-r 768x1024 \
|
|
218
|
+
--output-name-numbers \
|
|
219
|
+
--crop-position top
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Requirements
|
|
223
|
+
|
|
224
|
+
- Python 3.10+
|
|
225
|
+
- Pillow (for resize)
|
|
226
|
+
- PyTorch + Transformers (for caption: `pip install datasety[caption]`)
|
|
227
|
+
- PyTorch + Diffusers (for synthetic: `pip install datasety[synthetic]`)
|
|
228
|
+
|
|
229
|
+
## License
|
|
230
|
+
|
|
231
|
+
MIT
|
|
@@ -1,41 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: datasety
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: CLI tool for dataset preparation: image resizing and captioning with Florence-2
|
|
5
|
-
Project-URL: Homepage, https://github.com/kontextox/datasety
|
|
6
|
-
Project-URL: Repository, https://github.com/kontextox/datasety
|
|
7
|
-
Project-URL: Issues, https://github.com/kontextox/datasety/issues
|
|
8
|
-
Author: kontextox
|
|
9
|
-
License-Expression: MIT
|
|
10
|
-
License-File: LICENSE
|
|
11
|
-
Keywords: captioning,cli,dataset,florence-2,image-processing,machine-learning
|
|
12
|
-
Classifier: Development Status :: 4 - Beta
|
|
13
|
-
Classifier: Environment :: Console
|
|
14
|
-
Classifier: Intended Audience :: Developers
|
|
15
|
-
Classifier: Intended Audience :: Science/Research
|
|
16
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
-
Classifier: Operating System :: OS Independent
|
|
18
|
-
Classifier: Programming Language :: Python :: 3
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
-
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
24
|
-
Requires-Python: >=3.10
|
|
25
|
-
Requires-Dist: pillow>=9.0.0
|
|
26
|
-
Provides-Extra: caption
|
|
27
|
-
Requires-Dist: einops; extra == 'caption'
|
|
28
|
-
Requires-Dist: timm; extra == 'caption'
|
|
29
|
-
Requires-Dist: torch>=2.0.0; extra == 'caption'
|
|
30
|
-
Requires-Dist: transformers<4.46.0,>=4.38.0; extra == 'caption'
|
|
31
|
-
Provides-Extra: dev
|
|
32
|
-
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
33
|
-
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
34
|
-
Description-Content-Type: text/markdown
|
|
35
|
-
|
|
36
1
|
# datasety
|
|
37
2
|
|
|
38
|
-
CLI tool for dataset preparation:
|
|
3
|
+
CLI tool for dataset preparation: resize, caption, and synthetic image generation.
|
|
39
4
|
|
|
40
5
|
## Installation
|
|
41
6
|
|
|
@@ -43,10 +8,12 @@ CLI tool for dataset preparation: image resizing and captioning with Florence-2.
|
|
|
43
8
|
pip install datasety
|
|
44
9
|
```
|
|
45
10
|
|
|
46
|
-
|
|
11
|
+
Install with specific features:
|
|
47
12
|
|
|
48
13
|
```bash
|
|
49
|
-
pip install datasety[caption]
|
|
14
|
+
pip install datasety[caption] # Florence-2 captioning
|
|
15
|
+
pip install datasety[synthetic] # Qwen image editing
|
|
16
|
+
pip install datasety[all] # All features
|
|
50
17
|
```
|
|
51
18
|
|
|
52
19
|
## Usage
|
|
@@ -88,7 +55,7 @@ datasety resize \
|
|
|
88
55
|
1. Finds all images matching input formats
|
|
89
56
|
2. Skips images where either dimension is smaller than target
|
|
90
57
|
3. Resizes proportionally so the smaller side matches target
|
|
91
|
-
4. Crops from the specified
|
|
58
|
+
4. Crops from the specified position to exact dimensions
|
|
92
59
|
5. Saves with high quality (95% for jpg/webp)
|
|
93
60
|
|
|
94
61
|
### Generate Captions
|
|
@@ -130,6 +97,43 @@ datasety caption \
|
|
|
130
97
|
|
|
131
98
|
This creates a `.txt` file for each image with the generated caption.
|
|
132
99
|
|
|
100
|
+
### Generate Synthetic Images
|
|
101
|
+
|
|
102
|
+
Generate synthetic variations of images using Qwen-Image-Edit:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
datasety synthetic --input ./images --output ./synthetic --prompt "add a winter hat"
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Options:**
|
|
109
|
+
|
|
110
|
+
| Option | Description | Default |
|
|
111
|
+
| ------------------- | --------------------------------- | -------------------------- |
|
|
112
|
+
| `--input`, `-i` | Input directory | (required) |
|
|
113
|
+
| `--output`, `-o` | Output directory | (required) |
|
|
114
|
+
| `--prompt`, `-p` | Edit prompt | (required) |
|
|
115
|
+
| `--model` | Model to use | `Qwen/Qwen-Image-Edit-2511`|
|
|
116
|
+
| `--device` | `cpu` or `cuda` | `cuda` |
|
|
117
|
+
| `--steps` | Number of inference steps | `40` |
|
|
118
|
+
| `--cfg-scale` | Guidance scale | `1.0` |
|
|
119
|
+
| `--true-cfg-scale` | True CFG scale | `4.0` |
|
|
120
|
+
| `--negative-prompt` | Negative prompt | `" "` |
|
|
121
|
+
| `--num-images` | Images to generate per input | `1` |
|
|
122
|
+
| `--seed` | Random seed for reproducibility | (random) |
|
|
123
|
+
|
|
124
|
+
**Example:**
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
datasety synthetic \
|
|
128
|
+
--input ./dataset \
|
|
129
|
+
--output ./synthetic \
|
|
130
|
+
--prompt "add sunglasses to the person, keep everything else the same" \
|
|
131
|
+
--device cuda \
|
|
132
|
+
--steps 40 \
|
|
133
|
+
--true-cfg-scale 4.0 \
|
|
134
|
+
--seed 42
|
|
135
|
+
```
|
|
136
|
+
|
|
133
137
|
## Common Workflows
|
|
134
138
|
|
|
135
139
|
### Prepare a LoRA Training Dataset
|
|
@@ -142,6 +146,18 @@ datasety resize -i ./raw -o ./dataset -r 1024x1024 --crop-position center
|
|
|
142
146
|
datasety caption -i ./dataset -o ./dataset --trigger-word "[trigger]" --device cuda
|
|
143
147
|
```
|
|
144
148
|
|
|
149
|
+
### Augment Dataset with Synthetic Variations
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
# Generate variations with different accessories
|
|
153
|
+
datasety synthetic \
|
|
154
|
+
-i ./dataset \
|
|
155
|
+
-o ./synthetic \
|
|
156
|
+
--prompt "add a red scarf" \
|
|
157
|
+
--num-images 2 \
|
|
158
|
+
--device cuda
|
|
159
|
+
```
|
|
160
|
+
|
|
145
161
|
### Batch Process with Numbered Files
|
|
146
162
|
|
|
147
163
|
```bash
|
|
@@ -157,7 +173,8 @@ datasety resize \
|
|
|
157
173
|
|
|
158
174
|
- Python 3.10+
|
|
159
175
|
- Pillow (for resize)
|
|
160
|
-
- PyTorch + Transformers (for caption
|
|
176
|
+
- PyTorch + Transformers (for caption: `pip install datasety[caption]`)
|
|
177
|
+
- PyTorch + Diffusers (for synthetic: `pip install datasety[synthetic]`)
|
|
161
178
|
|
|
162
179
|
## License
|
|
163
180
|
|
|
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "datasety"
|
|
7
7
|
dynamic = ["version"]
|
|
8
|
-
description = "CLI tool for dataset preparation:
|
|
8
|
+
description = "CLI tool for dataset preparation: resize, caption, and synthetic image generation"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
11
11
|
requires-python = ">=3.10"
|
|
@@ -17,6 +17,9 @@ keywords = [
|
|
|
17
17
|
"image-processing",
|
|
18
18
|
"captioning",
|
|
19
19
|
"florence-2",
|
|
20
|
+
"synthetic",
|
|
21
|
+
"image-editing",
|
|
22
|
+
"diffusers",
|
|
20
23
|
"machine-learning",
|
|
21
24
|
"cli",
|
|
22
25
|
]
|
|
@@ -45,6 +48,16 @@ caption = [
|
|
|
45
48
|
"einops",
|
|
46
49
|
"timm",
|
|
47
50
|
]
|
|
51
|
+
synthetic = [
|
|
52
|
+
"torch>=2.0.0",
|
|
53
|
+
"diffusers>=0.32.0",
|
|
54
|
+
"transformers>=4.38.0",
|
|
55
|
+
"accelerate",
|
|
56
|
+
"sentencepiece",
|
|
57
|
+
]
|
|
58
|
+
all = [
|
|
59
|
+
"datasety[caption,synthetic]",
|
|
60
|
+
]
|
|
48
61
|
dev = [
|
|
49
62
|
"pytest>=7.0.0",
|
|
50
63
|
"ruff>=0.1.0",
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
"""
|
|
3
|
-
datasety - CLI tool for dataset preparation:
|
|
3
|
+
datasety - CLI tool for dataset preparation: resize, caption, and synthetic generation.
|
|
4
4
|
|
|
5
5
|
Usage:
|
|
6
6
|
datasety resize --input ./in --output ./out --resolution 768x1024 --crop-position top
|
|
7
7
|
datasety caption --input ./in --output ./out --trigger-word "[trigger]" --florence-2-large
|
|
8
|
+
datasety synthetic --input ./in --output ./out --prompt "add a winter hat"
|
|
8
9
|
"""
|
|
9
10
|
|
|
10
11
|
import argparse
|
|
@@ -283,6 +284,114 @@ def cmd_caption(args):
|
|
|
283
284
|
print(f"Done! Processed: {processed} images")
|
|
284
285
|
|
|
285
286
|
|
|
287
|
+
def cmd_synthetic(args):
|
|
288
|
+
"""Execute the synthetic image generation command."""
|
|
289
|
+
# Lazy import for faster CLI startup
|
|
290
|
+
try:
|
|
291
|
+
import torch
|
|
292
|
+
except ImportError:
|
|
293
|
+
print("Error: PyTorch not installed.")
|
|
294
|
+
print("Run: pip install 'datasety[synthetic]'")
|
|
295
|
+
sys.exit(1)
|
|
296
|
+
|
|
297
|
+
input_dir = Path(args.input)
|
|
298
|
+
output_dir = Path(args.output)
|
|
299
|
+
|
|
300
|
+
if not input_dir.exists():
|
|
301
|
+
print(f"Error: Input directory '{input_dir}' does not exist.")
|
|
302
|
+
sys.exit(1)
|
|
303
|
+
|
|
304
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
305
|
+
|
|
306
|
+
# Determine device
|
|
307
|
+
if args.device == "cuda" and not torch.cuda.is_available():
|
|
308
|
+
print("Warning: CUDA not available, falling back to CPU")
|
|
309
|
+
device = "cpu"
|
|
310
|
+
else:
|
|
311
|
+
device = args.device
|
|
312
|
+
|
|
313
|
+
# Import the correct pipeline based on model
|
|
314
|
+
try:
|
|
315
|
+
from diffusers import QwenImageEditPlusPipeline
|
|
316
|
+
pipeline_class = QwenImageEditPlusPipeline
|
|
317
|
+
except ImportError:
|
|
318
|
+
print("Error: QwenImageEditPlusPipeline not found.")
|
|
319
|
+
print("Make sure you have the latest diffusers: pip install -U diffusers")
|
|
320
|
+
sys.exit(1)
|
|
321
|
+
|
|
322
|
+
print(f"Loading model: {args.model}")
|
|
323
|
+
print(f"Device: {device}")
|
|
324
|
+
|
|
325
|
+
torch_dtype = torch.bfloat16 if device == "cuda" else torch.float32
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
pipeline = pipeline_class.from_pretrained(
|
|
329
|
+
args.model,
|
|
330
|
+
torch_dtype=torch_dtype
|
|
331
|
+
)
|
|
332
|
+
pipeline.to(device)
|
|
333
|
+
pipeline.set_progress_bar_config(disable=False)
|
|
334
|
+
except Exception as e:
|
|
335
|
+
print(f"Error loading model: {e}")
|
|
336
|
+
sys.exit(1)
|
|
337
|
+
|
|
338
|
+
# Find images
|
|
339
|
+
formats = ["jpg", "jpeg", "png", "webp", "bmp", "tiff"]
|
|
340
|
+
image_files = get_image_files(input_dir, formats)
|
|
341
|
+
|
|
342
|
+
if not image_files:
|
|
343
|
+
print(f"No images found in '{input_dir}'")
|
|
344
|
+
sys.exit(0)
|
|
345
|
+
|
|
346
|
+
print(f"Found {len(image_files)} images")
|
|
347
|
+
print(f"Prompt: {args.prompt}")
|
|
348
|
+
print(f"Steps: {args.steps}, CFG: {args.cfg_scale}, True CFG: {args.true_cfg_scale}")
|
|
349
|
+
print("-" * 50)
|
|
350
|
+
|
|
351
|
+
processed = 0
|
|
352
|
+
|
|
353
|
+
for img_path in image_files:
|
|
354
|
+
try:
|
|
355
|
+
image = Image.open(img_path).convert("RGB")
|
|
356
|
+
|
|
357
|
+
# Set up generation parameters
|
|
358
|
+
gen_kwargs = {
|
|
359
|
+
"image": [image],
|
|
360
|
+
"prompt": args.prompt,
|
|
361
|
+
"negative_prompt": args.negative_prompt,
|
|
362
|
+
"num_inference_steps": args.steps,
|
|
363
|
+
"guidance_scale": args.cfg_scale,
|
|
364
|
+
"true_cfg_scale": args.true_cfg_scale,
|
|
365
|
+
"num_images_per_prompt": args.num_images,
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
# Add seed if specified
|
|
369
|
+
if args.seed is not None:
|
|
370
|
+
gen_kwargs["generator"] = torch.manual_seed(args.seed)
|
|
371
|
+
|
|
372
|
+
with torch.inference_mode():
|
|
373
|
+
output = pipeline(**gen_kwargs)
|
|
374
|
+
|
|
375
|
+
# Save output image(s)
|
|
376
|
+
for idx, out_img in enumerate(output.images):
|
|
377
|
+
if args.num_images > 1:
|
|
378
|
+
out_name = f"{img_path.stem}_{idx + 1}.png"
|
|
379
|
+
else:
|
|
380
|
+
out_name = f"{img_path.stem}.png"
|
|
381
|
+
|
|
382
|
+
out_path = output_dir / out_name
|
|
383
|
+
out_img.save(out_path)
|
|
384
|
+
|
|
385
|
+
print(f"[OK] {img_path.name} -> {len(output.images)} image(s)")
|
|
386
|
+
processed += 1
|
|
387
|
+
|
|
388
|
+
except Exception as e:
|
|
389
|
+
print(f"[ERROR] {img_path.name}: {e}")
|
|
390
|
+
|
|
391
|
+
print("-" * 50)
|
|
392
|
+
print(f"Done! Processed: {processed} images")
|
|
393
|
+
|
|
394
|
+
|
|
286
395
|
def main():
|
|
287
396
|
parser = argparse.ArgumentParser(
|
|
288
397
|
prog="datasety",
|
|
@@ -379,6 +488,74 @@ def main():
|
|
|
379
488
|
)
|
|
380
489
|
caption_parser.set_defaults(func=cmd_caption)
|
|
381
490
|
|
|
491
|
+
# === SYNTHETIC command ===
|
|
492
|
+
synthetic_parser = subparsers.add_parser(
|
|
493
|
+
"synthetic",
|
|
494
|
+
help="Generate synthetic images using image editing models"
|
|
495
|
+
)
|
|
496
|
+
synthetic_parser.add_argument(
|
|
497
|
+
"--input", "-i",
|
|
498
|
+
required=True,
|
|
499
|
+
help="Input directory containing images"
|
|
500
|
+
)
|
|
501
|
+
synthetic_parser.add_argument(
|
|
502
|
+
"--output", "-o",
|
|
503
|
+
required=True,
|
|
504
|
+
help="Output directory for generated images"
|
|
505
|
+
)
|
|
506
|
+
synthetic_parser.add_argument(
|
|
507
|
+
"--prompt", "-p",
|
|
508
|
+
required=True,
|
|
509
|
+
help="Edit prompt (e.g., 'add a winter hat to the person')"
|
|
510
|
+
)
|
|
511
|
+
synthetic_parser.add_argument(
|
|
512
|
+
"--model",
|
|
513
|
+
default="Qwen/Qwen-Image-Edit-2511",
|
|
514
|
+
help="Model to use (default: Qwen/Qwen-Image-Edit-2511)"
|
|
515
|
+
)
|
|
516
|
+
synthetic_parser.add_argument(
|
|
517
|
+
"--device",
|
|
518
|
+
choices=["cpu", "cuda"],
|
|
519
|
+
default="cuda",
|
|
520
|
+
help="Device to run model on (default: cuda)"
|
|
521
|
+
)
|
|
522
|
+
synthetic_parser.add_argument(
|
|
523
|
+
"--steps",
|
|
524
|
+
type=int,
|
|
525
|
+
default=40,
|
|
526
|
+
help="Number of inference steps (default: 40)"
|
|
527
|
+
)
|
|
528
|
+
synthetic_parser.add_argument(
|
|
529
|
+
"--cfg-scale",
|
|
530
|
+
type=float,
|
|
531
|
+
default=1.0,
|
|
532
|
+
help="Guidance scale (default: 1.0)"
|
|
533
|
+
)
|
|
534
|
+
synthetic_parser.add_argument(
|
|
535
|
+
"--true-cfg-scale",
|
|
536
|
+
type=float,
|
|
537
|
+
default=4.0,
|
|
538
|
+
help="True CFG scale (default: 4.0)"
|
|
539
|
+
)
|
|
540
|
+
synthetic_parser.add_argument(
|
|
541
|
+
"--negative-prompt",
|
|
542
|
+
default=" ",
|
|
543
|
+
help="Negative prompt (default: ' ')"
|
|
544
|
+
)
|
|
545
|
+
synthetic_parser.add_argument(
|
|
546
|
+
"--num-images",
|
|
547
|
+
type=int,
|
|
548
|
+
default=1,
|
|
549
|
+
help="Number of images to generate per input (default: 1)"
|
|
550
|
+
)
|
|
551
|
+
synthetic_parser.add_argument(
|
|
552
|
+
"--seed",
|
|
553
|
+
type=int,
|
|
554
|
+
default=None,
|
|
555
|
+
help="Random seed for reproducibility"
|
|
556
|
+
)
|
|
557
|
+
synthetic_parser.set_defaults(func=cmd_synthetic)
|
|
558
|
+
|
|
382
559
|
# Parse and execute
|
|
383
560
|
args = parser.parse_args()
|
|
384
561
|
args.func(args)
|
datasety-0.1.0/.gitignore
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
# Byte-compiled / optimized / DLL files
|
|
2
|
-
__pycache__/
|
|
3
|
-
*.py[cod]
|
|
4
|
-
*$py.class
|
|
5
|
-
|
|
6
|
-
# Distribution / packaging
|
|
7
|
-
build/
|
|
8
|
-
dist/
|
|
9
|
-
*.egg-info/
|
|
10
|
-
*.egg
|
|
11
|
-
|
|
12
|
-
# Virtual environments
|
|
13
|
-
venv/
|
|
14
|
-
.venv/
|
|
15
|
-
env/
|
|
16
|
-
|
|
17
|
-
# IDE
|
|
18
|
-
.idea/
|
|
19
|
-
.vscode/
|
|
20
|
-
*.swp
|
|
21
|
-
*.swo
|
|
22
|
-
|
|
23
|
-
# Testing
|
|
24
|
-
.pytest_cache/
|
|
25
|
-
.coverage
|
|
26
|
-
htmlcov/
|
|
27
|
-
|
|
28
|
-
# OS
|
|
29
|
-
.DS_Store
|
|
30
|
-
Thumbs.db
|
|
31
|
-
|
|
32
|
-
# Project specific
|
|
33
|
-
*.jpg
|
|
34
|
-
*.jpeg
|
|
35
|
-
*.png
|
|
36
|
-
*.webp
|
datasety-0.1.0/README.md
DELETED
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
# datasety
|
|
2
|
-
|
|
3
|
-
CLI tool for dataset preparation: image resizing and captioning with Florence-2.
|
|
4
|
-
|
|
5
|
-
## Installation
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
pip install datasety
|
|
9
|
-
```
|
|
10
|
-
|
|
11
|
-
For captioning support (requires PyTorch and Transformers):
|
|
12
|
-
|
|
13
|
-
```bash
|
|
14
|
-
pip install datasety[caption]
|
|
15
|
-
```
|
|
16
|
-
|
|
17
|
-
## Usage
|
|
18
|
-
|
|
19
|
-
### Resize Images
|
|
20
|
-
|
|
21
|
-
Resize and crop images to a target resolution:
|
|
22
|
-
|
|
23
|
-
```bash
|
|
24
|
-
datasety resize --input ./images --output ./resized --resolution 768x1024
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
**Options:**
|
|
28
|
-
|
|
29
|
-
| Option | Description | Default |
|
|
30
|
-
| ----------------------- | --------------------------------------------------------- | ------------------- |
|
|
31
|
-
| `--input`, `-i` | Input directory | (required) |
|
|
32
|
-
| `--output`, `-o` | Output directory | (required) |
|
|
33
|
-
| `--resolution`, `-r` | Target resolution (WIDTHxHEIGHT) | (required) |
|
|
34
|
-
| `--crop-position` | Crop position: `top`, `center`, `bottom`, `left`, `right` | `center` |
|
|
35
|
-
| `--input-format` | Comma-separated formats | `jpg,jpeg,png,webp` |
|
|
36
|
-
| `--output-format` | Output format: `jpg`, `png`, `webp` | `jpg` |
|
|
37
|
-
| `--output-name-numbers` | Rename files to 1.jpg, 2.jpg, ... | `false` |
|
|
38
|
-
|
|
39
|
-
**Example:**
|
|
40
|
-
|
|
41
|
-
```bash
|
|
42
|
-
datasety resize \
|
|
43
|
-
--input ./raw_photos \
|
|
44
|
-
--output ./dataset \
|
|
45
|
-
--resolution 1024x1024 \
|
|
46
|
-
--crop-position top \
|
|
47
|
-
--output-format jpg \
|
|
48
|
-
--output-name-numbers
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
**How it works:**
|
|
52
|
-
|
|
53
|
-
1. Finds all images matching input formats
|
|
54
|
-
2. Skips images where either dimension is smaller than target
|
|
55
|
-
3. Resizes proportionally so the smaller side matches target
|
|
56
|
-
4. Crops from the specified area to exact dimensions
|
|
57
|
-
5. Saves with high quality (95% for jpg/webp)
|
|
58
|
-
|
|
59
|
-
### Generate Captions
|
|
60
|
-
|
|
61
|
-
Generate captions for images using Microsoft's Florence-2 model:
|
|
62
|
-
|
|
63
|
-
```bash
|
|
64
|
-
datasety caption --input ./images --output ./captions --florence-2-large
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
**Options:**
|
|
68
|
-
|
|
69
|
-
| Option | Description | Default |
|
|
70
|
-
| -------------------- | ------------------------------- | ------------------------- |
|
|
71
|
-
| `--input`, `-i` | Input directory | (required) |
|
|
72
|
-
| `--output`, `-o` | Output directory for .txt files | (required) |
|
|
73
|
-
| `--device` | `cpu` or `cuda` | `cpu` |
|
|
74
|
-
| `--trigger-word` | Text to prepend to captions | (none) |
|
|
75
|
-
| `--prompt` | Florence-2 task prompt | `<MORE_DETAILED_CAPTION>` |
|
|
76
|
-
| `--florence-2-base` | Use base model (0.23B, faster) | |
|
|
77
|
-
| `--florence-2-large` | Use large model (0.77B, better) | (default) |
|
|
78
|
-
|
|
79
|
-
**Available prompts:**
|
|
80
|
-
|
|
81
|
-
- `<CAPTION>` - Brief caption
|
|
82
|
-
- `<DETAILED_CAPTION>` - Detailed caption
|
|
83
|
-
- `<MORE_DETAILED_CAPTION>` - Most detailed caption (default)
|
|
84
|
-
|
|
85
|
-
**Example:**
|
|
86
|
-
|
|
87
|
-
```bash
|
|
88
|
-
datasety caption \
|
|
89
|
-
--input ./dataset \
|
|
90
|
-
--output ./dataset \
|
|
91
|
-
--device cuda \
|
|
92
|
-
--trigger-word "photo of sks person," \
|
|
93
|
-
--florence-2-large
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
This creates a `.txt` file for each image with the generated caption.
|
|
97
|
-
|
|
98
|
-
## Common Workflows
|
|
99
|
-
|
|
100
|
-
### Prepare a LoRA Training Dataset
|
|
101
|
-
|
|
102
|
-
```bash
|
|
103
|
-
# 1. Resize images to 1024x1024
|
|
104
|
-
datasety resize -i ./raw -o ./dataset -r 1024x1024 --crop-position center
|
|
105
|
-
|
|
106
|
-
# 2. Generate captions with trigger word
|
|
107
|
-
datasety caption -i ./dataset -o ./dataset --trigger-word "[trigger]" --device cuda
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
### Batch Process with Numbered Files
|
|
111
|
-
|
|
112
|
-
```bash
|
|
113
|
-
datasety resize \
|
|
114
|
-
-i ./photos \
|
|
115
|
-
-o ./processed \
|
|
116
|
-
-r 768x1024 \
|
|
117
|
-
--output-name-numbers \
|
|
118
|
-
--crop-position top
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
## Requirements
|
|
122
|
-
|
|
123
|
-
- Python 3.10+
|
|
124
|
-
- Pillow (for resize)
|
|
125
|
-
- PyTorch + Transformers (for caption, install with `pip install datasety[caption]`)
|
|
126
|
-
|
|
127
|
-
## License
|
|
128
|
-
|
|
129
|
-
MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|