datasety 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datasety/__init__.py +3 -0
- datasety/__main__.py +6 -0
- datasety/cli.py +388 -0
- datasety/py.typed +0 -0
- datasety-0.1.0.dist-info/METADATA +164 -0
- datasety-0.1.0.dist-info/RECORD +9 -0
- datasety-0.1.0.dist-info/WHEEL +4 -0
- datasety-0.1.0.dist-info/entry_points.txt +2 -0
- datasety-0.1.0.dist-info/licenses/LICENSE +21 -0
datasety/__init__.py
ADDED
datasety/__main__.py
ADDED
datasety/cli.py
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
datasety - CLI tool for dataset preparation: image resizing and captioning.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
datasety resize --input ./in --output ./out --resolution 768x1024 --crop-position top
|
|
7
|
+
datasety caption --input ./in --output ./out --trigger-word "[trigger]" --florence-2-large
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from PIL import Image
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_image_files(input_dir: Path, formats: list[str]) -> list[Path]:
|
|
18
|
+
"""Find all images matching the specified formats."""
|
|
19
|
+
files = []
|
|
20
|
+
for fmt in formats:
|
|
21
|
+
fmt = fmt.lower().strip()
|
|
22
|
+
files.extend(input_dir.glob(f"*.{fmt}"))
|
|
23
|
+
files.extend(input_dir.glob(f"*.{fmt.upper()}"))
|
|
24
|
+
return sorted(set(files))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def calculate_resize_and_crop(
|
|
28
|
+
orig_width: int, orig_height: int,
|
|
29
|
+
target_width: int, target_height: int,
|
|
30
|
+
crop_position: str
|
|
31
|
+
) -> tuple[tuple[int, int], tuple[int, int, int, int]]:
|
|
32
|
+
"""
|
|
33
|
+
Calculate resize dimensions and crop box.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
crop_position: Where to position the crop window (what to keep).
|
|
37
|
+
'top' keeps top, 'right' keeps right, etc.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
(new_width, new_height), (left, top, right, bottom)
|
|
41
|
+
"""
|
|
42
|
+
target_ratio = target_width / target_height
|
|
43
|
+
orig_ratio = orig_width / orig_height
|
|
44
|
+
|
|
45
|
+
if orig_ratio > target_ratio:
|
|
46
|
+
# Image is wider - resize by height, crop width
|
|
47
|
+
new_height = target_height
|
|
48
|
+
new_width = int(orig_width * (target_height / orig_height))
|
|
49
|
+
else:
|
|
50
|
+
# Image is taller - resize by width, crop height
|
|
51
|
+
new_width = target_width
|
|
52
|
+
new_height = int(orig_height * (target_width / orig_width))
|
|
53
|
+
|
|
54
|
+
# Calculate crop box based on position (what to keep)
|
|
55
|
+
if crop_position == "center":
|
|
56
|
+
left = (new_width - target_width) // 2
|
|
57
|
+
top = (new_height - target_height) // 2
|
|
58
|
+
elif crop_position == "top":
|
|
59
|
+
left = (new_width - target_width) // 2
|
|
60
|
+
top = 0
|
|
61
|
+
elif crop_position == "bottom":
|
|
62
|
+
left = (new_width - target_width) // 2
|
|
63
|
+
top = new_height - target_height
|
|
64
|
+
elif crop_position == "left":
|
|
65
|
+
left = 0
|
|
66
|
+
top = (new_height - target_height) // 2
|
|
67
|
+
elif crop_position == "right":
|
|
68
|
+
left = new_width - target_width
|
|
69
|
+
top = (new_height - target_height) // 2
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError(f"Invalid crop position: {crop_position}")
|
|
72
|
+
|
|
73
|
+
right = left + target_width
|
|
74
|
+
bottom = top + target_height
|
|
75
|
+
|
|
76
|
+
return (new_width, new_height), (left, top, right, bottom)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def cmd_resize(args):
|
|
80
|
+
"""Execute the resize command."""
|
|
81
|
+
input_dir = Path(args.input)
|
|
82
|
+
output_dir = Path(args.output)
|
|
83
|
+
|
|
84
|
+
if not input_dir.exists():
|
|
85
|
+
print(f"Error: Input directory '{input_dir}' does not exist.")
|
|
86
|
+
sys.exit(1)
|
|
87
|
+
|
|
88
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
89
|
+
|
|
90
|
+
# Parse resolution
|
|
91
|
+
try:
|
|
92
|
+
width, height = map(int, args.resolution.lower().split("x"))
|
|
93
|
+
except ValueError:
|
|
94
|
+
print(f"Error: Invalid resolution '{args.resolution}'. Use WIDTHxHEIGHT (e.g., 768x1024)")
|
|
95
|
+
sys.exit(1)
|
|
96
|
+
|
|
97
|
+
# Parse input formats
|
|
98
|
+
formats = [f.strip() for f in args.input_format.split(",")]
|
|
99
|
+
|
|
100
|
+
# Get image files
|
|
101
|
+
image_files = get_image_files(input_dir, formats)
|
|
102
|
+
|
|
103
|
+
if not image_files:
|
|
104
|
+
print(f"No images found in '{input_dir}' with formats: {formats}")
|
|
105
|
+
sys.exit(0)
|
|
106
|
+
|
|
107
|
+
print(f"Found {len(image_files)} images")
|
|
108
|
+
print(f"Target resolution: {width}x{height}")
|
|
109
|
+
print(f"Crop position: {args.crop_position}")
|
|
110
|
+
print(f"Output format: {args.output_format}")
|
|
111
|
+
print("-" * 50)
|
|
112
|
+
|
|
113
|
+
processed = 0
|
|
114
|
+
skipped = 0
|
|
115
|
+
|
|
116
|
+
for idx, img_path in enumerate(image_files, start=1):
|
|
117
|
+
try:
|
|
118
|
+
with Image.open(img_path) as img:
|
|
119
|
+
img = img.convert("RGB")
|
|
120
|
+
orig_w, orig_h = img.size
|
|
121
|
+
|
|
122
|
+
# Skip if image is too small
|
|
123
|
+
if orig_w < width or orig_h < height:
|
|
124
|
+
print(f"[SKIP] {img_path.name}: {orig_w}x{orig_h} < {width}x{height}")
|
|
125
|
+
skipped += 1
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
# Calculate resize and crop
|
|
129
|
+
(new_w, new_h), crop_box = calculate_resize_and_crop(
|
|
130
|
+
orig_w, orig_h, width, height, args.crop_position
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Resize
|
|
134
|
+
img_resized = img.resize((new_w, new_h), Image.LANCZOS)
|
|
135
|
+
|
|
136
|
+
# Crop
|
|
137
|
+
img_cropped = img_resized.crop(crop_box)
|
|
138
|
+
|
|
139
|
+
# Determine output filename
|
|
140
|
+
if args.output_name_numbers:
|
|
141
|
+
out_name = f"{processed + 1}.{args.output_format}"
|
|
142
|
+
else:
|
|
143
|
+
out_name = f"{img_path.stem}.{args.output_format}"
|
|
144
|
+
|
|
145
|
+
out_path = output_dir / out_name
|
|
146
|
+
|
|
147
|
+
# Save with quality settings
|
|
148
|
+
save_kwargs = {}
|
|
149
|
+
if args.output_format.lower() in ("jpg", "jpeg"):
|
|
150
|
+
save_kwargs["quality"] = 95
|
|
151
|
+
save_kwargs["optimize"] = True
|
|
152
|
+
elif args.output_format.lower() == "webp":
|
|
153
|
+
save_kwargs["quality"] = 95
|
|
154
|
+
elif args.output_format.lower() == "png":
|
|
155
|
+
save_kwargs["optimize"] = True
|
|
156
|
+
|
|
157
|
+
img_cropped.save(out_path, **save_kwargs)
|
|
158
|
+
|
|
159
|
+
print(f"[OK] {img_path.name} ({orig_w}x{orig_h}) -> {out_name} ({width}x{height})")
|
|
160
|
+
processed += 1
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
print(f"[ERROR] {img_path.name}: {e}")
|
|
164
|
+
skipped += 1
|
|
165
|
+
|
|
166
|
+
print("-" * 50)
|
|
167
|
+
print(f"Done! Processed: {processed}, Skipped: {skipped}")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def cmd_caption(args):
|
|
171
|
+
"""Execute the caption command."""
|
|
172
|
+
# Lazy import for faster CLI startup when not using caption
|
|
173
|
+
try:
|
|
174
|
+
import torch
|
|
175
|
+
from transformers import AutoModelForCausalLM, AutoProcessor
|
|
176
|
+
except ImportError:
|
|
177
|
+
print("Error: Required packages not installed.")
|
|
178
|
+
print("Run: pip install torch transformers")
|
|
179
|
+
sys.exit(1)
|
|
180
|
+
|
|
181
|
+
input_dir = Path(args.input)
|
|
182
|
+
output_dir = Path(args.output)
|
|
183
|
+
|
|
184
|
+
if not input_dir.exists():
|
|
185
|
+
print(f"Error: Input directory '{input_dir}' does not exist.")
|
|
186
|
+
sys.exit(1)
|
|
187
|
+
|
|
188
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
189
|
+
|
|
190
|
+
# Determine model (base flag takes priority since large is default)
|
|
191
|
+
if args.florence_2_base:
|
|
192
|
+
model_name = "microsoft/Florence-2-base"
|
|
193
|
+
else:
|
|
194
|
+
model_name = "microsoft/Florence-2-large"
|
|
195
|
+
|
|
196
|
+
# Determine device
|
|
197
|
+
if args.device == "cuda" and not torch.cuda.is_available():
|
|
198
|
+
print("Warning: CUDA not available, falling back to CPU")
|
|
199
|
+
device = "cpu"
|
|
200
|
+
else:
|
|
201
|
+
device = args.device
|
|
202
|
+
|
|
203
|
+
torch_dtype = torch.float16 if device == "cuda" else torch.float32
|
|
204
|
+
|
|
205
|
+
print(f"Loading model: {model_name}")
|
|
206
|
+
print(f"Device: {device}")
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
210
|
+
model_name,
|
|
211
|
+
torch_dtype=torch_dtype,
|
|
212
|
+
trust_remote_code=True
|
|
213
|
+
).to(device).eval()
|
|
214
|
+
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
|
|
215
|
+
except Exception as e:
|
|
216
|
+
print(f"Error loading model: {e}")
|
|
217
|
+
sys.exit(1)
|
|
218
|
+
|
|
219
|
+
# Find images (common formats)
|
|
220
|
+
formats = ["jpg", "jpeg", "png", "webp", "bmp", "tiff"]
|
|
221
|
+
image_files = get_image_files(input_dir, formats)
|
|
222
|
+
|
|
223
|
+
if not image_files:
|
|
224
|
+
print(f"No images found in '{input_dir}'")
|
|
225
|
+
sys.exit(0)
|
|
226
|
+
|
|
227
|
+
print(f"Found {len(image_files)} images")
|
|
228
|
+
print(f"Prompt: {args.prompt}")
|
|
229
|
+
if args.trigger_word:
|
|
230
|
+
print(f"Trigger word: {args.trigger_word}")
|
|
231
|
+
print("-" * 50)
|
|
232
|
+
|
|
233
|
+
processed = 0
|
|
234
|
+
|
|
235
|
+
for img_path in image_files:
|
|
236
|
+
try:
|
|
237
|
+
with Image.open(img_path) as img:
|
|
238
|
+
img = img.convert("RGB")
|
|
239
|
+
|
|
240
|
+
inputs = processor(
|
|
241
|
+
text=args.prompt,
|
|
242
|
+
images=img,
|
|
243
|
+
return_tensors="pt"
|
|
244
|
+
).to(device, torch_dtype)
|
|
245
|
+
|
|
246
|
+
with torch.no_grad():
|
|
247
|
+
generated_ids = model.generate(
|
|
248
|
+
input_ids=inputs["input_ids"],
|
|
249
|
+
pixel_values=inputs["pixel_values"],
|
|
250
|
+
max_new_tokens=1024,
|
|
251
|
+
num_beams=3,
|
|
252
|
+
do_sample=False
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
generated_text = processor.batch_decode(
|
|
256
|
+
generated_ids, skip_special_tokens=False
|
|
257
|
+
)[0]
|
|
258
|
+
|
|
259
|
+
parsed = processor.post_process_generation(
|
|
260
|
+
generated_text,
|
|
261
|
+
task=args.prompt,
|
|
262
|
+
image_size=(img.width, img.height)
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
caption = parsed.get(args.prompt, "")
|
|
266
|
+
|
|
267
|
+
# Prepend trigger word if specified
|
|
268
|
+
if args.trigger_word:
|
|
269
|
+
caption = f"{args.trigger_word} {caption}"
|
|
270
|
+
|
|
271
|
+
# Save caption
|
|
272
|
+
caption_path = output_dir / f"{img_path.stem}.txt"
|
|
273
|
+
caption_path.write_text(caption.strip())
|
|
274
|
+
|
|
275
|
+
print(f"[OK] {img_path.name}")
|
|
276
|
+
print(f" {caption[:100]}{'...' if len(caption) > 100 else ''}")
|
|
277
|
+
processed += 1
|
|
278
|
+
|
|
279
|
+
except Exception as e:
|
|
280
|
+
print(f"[ERROR] {img_path.name}: {e}")
|
|
281
|
+
|
|
282
|
+
print("-" * 50)
|
|
283
|
+
print(f"Done! Processed: {processed} images")
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def main():
|
|
287
|
+
parser = argparse.ArgumentParser(
|
|
288
|
+
prog="datasety",
|
|
289
|
+
description="CLI tool for dataset preparation: image resizing and captioning."
|
|
290
|
+
)
|
|
291
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
292
|
+
|
|
293
|
+
# === RESIZE command ===
|
|
294
|
+
resize_parser = subparsers.add_parser(
|
|
295
|
+
"resize",
|
|
296
|
+
help="Resize and crop images to target resolution"
|
|
297
|
+
)
|
|
298
|
+
resize_parser.add_argument(
|
|
299
|
+
"--input", "-i",
|
|
300
|
+
required=True,
|
|
301
|
+
help="Input directory containing images"
|
|
302
|
+
)
|
|
303
|
+
resize_parser.add_argument(
|
|
304
|
+
"--output", "-o",
|
|
305
|
+
required=True,
|
|
306
|
+
help="Output directory for processed images"
|
|
307
|
+
)
|
|
308
|
+
resize_parser.add_argument(
|
|
309
|
+
"--resolution", "-r",
|
|
310
|
+
required=True,
|
|
311
|
+
help="Target resolution as WIDTHxHEIGHT (e.g., 768x1024)"
|
|
312
|
+
)
|
|
313
|
+
resize_parser.add_argument(
|
|
314
|
+
"--crop-position",
|
|
315
|
+
choices=["top", "center", "bottom", "left", "right"],
|
|
316
|
+
default="center",
|
|
317
|
+
help="Position to keep when cropping (default: center)"
|
|
318
|
+
)
|
|
319
|
+
resize_parser.add_argument(
|
|
320
|
+
"--input-format",
|
|
321
|
+
default="jpg,jpeg,png,webp",
|
|
322
|
+
help="Comma-separated input formats (default: jpg,jpeg,png,webp)"
|
|
323
|
+
)
|
|
324
|
+
resize_parser.add_argument(
|
|
325
|
+
"--output-format",
|
|
326
|
+
choices=["jpg", "png", "webp"],
|
|
327
|
+
default="jpg",
|
|
328
|
+
help="Output image format (default: jpg)"
|
|
329
|
+
)
|
|
330
|
+
resize_parser.add_argument(
|
|
331
|
+
"--output-name-numbers",
|
|
332
|
+
action="store_true",
|
|
333
|
+
help="Rename output files to sequential numbers (1.jpg, 2.jpg, ...)"
|
|
334
|
+
)
|
|
335
|
+
resize_parser.set_defaults(func=cmd_resize)
|
|
336
|
+
|
|
337
|
+
# === CAPTION command ===
|
|
338
|
+
caption_parser = subparsers.add_parser(
|
|
339
|
+
"caption",
|
|
340
|
+
help="Generate captions for images using Florence-2"
|
|
341
|
+
)
|
|
342
|
+
caption_parser.add_argument(
|
|
343
|
+
"--input", "-i",
|
|
344
|
+
required=True,
|
|
345
|
+
help="Input directory containing images"
|
|
346
|
+
)
|
|
347
|
+
caption_parser.add_argument(
|
|
348
|
+
"--output", "-o",
|
|
349
|
+
required=True,
|
|
350
|
+
help="Output directory for caption text files"
|
|
351
|
+
)
|
|
352
|
+
caption_parser.add_argument(
|
|
353
|
+
"--device",
|
|
354
|
+
choices=["cpu", "cuda"],
|
|
355
|
+
default="cpu",
|
|
356
|
+
help="Device to run model on (default: cpu)"
|
|
357
|
+
)
|
|
358
|
+
caption_parser.add_argument(
|
|
359
|
+
"--trigger-word",
|
|
360
|
+
default="",
|
|
361
|
+
help="Text to prepend to each caption (e.g., '[trigger]' or 'photo,')"
|
|
362
|
+
)
|
|
363
|
+
caption_parser.add_argument(
|
|
364
|
+
"--prompt",
|
|
365
|
+
default="<MORE_DETAILED_CAPTION>",
|
|
366
|
+
help="Florence-2 prompt (default: <MORE_DETAILED_CAPTION>)"
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
model_group = caption_parser.add_mutually_exclusive_group()
|
|
370
|
+
model_group.add_argument(
|
|
371
|
+
"--florence-2-base",
|
|
372
|
+
action="store_true",
|
|
373
|
+
help="Use Florence-2-base model (0.23B params, faster)"
|
|
374
|
+
)
|
|
375
|
+
model_group.add_argument(
|
|
376
|
+
"--florence-2-large",
|
|
377
|
+
action="store_true",
|
|
378
|
+
help="Use Florence-2-large model (0.77B params, more accurate) [default]"
|
|
379
|
+
)
|
|
380
|
+
caption_parser.set_defaults(func=cmd_caption)
|
|
381
|
+
|
|
382
|
+
# Parse and execute
|
|
383
|
+
args = parser.parse_args()
|
|
384
|
+
args.func(args)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
if __name__ == "__main__":
|
|
388
|
+
main()
|
datasety/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datasety
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI tool for dataset preparation: image resizing and captioning with Florence-2
|
|
5
|
+
Project-URL: Homepage, https://github.com/kontextox/datasety
|
|
6
|
+
Project-URL: Repository, https://github.com/kontextox/datasety
|
|
7
|
+
Project-URL: Issues, https://github.com/kontextox/datasety/issues
|
|
8
|
+
Author: kontextox
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: captioning,cli,dataset,florence-2,image-processing,machine-learning
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: pillow>=9.0.0
|
|
26
|
+
Provides-Extra: caption
|
|
27
|
+
Requires-Dist: einops; extra == 'caption'
|
|
28
|
+
Requires-Dist: timm; extra == 'caption'
|
|
29
|
+
Requires-Dist: torch>=2.0.0; extra == 'caption'
|
|
30
|
+
Requires-Dist: transformers<4.46.0,>=4.38.0; extra == 'caption'
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# datasety
|
|
37
|
+
|
|
38
|
+
CLI tool for dataset preparation: image resizing and captioning with Florence-2.
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install datasety
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
For captioning support (requires PyTorch and Transformers):
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install datasety[caption]
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Usage
|
|
53
|
+
|
|
54
|
+
### Resize Images
|
|
55
|
+
|
|
56
|
+
Resize and crop images to a target resolution:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
datasety resize --input ./images --output ./resized --resolution 768x1024
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**Options:**
|
|
63
|
+
|
|
64
|
+
| Option | Description | Default |
|
|
65
|
+
| ----------------------- | --------------------------------------------------------- | ------------------- |
|
|
66
|
+
| `--input`, `-i` | Input directory | (required) |
|
|
67
|
+
| `--output`, `-o` | Output directory | (required) |
|
|
68
|
+
| `--resolution`, `-r` | Target resolution (WIDTHxHEIGHT) | (required) |
|
|
69
|
+
| `--crop-position` | Crop position: `top`, `center`, `bottom`, `left`, `right` | `center` |
|
|
70
|
+
| `--input-format` | Comma-separated formats | `jpg,jpeg,png,webp` |
|
|
71
|
+
| `--output-format` | Output format: `jpg`, `png`, `webp` | `jpg` |
|
|
72
|
+
| `--output-name-numbers` | Rename files to 1.jpg, 2.jpg, ... | `false` |
|
|
73
|
+
|
|
74
|
+
**Example:**
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
datasety resize \
|
|
78
|
+
--input ./raw_photos \
|
|
79
|
+
--output ./dataset \
|
|
80
|
+
--resolution 1024x1024 \
|
|
81
|
+
--crop-position top \
|
|
82
|
+
--output-format jpg \
|
|
83
|
+
--output-name-numbers
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**How it works:**
|
|
87
|
+
|
|
88
|
+
1. Finds all images matching input formats
|
|
89
|
+
2. Skips images where either dimension is smaller than target
|
|
90
|
+
3. Resizes proportionally so the smaller side matches target
|
|
91
|
+
4. Crops from the specified area to exact dimensions
|
|
92
|
+
5. Saves with high quality (95% for jpg/webp)
|
|
93
|
+
|
|
94
|
+
### Generate Captions
|
|
95
|
+
|
|
96
|
+
Generate captions for images using Microsoft's Florence-2 model:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
datasety caption --input ./images --output ./captions --florence-2-large
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
**Options:**
|
|
103
|
+
|
|
104
|
+
| Option | Description | Default |
|
|
105
|
+
| -------------------- | ------------------------------- | ------------------------- |
|
|
106
|
+
| `--input`, `-i` | Input directory | (required) |
|
|
107
|
+
| `--output`, `-o` | Output directory for .txt files | (required) |
|
|
108
|
+
| `--device` | `cpu` or `cuda` | `cpu` |
|
|
109
|
+
| `--trigger-word` | Text to prepend to captions | (none) |
|
|
110
|
+
| `--prompt` | Florence-2 task prompt | `<MORE_DETAILED_CAPTION>` |
|
|
111
|
+
| `--florence-2-base` | Use base model (0.23B, faster) | |
|
|
112
|
+
| `--florence-2-large` | Use large model (0.77B, better) | (default) |
|
|
113
|
+
|
|
114
|
+
**Available prompts:**
|
|
115
|
+
|
|
116
|
+
- `<CAPTION>` - Brief caption
|
|
117
|
+
- `<DETAILED_CAPTION>` - Detailed caption
|
|
118
|
+
- `<MORE_DETAILED_CAPTION>` - Most detailed caption (default)
|
|
119
|
+
|
|
120
|
+
**Example:**
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
datasety caption \
|
|
124
|
+
--input ./dataset \
|
|
125
|
+
--output ./dataset \
|
|
126
|
+
--device cuda \
|
|
127
|
+
--trigger-word "photo of sks person," \
|
|
128
|
+
--florence-2-large
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
This creates a `.txt` file for each image with the generated caption.
|
|
132
|
+
|
|
133
|
+
## Common Workflows
|
|
134
|
+
|
|
135
|
+
### Prepare a LoRA Training Dataset
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
# 1. Resize images to 1024x1024
|
|
139
|
+
datasety resize -i ./raw -o ./dataset -r 1024x1024 --crop-position center
|
|
140
|
+
|
|
141
|
+
# 2. Generate captions with trigger word
|
|
142
|
+
datasety caption -i ./dataset -o ./dataset --trigger-word "[trigger]" --device cuda
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Batch Process with Numbered Files
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
datasety resize \
|
|
149
|
+
-i ./photos \
|
|
150
|
+
-o ./processed \
|
|
151
|
+
-r 768x1024 \
|
|
152
|
+
--output-name-numbers \
|
|
153
|
+
--crop-position top
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Requirements
|
|
157
|
+
|
|
158
|
+
- Python 3.10+
|
|
159
|
+
- Pillow (for resize)
|
|
160
|
+
- PyTorch + Transformers (for caption, install with `pip install datasety[caption]`)
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
datasety/__init__.py,sha256=LeNjBou33I9yomQhLmpfZWXEJeW2Io9ZvHnZmYXltQ4,105
|
|
2
|
+
datasety/__main__.py,sha256=rhdW0XGNAX-GC5IqU62ulCVccf3kgelddrGghYMZzn4,115
|
|
3
|
+
datasety/cli.py,sha256=K5doc2QZBLKDS3MjVkTsGe4giz31eo6xMCx8G4wdBEM,12399
|
|
4
|
+
datasety/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
datasety-0.1.0.dist-info/METADATA,sha256=u-9NanmOw3QZ3pWI1VNmrUG8CBAjFwyCnVVY_I6TpSE,5429
|
|
6
|
+
datasety-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
+
datasety-0.1.0.dist-info/entry_points.txt,sha256=oWbVHN1_qyuWezjxuhsAGAqqwmivRUtG2jCYKm8bVnE,47
|
|
8
|
+
datasety-0.1.0.dist-info/licenses/LICENSE,sha256=dUhuoK-TCRQMpuLEAdfme-qPSJI0TlcH9jlNxeg9_EQ,1056
|
|
9
|
+
datasety-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|