datasety 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datasety/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """datasety - CLI tool for dataset preparation: image resizing and captioning."""
2
+
3
+ __version__ = "0.1.0"
datasety/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Allow running as `python -m datasety`."""
2
+
3
+ from datasety.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
datasety/cli.py ADDED
@@ -0,0 +1,388 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ datasety - CLI tool for dataset preparation: image resizing and captioning.
4
+
5
+ Usage:
6
+ datasety resize --input ./in --output ./out --resolution 768x1024 --crop-position top
7
+ datasety caption --input ./in --output ./out --trigger-word "[trigger]" --florence-2-large
8
+ """
9
+
10
+ import argparse
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ from PIL import Image
15
+
16
+
17
+ def get_image_files(input_dir: Path, formats: list[str]) -> list[Path]:
18
+ """Find all images matching the specified formats."""
19
+ files = []
20
+ for fmt in formats:
21
+ fmt = fmt.lower().strip()
22
+ files.extend(input_dir.glob(f"*.{fmt}"))
23
+ files.extend(input_dir.glob(f"*.{fmt.upper()}"))
24
+ return sorted(set(files))
25
+
26
+
27
+ def calculate_resize_and_crop(
28
+ orig_width: int, orig_height: int,
29
+ target_width: int, target_height: int,
30
+ crop_position: str
31
+ ) -> tuple[tuple[int, int], tuple[int, int, int, int]]:
32
+ """
33
+ Calculate resize dimensions and crop box.
34
+
35
+ Args:
36
+ crop_position: Where to position the crop window (what to keep).
37
+ 'top' keeps top, 'right' keeps right, etc.
38
+
39
+ Returns:
40
+ (new_width, new_height), (left, top, right, bottom)
41
+ """
42
+ target_ratio = target_width / target_height
43
+ orig_ratio = orig_width / orig_height
44
+
45
+ if orig_ratio > target_ratio:
46
+ # Image is wider - resize by height, crop width
47
+ new_height = target_height
48
+ new_width = int(orig_width * (target_height / orig_height))
49
+ else:
50
+ # Image is taller - resize by width, crop height
51
+ new_width = target_width
52
+ new_height = int(orig_height * (target_width / orig_width))
53
+
54
+ # Calculate crop box based on position (what to keep)
55
+ if crop_position == "center":
56
+ left = (new_width - target_width) // 2
57
+ top = (new_height - target_height) // 2
58
+ elif crop_position == "top":
59
+ left = (new_width - target_width) // 2
60
+ top = 0
61
+ elif crop_position == "bottom":
62
+ left = (new_width - target_width) // 2
63
+ top = new_height - target_height
64
+ elif crop_position == "left":
65
+ left = 0
66
+ top = (new_height - target_height) // 2
67
+ elif crop_position == "right":
68
+ left = new_width - target_width
69
+ top = (new_height - target_height) // 2
70
+ else:
71
+ raise ValueError(f"Invalid crop position: {crop_position}")
72
+
73
+ right = left + target_width
74
+ bottom = top + target_height
75
+
76
+ return (new_width, new_height), (left, top, right, bottom)
77
+
78
+
79
+ def cmd_resize(args):
80
+ """Execute the resize command."""
81
+ input_dir = Path(args.input)
82
+ output_dir = Path(args.output)
83
+
84
+ if not input_dir.exists():
85
+ print(f"Error: Input directory '{input_dir}' does not exist.")
86
+ sys.exit(1)
87
+
88
+ output_dir.mkdir(parents=True, exist_ok=True)
89
+
90
+ # Parse resolution
91
+ try:
92
+ width, height = map(int, args.resolution.lower().split("x"))
93
+ except ValueError:
94
+ print(f"Error: Invalid resolution '{args.resolution}'. Use WIDTHxHEIGHT (e.g., 768x1024)")
95
+ sys.exit(1)
96
+
97
+ # Parse input formats
98
+ formats = [f.strip() for f in args.input_format.split(",")]
99
+
100
+ # Get image files
101
+ image_files = get_image_files(input_dir, formats)
102
+
103
+ if not image_files:
104
+ print(f"No images found in '{input_dir}' with formats: {formats}")
105
+ sys.exit(0)
106
+
107
+ print(f"Found {len(image_files)} images")
108
+ print(f"Target resolution: {width}x{height}")
109
+ print(f"Crop position: {args.crop_position}")
110
+ print(f"Output format: {args.output_format}")
111
+ print("-" * 50)
112
+
113
+ processed = 0
114
+ skipped = 0
115
+
116
+ for idx, img_path in enumerate(image_files, start=1):
117
+ try:
118
+ with Image.open(img_path) as img:
119
+ img = img.convert("RGB")
120
+ orig_w, orig_h = img.size
121
+
122
+ # Skip if image is too small
123
+ if orig_w < width or orig_h < height:
124
+ print(f"[SKIP] {img_path.name}: {orig_w}x{orig_h} < {width}x{height}")
125
+ skipped += 1
126
+ continue
127
+
128
+ # Calculate resize and crop
129
+ (new_w, new_h), crop_box = calculate_resize_and_crop(
130
+ orig_w, orig_h, width, height, args.crop_position
131
+ )
132
+
133
+ # Resize
134
+ img_resized = img.resize((new_w, new_h), Image.LANCZOS)
135
+
136
+ # Crop
137
+ img_cropped = img_resized.crop(crop_box)
138
+
139
+ # Determine output filename
140
+ if args.output_name_numbers:
141
+ out_name = f"{processed + 1}.{args.output_format}"
142
+ else:
143
+ out_name = f"{img_path.stem}.{args.output_format}"
144
+
145
+ out_path = output_dir / out_name
146
+
147
+ # Save with quality settings
148
+ save_kwargs = {}
149
+ if args.output_format.lower() in ("jpg", "jpeg"):
150
+ save_kwargs["quality"] = 95
151
+ save_kwargs["optimize"] = True
152
+ elif args.output_format.lower() == "webp":
153
+ save_kwargs["quality"] = 95
154
+ elif args.output_format.lower() == "png":
155
+ save_kwargs["optimize"] = True
156
+
157
+ img_cropped.save(out_path, **save_kwargs)
158
+
159
+ print(f"[OK] {img_path.name} ({orig_w}x{orig_h}) -> {out_name} ({width}x{height})")
160
+ processed += 1
161
+
162
+ except Exception as e:
163
+ print(f"[ERROR] {img_path.name}: {e}")
164
+ skipped += 1
165
+
166
+ print("-" * 50)
167
+ print(f"Done! Processed: {processed}, Skipped: {skipped}")
168
+
169
+
170
+ def cmd_caption(args):
171
+ """Execute the caption command."""
172
+ # Lazy import for faster CLI startup when not using caption
173
+ try:
174
+ import torch
175
+ from transformers import AutoModelForCausalLM, AutoProcessor
176
+ except ImportError:
177
+ print("Error: Required packages not installed.")
178
+ print("Run: pip install torch transformers")
179
+ sys.exit(1)
180
+
181
+ input_dir = Path(args.input)
182
+ output_dir = Path(args.output)
183
+
184
+ if not input_dir.exists():
185
+ print(f"Error: Input directory '{input_dir}' does not exist.")
186
+ sys.exit(1)
187
+
188
+ output_dir.mkdir(parents=True, exist_ok=True)
189
+
190
+ # Determine model (base flag takes priority since large is default)
191
+ if args.florence_2_base:
192
+ model_name = "microsoft/Florence-2-base"
193
+ else:
194
+ model_name = "microsoft/Florence-2-large"
195
+
196
+ # Determine device
197
+ if args.device == "cuda" and not torch.cuda.is_available():
198
+ print("Warning: CUDA not available, falling back to CPU")
199
+ device = "cpu"
200
+ else:
201
+ device = args.device
202
+
203
+ torch_dtype = torch.float16 if device == "cuda" else torch.float32
204
+
205
+ print(f"Loading model: {model_name}")
206
+ print(f"Device: {device}")
207
+
208
+ try:
209
+ model = AutoModelForCausalLM.from_pretrained(
210
+ model_name,
211
+ torch_dtype=torch_dtype,
212
+ trust_remote_code=True
213
+ ).to(device).eval()
214
+ processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
215
+ except Exception as e:
216
+ print(f"Error loading model: {e}")
217
+ sys.exit(1)
218
+
219
+ # Find images (common formats)
220
+ formats = ["jpg", "jpeg", "png", "webp", "bmp", "tiff"]
221
+ image_files = get_image_files(input_dir, formats)
222
+
223
+ if not image_files:
224
+ print(f"No images found in '{input_dir}'")
225
+ sys.exit(0)
226
+
227
+ print(f"Found {len(image_files)} images")
228
+ print(f"Prompt: {args.prompt}")
229
+ if args.trigger_word:
230
+ print(f"Trigger word: {args.trigger_word}")
231
+ print("-" * 50)
232
+
233
+ processed = 0
234
+
235
+ for img_path in image_files:
236
+ try:
237
+ with Image.open(img_path) as img:
238
+ img = img.convert("RGB")
239
+
240
+ inputs = processor(
241
+ text=args.prompt,
242
+ images=img,
243
+ return_tensors="pt"
244
+ ).to(device, torch_dtype)
245
+
246
+ with torch.no_grad():
247
+ generated_ids = model.generate(
248
+ input_ids=inputs["input_ids"],
249
+ pixel_values=inputs["pixel_values"],
250
+ max_new_tokens=1024,
251
+ num_beams=3,
252
+ do_sample=False
253
+ )
254
+
255
+ generated_text = processor.batch_decode(
256
+ generated_ids, skip_special_tokens=False
257
+ )[0]
258
+
259
+ parsed = processor.post_process_generation(
260
+ generated_text,
261
+ task=args.prompt,
262
+ image_size=(img.width, img.height)
263
+ )
264
+
265
+ caption = parsed.get(args.prompt, "")
266
+
267
+ # Prepend trigger word if specified
268
+ if args.trigger_word:
269
+ caption = f"{args.trigger_word} {caption}"
270
+
271
+ # Save caption
272
+ caption_path = output_dir / f"{img_path.stem}.txt"
273
+ caption_path.write_text(caption.strip())
274
+
275
+ print(f"[OK] {img_path.name}")
276
+ print(f" {caption[:100]}{'...' if len(caption) > 100 else ''}")
277
+ processed += 1
278
+
279
+ except Exception as e:
280
+ print(f"[ERROR] {img_path.name}: {e}")
281
+
282
+ print("-" * 50)
283
+ print(f"Done! Processed: {processed} images")
284
+
285
+
286
+ def main():
287
+ parser = argparse.ArgumentParser(
288
+ prog="datasety",
289
+ description="CLI tool for dataset preparation: image resizing and captioning."
290
+ )
291
+ subparsers = parser.add_subparsers(dest="command", required=True)
292
+
293
+ # === RESIZE command ===
294
+ resize_parser = subparsers.add_parser(
295
+ "resize",
296
+ help="Resize and crop images to target resolution"
297
+ )
298
+ resize_parser.add_argument(
299
+ "--input", "-i",
300
+ required=True,
301
+ help="Input directory containing images"
302
+ )
303
+ resize_parser.add_argument(
304
+ "--output", "-o",
305
+ required=True,
306
+ help="Output directory for processed images"
307
+ )
308
+ resize_parser.add_argument(
309
+ "--resolution", "-r",
310
+ required=True,
311
+ help="Target resolution as WIDTHxHEIGHT (e.g., 768x1024)"
312
+ )
313
+ resize_parser.add_argument(
314
+ "--crop-position",
315
+ choices=["top", "center", "bottom", "left", "right"],
316
+ default="center",
317
+ help="Position to keep when cropping (default: center)"
318
+ )
319
+ resize_parser.add_argument(
320
+ "--input-format",
321
+ default="jpg,jpeg,png,webp",
322
+ help="Comma-separated input formats (default: jpg,jpeg,png,webp)"
323
+ )
324
+ resize_parser.add_argument(
325
+ "--output-format",
326
+ choices=["jpg", "png", "webp"],
327
+ default="jpg",
328
+ help="Output image format (default: jpg)"
329
+ )
330
+ resize_parser.add_argument(
331
+ "--output-name-numbers",
332
+ action="store_true",
333
+ help="Rename output files to sequential numbers (1.jpg, 2.jpg, ...)"
334
+ )
335
+ resize_parser.set_defaults(func=cmd_resize)
336
+
337
+ # === CAPTION command ===
338
+ caption_parser = subparsers.add_parser(
339
+ "caption",
340
+ help="Generate captions for images using Florence-2"
341
+ )
342
+ caption_parser.add_argument(
343
+ "--input", "-i",
344
+ required=True,
345
+ help="Input directory containing images"
346
+ )
347
+ caption_parser.add_argument(
348
+ "--output", "-o",
349
+ required=True,
350
+ help="Output directory for caption text files"
351
+ )
352
+ caption_parser.add_argument(
353
+ "--device",
354
+ choices=["cpu", "cuda"],
355
+ default="cpu",
356
+ help="Device to run model on (default: cpu)"
357
+ )
358
+ caption_parser.add_argument(
359
+ "--trigger-word",
360
+ default="",
361
+ help="Text to prepend to each caption (e.g., '[trigger]' or 'photo,')"
362
+ )
363
+ caption_parser.add_argument(
364
+ "--prompt",
365
+ default="<MORE_DETAILED_CAPTION>",
366
+ help="Florence-2 prompt (default: <MORE_DETAILED_CAPTION>)"
367
+ )
368
+
369
+ model_group = caption_parser.add_mutually_exclusive_group()
370
+ model_group.add_argument(
371
+ "--florence-2-base",
372
+ action="store_true",
373
+ help="Use Florence-2-base model (0.23B params, faster)"
374
+ )
375
+ model_group.add_argument(
376
+ "--florence-2-large",
377
+ action="store_true",
378
+ help="Use Florence-2-large model (0.77B params, more accurate) [default]"
379
+ )
380
+ caption_parser.set_defaults(func=cmd_caption)
381
+
382
+ # Parse and execute
383
+ args = parser.parse_args()
384
+ args.func(args)
385
+
386
+
387
+ if __name__ == "__main__":
388
+ main()
datasety/py.typed ADDED
File without changes
@@ -0,0 +1,164 @@
1
+ Metadata-Version: 2.4
2
+ Name: datasety
3
+ Version: 0.1.0
4
+ Summary: CLI tool for dataset preparation: image resizing and captioning with Florence-2
5
+ Project-URL: Homepage, https://github.com/kontextox/datasety
6
+ Project-URL: Repository, https://github.com/kontextox/datasety
7
+ Project-URL: Issues, https://github.com/kontextox/datasety/issues
8
+ Author: kontextox
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: captioning,cli,dataset,florence-2,image-processing,machine-learning
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Scientific/Engineering :: Image Processing
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: pillow>=9.0.0
26
+ Provides-Extra: caption
27
+ Requires-Dist: einops; extra == 'caption'
28
+ Requires-Dist: timm; extra == 'caption'
29
+ Requires-Dist: torch>=2.0.0; extra == 'caption'
30
+ Requires-Dist: transformers<4.46.0,>=4.38.0; extra == 'caption'
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
33
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # datasety
37
+
38
+ CLI tool for dataset preparation: image resizing and captioning with Florence-2.
39
+
40
+ ## Installation
41
+
42
+ ```bash
43
+ pip install datasety
44
+ ```
45
+
46
+ For captioning support (requires PyTorch and Transformers):
47
+
48
+ ```bash
49
+ pip install datasety[caption]
50
+ ```
51
+
52
+ ## Usage
53
+
54
+ ### Resize Images
55
+
56
+ Resize and crop images to a target resolution:
57
+
58
+ ```bash
59
+ datasety resize --input ./images --output ./resized --resolution 768x1024
60
+ ```
61
+
62
+ **Options:**
63
+
64
+ | Option | Description | Default |
65
+ | ----------------------- | --------------------------------------------------------- | ------------------- |
66
+ | `--input`, `-i` | Input directory | (required) |
67
+ | `--output`, `-o` | Output directory | (required) |
68
+ | `--resolution`, `-r` | Target resolution (WIDTHxHEIGHT) | (required) |
69
+ | `--crop-position` | Crop position: `top`, `center`, `bottom`, `left`, `right` | `center` |
70
+ | `--input-format` | Comma-separated formats | `jpg,jpeg,png,webp` |
71
+ | `--output-format` | Output format: `jpg`, `png`, `webp` | `jpg` |
72
+ | `--output-name-numbers` | Rename files to 1.jpg, 2.jpg, ... | `false` |
73
+
74
+ **Example:**
75
+
76
+ ```bash
77
+ datasety resize \
78
+ --input ./raw_photos \
79
+ --output ./dataset \
80
+ --resolution 1024x1024 \
81
+ --crop-position top \
82
+ --output-format jpg \
83
+ --output-name-numbers
84
+ ```
85
+
86
+ **How it works:**
87
+
88
+ 1. Finds all images matching input formats
89
+ 2. Skips images where either dimension is smaller than target
90
+ 3. Resizes proportionally so the smaller side matches target
91
+ 4. Crops from the specified area to exact dimensions
92
+ 5. Saves with high quality (95% for jpg/webp)
93
+
94
+ ### Generate Captions
95
+
96
+ Generate captions for images using Microsoft's Florence-2 model:
97
+
98
+ ```bash
99
+ datasety caption --input ./images --output ./captions --florence-2-large
100
+ ```
101
+
102
+ **Options:**
103
+
104
+ | Option | Description | Default |
105
+ | -------------------- | ------------------------------- | ------------------------- |
106
+ | `--input`, `-i` | Input directory | (required) |
107
+ | `--output`, `-o` | Output directory for .txt files | (required) |
108
+ | `--device` | `cpu` or `cuda` | `cpu` |
109
+ | `--trigger-word` | Text to prepend to captions | (none) |
110
+ | `--prompt` | Florence-2 task prompt | `<MORE_DETAILED_CAPTION>` |
111
+ | `--florence-2-base` | Use base model (0.23B, faster) | |
112
+ | `--florence-2-large` | Use large model (0.77B, better) | (default) |
113
+
114
+ **Available prompts:**
115
+
116
+ - `<CAPTION>` - Brief caption
117
+ - `<DETAILED_CAPTION>` - Detailed caption
118
+ - `<MORE_DETAILED_CAPTION>` - Most detailed caption (default)
119
+
120
+ **Example:**
121
+
122
+ ```bash
123
+ datasety caption \
124
+ --input ./dataset \
125
+ --output ./dataset \
126
+ --device cuda \
127
+ --trigger-word "photo of sks person," \
128
+ --florence-2-large
129
+ ```
130
+
131
+ This creates a `.txt` file for each image with the generated caption.
132
+
133
+ ## Common Workflows
134
+
135
+ ### Prepare a LoRA Training Dataset
136
+
137
+ ```bash
138
+ # 1. Resize images to 1024x1024
139
+ datasety resize -i ./raw -o ./dataset -r 1024x1024 --crop-position center
140
+
141
+ # 2. Generate captions with trigger word
142
+ datasety caption -i ./dataset -o ./dataset --trigger-word "[trigger]" --device cuda
143
+ ```
144
+
145
+ ### Batch Process with Numbered Files
146
+
147
+ ```bash
148
+ datasety resize \
149
+ -i ./photos \
150
+ -o ./processed \
151
+ -r 768x1024 \
152
+ --output-name-numbers \
153
+ --crop-position top
154
+ ```
155
+
156
+ ## Requirements
157
+
158
+ - Python 3.10+
159
+ - Pillow (for resize)
160
+ - PyTorch + Transformers (for caption, install with `pip install datasety[caption]`)
161
+
162
+ ## License
163
+
164
+ MIT
@@ -0,0 +1,9 @@
1
+ datasety/__init__.py,sha256=LeNjBou33I9yomQhLmpfZWXEJeW2Io9ZvHnZmYXltQ4,105
2
+ datasety/__main__.py,sha256=rhdW0XGNAX-GC5IqU62ulCVccf3kgelddrGghYMZzn4,115
3
+ datasety/cli.py,sha256=K5doc2QZBLKDS3MjVkTsGe4giz31eo6xMCx8G4wdBEM,12399
4
+ datasety/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ datasety-0.1.0.dist-info/METADATA,sha256=u-9NanmOw3QZ3pWI1VNmrUG8CBAjFwyCnVVY_I6TpSE,5429
6
+ datasety-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
+ datasety-0.1.0.dist-info/entry_points.txt,sha256=oWbVHN1_qyuWezjxuhsAGAqqwmivRUtG2jCYKm8bVnE,47
8
+ datasety-0.1.0.dist-info/licenses/LICENSE,sha256=dUhuoK-TCRQMpuLEAdfme-qPSJI0TlcH9jlNxeg9_EQ,1056
9
+ datasety-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ datasety = datasety.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.