PyPI - datasety - Versions diffs - 0.1.0__tar.gz - Mend

datasety 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

datasety-0.1.0/.github/workflows/publish.yml +45 -0
datasety-0.1.0/.github/workflows/test.yml +46 -0
datasety-0.1.0/.gitignore +36 -0
datasety-0.1.0/LICENSE +21 -0
datasety-0.1.0/PKG-INFO +164 -0
datasety-0.1.0/README.md +129 -0
datasety-0.1.0/pyproject.toml +72 -0
datasety-0.1.0/src/datasety/__init__.py +3 -0
datasety-0.1.0/src/datasety/__main__.py +6 -0
datasety-0.1.0/src/datasety/cli.py +388 -0
datasety-0.1.0/src/datasety/py.typed +0 -0
datasety-0.1.0/tests/__init__.py +0 -0
datasety-0.1.0/tests/test_resize.py +84 -0

datasety-0.1.0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,45 @@
+name: Publish to PyPI
+on:
+  release:
+    types: [published]
+  workflow_dispatch:
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install build dependencies
+        run: pip install build
+      - name: Build package
+        run: python -m build
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+  publish:
+    needs: build
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

datasety-0.1.0/.github/workflows/test.yml ADDED Viewed

@@ -0,0 +1,46 @@
+name: Test
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          pip install -e .
+          pip install pytest
+      - name: Run tests
+        run: pytest -v
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install ruff
+        run: pip install ruff
+      - name: Run linter
+        run: ruff check src/

datasety-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,36 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Distribution / packaging
+build/
+dist/
+*.egg-info/
+*.egg
+# Virtual environments
+venv/
+.venv/
+env/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# OS
+.DS_Store
+Thumbs.db
+# Project specific
+*.jpg
+*.jpeg
+*.png
+*.webp

datasety-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

datasety-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,164 @@
+Metadata-Version: 2.4
+Name: datasety
+Version: 0.1.0
+Summary: CLI tool for dataset preparation: image resizing and captioning with Florence-2
+Project-URL: Homepage, https://github.com/kontextox/datasety
+Project-URL: Repository, https://github.com/kontextox/datasety
+Project-URL: Issues, https://github.com/kontextox/datasety/issues
+Author: kontextox
+License-Expression: MIT
+License-File: LICENSE
+Keywords: captioning,cli,dataset,florence-2,image-processing,machine-learning
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Scientific/Engineering :: Image Processing
+Requires-Python: >=3.10
+Requires-Dist: pillow>=9.0.0
+Provides-Extra: caption
+Requires-Dist: einops; extra == 'caption'
+Requires-Dist: timm; extra == 'caption'
+Requires-Dist: torch>=2.0.0; extra == 'caption'
+Requires-Dist: transformers<4.46.0,>=4.38.0; extra == 'caption'
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == 'dev'
+Requires-Dist: ruff>=0.1.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# datasety
+CLI tool for dataset preparation: image resizing and captioning with Florence-2.
+## Installation
+```bash
+pip install datasety
+```
+For captioning support (requires PyTorch and Transformers):
+```bash
+pip install datasety[caption]
+```
+## Usage
+### Resize Images
+Resize and crop images to a target resolution:
+```bash
+datasety resize --input ./images --output ./resized --resolution 768x1024
+```
+**Options:**
+| Option                  | Description                                               | Default             |
+| ----------------------- | --------------------------------------------------------- | ------------------- |
+| `--input`, `-i`         | Input directory                                           | (required)          |
+| `--output`, `-o`        | Output directory                                          | (required)          |
+| `--resolution`, `-r`    | Target resolution (WIDTHxHEIGHT)                          | (required)          |
+| `--crop-position`       | Crop position: `top`, `center`, `bottom`, `left`, `right` | `center`            |
+| `--input-format`        | Comma-separated formats                                   | `jpg,jpeg,png,webp` |
+| `--output-format`       | Output format: `jpg`, `png`, `webp`                       | `jpg`               |
+| `--output-name-numbers` | Rename files to 1.jpg, 2.jpg, ...                         | `false`             |
+**Example:**
+```bash
+datasety resize \
+    --input ./raw_photos \
+    --output ./dataset \
+    --resolution 1024x1024 \
+    --crop-position top \
+    --output-format jpg \
+    --output-name-numbers
+```
+**How it works:**
+1. Finds all images matching input formats
+2. Skips images where either dimension is smaller than target
+3. Resizes proportionally so the smaller side matches target
+4. Crops from the specified area to exact dimensions
+5. Saves with high quality (95% for jpg/webp)
+### Generate Captions
+Generate captions for images using Microsoft's Florence-2 model:
+```bash
+datasety caption --input ./images --output ./captions --florence-2-large
+```
+**Options:**
+| Option               | Description                     | Default                   |
+| -------------------- | ------------------------------- | ------------------------- |
+| `--input`, `-i`      | Input directory                 | (required)                |
+| `--output`, `-o`     | Output directory for .txt files | (required)                |
+| `--device`           | `cpu` or `cuda`                 | `cpu`                     |
+| `--trigger-word`     | Text to prepend to captions     | (none)                    |
+| `--prompt`           | Florence-2 task prompt          | `<MORE_DETAILED_CAPTION>` |
+| `--florence-2-base`  | Use base model (0.23B, faster)  |                           |
+| `--florence-2-large` | Use large model (0.77B, better) | (default)                 |
+**Available prompts:**
+- `<CAPTION>` - Brief caption
+- `<DETAILED_CAPTION>` - Detailed caption
+- `<MORE_DETAILED_CAPTION>` - Most detailed caption (default)
+**Example:**
+```bash
+datasety caption \
+    --input ./dataset \
+    --output ./dataset \
+    --device cuda \
+    --trigger-word "photo of sks person," \
+    --florence-2-large
+```
+This creates a `.txt` file for each image with the generated caption.
+## Common Workflows
+### Prepare a LoRA Training Dataset
+```bash
+# 1. Resize images to 1024x1024
+datasety resize -i ./raw -o ./dataset -r 1024x1024 --crop-position center
+# 2. Generate captions with trigger word
+datasety caption -i ./dataset -o ./dataset --trigger-word "[trigger]" --device cuda
+```
+### Batch Process with Numbered Files
+```bash
+datasety resize \
+    -i ./photos \
+    -o ./processed \
+    -r 768x1024 \
+    --output-name-numbers \
+    --crop-position top
+```
+## Requirements
+- Python 3.10+
+- Pillow (for resize)
+- PyTorch + Transformers (for caption, install with `pip install datasety[caption]`)
+## License
+MIT

datasety-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,129 @@
+# datasety
+CLI tool for dataset preparation: image resizing and captioning with Florence-2.
+## Installation
+```bash
+pip install datasety
+```
+For captioning support (requires PyTorch and Transformers):
+```bash
+pip install datasety[caption]
+```
+## Usage
+### Resize Images
+Resize and crop images to a target resolution:
+```bash
+datasety resize --input ./images --output ./resized --resolution 768x1024
+```
+**Options:**
+| Option                  | Description                                               | Default             |
+| ----------------------- | --------------------------------------------------------- | ------------------- |
+| `--input`, `-i`         | Input directory                                           | (required)          |
+| `--output`, `-o`        | Output directory                                          | (required)          |
+| `--resolution`, `-r`    | Target resolution (WIDTHxHEIGHT)                          | (required)          |
+| `--crop-position`       | Crop position: `top`, `center`, `bottom`, `left`, `right` | `center`            |
+| `--input-format`        | Comma-separated formats                                   | `jpg,jpeg,png,webp` |
+| `--output-format`       | Output format: `jpg`, `png`, `webp`                       | `jpg`               |
+| `--output-name-numbers` | Rename files to 1.jpg, 2.jpg, ...                         | `false`             |
+**Example:**
+```bash
+datasety resize \
+    --input ./raw_photos \
+    --output ./dataset \
+    --resolution 1024x1024 \
+    --crop-position top \
+    --output-format jpg \
+    --output-name-numbers
+```
+**How it works:**
+1. Finds all images matching input formats
+2. Skips images where either dimension is smaller than target
+3. Resizes proportionally so the smaller side matches target
+4. Crops from the specified area to exact dimensions
+5. Saves with high quality (95% for jpg/webp)
+### Generate Captions
+Generate captions for images using Microsoft's Florence-2 model:
+```bash
+datasety caption --input ./images --output ./captions --florence-2-large
+```
+**Options:**
+| Option               | Description                     | Default                   |
+| -------------------- | ------------------------------- | ------------------------- |
+| `--input`, `-i`      | Input directory                 | (required)                |
+| `--output`, `-o`     | Output directory for .txt files | (required)                |
+| `--device`           | `cpu` or `cuda`                 | `cpu`                     |
+| `--trigger-word`     | Text to prepend to captions     | (none)                    |
+| `--prompt`           | Florence-2 task prompt          | `<MORE_DETAILED_CAPTION>` |
+| `--florence-2-base`  | Use base model (0.23B, faster)  |                           |
+| `--florence-2-large` | Use large model (0.77B, better) | (default)                 |
+**Available prompts:**
+- `<CAPTION>` - Brief caption
+- `<DETAILED_CAPTION>` - Detailed caption
+- `<MORE_DETAILED_CAPTION>` - Most detailed caption (default)
+**Example:**
+```bash
+datasety caption \
+    --input ./dataset \
+    --output ./dataset \
+    --device cuda \
+    --trigger-word "photo of sks person," \
+    --florence-2-large
+```
+This creates a `.txt` file for each image with the generated caption.
+## Common Workflows
+### Prepare a LoRA Training Dataset
+```bash
+# 1. Resize images to 1024x1024
+datasety resize -i ./raw -o ./dataset -r 1024x1024 --crop-position center
+# 2. Generate captions with trigger word
+datasety caption -i ./dataset -o ./dataset --trigger-word "[trigger]" --device cuda
+```
+### Batch Process with Numbered Files
+```bash
+datasety resize \
+    -i ./photos \
+    -o ./processed \
+    -r 768x1024 \
+    --output-name-numbers \
+    --crop-position top
+```
+## Requirements
+- Python 3.10+
+- Pillow (for resize)
+- PyTorch + Transformers (for caption, install with `pip install datasety[caption]`)
+## License
+MIT

datasety-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,72 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "datasety"
+dynamic = ["version"]
+description = "CLI tool for dataset preparation: image resizing and captioning with Florence-2"
+readme = "README.md"
+license = "MIT"
+requires-python = ">=3.10"
+authors = [
+    { name = "kontextox" }
+]
+keywords = [
+    "dataset",
+    "image-processing",
+    "captioning",
+    "florence-2",
+    "machine-learning",
+    "cli",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Image Processing",
+]
+dependencies = [
+    "Pillow>=9.0.0",
+]
+[project.optional-dependencies]
+caption = [
+    "torch>=2.0.0",
+    "transformers>=4.38.0,<4.46.0",
+    "einops",
+    "timm",
+]
+dev = [
+    "pytest>=7.0.0",
+    "ruff>=0.1.0",
+]
+[project.scripts]
+datasety = "datasety.cli:main"
+[project.urls]
+Homepage = "https://github.com/kontextox/datasety"
+Repository = "https://github.com/kontextox/datasety"
+Issues = "https://github.com/kontextox/datasety/issues"
+[tool.hatch.version]
+path = "src/datasety/__init__.py"
+[tool.hatch.build.targets.wheel]
+packages = ["src/datasety"]
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+[tool.ruff.lint]
+select = ["E", "F", "I", "W"]

datasety-0.1.0/src/datasety/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""datasety - CLI tool for dataset preparation: image resizing and captioning."""
+__version__ = "0.1.0"

datasety-0.1.0/src/datasety/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Allow running as `python -m datasety`."""
+from datasety.cli import main
+if __name__ == "__main__":
+    main()

datasety-0.1.0/src/datasety/cli.py ADDED Viewed

@@ -0,0 +1,388 @@
+#!/usr/bin/env python3
+"""
+datasety - CLI tool for dataset preparation: image resizing and captioning.
+Usage:
+    datasety resize --input ./in --output ./out --resolution 768x1024 --crop-position top
+    datasety caption --input ./in --output ./out --trigger-word "[trigger]" --florence-2-large
+"""
+import argparse
+import sys
+from pathlib import Path
+from PIL import Image
+def get_image_files(input_dir: Path, formats: list[str]) -> list[Path]:
+    """Find all images matching the specified formats."""
+    files = []
+    for fmt in formats:
+        fmt = fmt.lower().strip()
+        files.extend(input_dir.glob(f"*.{fmt}"))
+        files.extend(input_dir.glob(f"*.{fmt.upper()}"))
+    return sorted(set(files))
+def calculate_resize_and_crop(
+    orig_width: int, orig_height: int,
+    target_width: int, target_height: int,
+    crop_position: str
+) -> tuple[tuple[int, int], tuple[int, int, int, int]]:
+    """
+    Calculate resize dimensions and crop box.
+    Args:
+        crop_position: Where to position the crop window (what to keep).
+                      'top' keeps top, 'right' keeps right, etc.
+    Returns:
+        (new_width, new_height), (left, top, right, bottom)
+    """
+    target_ratio = target_width / target_height
+    orig_ratio = orig_width / orig_height
+    if orig_ratio > target_ratio:
+        # Image is wider - resize by height, crop width
+        new_height = target_height
+        new_width = int(orig_width * (target_height / orig_height))
+    else:
+        # Image is taller - resize by width, crop height
+        new_width = target_width
+        new_height = int(orig_height * (target_width / orig_width))
+    # Calculate crop box based on position (what to keep)
+    if crop_position == "center":
+        left = (new_width - target_width) // 2
+        top = (new_height - target_height) // 2
+    elif crop_position == "top":
+        left = (new_width - target_width) // 2
+        top = 0
+    elif crop_position == "bottom":
+        left = (new_width - target_width) // 2
+        top = new_height - target_height
+    elif crop_position == "left":
+        left = 0
+        top = (new_height - target_height) // 2
+    elif crop_position == "right":
+        left = new_width - target_width
+        top = (new_height - target_height) // 2
+    else:
+        raise ValueError(f"Invalid crop position: {crop_position}")
+    right = left + target_width
+    bottom = top + target_height
+    return (new_width, new_height), (left, top, right, bottom)
+def cmd_resize(args):
+    """Execute the resize command."""
+    input_dir = Path(args.input)
+    output_dir = Path(args.output)
+    if not input_dir.exists():
+        print(f"Error: Input directory '{input_dir}' does not exist.")
+        sys.exit(1)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Parse resolution
+    try:
+        width, height = map(int, args.resolution.lower().split("x"))
+    except ValueError:
+        print(f"Error: Invalid resolution '{args.resolution}'. Use WIDTHxHEIGHT (e.g., 768x1024)")
+        sys.exit(1)
+    # Parse input formats
+    formats = [f.strip() for f in args.input_format.split(",")]
+    # Get image files
+    image_files = get_image_files(input_dir, formats)
+    if not image_files:
+        print(f"No images found in '{input_dir}' with formats: {formats}")
+        sys.exit(0)
+    print(f"Found {len(image_files)} images")
+    print(f"Target resolution: {width}x{height}")
+    print(f"Crop position: {args.crop_position}")
+    print(f"Output format: {args.output_format}")
+    print("-" * 50)
+    processed = 0
+    skipped = 0
+    for idx, img_path in enumerate(image_files, start=1):
+        try:
+            with Image.open(img_path) as img:
+                img = img.convert("RGB")
+                orig_w, orig_h = img.size
+                # Skip if image is too small
+                if orig_w < width or orig_h < height:
+                    print(f"[SKIP] {img_path.name}: {orig_w}x{orig_h} < {width}x{height}")
+                    skipped += 1
+                    continue
+                # Calculate resize and crop
+                (new_w, new_h), crop_box = calculate_resize_and_crop(
+                    orig_w, orig_h, width, height, args.crop_position
+                )
+                # Resize
+                img_resized = img.resize((new_w, new_h), Image.LANCZOS)
+                # Crop
+                img_cropped = img_resized.crop(crop_box)
+                # Determine output filename
+                if args.output_name_numbers:
+                    out_name = f"{processed + 1}.{args.output_format}"
+                else:
+                    out_name = f"{img_path.stem}.{args.output_format}"
+                out_path = output_dir / out_name
+                # Save with quality settings
+                save_kwargs = {}
+                if args.output_format.lower() in ("jpg", "jpeg"):
+                    save_kwargs["quality"] = 95
+                    save_kwargs["optimize"] = True
+                elif args.output_format.lower() == "webp":
+                    save_kwargs["quality"] = 95
+                elif args.output_format.lower() == "png":
+                    save_kwargs["optimize"] = True
+                img_cropped.save(out_path, **save_kwargs)
+                print(f"[OK] {img_path.name} ({orig_w}x{orig_h}) -> {out_name} ({width}x{height})")
+                processed += 1
+        except Exception as e:
+            print(f"[ERROR] {img_path.name}: {e}")
+            skipped += 1
+    print("-" * 50)
+    print(f"Done! Processed: {processed}, Skipped: {skipped}")
+def cmd_caption(args):
+    """Execute the caption command."""
+    # Lazy import for faster CLI startup when not using caption
+    try:
+        import torch
+        from transformers import AutoModelForCausalLM, AutoProcessor
+    except ImportError:
+        print("Error: Required packages not installed.")
+        print("Run: pip install torch transformers")
+        sys.exit(1)
+    input_dir = Path(args.input)
+    output_dir = Path(args.output)
+    if not input_dir.exists():
+        print(f"Error: Input directory '{input_dir}' does not exist.")
+        sys.exit(1)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Determine model (base flag takes priority since large is default)
+    if args.florence_2_base:
+        model_name = "microsoft/Florence-2-base"
+    else:
+        model_name = "microsoft/Florence-2-large"
+    # Determine device
+    if args.device == "cuda" and not torch.cuda.is_available():
+        print("Warning: CUDA not available, falling back to CPU")
+        device = "cpu"
+    else:
+        device = args.device
+    torch_dtype = torch.float16 if device == "cuda" else torch.float32
+    print(f"Loading model: {model_name}")
+    print(f"Device: {device}")
+    try:
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch_dtype,
+            trust_remote_code=True
+        ).to(device).eval()
+        processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        sys.exit(1)
+    # Find images (common formats)
+    formats = ["jpg", "jpeg", "png", "webp", "bmp", "tiff"]
+    image_files = get_image_files(input_dir, formats)
+    if not image_files:
+        print(f"No images found in '{input_dir}'")
+        sys.exit(0)
+    print(f"Found {len(image_files)} images")
+    print(f"Prompt: {args.prompt}")
+    if args.trigger_word:
+        print(f"Trigger word: {args.trigger_word}")
+    print("-" * 50)
+    processed = 0
+    for img_path in image_files:
+        try:
+            with Image.open(img_path) as img:
+                img = img.convert("RGB")
+                inputs = processor(
+                    text=args.prompt,
+                    images=img,
+                    return_tensors="pt"
+                ).to(device, torch_dtype)
+                with torch.no_grad():
+                    generated_ids = model.generate(
+                        input_ids=inputs["input_ids"],
+                        pixel_values=inputs["pixel_values"],
+                        max_new_tokens=1024,
+                        num_beams=3,
+                        do_sample=False
+                    )
+                generated_text = processor.batch_decode(
+                    generated_ids, skip_special_tokens=False
+                )[0]
+                parsed = processor.post_process_generation(
+                    generated_text,
+                    task=args.prompt,
+                    image_size=(img.width, img.height)
+                )
+                caption = parsed.get(args.prompt, "")
+                # Prepend trigger word if specified
+                if args.trigger_word:
+                    caption = f"{args.trigger_word} {caption}"
+                # Save caption
+                caption_path = output_dir / f"{img_path.stem}.txt"
+                caption_path.write_text(caption.strip())
+                print(f"[OK] {img_path.name}")
+                print(f"     {caption[:100]}{'...' if len(caption) > 100 else ''}")
+                processed += 1
+        except Exception as e:
+            print(f"[ERROR] {img_path.name}: {e}")
+    print("-" * 50)
+    print(f"Done! Processed: {processed} images")
+def main():
+    parser = argparse.ArgumentParser(
+        prog="datasety",
+        description="CLI tool for dataset preparation: image resizing and captioning."
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    # === RESIZE command ===
+    resize_parser = subparsers.add_parser(
+        "resize",
+        help="Resize and crop images to target resolution"
+    )
+    resize_parser.add_argument(
+        "--input", "-i",
+        required=True,
+        help="Input directory containing images"
+    )
+    resize_parser.add_argument(
+        "--output", "-o",
+        required=True,
+        help="Output directory for processed images"
+    )
+    resize_parser.add_argument(
+        "--resolution", "-r",
+        required=True,
+        help="Target resolution as WIDTHxHEIGHT (e.g., 768x1024)"
+    )
+    resize_parser.add_argument(
+        "--crop-position",
+        choices=["top", "center", "bottom", "left", "right"],
+        default="center",
+        help="Position to keep when cropping (default: center)"
+    )
+    resize_parser.add_argument(
+        "--input-format",
+        default="jpg,jpeg,png,webp",
+        help="Comma-separated input formats (default: jpg,jpeg,png,webp)"
+    )
+    resize_parser.add_argument(
+        "--output-format",
+        choices=["jpg", "png", "webp"],
+        default="jpg",
+        help="Output image format (default: jpg)"
+    )
+    resize_parser.add_argument(
+        "--output-name-numbers",
+        action="store_true",
+        help="Rename output files to sequential numbers (1.jpg, 2.jpg, ...)"
+    )
+    resize_parser.set_defaults(func=cmd_resize)
+    # === CAPTION command ===
+    caption_parser = subparsers.add_parser(
+        "caption",
+        help="Generate captions for images using Florence-2"
+    )
+    caption_parser.add_argument(
+        "--input", "-i",
+        required=True,
+        help="Input directory containing images"
+    )
+    caption_parser.add_argument(
+        "--output", "-o",
+        required=True,
+        help="Output directory for caption text files"
+    )
+    caption_parser.add_argument(
+        "--device",
+        choices=["cpu", "cuda"],
+        default="cpu",
+        help="Device to run model on (default: cpu)"
+    )
+    caption_parser.add_argument(
+        "--trigger-word",
+        default="",
+        help="Text to prepend to each caption (e.g., '[trigger]' or 'photo,')"
+    )
+    caption_parser.add_argument(
+        "--prompt",
+        default="<MORE_DETAILED_CAPTION>",
+        help="Florence-2 prompt (default: <MORE_DETAILED_CAPTION>)"
+    )
+    model_group = caption_parser.add_mutually_exclusive_group()
+    model_group.add_argument(
+        "--florence-2-base",
+        action="store_true",
+        help="Use Florence-2-base model (0.23B params, faster)"
+    )
+    model_group.add_argument(
+        "--florence-2-large",
+        action="store_true",
+        help="Use Florence-2-large model (0.77B params, more accurate) [default]"
+    )
+    caption_parser.set_defaults(func=cmd_caption)
+    # Parse and execute
+    args = parser.parse_args()
+    args.func(args)
+if __name__ == "__main__":
+    main()

datasety-0.1.0/src/datasety/py.typed ADDED Viewed

File without changes

datasety-0.1.0/tests/__init__.py ADDED Viewed

File without changes

datasety-0.1.0/tests/test_resize.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""Tests for the resize command."""
+import pytest
+from datasety.cli import calculate_resize_and_crop, get_image_files
+from pathlib import Path
+class TestCalculateResizeAndCrop:
+    """Test resize and crop calculations."""
+    def test_wider_image_center_crop(self):
+        """Test cropping a wider image from center."""
+        # 2000x1000 image -> 1024x1024 target
+        (new_w, new_h), (left, top, right, bottom) = calculate_resize_and_crop(
+            2000, 1000, 1024, 1024, "center"
+        )
+        assert new_h == 1024
+        assert new_w == 2048  # maintains aspect ratio
+        assert top == 0
+        assert bottom == 1024
+        assert left == (2048 - 1024) // 2
+        assert right == left + 1024
+    def test_taller_image_center_crop(self):
+        """Test cropping a taller image from center."""
+        # 1000x2000 image -> 1024x1024 target
+        (new_w, new_h), (left, top, right, bottom) = calculate_resize_and_crop(
+            1000, 2000, 1024, 1024, "center"
+        )
+        assert new_w == 1024
+        assert new_h == 2048
+        assert left == 0
+        assert right == 1024
+        assert top == (2048 - 1024) // 2
+        assert bottom == top + 1024
+    def test_top_crop(self):
+        """Test cropping from top."""
+        (new_w, new_h), (left, top, right, bottom) = calculate_resize_and_crop(
+            1000, 2000, 1024, 1024, "top"
+        )
+        assert top == 0
+        assert bottom == 1024
+    def test_bottom_crop(self):
+        """Test cropping from bottom."""
+        (new_w, new_h), (left, top, right, bottom) = calculate_resize_and_crop(
+            1000, 2000, 1024, 1024, "bottom"
+        )
+        assert bottom == new_h
+        assert top == new_h - 1024
+    def test_left_crop(self):
+        """Test cropping from left."""
+        (new_w, new_h), (left, top, right, bottom) = calculate_resize_and_crop(
+            2000, 1000, 1024, 1024, "left"
+        )
+        assert left == 0
+        assert right == 1024
+    def test_right_crop(self):
+        """Test cropping from right."""
+        (new_w, new_h), (left, top, right, bottom) = calculate_resize_and_crop(
+            2000, 1000, 1024, 1024, "right"
+        )
+        assert right == new_w
+        assert left == new_w - 1024
+    def test_non_square_target(self):
+        """Test with non-square target resolution."""
+        # 2000x1500 image -> 768x1024 target (portrait)
+        # orig_ratio=1.33 > target_ratio=0.75, so resize by height
+        (new_w, new_h), (left, top, right, bottom) = calculate_resize_and_crop(
+            2000, 1500, 768, 1024, "center"
+        )
+        assert new_h == 1024
+        assert new_w == int(2000 * (1024 / 1500))  # 1365
+        assert right - left == 768
+        assert bottom - top == 1024
+    def test_invalid_crop_position(self):
+        """Test that invalid crop position raises error."""
+        with pytest.raises(ValueError):
+            calculate_resize_and_crop(1000, 1000, 512, 512, "invalid")