PyPI - caption-flow - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

caption-flow 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

caption_flow/cli.py +2 -1
caption_flow/models.py +108 -1
caption_flow/monitor.py +1 -1
caption_flow/orchestrator.py +423 -1595
caption_flow/processors/__init__.py +11 -0
caption_flow/processors/base.py +219 -0
caption_flow/processors/huggingface.py +832 -0
caption_flow/processors/local_filesystem.py +683 -0
caption_flow/processors/webdataset.py +782 -0
caption_flow/storage.py +415 -406
caption_flow/utils/checkpoint_tracker.py +2 -2
caption_flow/utils/chunk_tracker.py +94 -35
caption_flow/utils/dataset_loader.py +64 -522
caption_flow/utils/dataset_metadata_cache.py +67 -0
caption_flow/utils/image_processor.py +1 -4
caption_flow/utils/shard_processor.py +4 -200
caption_flow/utils/shard_tracker.py +1 -5
caption_flow/workers/base.py +3 -3
caption_flow/workers/caption.py +416 -792
{caption_flow-0.2.1.dist-info → caption_flow-0.2.3.dist-info}/METADATA +29 -27
caption_flow-0.2.3.dist-info/RECORD +35 -0
caption_flow-0.2.1.dist-info/RECORD +0 -29
{caption_flow-0.2.1.dist-info → caption_flow-0.2.3.dist-info}/WHEEL +0 -0
{caption_flow-0.2.1.dist-info → caption_flow-0.2.3.dist-info}/entry_points.txt +0 -0
{caption_flow-0.2.1.dist-info → caption_flow-0.2.3.dist-info}/licenses/LICENSE +0 -0
{caption_flow-0.2.1.dist-info → caption_flow-0.2.3.dist-info}/top_level.txt +0 -0

caption_flow/cli.py CHANGED Viewed

@@ -124,7 +124,7 @@ def setup_logging(verbose: bool = False):
     level = logging.DEBUG if verbose else logging.INFO
     logging.basicConfig(
         level=level,
-        format="%(asctime)s %(message)s",
+        format="%(message)s",
         datefmt="[%Y-%m-%d %H:%M:%S]",
         handlers=[
             RichHandler(
@@ -161,6 +161,7 @@ def main(ctx, verbose: bool):
 @click.option("--key", help="SSL key path")
 @click.option("--no-ssl", is_flag=True, help="Disable SSL (development only)")
 @click.option("--vllm", is_flag=True, help="Use vLLM orchestrator for WebDataset/HF datasets")
+@click.option("--verbose", is_flag=True, help="Enable verbose logging")
 @click.pass_context
 def orchestrator(ctx, config: Optional[str], **kwargs):
     """Start the orchestrator server."""

caption_flow/models.py CHANGED Viewed

@@ -1,9 +1,11 @@
 """Data models for CaptionFlow."""
+import PIL
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
+from PIL import Image
 class JobStatus(Enum):
@@ -38,6 +40,38 @@ class Job:
             self.created_at = datetime.utcnow()
+@dataclass
+class JobId:
+    shard_id: str
+    chunk_id: str
+    sample_id: str
+    def get_shard_str(self):
+        return f"{self.shard_id}"
+    def get_chunk_str(self):
+        return f"{self.shard_id}:chunk:{self.chunk_id}"
+    def get_sample_str(self):
+        return f"{self.shard_id}:chunk:{self.chunk_id}:idx:{self.sample_id}"
+    @staticmethod
+    def from_dict(job: dict) -> "JobId":
+        return JobId(shard_id=job["shard_id"], chunk_id=job["chunk_id"], sample_id=job["sample_id"])
+    @staticmethod
+    def from_values(shard_id: str, chunk_id: str, sample_id: str) -> "JobId":
+        return JobId(shard_id=shard_id, chunk_id=chunk_id, sample_id=sample_id)
+    @staticmethod
+    def from_str(job_id: str):
+        # from data-0000:chunk:0:idx:0
+        parts = job_id.split(":")
+        if len(parts) != 5:
+            raise ValueError(f"Invalid job_id format: {job_id}")
+        return JobId(shard_id=parts[0], chunk_id=parts[2], sample_id=parts[4])
 @dataclass
 class Caption:
     """Generated caption with attribution and image metadata."""
@@ -61,6 +95,8 @@ class Caption:
     image_height: Optional[int] = None
     image_format: Optional[str] = None
     file_size: Optional[int] = None
+    filename: Optional[str] = None
+    url: Optional[str] = None
     # Processing metadata
     caption_index: Optional[int] = None  # Which caption this is (0, 1, 2...)
@@ -82,3 +118,74 @@ class Contributor:
     name: str
     total_captions: int = 0
     trust_level: int = 1
+@dataclass
+class ProcessingStage:
+    """Configuration for a single processing stage."""
+    name: str
+    model: str
+    prompts: List[str]
+    output_field: str
+    requires: List[str] = field(default_factory=list)
+    sampling: Optional[Dict[str, Any]] = None
+    # Model-specific overrides
+    tensor_parallel_size: Optional[int] = None
+    max_model_len: Optional[int] = None
+    dtype: Optional[str] = None
+    gpu_memory_utilization: Optional[float] = None
+@dataclass
+class StageResult:
+    """Results from a single stage."""
+    stage_name: str
+    output_field: str
+    outputs: List[str]  # Multiple outputs from multiple prompts
+    error: Optional[str] = None
+    def is_success(self) -> bool:
+        return self.error is None and bool(self.outputs)
+@dataclass
+class ShardChunk:
+    """Shard chunk assignment with unprocessed ranges."""
+    chunk_id: str
+    shard_url: str
+    shard_name: str
+    start_index: int
+    chunk_size: int
+    unprocessed_ranges: List[Tuple[int, int]] = field(default_factory=list)
+@dataclass
+class ProcessingItem:
+    """Item being processed."""
+    chunk_id: str
+    item_key: str
+    image: Image.Image
+    image_data: bytes
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    stage_results: Dict[str, StageResult] = field(default_factory=dict)  # Accumulated results
+@dataclass
+class ProcessedResult:
+    """Result with multi-stage outputs."""
+    chunk_id: str
+    shard_name: str
+    item_key: str
+    outputs: Dict[str, List[str]]  # field_name -> list of outputs
+    image_width: int
+    image_height: int
+    image_format: str
+    file_size: int
+    processing_time_ms: float
+    metadata: Dict[str, Any] = field(default_factory=dict)

caption_flow/monitor.py CHANGED Viewed

@@ -83,7 +83,7 @@ class Monitor:
                         await self._handle_update(data)
             except Exception as e:
-                logger.error(f"Connection error: {e}")
+                logger.error(f"Connection error: {e}", exc_info=True)
                 await asyncio.sleep(5)
     async def _handle_update(self, data: Dict):

caption-flow 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

caption-flow 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl