PyPI - caption-flow - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

caption-flow 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

caption_flow/cli.py +8 -2
caption_flow/monitor.py +1 -1
caption_flow/orchestrator.py +522 -129
caption_flow/storage.py +5 -0
caption_flow/utils/chunk_tracker.py +22 -4
caption_flow/utils/dataset_loader.py +99 -142
caption_flow/utils/shard_processor.py +100 -36
{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/METADATA +2 -1
{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/RECORD +13 -13
{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/WHEEL +0 -0
{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/entry_points.txt +0 -0
{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/licenses/LICENSE +0 -0
{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/top_level.txt +0 -0

caption_flow/storage.py CHANGED Viewed

@@ -386,10 +386,15 @@ class StorageManager:
             # Filter new data to exclude duplicates
             new_rows = []
+            duplicate_rows = []
             for row in prepared_buffer:
                 if row["job_id"] not in existing_job_ids:
                     new_rows.append(row)
+                elif row not in duplicate_rows:
+                    duplicate_rows.append(row)
+            if duplicate_rows:
+                logger.info(f"Example duplicate row: {duplicate_rows[0]}")
             if new_rows:
                 # Create table from new rows only
                 new_table = pa.Table.from_pylist(new_rows, schema=self.caption_schema)

caption_flow/utils/chunk_tracker.py CHANGED Viewed

@@ -441,9 +441,27 @@ class ChunkTracker(CheckpointTracker):
         )
     def get_chunk_with_unprocessed_items(self, chunk_id: str) -> Optional[Dict[str, Any]]:
-        """Get chunk info including unprocessed ranges."""
-        if chunk_id not in self.chunks:
+        """Get chunk info with unprocessed item ranges."""
+        chunk_state = self.chunks.get(chunk_id)
+        if not chunk_state:
             return None
-        chunk = self.chunks[chunk_id]
-        return {"chunk": chunk.to_dict(), "unprocessed_ranges": chunk.get_unprocessed_ranges()}
+        # During startup or if no worker is assigned, treat all unprocessed as available
+        if not hasattr(self, "_startup_complete"):
+            self._startup_complete = False
+        if not self._startup_complete or not chunk_state.assigned_to:
+            # Return all unprocessed ranges
+            return {
+                "chunk_id": chunk_id,
+                "unprocessed_ranges": chunk_state.get_unprocessed_ranges(),
+                "status": chunk_state.status,
+            }
+        # Normal operation - only return ranges not being worked on
+        # This would need more complex tracking of which ranges each worker is processing
+        return {
+            "chunk_id": chunk_id,
+            "unprocessed_ranges": chunk_state.get_unprocessed_ranges(),
+            "status": chunk_state.status,
+        }

caption_flow/utils/dataset_loader.py CHANGED Viewed

@@ -217,200 +217,157 @@ class DatasetLoader:
         return dataset_path, start_idx, chunk_size
     def iterate_shard(
-        self, shard_url: str, processed_keys: Optional[set] = None
+        self,
+        shard_url: str,
+        processed_keys: Optional[set] = None,
+        unprocessed_ranges: Optional[List[Tuple[int, int]]] = None,
     ) -> Generator[Tuple[str, str, bytes], None, None]:
         """
         Iterate over items in a shard.
+        Args:
+            shard_url: URL or identifier of the shard
+            processed_keys: Set of already processed keys to skip
+            unprocessed_ranges: Specific ranges to process (for HF datasets)
         Yields:
             Tuple of (key, url, image_bytes)
         """
-        # Check if this is a virtual HuggingFace dataset shard
         if shard_url.startswith("hf_dataset:"):
-            yield from self._iterate_hf_dataset_shard(shard_url, processed_keys)
+            raise ValueError(
+                "Virtual HuggingFace dataset shards should use iterate_shard_with_metadata()"
+            )
         else:
             # Regular WebDataset shard
             ds = self.load_shard(shard_url, processed_keys)
             for key, url, image_data in ds:
                 yield key, url, image_data
-    def _iterate_hf_dataset_shard_with_metadata(
-        self, shard_url: str, processed_keys: Optional[set] = None
-    ) -> Generator[Tuple[str, str, bytes, Dict[str, Any]], None, None]:
-        """Iterate over a virtual HuggingFace dataset shard with metadata."""
-        if processed_keys is None:
-            processed_keys = set()
-        dataset_path, start_idx, chunk_size = self._parse_virtual_shard(shard_url)
-        logger.info(
-            f"Loading HuggingFace dataset with metadata: {dataset_path} (split: {self.split})"
-        )
+    def _create_dataset_at_position(self, dataset_path: str, split: str, start_idx: int):
+        """Create a dataset iterator positioned at start_idx using state_dict if available."""
         try:
             # Load dataset in streaming mode
             dataset = load_dataset(
                 dataset_path,
-                split=self.split,
+                split=split,
                 streaming=True,
                 token=self.token,
             )
-            # Skip to start index if needed - CONSISTENT WITH OTHER METHOD
-            if start_idx > 0:
-                dataset = dataset.skip(start_idx)
-            items_processed = 0
-            for item in dataset:
-                # Stop after processing chunk_size items
-                if items_processed >= chunk_size:
-                    break
-                # Generate a unique key for this item - CONSISTENT FORMAT
-                key = f"{dataset_path.replace('/', '_')}_{start_idx + items_processed:08d}"
-                if key in processed_keys:
-                    items_processed += 1
-                    continue
+            # Check if the dataset supports state_dict (newer versions of datasets library)
+            if hasattr(dataset, "load_state_dict") and hasattr(dataset, "state_dict"):
+                # Try to use the dataset's native state management
                 try:
-                    # Extract image data
-                    if self.image_column in item:
-                        img_data = item[self.image_column]
-                        # Process image to bytes
-                        image_bytes = ImageProcessor.process_image_data(img_data)
-                        if image_bytes:
-                            # Extract all metadata (excluding the image column)
-                            metadata = {k: v for k, v in item.items() if k != self.image_column}
+                    # Get current state
+                    state = dataset.state_dict()
+                    # Modify the state to skip to start_idx
+                    if "epoch" in state:
+                        state["epoch"] = 0
+                    if "num_examples_since_previous_state" in state:
+                        state["num_examples_since_previous_state"] = start_idx
+                    # For newer datasets with examples_iterable state
+                    if "examples_iterable" in state:
+                        if isinstance(state["examples_iterable"], dict):
+                            if "shard_example_idx" in state["examples_iterable"]:
+                                state["examples_iterable"]["shard_example_idx"] = start_idx
+                    # Load the modified state
+                    dataset.load_state_dict(state)
+                    logger.info(f"Positioned dataset at index {start_idx} using state_dict")
+                    return dataset
+                except Exception as e:
+                    logger.debug(f"Could not use state_dict approach: {e}")
-                            # URL is virtual for HF datasets
-                            url = f"hf://{dataset_path}#{start_idx + items_processed}"
-                            items_processed += 1
-                            yield key, url, image_bytes, metadata
-                        else:
-                            logger.warning(
-                                f"Failed to process image for item at index {start_idx + items_processed}"
-                            )
-                            items_processed += 1
-                            continue
-                    else:
-                        logger.warning(
-                            f"No image column '{self.image_column}' found in item at index {start_idx + items_processed}. "
-                            f"Available columns: {list(item.keys())}"
-                        )
-                        items_processed += 1
+            # Fall back to skip() for large skips
+            if start_idx > 0:
+                logger.info(f"Using skip() to position dataset at index {start_idx}")
+                dataset = dataset.skip(start_idx)
-                except Exception as e:
-                    logger.error(
-                        f"Error processing item at index {start_idx + items_processed}: {e}"
-                    )
-                    items_processed += 1
-                    continue
+            return dataset
         except Exception as e:
-            logger.error(f"Error loading HuggingFace dataset: {e}")
-            return
+            logger.warning(f"Error creating positioned dataset: {e}")
+            return None
-    def _iterate_hf_dataset_shard(
+    def _iterate_hf_dataset_shard_with_metadata(
         self, shard_url: str, processed_keys: Optional[set] = None
-    ) -> Generator[Tuple[str, str, bytes], None, None]:
-        """Iterate over a virtual HuggingFace dataset shard."""
+    ) -> Generator[Tuple[str, str, bytes, Dict[str, Any]], None, None]:
+        """Iterate over a virtual HuggingFace dataset shard with metadata."""
         if processed_keys is None:
             processed_keys = set()
         dataset_path, start_idx, chunk_size = self._parse_virtual_shard(shard_url)
-        # IMPORTANT: Check if start_idx is beyond dataset bounds
-        if self._hf_total_items is not None and start_idx >= self._hf_total_items:
-            logger.warning(
-                f"Virtual shard starts at index {start_idx} but dataset only has "
-                f"{self._hf_total_items} items. Skipping this shard."
-            )
-            return
         logger.info(
-            f"Loading HuggingFace dataset in streaming mode: {dataset_path} "
-            f"(split: {self.split}, start: {start_idx}, chunk_size: {chunk_size})"
+            f"Loading HuggingFace dataset with metadata: {dataset_path} (split: {self.split})"
         )
         try:
-            # Load dataset in streaming mode
-            dataset = load_dataset(
-                dataset_path,
-                split=self.split,
-                streaming=True,
-                token=self.token,
-            )
+            # For HF datasets, we iterate through the full chunk range
+            # The actual range filtering happens in the shard processor
+            items_processed = 0
+            current_abs_idx = start_idx
+            while items_processed < chunk_size:
+                # Create a fresh dataset iterator for each batch
+                # This avoids issues with stateful iterators
+                batch_size = min(1000, chunk_size - items_processed)  # Process in smaller batches
+                dataset = load_dataset(
+                    dataset_path,
+                    split=self.split,
+                    streaming=True,
+                    token=self.token,
+                )
-            # Use dataset.skip() for efficient skipping
-            if start_idx > 0:
-                dataset = dataset.skip(start_idx)
-                logger.info(f"Skipped to index {start_idx}")
+                # Skip to current position
+                if current_abs_idx > 0:
+                    dataset = dataset.skip(current_abs_idx)
-            items_processed = 0
+                batch_processed = 0
+                for item in dataset:
+                    if batch_processed >= batch_size or items_processed >= chunk_size:
+                        break
-            # Now enumerate starts from 0 after skip
-            for item in dataset:
-                # Stop after processing chunk_size items
-                if items_processed >= chunk_size:
-                    logger.info(f"Completed chunk: processed {items_processed} items")
-                    break
-                # Also stop if we've reached the dataset end
-                if self._hf_total_items and (start_idx + items_processed) >= self._hf_total_items:
-                    logger.info(
-                        f"Reached dataset end at item {start_idx + items_processed} "
-                        f"(total: {self._hf_total_items})"
-                    )
-                    break
+                    # Generate key
+                    key = f"{dataset_path.replace('/', '_')}_{current_abs_idx:08d}"
-                # Generate a unique key for this item - ensure proper formatting
-                key = f"{dataset_path.replace('/', '_')}_{start_idx + items_processed:08d}"
+                    if key in processed_keys:
+                        current_abs_idx += 1
+                        batch_processed += 1
+                        items_processed += 1
+                        continue
-                if key in processed_keys:
-                    items_processed += 1
-                    continue
+                    try:
+                        if self.image_column in item:
+                            img_data = item[self.image_column]
+                            image_bytes = ImageProcessor.process_image_data(img_data)
-                try:
-                    # Extract image data - check configured column name
-                    if self.image_column in item:
-                        img_data = item[self.image_column]
+                            if image_bytes:
+                                metadata = {k: v for k, v in item.items() if k != self.image_column}
+                                url = f"hf://{dataset_path}#{current_abs_idx}"
-                        # Delegate image processing to ImageProcessor
-                        image_bytes = ImageProcessor.process_image_data(img_data)
+                                yield key, url, image_bytes, metadata
-                        if image_bytes:
-                            # URL is virtual for HF datasets
-                            url = f"hf://{dataset_path}#{start_idx + items_processed}"
+                            current_abs_idx += 1
+                            batch_processed += 1
                             items_processed += 1
-                            yield key, url, image_bytes
                         else:
                             logger.warning(
-                                f"Failed to process image for item at index {start_idx + items_processed}"
+                                f"No image column '{self.image_column}' at index {current_abs_idx}"
                             )
+                            current_abs_idx += 1
+                            batch_processed += 1
                             items_processed += 1
-                            continue
-                    else:
-                        logger.warning(
-                            f"No image column '{self.image_column}' found in item at index {start_idx + items_processed}. "
-                            f"Available columns: {list(item.keys())}"
-                        )
-                        items_processed += 1
-                except Exception as e:
-                    logger.error(
-                        f"Error processing item at index {start_idx + items_processed}: {e}"
-                    )
-                    items_processed += 1
-                    continue
-            logger.info(
-                f"Virtual shard complete: processed {items_processed} items "
-                f"(start_idx: {start_idx})"
-            )
+                    except Exception as e:
+                        logger.error(f"Error processing item at index {current_abs_idx}: {e}")
+                        current_abs_idx += 1
+                        batch_processed += 1
+                        items_processed += 1
+                        continue
         except Exception as e:
             logger.error(f"Error loading HuggingFace dataset: {e}")

caption_flow/utils/shard_processor.py CHANGED Viewed

@@ -7,6 +7,8 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Generator, Tuple, Optional, Dict, Any
 from dataclasses import dataclass
+from datasets import load_dataset
+from .image_processor import ImageProcessor
 from threading import Event
 import shlex
@@ -108,10 +110,7 @@ class HFDatasetShardProcessor(ShardProcessor):
         connected: Event,
     ) -> Generator[Tuple[str, str, bytes, Dict[str, Any]], None, None]:
         """
-        Process HuggingFace virtual shard chunk with metadata.
-        Yields:
-            Tuple of (key, url, image_data, metadata)
+        Process HuggingFace virtual shard chunk with metadata, range by range.
         """
         if not dataset_loader:
             logger.error("No dataset loader configured for HuggingFace dataset shard")
@@ -121,49 +120,114 @@ class HFDatasetShardProcessor(ShardProcessor):
         unprocessed_ranges = getattr(chunk, "unprocessed_ranges", [(0, chunk.chunk_size - 1)])
         logger.info(
-            f"Processing HF dataset chunk {chunk.chunk_id} with ranges: {unprocessed_ranges}"
+            f"Processing HF dataset chunk {chunk.chunk_id} with {len(unprocessed_ranges)} ranges"
         )
-        items_processed = 0
-        current_idx = 0
-        # Construct proper virtual shard URL
-        parts = chunk.shard_url.split("_chunk_")
-        if len(parts) == 2:
-            base_path = parts[0]
-            virtual_shard_url = f"{base_path}:chunk:{chunk.start_index}"
-        else:
-            virtual_shard_url = chunk.shard_url
-        logger.debug(f"Using virtual shard URL: {virtual_shard_url}")
+        items_yielded = 0
-        # Use the new iterate method that includes metadata
-        for key, url, image_data, metadata in dataset_loader.iterate_shard_with_metadata(
-            virtual_shard_url
-        ):
-            # Check if we should stop
+        # Process each range independently with its own iterator
+        for range_start, range_end in unprocessed_ranges:
             if should_stop.is_set() or not connected.is_set():
                 logger.info(f"Stopping chunk processing early due to disconnect")
                 break
-            # Check if current index is in any unprocessed range
-            in_range = any(start <= current_idx <= end for start, end in unprocessed_ranges)
-            if not in_range:
-                current_idx += 1
-                continue  # Skip already processed items
+            # Calculate absolute indices for this range
+            abs_start = chunk.start_index + range_start
+            abs_end = chunk.start_index + range_end
+            range_size = range_end - range_start + 1
-            # Check if we've processed enough for this chunk
-            if current_idx >= chunk.chunk_size:
-                break
+            logger.debug(
+                f"Processing range [{range_start}, {range_end}] "
+                f"(absolute: [{abs_start}, {abs_end}])"
+            )
-            items_processed += 1
-            current_idx += 1
-            yield key, url, image_data, metadata
+            try:
+                # Create a fresh dataset iterator for this range
+                dataset = load_dataset(
+                    dataset_loader.dataset_path,
+                    split=dataset_loader.split,
+                    streaming=True,
+                    token=dataset_loader.token,
+                )
+                # Use state_dict if available for efficient positioning
+                if hasattr(dataset, "load_state_dict") and hasattr(dataset, "state_dict"):
+                    try:
+                        state = dataset.state_dict()
+                        # Modify state to jump to abs_start
+                        if "num_examples_since_previous_state" in state:
+                            state["num_examples_since_previous_state"] = abs_start
+                        if "examples_iterable" in state and isinstance(
+                            state["examples_iterable"], dict
+                        ):
+                            if "shard_example_idx" in state["examples_iterable"]:
+                                state["examples_iterable"]["shard_example_idx"] = abs_start
+                        dataset.load_state_dict(state)
+                        logger.debug(f"Positioned dataset at index {abs_start} using state_dict")
+                    except Exception as e:
+                        logger.debug(f"Could not use state_dict, falling back to skip: {e}")
+                        dataset = dataset.skip(abs_start)
+                else:
+                    # Fall back to skip
+                    dataset = dataset.skip(abs_start)
+                # Process items in this range
+                range_items = 0
+                for item in dataset:
+                    if range_items >= range_size:
+                        break
+                    if should_stop.is_set() or not connected.is_set():
+                        break
+                    # Generate key for this item
+                    current_abs_idx = abs_start + range_items
+                    key = f"{dataset_loader.dataset_path.replace('/', '_')}_{current_abs_idx:08d}"
+                    try:
+                        if dataset_loader.image_column in item:
+                            img_data = item[dataset_loader.image_column]
+                            image_bytes = ImageProcessor.process_image_data(img_data)
+                            if image_bytes:
+                                # Extract metadata
+                                metadata = {
+                                    k: v
+                                    for k, v in item.items()
+                                    if k != dataset_loader.image_column
+                                }
+                                # Add chunk-relative index to metadata
+                                metadata["_chunk_relative_index"] = range_start + range_items
+                                url = f"hf://{dataset_loader.dataset_path}#{current_abs_idx}"
+                                items_yielded += 1
+                                range_items += 1
+                                yield key, url, image_bytes, metadata
+                            else:
+                                logger.warning(
+                                    f"Failed to process image at index {current_abs_idx}"
+                                )
+                                range_items += 1
+                        else:
+                            logger.warning(
+                                f"No image column '{dataset_loader.image_column}' at index {current_abs_idx}"
+                            )
+                            range_items += 1
+                    except Exception as e:
+                        logger.error(f"Error processing item at index {current_abs_idx}: {e}")
+                        range_items += 1
+                        continue
+            except Exception as e:
+                logger.error(f"Error processing range [{range_start}, {range_end}]: {e}")
+                continue
         logger.info(
-            f"HF dataset chunk {chunk.chunk_id}: yielded {items_processed} items "
-            f"from ranges {unprocessed_ranges}"
+            f"HF dataset chunk {chunk.chunk_id}: yielded {items_yielded} items "
+            f"from {len(unprocessed_ranges)} ranges"
         )

{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: caption-flow
-Version: 0.2.0
+Version: 0.2.2
 Summary: Self-contained distributed community captioning system
 Author-email: bghira <bghira@users.github.com>
 License: MIT
@@ -32,6 +32,7 @@ Requires-Dist: pandas<3.0.0,>=2.3.1
 Requires-Dist: arrow<2.0.0,>=1.3.0
 Requires-Dist: datasets<5.0.0,>=4.0.0
 Requires-Dist: boto3<2.0.0,>=1.40.11
+Requires-Dist: torchdata<0.12.0,>=0.11.0
 Provides-Extra: dev
 Requires-Dist: pytest>=7.4.0; extra == "dev"
 Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"

{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/RECORD RENAMED Viewed

@@ -1,29 +1,29 @@
 caption_flow/__init__.py,sha256=NLPJ25lRN7xHqncXweINDNwbt0q8lgjZ30G21zlPdRs,303
-caption_flow/cli.py,sha256=DVVN4e4uL0jL0gRTaIC5BL0DBU2IU_2yUOi4lg6-lEw,28639
+caption_flow/cli.py,sha256=fkyQHzs5kei6-9ftkbJjko-K67TARxd7yNf7x9e7KSs,28820
 caption_flow/models.py,sha256=qo6lQiO10UISbaBVr6Cs-fSW_pmjwE6kmiTmmU_l3Wk,2140
-caption_flow/monitor.py,sha256=MltOwBqcFwni1XEPWu5dIO-os5NKDbH_LInOBXUWHAY,7870
-caption_flow/orchestrator.py,sha256=vLW_w5KuRn9Asy_343DxZDRxiUs0xYgbfuuNGgqIf7k,76403
-caption_flow/storage.py,sha256=hC6ZHT_PHFoUVjqD5JUwy3_79oAD1e1H30neA_xsz7s,40748
+caption_flow/monitor.py,sha256=ZZCSasYLKJ-UzA3-RoAtytv-tbNA-m3h5YjlZg_vukg,7870
+caption_flow/orchestrator.py,sha256=9yWKVcaR-S6naNQSd7Np8AemwV5lNDmB_lCufpvVrS0,96282
+caption_flow/storage.py,sha256=kGv9iQAgxwLLlAIPU6TBrlagdfxA339eBz1xG0yYRsc,40981
 caption_flow/utils/__init__.py,sha256=F1BChVoCsj9zn1GJRBOLHET1kLW6xrAmsbzcR7hHy6Y,202
 caption_flow/utils/auth.py,sha256=UrxX2n8OEEcfMD1Ey27TxGfrJFmUCpC59x-SCrQJoVE,2253
 caption_flow/utils/caption_utils.py,sha256=esUMAdcCkNjRroZ0Bhxv0_yKlLtMf0XeDCTt-5k6bik,5309
 caption_flow/utils/certificates.py,sha256=eu4blQZEkL9NRaY1ynQWg1asvDorRYhGRZea7STonJE,4635
 caption_flow/utils/checkpoint_tracker.py,sha256=8tsTFF-HcygitK92YcS-QWzeg-qRm9AuCpQoQRfC8M0,3335
-caption_flow/utils/chunk_tracker.py,sha256=hKn8CN6ubErc9kuCWZMj12ZCZKxVlqXqAEocbzjfa-k,17296
-caption_flow/utils/dataset_loader.py,sha256=qjoRuPnCv_2nGPfrdqf45AgBXlthw1HwqZ1IqwIXzH4,20792
+caption_flow/utils/chunk_tracker.py,sha256=SO6ERvEwGXuikGDVaXFota_3Ix8BnePMU7CiZJKBAnQ,18025
+caption_flow/utils/dataset_loader.py,sha256=Bvo-aa5jWtjzqXW0rEisdiWaN7Q-aH02rXXUu9uXqGo,19194
 caption_flow/utils/image_processor.py,sha256=Zl8TAv9gYPdAYat3UiTuuNdIb2fXNfZ35AxsxuovJTs,5650
 caption_flow/utils/job_queue.py,sha256=itdfXcrkvGjmXn4qtpgMF63k1ufRBaejDe4V6WcxzgU,1104
 caption_flow/utils/json_utils.py,sha256=IiZYn8uCM-3pYmyIbX2fmaOIyutArn67SqAyp0ggNpU,5396
 caption_flow/utils/prompt_template.py,sha256=AKp0diSZqNBMwZkpiTNjw8-bbQwHStr7QZTOJ7o1dC4,4345
-caption_flow/utils/shard_processor.py,sha256=CRda6M4xh4U0vwvYlzq9nJEzz4d_4yzUBosYAeBcPEA,10854
+caption_flow/utils/shard_processor.py,sha256=c6COBKhFzZyUeJqot5uGVR3ANeOReBfs8-DR27mrdcA,14242
 caption_flow/utils/shard_tracker.py,sha256=Wt2oE-O85F2FxSnqIocJiaYeFn00OVVjIiklZIZRGL8,3233
 caption_flow/utils/vllm_config.py,sha256=TC7Rmjk0zRKbBXbWUXrFL4Z58hzax_-4L0pXZn09hdM,6019
 caption_flow/workers/base.py,sha256=jPm_Xw4Lxd0cnrPs-biBqKRQKkTOJLvHLolmp0Gb1CI,7530
 caption_flow/workers/caption.py,sha256=NZ9kTjk2uOoNwyyNSkB_arYk213vLr5mowHN-OjiFkk,54631
 caption_flow/workers/data.py,sha256=0Tg8NE0wdONeMlivYQ4nvbcfWdLuU51O7vR8_YSnJgo,14813
-caption_flow-0.2.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
-caption_flow-0.2.0.dist-info/METADATA,sha256=6qwt05U0S23Omjz1yR6VzLq_wRHbRx_xl3YzhwHyDLc,11900
-caption_flow-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-caption_flow-0.2.0.dist-info/entry_points.txt,sha256=KnVlyrGKZj6p2zNyuEnCx4Y6jvJ4V-mcfN0lddPKTlQ,55
-caption_flow-0.2.0.dist-info/top_level.txt,sha256=_bXpKRutqded0FQ80dCChIz26ETV7tL4d4e2E_Y1FXs,13
-caption_flow-0.2.0.dist-info/RECORD,,
+caption_flow-0.2.2.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
+caption_flow-0.2.2.dist-info/METADATA,sha256=h9VN2ZWXVDH935Eavb-1kfsBpuW7m4Oph3tjh9ucc3w,11941
+caption_flow-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+caption_flow-0.2.2.dist-info/entry_points.txt,sha256=KnVlyrGKZj6p2zNyuEnCx4Y6jvJ4V-mcfN0lddPKTlQ,55
+caption_flow-0.2.2.dist-info/top_level.txt,sha256=_bXpKRutqded0FQ80dCChIz26ETV7tL4d4e2E_Y1FXs,13
+caption_flow-0.2.2.dist-info/RECORD,,

{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{caption_flow-0.2.0.dist-info → caption_flow-0.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

caption-flow 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

caption-flow 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl