caption-flow 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- caption_flow/__init__.py +1 -1
- caption_flow/cli.py +307 -0
- caption_flow/models.py +26 -0
- caption_flow/orchestrator.py +9 -9
- caption_flow/processors/huggingface.py +636 -464
- caption_flow/processors/webdataset.py +379 -534
- caption_flow/storage/__init__.py +1 -0
- caption_flow/storage/exporter.py +550 -0
- caption_flow/{storage.py → storage/manager.py} +410 -303
- caption_flow/utils/__init__.py +0 -2
- caption_flow/utils/chunk_tracker.py +196 -164
- caption_flow/utils/image_processor.py +19 -132
- caption_flow/viewer.py +594 -0
- caption_flow/workers/caption.py +164 -129
- {caption_flow-0.2.3.dist-info → caption_flow-0.3.1.dist-info}/METADATA +45 -177
- caption_flow-0.3.1.dist-info/RECORD +33 -0
- caption_flow/utils/dataset_loader.py +0 -222
- caption_flow/utils/dataset_metadata_cache.py +0 -67
- caption_flow/utils/job_queue.py +0 -41
- caption_flow/utils/shard_processor.py +0 -119
- caption_flow/utils/shard_tracker.py +0 -83
- caption_flow-0.2.3.dist-info/RECORD +0 -35
- {caption_flow-0.2.3.dist-info → caption_flow-0.3.1.dist-info}/WHEEL +0 -0
- {caption_flow-0.2.3.dist-info → caption_flow-0.3.1.dist-info}/entry_points.txt +0 -0
- {caption_flow-0.2.3.dist-info → caption_flow-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {caption_flow-0.2.3.dist-info → caption_flow-0.3.1.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ from typing import List, Any, Optional, Tuple, Union
|
|
10
10
|
import numpy as np
|
11
11
|
import requests
|
12
12
|
from PIL import Image
|
13
|
+
from ..models import ProcessingItem
|
13
14
|
|
14
15
|
|
15
16
|
logger = logging.getLogger(__name__)
|
@@ -21,104 +22,10 @@ class ImageProcessor:
|
|
21
22
|
def __init__(self, num_workers: int = 4):
|
22
23
|
self.executor = ProcessPoolExecutor(max_workers=num_workers)
|
23
24
|
|
24
|
-
async def process_batch(self, image_paths: List[Path]) -> List[np.ndarray]:
|
25
|
-
"""Process a batch of images in parallel."""
|
26
|
-
loop = asyncio.get_event_loop()
|
27
|
-
|
28
|
-
tasks = []
|
29
|
-
for path in image_paths:
|
30
|
-
task = loop.run_in_executor(self.executor, self._process_image, path)
|
31
|
-
tasks.append(task)
|
32
|
-
|
33
|
-
return await asyncio.gather(*tasks)
|
34
|
-
|
35
|
-
@staticmethod
|
36
|
-
def _process_image(path: Path) -> np.ndarray:
|
37
|
-
"""Process a single image."""
|
38
|
-
img = Image.open(path)
|
39
|
-
|
40
|
-
# Resize to standard size
|
41
|
-
img = img.resize((224, 224), Image.Resampling.LANCZOS)
|
42
|
-
|
43
|
-
# Convert to RGB if needed
|
44
|
-
if img.mode != "RGB":
|
45
|
-
img = img.convert("RGB")
|
46
|
-
|
47
|
-
# Convert to numpy array
|
48
|
-
arr = np.array(img, dtype=np.float32)
|
49
|
-
|
50
|
-
# Normalize
|
51
|
-
arr = arr / 255.0
|
52
|
-
|
53
|
-
return arr
|
54
|
-
|
55
|
-
@staticmethod
|
56
|
-
def process_image_data(img_data: Union[str, bytes, Image.Image]) -> Optional[bytes]:
|
57
|
-
"""
|
58
|
-
Process various types of image data into bytes.
|
59
|
-
|
60
|
-
Args:
|
61
|
-
img_data: Can be a URL string, bytes, or PIL Image
|
62
|
-
|
63
|
-
Returns:
|
64
|
-
Image data as bytes, or None if processing failed
|
65
|
-
"""
|
66
|
-
try:
|
67
|
-
if isinstance(img_data, str):
|
68
|
-
# It's a URL - download the image
|
69
|
-
try:
|
70
|
-
# Download with timeout
|
71
|
-
response = requests.get(
|
72
|
-
img_data,
|
73
|
-
timeout=30,
|
74
|
-
headers={"User-Agent": "Mozilla/5.0 (captionflow-dataset-loader)"},
|
75
|
-
)
|
76
|
-
response.raise_for_status()
|
77
|
-
image_data = response.content
|
78
|
-
|
79
|
-
# Verify it's an image by trying to open it
|
80
|
-
img = Image.open(BytesIO(image_data))
|
81
|
-
img.verify() # Verify it's a valid image
|
82
|
-
|
83
|
-
return image_data
|
84
|
-
|
85
|
-
except Exception as e:
|
86
|
-
logger.error(f"Failed to download image from {img_data}: {e}")
|
87
|
-
return None
|
88
|
-
|
89
|
-
elif hasattr(img_data, "__class__") and "Image" in str(img_data.__class__):
|
90
|
-
# It's a PIL Image object
|
91
|
-
import io
|
92
|
-
|
93
|
-
# Save as PNG bytes
|
94
|
-
img_bytes = io.BytesIO()
|
95
|
-
# Convert to RGB
|
96
|
-
img_data = img_data.convert("RGB")
|
97
|
-
img_data.save(img_bytes, format="PNG")
|
98
|
-
return img_bytes.getvalue()
|
99
|
-
|
100
|
-
elif isinstance(img_data, bytes):
|
101
|
-
# Already bytes - validate it's an image
|
102
|
-
try:
|
103
|
-
img = Image.open(BytesIO(img_data))
|
104
|
-
img.verify()
|
105
|
-
return img_data
|
106
|
-
except Exception as e:
|
107
|
-
logger.error(f"Invalid image data: {e}")
|
108
|
-
return None
|
109
|
-
|
110
|
-
else:
|
111
|
-
logger.warning(f"Unknown image data type: {type(img_data)}")
|
112
|
-
return None
|
113
|
-
|
114
|
-
except Exception as e:
|
115
|
-
logger.error(f"Error processing image data: {e}", exc_info=True)
|
116
|
-
return None
|
117
|
-
|
118
25
|
@staticmethod
|
119
|
-
def prepare_for_inference(
|
26
|
+
def prepare_for_inference(item: ProcessingItem) -> Image.Image:
|
120
27
|
"""
|
121
|
-
Prepare image for inference
|
28
|
+
Prepare image for inference.
|
122
29
|
|
123
30
|
Args:
|
124
31
|
image: PIL Image to prepare
|
@@ -126,42 +33,22 @@ class ImageProcessor:
|
|
126
33
|
Returns:
|
127
34
|
Prepared PIL Image
|
128
35
|
"""
|
129
|
-
#
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
if is_mostly_black or is_mostly_white:
|
147
|
-
# Replace background with opposite color for better contrast
|
148
|
-
bg_color = (255, 255, 255) if is_mostly_black else (0, 0, 0)
|
149
|
-
background = Image.new("RGB", img_rgba.size, bg_color)
|
150
|
-
# Use alpha channel as mask if present
|
151
|
-
if img_rgba.mode == "RGBA":
|
152
|
-
background.paste(img_rgba.convert("RGB"), mask=img_rgba.split()[3])
|
153
|
-
else:
|
154
|
-
background.paste(img_rgba.convert("RGB"))
|
155
|
-
|
156
|
-
color_type = "black" if is_mostly_black else "white"
|
157
|
-
pct = black_pct if is_mostly_black else white_pct
|
158
|
-
logger.debug(
|
159
|
-
f"Image is {pct*100:.1f}% {color_type}; background replaced with {bg_color}"
|
160
|
-
)
|
161
|
-
|
162
|
-
return background
|
163
|
-
else:
|
164
|
-
return rgb_img
|
36
|
+
# We used to do a lot more hand-holding here with transparency, but oh well.
|
37
|
+
|
38
|
+
if item.image is not None:
|
39
|
+
image = item.image
|
40
|
+
item.metadata["image_width"], item.metadata["image_height"] = image.size
|
41
|
+
item.metadata["image_format"] = image.format or "unknown"
|
42
|
+
item.image = None
|
43
|
+
return image
|
44
|
+
|
45
|
+
item.image = None
|
46
|
+
image = Image.open(BytesIO(item.image_data))
|
47
|
+
item.image_data = b""
|
48
|
+
item.metadata["image_format"] = image.format or "unknown"
|
49
|
+
item.metadata["image_width"], item.metadata["image_height"] = image.size
|
50
|
+
|
51
|
+
return image
|
165
52
|
|
166
53
|
def shutdown(self):
|
167
54
|
"""Shutdown the executor."""
|