isa-model 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,359 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- BLIP Vision Service
4
- Computer vision service using BLIP for image captioning and description
5
- Based on the notebook implementation
6
- """
7
-
8
- import os
9
- import logging
10
- from typing import Dict, List, Any, Optional, Union, BinaryIO
11
- from PIL import Image
12
- import io
13
-
14
- from .base_vision_service import BaseVisionService
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
- def _lazy_import_blip_deps():
19
- """Lazy import BLIP dependencies"""
20
- try:
21
- import torch
22
- import tensorflow as tf
23
- from transformers import BlipProcessor, BlipForConditionalGeneration
24
-
25
- return {
26
- 'torch': torch,
27
- 'tf': tf,
28
- 'BlipProcessor': BlipProcessor,
29
- 'BlipForConditionalGeneration': BlipForConditionalGeneration,
30
- 'available': True
31
- }
32
- except ImportError as e:
33
- logger.warning(f"BLIP dependencies not available: {e}")
34
- return {'available': False}
35
-
36
- class BLIPVisionService(BaseVisionService):
37
- """
38
- BLIP-based vision service for image captioning and description
39
- Provides an alternative implementation to VLM-based captioning
40
- """
41
-
42
- def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"):
43
- """
44
- Initialize BLIP vision service
45
-
46
- Args:
47
- model_name: Hugging Face model name for BLIP
48
- """
49
- super().__init__()
50
-
51
- self.model_name = model_name
52
- self.processor = None
53
- self.model = None
54
-
55
- # Lazy load dependencies
56
- self.blip_components = _lazy_import_blip_deps()
57
-
58
- if not self.blip_components['available']:
59
- raise ImportError("BLIP dependencies (transformers, torch) are required")
60
-
61
- # Load BLIP model
62
- self._load_blip_model()
63
-
64
- def _load_blip_model(self):
65
- """Load BLIP model and processor"""
66
- try:
67
- # Load the pretrained BLIP processor and model
68
- self.processor = self.blip_components['BlipProcessor'].from_pretrained(self.model_name)
69
- self.model = self.blip_components['BlipForConditionalGeneration'].from_pretrained(self.model_name)
70
-
71
- logger.info(f"BLIP model loaded: {self.model_name}")
72
-
73
- except Exception as e:
74
- logger.error(f"Error loading BLIP model: {e}")
75
- raise
76
-
77
- def _preprocess_image(self, image: Union[str, BinaryIO]) -> Image.Image:
78
- """
79
- Preprocess image for BLIP input
80
-
81
- Args:
82
- image: Image path or binary data
83
-
84
- Returns:
85
- PIL Image in RGB format
86
- """
87
- try:
88
- # Handle different image input types
89
- if isinstance(image, str):
90
- # File path
91
- pil_image = Image.open(image).convert('RGB')
92
- elif hasattr(image, 'read'):
93
- # Binary IO
94
- image_data = image.read()
95
- pil_image = Image.open(io.BytesIO(image_data)).convert('RGB')
96
- else:
97
- raise ValueError("Unsupported image format")
98
-
99
- return pil_image
100
-
101
- except Exception as e:
102
- logger.error(f"Error preprocessing image: {e}")
103
- raise
104
-
105
- def _generate_text(self, image: Image.Image, prompt: str) -> str:
106
- """
107
- Generate text for image using BLIP
108
-
109
- Args:
110
- image: PIL Image
111
- prompt: Text prompt for generation
112
-
113
- Returns:
114
- Generated text
115
- """
116
- try:
117
- # Prepare inputs for BLIP model
118
- inputs = self.processor(images=image, text=prompt, return_tensors="pt")
119
-
120
- # Generate text output
121
- output = self.model.generate(**inputs)
122
-
123
- # Decode output
124
- result = self.processor.decode(output[0], skip_special_tokens=True)
125
-
126
- return result
127
-
128
- except Exception as e:
129
- logger.error(f"Error generating text: {e}")
130
- raise
131
-
132
- async def describe_image(self,
133
- image: Union[str, BinaryIO],
134
- detail_level: str = "medium") -> Dict[str, Any]:
135
- """
136
- Generate description for image using BLIP
137
-
138
- Args:
139
- image: Image path or binary data
140
- detail_level: Level of detail (not used in BLIP, maintained for compatibility)
141
-
142
- Returns:
143
- Description results
144
- """
145
- try:
146
- # Preprocess image
147
- pil_image = self._preprocess_image(image)
148
-
149
- # Generate caption using BLIP
150
- prompt = "This is a picture of" # Following notebook implementation
151
- caption = self._generate_text(pil_image, prompt)
152
-
153
- return {
154
- "task": "describe",
155
- "service": "BLIPVisionService",
156
- "description": caption,
157
- "detail_level": detail_level,
158
- "model_type": "BLIP",
159
- "prompt_used": prompt,
160
- "success": True
161
- }
162
-
163
- except Exception as e:
164
- logger.error(f"Error describing image: {e}")
165
- return {
166
- "error": str(e),
167
- "service": "BLIPVisionService",
168
- "success": False
169
- }
170
-
171
- async def analyze_image(self,
172
- image: Union[str, BinaryIO],
173
- prompt: Optional[str] = None,
174
- max_tokens: int = 1000) -> Dict[str, Any]:
175
- """
176
- Analyze image using BLIP
177
-
178
- Args:
179
- image: Image path or binary data
180
- prompt: Optional custom prompt
181
- max_tokens: Not used for BLIP
182
-
183
- Returns:
184
- Analysis results
185
- """
186
- try:
187
- # Preprocess image
188
- pil_image = self._preprocess_image(image)
189
-
190
- # Use custom prompt or default
191
- if prompt:
192
- analysis_prompt = prompt
193
- else:
194
- analysis_prompt = "This is a detailed photo showing" # For summary-like analysis
195
-
196
- # Generate analysis using BLIP
197
- analysis_text = self._generate_text(pil_image, analysis_prompt)
198
-
199
- return {
200
- "task": "analyze",
201
- "service": "BLIPVisionService",
202
- "text": analysis_text,
203
- "model_type": "BLIP",
204
- "prompt_used": analysis_prompt,
205
- "success": True
206
- }
207
-
208
- except Exception as e:
209
- logger.error(f"Error analyzing image: {e}")
210
- return {
211
- "error": str(e),
212
- "service": "BLIPVisionService",
213
- "success": False
214
- }
215
-
216
- async def generate_caption(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
217
- """
218
- Generate caption for image (Task 9 from notebook)
219
-
220
- Args:
221
- image: Image path or binary data
222
-
223
- Returns:
224
- Caption results
225
- """
226
- try:
227
- # Preprocess image
228
- pil_image = self._preprocess_image(image)
229
-
230
- # Generate caption
231
- prompt = "This is a picture of" # Following notebook
232
- caption = self._generate_text(pil_image, prompt)
233
-
234
- return {
235
- "task": "caption",
236
- "service": "BLIPVisionService",
237
- "caption": caption,
238
- "model_type": "BLIP",
239
- "success": True
240
- }
241
-
242
- except Exception as e:
243
- logger.error(f"Error generating caption: {e}")
244
- return {
245
- "error": str(e),
246
- "service": "BLIPVisionService",
247
- "success": False
248
- }
249
-
250
- async def generate_summary(self, image: Union[str, BinaryIO]) -> Dict[str, Any]:
251
- """
252
- Generate summary for image (Task 10 from notebook)
253
-
254
- Args:
255
- image: Image path or binary data
256
-
257
- Returns:
258
- Summary results
259
- """
260
- try:
261
- # Preprocess image
262
- pil_image = self._preprocess_image(image)
263
-
264
- # Generate summary
265
- prompt = "This is a detailed photo showing" # Following notebook
266
- summary = self._generate_text(pil_image, prompt)
267
-
268
- return {
269
- "task": "summary",
270
- "service": "BLIPVisionService",
271
- "summary": summary,
272
- "model_type": "BLIP",
273
- "success": True
274
- }
275
-
276
- except Exception as e:
277
- logger.error(f"Error generating summary: {e}")
278
- return {
279
- "error": str(e),
280
- "service": "BLIPVisionService",
281
- "success": False
282
- }
283
-
284
- async def batch_generate(self,
285
- images: List[Union[str, BinaryIO]],
286
- task: str = "caption") -> Dict[str, Any]:
287
- """
288
- Generate captions or summaries for multiple images
289
-
290
- Args:
291
- images: List of image paths or binary data
292
- task: Task type ("caption" or "summary")
293
-
294
- Returns:
295
- Batch generation results
296
- """
297
- try:
298
- results = []
299
- errors = []
300
-
301
- for i, image in enumerate(images):
302
- try:
303
- if task == "caption":
304
- result = await self.generate_caption(image)
305
- elif task == "summary":
306
- result = await self.generate_summary(image)
307
- else:
308
- raise ValueError(f"Unsupported task: {task}")
309
-
310
- if result.get("success"):
311
- results.append({
312
- "index": i,
313
- "image": str(image) if isinstance(image, str) else f"binary_image_{i}",
314
- **result
315
- })
316
- else:
317
- errors.append({
318
- "index": i,
319
- "image": str(image) if isinstance(image, str) else f"binary_image_{i}",
320
- "error": result.get("error", "Unknown error")
321
- })
322
-
323
- except Exception as e:
324
- errors.append({
325
- "index": i,
326
- "image": str(image) if isinstance(image, str) else f"binary_image_{i}",
327
- "error": str(e)
328
- })
329
-
330
- return {
331
- "task": f"batch_{task}",
332
- "service": "BLIPVisionService",
333
- "total_images": len(images),
334
- "successful": len(results),
335
- "failed": len(errors),
336
- "results": results,
337
- "errors": errors,
338
- "success": True
339
- }
340
-
341
- except Exception as e:
342
- logger.error(f"Error in batch generation: {e}")
343
- return {
344
- "error": str(e),
345
- "service": "BLIPVisionService",
346
- "success": False
347
- }
348
-
349
- def get_service_info(self) -> Dict[str, Any]:
350
- """Get service information"""
351
- return {
352
- "service_name": "BLIPVisionService",
353
- "model_name": self.model_name,
354
- "model_type": "BLIP",
355
- "capabilities": ["describe", "analyze", "caption", "summary", "batch_generate"],
356
- "model_loaded": self.model is not None,
357
- "processor_loaded": self.processor is not None,
358
- "dependencies_available": self.blip_components['available']
359
- }