isa-model 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. isa_model/__init__.py +30 -1
  2. isa_model/client.py +770 -0
  3. isa_model/core/config/__init__.py +16 -0
  4. isa_model/core/config/config_manager.py +514 -0
  5. isa_model/core/config.py +426 -0
  6. isa_model/core/models/model_billing_tracker.py +476 -0
  7. isa_model/core/models/model_manager.py +399 -0
  8. isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
  9. isa_model/core/pricing_manager.py +426 -0
  10. isa_model/core/services/__init__.py +19 -0
  11. isa_model/core/services/intelligent_model_selector.py +547 -0
  12. isa_model/core/types.py +291 -0
  13. isa_model/deployment/__init__.py +2 -0
  14. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
  15. isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
  16. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
  17. isa_model/deployment/cloud/modal/register_models.py +321 -0
  18. isa_model/deployment/runtime/deployed_service.py +338 -0
  19. isa_model/deployment/services/__init__.py +9 -0
  20. isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
  21. isa_model/deployment/services/model_service.py +332 -0
  22. isa_model/deployment/services/service_monitor.py +356 -0
  23. isa_model/deployment/services/service_registry.py +527 -0
  24. isa_model/eval/__init__.py +80 -44
  25. isa_model/eval/config/__init__.py +10 -0
  26. isa_model/eval/config/evaluation_config.py +108 -0
  27. isa_model/eval/evaluators/__init__.py +18 -0
  28. isa_model/eval/evaluators/base_evaluator.py +503 -0
  29. isa_model/eval/evaluators/llm_evaluator.py +472 -0
  30. isa_model/eval/factory.py +417 -709
  31. isa_model/eval/infrastructure/__init__.py +24 -0
  32. isa_model/eval/infrastructure/experiment_tracker.py +466 -0
  33. isa_model/eval/metrics.py +191 -21
  34. isa_model/inference/ai_factory.py +181 -605
  35. isa_model/inference/services/audio/base_stt_service.py +65 -1
  36. isa_model/inference/services/audio/base_tts_service.py +75 -1
  37. isa_model/inference/services/audio/openai_stt_service.py +189 -151
  38. isa_model/inference/services/audio/openai_tts_service.py +12 -10
  39. isa_model/inference/services/audio/replicate_tts_service.py +61 -56
  40. isa_model/inference/services/base_service.py +55 -17
  41. isa_model/inference/services/embedding/base_embed_service.py +65 -1
  42. isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
  43. isa_model/inference/services/embedding/openai_embed_service.py +8 -10
  44. isa_model/inference/services/helpers/stacked_config.py +148 -0
  45. isa_model/inference/services/img/__init__.py +18 -0
  46. isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
  47. isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
  48. isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
  49. isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
  50. isa_model/inference/services/llm/__init__.py +3 -3
  51. isa_model/inference/services/llm/base_llm_service.py +492 -40
  52. isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
  53. isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
  54. isa_model/inference/services/llm/ollama_llm_service.py +51 -17
  55. isa_model/inference/services/llm/openai_llm_service.py +70 -19
  56. isa_model/inference/services/llm/yyds_llm_service.py +24 -23
  57. isa_model/inference/services/vision/__init__.py +38 -4
  58. isa_model/inference/services/vision/base_vision_service.py +218 -117
  59. isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
  60. isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
  61. isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
  62. isa_model/inference/services/vision/helpers/image_utils.py +272 -3
  63. isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
  64. isa_model/inference/services/vision/openai_vision_service.py +104 -307
  65. isa_model/inference/services/vision/replicate_vision_service.py +140 -325
  66. isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
  67. isa_model/scripts/register_models.py +370 -0
  68. isa_model/scripts/register_models_with_embeddings.py +510 -0
  69. isa_model/serving/api/fastapi_server.py +6 -1
  70. isa_model/serving/api/routes/unified.py +202 -0
  71. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
  72. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/RECORD +77 -53
  73. isa_model/config/__init__.py +0 -9
  74. isa_model/config/config_manager.py +0 -213
  75. isa_model/core/model_manager.py +0 -213
  76. isa_model/core/model_registry.py +0 -375
  77. isa_model/core/vision_models_init.py +0 -116
  78. isa_model/inference/billing_tracker.py +0 -406
  79. isa_model/inference/services/llm/triton_llm_service.py +0 -481
  80. isa_model/inference/services/stacked/__init__.py +0 -26
  81. isa_model/inference/services/stacked/config.py +0 -426
  82. isa_model/inference/services/vision/ollama_vision_service.py +0 -194
  83. /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
  84. /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
  85. /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
  86. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
  87. {isa_model-0.3.5.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,297 @@
1
+ """
2
+ 统一的Vision任务提示词服务
3
+ 为不同的Vision模型提供标准化的提示词模板,避免重复代码
4
+ """
5
+
6
+ from typing import List, Optional, Dict, Any
7
+
8
+
9
+ class VisionPromptService:
10
+ """Vision任务的统一提示词生成服务"""
11
+
12
+ @staticmethod
13
+ def get_describe_prompt(detail_level: str = "medium") -> str:
14
+ """生成图像描述提示词"""
15
+ detail_prompts = {
16
+ "brief": "Please provide a brief, one-sentence description of this image.",
17
+ "medium": "Please provide a detailed description of this image, including main objects, people, setting, and notable details.",
18
+ "detailed": "Please provide a comprehensive and detailed description of this image, including all visible objects, people, setting, colors, composition, style, mood, and any other notable details or context."
19
+ }
20
+ return detail_prompts.get(detail_level, detail_prompts["medium"])
21
+
22
+ @staticmethod
23
+ def get_extract_text_prompt() -> str:
24
+ """生成文本提取(OCR)提示词"""
25
+ return """Please extract ALL text content from this image. Requirements:
26
+ 1. Extract text exactly as it appears
27
+ 2. Preserve formatting, line breaks, and structure
28
+ 3. If there are tables, maintain table structure
29
+ 4. Include headers, captions, and footnotes
30
+ 5. Return as structured JSON with extracted text and layout information
31
+
32
+ Format your response as JSON:
33
+ {
34
+ "extracted_text": "full text content",
35
+ "structured_content": {
36
+ "headers": [],
37
+ "paragraphs": [],
38
+ "tables": [],
39
+ "other": []
40
+ }
41
+ }"""
42
+
43
+ @staticmethod
44
+ def get_detect_objects_prompt(confidence_threshold: float = 0.5) -> str:
45
+ """生成物体检测提示词"""
46
+ return f"""Please identify and locate all objects in this image. For each object:
47
+ 1. Object name/type
48
+ 2. Approximate location (describe position: top-left, center, bottom-right, etc.)
49
+ 3. Size (small, medium, large)
50
+ 4. Confidence level (high, medium, low)
51
+
52
+ Only include objects you're confident about (confidence > {confidence_threshold})
53
+
54
+ Format as JSON:
55
+ {{
56
+ "detected_objects": [
57
+ {{
58
+ "name": "object_name",
59
+ "location": "position_description",
60
+ "size": "relative_size",
61
+ "confidence": "confidence_level"
62
+ }}
63
+ ]
64
+ }}"""
65
+
66
+ @staticmethod
67
+ def get_detect_ui_elements_prompt(element_types: Optional[List[str]] = None) -> str:
68
+ """生成UI元素检测提示词"""
69
+ element_filter = f"Focus on these element types: {', '.join(element_types)}" if element_types else "Identify all UI elements"
70
+
71
+ return f"""Please analyze this user interface image and identify all interactive elements. {element_filter}
72
+
73
+ For each UI element, provide:
74
+ 1. Element type (button, input field, dropdown, link, checkbox, radio button, text area, etc.)
75
+ 2. Text/label content
76
+ 3. Location description
77
+ 4. Interactive state (enabled, disabled, selected, etc.)
78
+
79
+ Format as JSON:
80
+ {{
81
+ "ui_elements": [
82
+ {{
83
+ "type": "element_type",
84
+ "text": "visible_text",
85
+ "location": "position_description",
86
+ "state": "element_state",
87
+ "confidence": "detection_confidence"
88
+ }}
89
+ ]
90
+ }}"""
91
+
92
+ @staticmethod
93
+ def get_detect_document_elements_prompt() -> str:
94
+ """生成文档元素检测提示词"""
95
+ return """Please analyze this document image and extract its structure and content.
96
+
97
+ Identify and extract:
98
+ 1. Headers and subheaders (with hierarchy level)
99
+ 2. Paragraphs and body text
100
+ 3. Tables (with rows and columns)
101
+ 4. Lists (ordered/unordered)
102
+ 5. Images and captions
103
+ 6. Footnotes and references
104
+
105
+ Format as JSON:
106
+ {
107
+ "document_structure": {
108
+ "title": "document_title",
109
+ "headers": [
110
+ {"level": 1, "text": "header_text", "position": "location"}
111
+ ],
112
+ "paragraphs": [
113
+ {"text": "paragraph_content", "position": "location"}
114
+ ],
115
+ "tables": [
116
+ {"rows": [["cell1", "cell2"]], "caption": "table_caption"}
117
+ ],
118
+ "lists": [
119
+ {"type": "ordered/unordered", "items": ["item1", "item2"]}
120
+ ]
121
+ }
122
+ }"""
123
+
124
+ @staticmethod
125
+ def get_extract_table_data_prompt(table_format: str = "json", preserve_formatting: bool = True) -> str:
126
+ """生成表格数据抽取提示词"""
127
+ format_instructions = {
128
+ "json": "Return the table data as a JSON structure with arrays for headers and rows",
129
+ "csv": "Return the table data in CSV format",
130
+ "markdown": "Return the table data in Markdown table format",
131
+ "html": "Return the table data as an HTML table"
132
+ }
133
+
134
+ format_instruction = format_instructions.get(table_format, format_instructions["json"])
135
+ formatting_note = "Preserve cell merging, formatting, and styling information" if preserve_formatting else "Extract data in simplified format"
136
+
137
+ return f"""Please extract ALL table data from this image with high precision. {formatting_note}
138
+
139
+ Requirements:
140
+ 1. Identify all tables in the image
141
+ 2. Extract headers, rows, and data accurately
142
+ 3. Maintain data relationships and structure
143
+ 4. Handle merged cells appropriately
144
+ 5. Include any table captions or titles
145
+ 6. {format_instruction}
146
+
147
+ For each table, provide:
148
+ - Table identifier/caption
149
+ - Column headers
150
+ - All row data
151
+ - Metadata about structure (row/column counts, merged cells)
152
+
153
+ Return as structured JSON:
154
+ {{
155
+ "tables": [
156
+ {{
157
+ "table_id": "table_1",
158
+ "caption": "table_title_if_any",
159
+ "headers": ["Column1", "Column2", "Column3"],
160
+ "rows": [
161
+ ["data1", "data2", "data3"],
162
+ ["data4", "data5", "data6"]
163
+ ],
164
+ "metadata": {{
165
+ "row_count": 2,
166
+ "column_count": 3,
167
+ "has_headers": true,
168
+ "merged_cells": [
169
+ {{"row": 0, "col": 0, "rowspan": 1, "colspan": 2}}
170
+ ],
171
+ "data_types": ["text", "number", "text"]
172
+ }}
173
+ }}
174
+ ],
175
+ "extraction_metadata": {{
176
+ "total_tables": 1,
177
+ "extraction_confidence": "high",
178
+ "format": "{table_format}",
179
+ "preserve_formatting": {str(preserve_formatting).lower()}
180
+ }}
181
+ }}
182
+
183
+ Important:
184
+ - Be extremely accurate with data extraction
185
+ - Preserve numbers exactly as they appear
186
+ - Handle currency, percentages, and special characters correctly
187
+ - If cells are empty, represent them as empty strings or null
188
+ - For merged cells, include merge information in metadata"""
189
+
190
+ @staticmethod
191
+ def get_classify_image_prompt(categories: Optional[List[str]] = None) -> str:
192
+ """生成图像分类提示词"""
193
+ if categories:
194
+ return f"""Please classify this image into one of these categories: {', '.join(categories)}
195
+
196
+ Provide:
197
+ 1. The most appropriate category
198
+ 2. Confidence level (0.0-1.0)
199
+ 3. Brief reasoning
200
+
201
+ Format as JSON:
202
+ {{
203
+ "classification": "selected_category",
204
+ "confidence": 0.95,
205
+ "reasoning": "explanation"
206
+ }}"""
207
+ else:
208
+ return """Please classify this image by identifying its main category and subcategory.
209
+
210
+ Provide:
211
+ 1. Main category (e.g., nature, technology, people, etc.)
212
+ 2. Subcategory (more specific classification)
213
+ 3. Confidence level
214
+ 4. Key features that led to this classification
215
+
216
+ Format as JSON:
217
+ {
218
+ "main_category": "primary_category",
219
+ "subcategory": "specific_type",
220
+ "confidence": 0.95,
221
+ "key_features": ["feature1", "feature2"]
222
+ }"""
223
+
224
+ @staticmethod
225
+ def get_object_coordinates_prompt(object_name: str) -> str:
226
+ """生成对象坐标检测提示词"""
227
+ return f"""Please locate '{object_name}' in this image and provide detailed location information.
228
+
229
+ Provide:
230
+ 1. Whether the object was found
231
+ 2. Detailed position description
232
+ 3. Approximate coordinates (if possible, describe as percentages from top-left)
233
+ 4. Size and boundaries
234
+
235
+ Format as JSON:
236
+ {{
237
+ "found": true/false,
238
+ "object_name": "{object_name}",
239
+ "location": "detailed_position_description",
240
+ "coordinates": "approximate_position_as_percentages",
241
+ "size": "object_size_description",
242
+ "confidence": "detection_confidence"
243
+ }}"""
244
+
245
+ @staticmethod
246
+ def get_compare_images_prompt() -> str:
247
+ """生成图像比较提示词"""
248
+ return """Please compare the objects, styles, and content in this image. Highlight similarities and differences.
249
+
250
+ Provide:
251
+ 1. Main similarities
252
+ 2. Key differences
253
+ 3. Style comparison
254
+ 4. Content analysis
255
+
256
+ Format as JSON:
257
+ {
258
+ "comparison": {
259
+ "similarities": ["similarity1", "similarity2"],
260
+ "differences": ["difference1", "difference2"],
261
+ "style_analysis": "style_comparison",
262
+ "content_analysis": "content_comparison"
263
+ }
264
+ }"""
265
+
266
+
267
+ class VisionPromptMixin:
268
+ """
269
+ Mixin类,为Vision服务提供统一的提示词支持
270
+ 任何Vision服务都可以继承这个Mixin来获得标准提示词
271
+ """
272
+
273
+ def get_task_prompt(self, task: str, **kwargs) -> str:
274
+ """根据任务类型获取对应的提示词"""
275
+ if task == "describe":
276
+ return VisionPromptService.get_describe_prompt(kwargs.get("detail_level", "medium"))
277
+ elif task == "extract_text":
278
+ return VisionPromptService.get_extract_text_prompt()
279
+ elif task == "detect_objects":
280
+ return VisionPromptService.get_detect_objects_prompt(kwargs.get("confidence_threshold", 0.5))
281
+ elif task == "detect_ui_elements":
282
+ return VisionPromptService.get_detect_ui_elements_prompt(kwargs.get("element_types"))
283
+ elif task == "detect_document_elements":
284
+ return VisionPromptService.get_detect_document_elements_prompt()
285
+ elif task == "extract_table_data":
286
+ return VisionPromptService.get_extract_table_data_prompt(
287
+ kwargs.get("table_format", "json"),
288
+ kwargs.get("preserve_formatting", True)
289
+ )
290
+ elif task == "classify":
291
+ return VisionPromptService.get_classify_image_prompt(kwargs.get("categories"))
292
+ elif task == "get_coordinates":
293
+ return VisionPromptService.get_object_coordinates_prompt(kwargs.get("object_name", ""))
294
+ elif task == "compare":
295
+ return VisionPromptService.get_compare_images_prompt()
296
+ else:
297
+ return "Please analyze this image and provide detailed information."