isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -95,34 +95,332 @@ class TextProcessor:
|
|
95
95
|
return code_blocks
|
96
96
|
|
97
97
|
@staticmethod
|
98
|
-
def extract_json_from_text(text: str
|
99
|
-
"""
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
98
|
+
def extract_json_from_text(text: str, schema: Optional[Dict] = None, repair_attempts: int = 3) -> Dict[str, Any]:
|
99
|
+
"""Enhanced JSON extraction with validation and error recovery"""
|
100
|
+
result = {
|
101
|
+
"success": False,
|
102
|
+
"data": None,
|
103
|
+
"errors": [],
|
104
|
+
"method": None,
|
105
|
+
"repaired": False
|
106
|
+
}
|
105
107
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
brace_pattern = r'\{.*\}'
|
117
|
-
match = re.search(brace_pattern, text, re.DOTALL)
|
118
|
-
if match:
|
108
|
+
extraction_methods = [
|
109
|
+
("direct_parse", TextProcessor._try_direct_json_parse),
|
110
|
+
("json_code_block", TextProcessor._try_json_code_block),
|
111
|
+
("first_json_object", TextProcessor._try_first_json_object),
|
112
|
+
("between_braces", TextProcessor._try_between_braces),
|
113
|
+
("multiple_objects", TextProcessor._try_multiple_json_objects),
|
114
|
+
("yaml_like", TextProcessor._try_yaml_like_parsing)
|
115
|
+
]
|
116
|
+
|
117
|
+
for method_name, extraction_func in extraction_methods:
|
119
118
|
try:
|
120
|
-
|
121
|
-
|
122
|
-
|
119
|
+
extracted_data = extraction_func(text)
|
120
|
+
if extracted_data is not None:
|
121
|
+
# Validate against schema if provided
|
122
|
+
if schema:
|
123
|
+
validation_result = TextProcessor._validate_json_schema(extracted_data, schema)
|
124
|
+
if validation_result["valid"]:
|
125
|
+
result.update({
|
126
|
+
"success": True,
|
127
|
+
"data": extracted_data,
|
128
|
+
"method": method_name
|
129
|
+
})
|
130
|
+
return result
|
131
|
+
else:
|
132
|
+
result["errors"].append(f"{method_name}: {validation_result['error']}")
|
133
|
+
else:
|
134
|
+
result.update({
|
135
|
+
"success": True,
|
136
|
+
"data": extracted_data,
|
137
|
+
"method": method_name
|
138
|
+
})
|
139
|
+
return result
|
140
|
+
except Exception as e:
|
141
|
+
result["errors"].append(f"{method_name}: {str(e)}")
|
142
|
+
continue
|
143
|
+
|
144
|
+
# If all methods failed, try repair attempts
|
145
|
+
if repair_attempts > 0:
|
146
|
+
repair_result = TextProcessor._attempt_json_repair(text, schema, repair_attempts)
|
147
|
+
if repair_result["success"]:
|
148
|
+
result.update(repair_result)
|
149
|
+
result["repaired"] = True
|
150
|
+
return result
|
151
|
+
else:
|
152
|
+
result["errors"].extend(repair_result["errors"])
|
153
|
+
|
154
|
+
return result
|
155
|
+
|
156
|
+
@staticmethod
|
157
|
+
def _try_direct_json_parse(text: str) -> Optional[Dict[str, Any]]:
|
158
|
+
"""Try to parse text directly as JSON"""
|
159
|
+
text = text.strip()
|
160
|
+
if not text:
|
161
|
+
return None
|
162
|
+
return json.loads(text)
|
163
|
+
|
164
|
+
@staticmethod
|
165
|
+
def _try_json_code_block(text: str) -> Optional[Dict[str, Any]]:
|
166
|
+
"""Extract JSON from code blocks"""
|
167
|
+
patterns = [
|
168
|
+
r'```json\s*(.*?)\s*```',
|
169
|
+
r'```JSON\s*(.*?)\s*```',
|
170
|
+
r'```\s*(\{.*?\})\s*```'
|
171
|
+
]
|
172
|
+
|
173
|
+
for pattern in patterns:
|
174
|
+
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
175
|
+
if match:
|
176
|
+
json_str = match.group(1).strip()
|
177
|
+
if json_str:
|
178
|
+
return json.loads(json_str)
|
123
179
|
|
124
180
|
return None
|
125
181
|
|
182
|
+
@staticmethod
|
183
|
+
def _try_first_json_object(text: str) -> Optional[Dict[str, Any]]:
|
184
|
+
"""Find and parse the first JSON object in text"""
|
185
|
+
# Look for the first { and find its matching }
|
186
|
+
start_idx = text.find('{')
|
187
|
+
if start_idx == -1:
|
188
|
+
return None
|
189
|
+
|
190
|
+
brace_count = 0
|
191
|
+
in_string = False
|
192
|
+
escape_next = False
|
193
|
+
|
194
|
+
for i, char in enumerate(text[start_idx:], start_idx):
|
195
|
+
if escape_next:
|
196
|
+
escape_next = False
|
197
|
+
continue
|
198
|
+
|
199
|
+
if char == '\\':
|
200
|
+
escape_next = True
|
201
|
+
continue
|
202
|
+
|
203
|
+
if char == '"' and not escape_next:
|
204
|
+
in_string = not in_string
|
205
|
+
continue
|
206
|
+
|
207
|
+
if not in_string:
|
208
|
+
if char == '{':
|
209
|
+
brace_count += 1
|
210
|
+
elif char == '}':
|
211
|
+
brace_count -= 1
|
212
|
+
if brace_count == 0:
|
213
|
+
json_str = text[start_idx:i+1]
|
214
|
+
return json.loads(json_str)
|
215
|
+
|
216
|
+
return None
|
217
|
+
|
218
|
+
@staticmethod
|
219
|
+
def _try_between_braces(text: str) -> Optional[Dict[str, Any]]:
|
220
|
+
"""Extract content between first { and last }"""
|
221
|
+
first_brace = text.find('{')
|
222
|
+
last_brace = text.rfind('}')
|
223
|
+
|
224
|
+
if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
|
225
|
+
json_str = text[first_brace:last_brace+1]
|
226
|
+
return json.loads(json_str)
|
227
|
+
|
228
|
+
return None
|
229
|
+
|
230
|
+
@staticmethod
|
231
|
+
def _try_multiple_json_objects(text: str) -> Optional[List[Dict[str, Any]]]:
|
232
|
+
"""Try to extract multiple JSON objects"""
|
233
|
+
objects = []
|
234
|
+
remaining_text = text
|
235
|
+
|
236
|
+
while True:
|
237
|
+
try:
|
238
|
+
obj = TextProcessor._try_first_json_object(remaining_text)
|
239
|
+
if obj is None:
|
240
|
+
break
|
241
|
+
objects.append(obj)
|
242
|
+
|
243
|
+
# Find where this object ends and continue
|
244
|
+
obj_str = json.dumps(obj)
|
245
|
+
obj_end = remaining_text.find('}')
|
246
|
+
if obj_end == -1:
|
247
|
+
break
|
248
|
+
remaining_text = remaining_text[obj_end+1:]
|
249
|
+
|
250
|
+
except:
|
251
|
+
break
|
252
|
+
|
253
|
+
return objects if objects else None
|
254
|
+
|
255
|
+
@staticmethod
|
256
|
+
def _try_yaml_like_parsing(text: str) -> Optional[Dict[str, Any]]:
|
257
|
+
"""Try to parse YAML-like structures and convert to JSON"""
|
258
|
+
try:
|
259
|
+
# Simple YAML-like parsing for basic cases
|
260
|
+
lines = text.strip().split('\n')
|
261
|
+
result = {}
|
262
|
+
|
263
|
+
for line in lines:
|
264
|
+
line = line.strip()
|
265
|
+
if ':' in line and not line.startswith('#'):
|
266
|
+
key, value = line.split(':', 1)
|
267
|
+
key = key.strip().strip('"\'')
|
268
|
+
value = value.strip().strip('"\'')
|
269
|
+
|
270
|
+
# Try to convert value to appropriate type
|
271
|
+
if value.lower() in ['true', 'false']:
|
272
|
+
value = value.lower() == 'true'
|
273
|
+
elif value.isdigit():
|
274
|
+
value = int(value)
|
275
|
+
elif '.' in value and value.replace('.', '').isdigit():
|
276
|
+
value = float(value)
|
277
|
+
|
278
|
+
result[key] = value
|
279
|
+
|
280
|
+
return result if result else None
|
281
|
+
except:
|
282
|
+
return None
|
283
|
+
|
284
|
+
@staticmethod
|
285
|
+
def _validate_json_schema(data: Any, schema: Dict) -> Dict[str, Any]:
|
286
|
+
"""Validate JSON data against a simple schema"""
|
287
|
+
try:
|
288
|
+
# Try jsonschema library first
|
289
|
+
try:
|
290
|
+
import jsonschema
|
291
|
+
jsonschema.validate(instance=data, schema=schema)
|
292
|
+
return {"valid": True, "error": None}
|
293
|
+
except ImportError:
|
294
|
+
# Fallback to basic validation
|
295
|
+
return TextProcessor._basic_schema_validation(data, schema)
|
296
|
+
except jsonschema.ValidationError as e:
|
297
|
+
return {"valid": False, "error": str(e)}
|
298
|
+
|
299
|
+
except Exception as e:
|
300
|
+
return {"valid": False, "error": f"Schema validation error: {str(e)}"}
|
301
|
+
|
302
|
+
@staticmethod
|
303
|
+
def _basic_schema_validation(data: Any, schema: Dict) -> Dict[str, Any]:
|
304
|
+
"""Basic schema validation without jsonschema library"""
|
305
|
+
try:
|
306
|
+
if "type" in schema:
|
307
|
+
expected_type = schema["type"]
|
308
|
+
if expected_type == "object" and not isinstance(data, dict):
|
309
|
+
return {"valid": False, "error": f"Expected object, got {type(data).__name__}"}
|
310
|
+
elif expected_type == "array" and not isinstance(data, list):
|
311
|
+
return {"valid": False, "error": f"Expected array, got {type(data).__name__}"}
|
312
|
+
elif expected_type == "string" and not isinstance(data, str):
|
313
|
+
return {"valid": False, "error": f"Expected string, got {type(data).__name__}"}
|
314
|
+
elif expected_type == "number" and not isinstance(data, (int, float)):
|
315
|
+
return {"valid": False, "error": f"Expected number, got {type(data).__name__}"}
|
316
|
+
elif expected_type == "boolean" and not isinstance(data, bool):
|
317
|
+
return {"valid": False, "error": f"Expected boolean, got {type(data).__name__}"}
|
318
|
+
|
319
|
+
if "required" in schema and isinstance(data, dict):
|
320
|
+
for required_field in schema["required"]:
|
321
|
+
if required_field not in data:
|
322
|
+
return {"valid": False, "error": f"Missing required field: {required_field}"}
|
323
|
+
|
324
|
+
return {"valid": True, "error": None}
|
325
|
+
except Exception as e:
|
326
|
+
return {"valid": False, "error": f"Basic validation error: {str(e)}"}
|
327
|
+
|
328
|
+
@staticmethod
|
329
|
+
def _attempt_json_repair(text: str, schema: Optional[Dict], max_attempts: int) -> Dict[str, Any]:
|
330
|
+
"""Attempt to repair malformed JSON"""
|
331
|
+
result = {"success": False, "data": None, "errors": []}
|
332
|
+
|
333
|
+
repair_strategies = [
|
334
|
+
TextProcessor._repair_missing_quotes,
|
335
|
+
TextProcessor._repair_trailing_commas,
|
336
|
+
TextProcessor._repair_unescaped_quotes,
|
337
|
+
TextProcessor._repair_incomplete_objects
|
338
|
+
]
|
339
|
+
|
340
|
+
current_text = text
|
341
|
+
|
342
|
+
for attempt in range(max_attempts):
|
343
|
+
for strategy_name, repair_func in [
|
344
|
+
("missing_quotes", TextProcessor._repair_missing_quotes),
|
345
|
+
("trailing_commas", TextProcessor._repair_trailing_commas),
|
346
|
+
("unescaped_quotes", TextProcessor._repair_unescaped_quotes),
|
347
|
+
("incomplete_objects", TextProcessor._repair_incomplete_objects)
|
348
|
+
]:
|
349
|
+
try:
|
350
|
+
repaired_text = repair_func(current_text)
|
351
|
+
if repaired_text != current_text:
|
352
|
+
# Try to parse the repaired text
|
353
|
+
try:
|
354
|
+
repaired_data = json.loads(repaired_text)
|
355
|
+
|
356
|
+
# Validate if schema provided
|
357
|
+
if schema:
|
358
|
+
validation = TextProcessor._validate_json_schema(repaired_data, schema)
|
359
|
+
if validation["valid"]:
|
360
|
+
result.update({
|
361
|
+
"success": True,
|
362
|
+
"data": repaired_data,
|
363
|
+
"method": f"repair_{strategy_name}"
|
364
|
+
})
|
365
|
+
return result
|
366
|
+
else:
|
367
|
+
result["errors"].append(f"repair_{strategy_name}: {validation['error']}")
|
368
|
+
else:
|
369
|
+
result.update({
|
370
|
+
"success": True,
|
371
|
+
"data": repaired_data,
|
372
|
+
"method": f"repair_{strategy_name}"
|
373
|
+
})
|
374
|
+
return result
|
375
|
+
except json.JSONDecodeError as e:
|
376
|
+
result["errors"].append(f"repair_{strategy_name}: Still invalid JSON after repair - {str(e)}")
|
377
|
+
|
378
|
+
current_text = repaired_text
|
379
|
+
except Exception as e:
|
380
|
+
result["errors"].append(f"repair_{strategy_name}: Repair attempt failed - {str(e)}")
|
381
|
+
|
382
|
+
return result
|
383
|
+
|
384
|
+
@staticmethod
|
385
|
+
def _repair_missing_quotes(text: str) -> str:
|
386
|
+
"""Add quotes around unquoted keys"""
|
387
|
+
# Simple regex to add quotes around keys
|
388
|
+
pattern = r'(\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:'
|
389
|
+
return re.sub(pattern, r'\1"\2":', text)
|
390
|
+
|
391
|
+
@staticmethod
|
392
|
+
def _repair_trailing_commas(text: str) -> str:
|
393
|
+
"""Remove trailing commas"""
|
394
|
+
# Remove trailing commas before } or ]
|
395
|
+
text = re.sub(r',\s*([}\]])', r'\1', text)
|
396
|
+
return text
|
397
|
+
|
398
|
+
@staticmethod
|
399
|
+
def _repair_unescaped_quotes(text: str) -> str:
|
400
|
+
"""Escape unescaped quotes in strings"""
|
401
|
+
# This is a simplified approach - in practice this is quite complex
|
402
|
+
# Replace unescaped quotes that are clearly inside string values
|
403
|
+
return text.replace('\\"', '"').replace('"', '\\"').replace('\\\\"', '\\"')
|
404
|
+
|
405
|
+
@staticmethod
|
406
|
+
def _repair_incomplete_objects(text: str) -> str:
|
407
|
+
"""Try to complete incomplete JSON objects"""
|
408
|
+
text = text.strip()
|
409
|
+
|
410
|
+
# Add missing closing braces
|
411
|
+
open_braces = text.count('{')
|
412
|
+
close_braces = text.count('}')
|
413
|
+
if open_braces > close_braces:
|
414
|
+
text += '}' * (open_braces - close_braces)
|
415
|
+
|
416
|
+
# Add missing closing brackets
|
417
|
+
open_brackets = text.count('[')
|
418
|
+
close_brackets = text.count(']')
|
419
|
+
if open_brackets > close_brackets:
|
420
|
+
text += ']' * (open_brackets - close_brackets)
|
421
|
+
|
422
|
+
return text
|
423
|
+
|
126
424
|
@staticmethod
|
127
425
|
def split_into_sentences(text: str) -> List[str]:
|
128
426
|
"""将文本分割为句子"""
|