mcli-framework 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/chat_cmd.py +42 -0
- mcli/app/commands_cmd.py +226 -0
- mcli/app/completion_cmd.py +216 -0
- mcli/app/completion_helpers.py +288 -0
- mcli/app/cron_test_cmd.py +697 -0
- mcli/app/logs_cmd.py +419 -0
- mcli/app/main.py +492 -0
- mcli/app/model/model.py +1060 -0
- mcli/app/model_cmd.py +227 -0
- mcli/app/redis_cmd.py +269 -0
- mcli/app/video/video.py +1114 -0
- mcli/app/visual_cmd.py +303 -0
- mcli/chat/chat.py +2409 -0
- mcli/chat/command_rag.py +514 -0
- mcli/chat/enhanced_chat.py +652 -0
- mcli/chat/system_controller.py +1010 -0
- mcli/chat/system_integration.py +1016 -0
- mcli/cli.py +25 -0
- mcli/config.toml +20 -0
- mcli/lib/api/api.py +586 -0
- mcli/lib/api/daemon_client.py +203 -0
- mcli/lib/api/daemon_client_local.py +44 -0
- mcli/lib/api/daemon_decorator.py +217 -0
- mcli/lib/api/mcli_decorators.py +1032 -0
- mcli/lib/auth/auth.py +85 -0
- mcli/lib/auth/aws_manager.py +85 -0
- mcli/lib/auth/azure_manager.py +91 -0
- mcli/lib/auth/credential_manager.py +192 -0
- mcli/lib/auth/gcp_manager.py +93 -0
- mcli/lib/auth/key_manager.py +117 -0
- mcli/lib/auth/mcli_manager.py +93 -0
- mcli/lib/auth/token_manager.py +75 -0
- mcli/lib/auth/token_util.py +1011 -0
- mcli/lib/config/config.py +47 -0
- mcli/lib/discovery/__init__.py +1 -0
- mcli/lib/discovery/command_discovery.py +274 -0
- mcli/lib/erd/erd.py +1345 -0
- mcli/lib/erd/generate_graph.py +453 -0
- mcli/lib/files/files.py +76 -0
- mcli/lib/fs/fs.py +109 -0
- mcli/lib/lib.py +29 -0
- mcli/lib/logger/logger.py +611 -0
- mcli/lib/performance/optimizer.py +409 -0
- mcli/lib/performance/rust_bridge.py +502 -0
- mcli/lib/performance/uvloop_config.py +154 -0
- mcli/lib/pickles/pickles.py +50 -0
- mcli/lib/search/cached_vectorizer.py +479 -0
- mcli/lib/services/data_pipeline.py +460 -0
- mcli/lib/services/lsh_client.py +441 -0
- mcli/lib/services/redis_service.py +387 -0
- mcli/lib/shell/shell.py +137 -0
- mcli/lib/toml/toml.py +33 -0
- mcli/lib/ui/styling.py +47 -0
- mcli/lib/ui/visual_effects.py +634 -0
- mcli/lib/watcher/watcher.py +185 -0
- mcli/ml/api/app.py +215 -0
- mcli/ml/api/middleware.py +224 -0
- mcli/ml/api/routers/admin_router.py +12 -0
- mcli/ml/api/routers/auth_router.py +244 -0
- mcli/ml/api/routers/backtest_router.py +12 -0
- mcli/ml/api/routers/data_router.py +12 -0
- mcli/ml/api/routers/model_router.py +302 -0
- mcli/ml/api/routers/monitoring_router.py +12 -0
- mcli/ml/api/routers/portfolio_router.py +12 -0
- mcli/ml/api/routers/prediction_router.py +267 -0
- mcli/ml/api/routers/trade_router.py +12 -0
- mcli/ml/api/routers/websocket_router.py +76 -0
- mcli/ml/api/schemas.py +64 -0
- mcli/ml/auth/auth_manager.py +425 -0
- mcli/ml/auth/models.py +154 -0
- mcli/ml/auth/permissions.py +302 -0
- mcli/ml/backtesting/backtest_engine.py +502 -0
- mcli/ml/backtesting/performance_metrics.py +393 -0
- mcli/ml/cache.py +400 -0
- mcli/ml/cli/main.py +398 -0
- mcli/ml/config/settings.py +394 -0
- mcli/ml/configs/dvc_config.py +230 -0
- mcli/ml/configs/mlflow_config.py +131 -0
- mcli/ml/configs/mlops_manager.py +293 -0
- mcli/ml/dashboard/app.py +532 -0
- mcli/ml/dashboard/app_integrated.py +738 -0
- mcli/ml/dashboard/app_supabase.py +560 -0
- mcli/ml/dashboard/app_training.py +615 -0
- mcli/ml/dashboard/cli.py +51 -0
- mcli/ml/data_ingestion/api_connectors.py +501 -0
- mcli/ml/data_ingestion/data_pipeline.py +567 -0
- mcli/ml/data_ingestion/stream_processor.py +512 -0
- mcli/ml/database/migrations/env.py +94 -0
- mcli/ml/database/models.py +667 -0
- mcli/ml/database/session.py +200 -0
- mcli/ml/experimentation/ab_testing.py +845 -0
- mcli/ml/features/ensemble_features.py +607 -0
- mcli/ml/features/political_features.py +676 -0
- mcli/ml/features/recommendation_engine.py +809 -0
- mcli/ml/features/stock_features.py +573 -0
- mcli/ml/features/test_feature_engineering.py +346 -0
- mcli/ml/logging.py +85 -0
- mcli/ml/mlops/data_versioning.py +518 -0
- mcli/ml/mlops/experiment_tracker.py +377 -0
- mcli/ml/mlops/model_serving.py +481 -0
- mcli/ml/mlops/pipeline_orchestrator.py +614 -0
- mcli/ml/models/base_models.py +324 -0
- mcli/ml/models/ensemble_models.py +675 -0
- mcli/ml/models/recommendation_models.py +474 -0
- mcli/ml/models/test_models.py +487 -0
- mcli/ml/monitoring/drift_detection.py +676 -0
- mcli/ml/monitoring/metrics.py +45 -0
- mcli/ml/optimization/portfolio_optimizer.py +834 -0
- mcli/ml/preprocessing/data_cleaners.py +451 -0
- mcli/ml/preprocessing/feature_extractors.py +491 -0
- mcli/ml/preprocessing/ml_pipeline.py +382 -0
- mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
- mcli/ml/preprocessing/test_preprocessing.py +294 -0
- mcli/ml/scripts/populate_sample_data.py +200 -0
- mcli/ml/tasks.py +400 -0
- mcli/ml/tests/test_integration.py +429 -0
- mcli/ml/tests/test_training_dashboard.py +387 -0
- mcli/public/oi/oi.py +15 -0
- mcli/public/public.py +4 -0
- mcli/self/self_cmd.py +1246 -0
- mcli/workflow/daemon/api_daemon.py +800 -0
- mcli/workflow/daemon/async_command_database.py +681 -0
- mcli/workflow/daemon/async_process_manager.py +591 -0
- mcli/workflow/daemon/client.py +530 -0
- mcli/workflow/daemon/commands.py +1196 -0
- mcli/workflow/daemon/daemon.py +905 -0
- mcli/workflow/daemon/daemon_api.py +59 -0
- mcli/workflow/daemon/enhanced_daemon.py +571 -0
- mcli/workflow/daemon/process_cli.py +244 -0
- mcli/workflow/daemon/process_manager.py +439 -0
- mcli/workflow/daemon/test_daemon.py +275 -0
- mcli/workflow/dashboard/dashboard_cmd.py +113 -0
- mcli/workflow/docker/docker.py +0 -0
- mcli/workflow/file/file.py +100 -0
- mcli/workflow/gcloud/config.toml +21 -0
- mcli/workflow/gcloud/gcloud.py +58 -0
- mcli/workflow/git_commit/ai_service.py +328 -0
- mcli/workflow/git_commit/commands.py +430 -0
- mcli/workflow/lsh_integration.py +355 -0
- mcli/workflow/model_service/client.py +594 -0
- mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
- mcli/workflow/model_service/lightweight_embedder.py +397 -0
- mcli/workflow/model_service/lightweight_model_server.py +714 -0
- mcli/workflow/model_service/lightweight_test.py +241 -0
- mcli/workflow/model_service/model_service.py +1955 -0
- mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
- mcli/workflow/model_service/pdf_processor.py +386 -0
- mcli/workflow/model_service/test_efficient_runner.py +234 -0
- mcli/workflow/model_service/test_example.py +315 -0
- mcli/workflow/model_service/test_integration.py +131 -0
- mcli/workflow/model_service/test_new_features.py +149 -0
- mcli/workflow/openai/openai.py +99 -0
- mcli/workflow/politician_trading/commands.py +1790 -0
- mcli/workflow/politician_trading/config.py +134 -0
- mcli/workflow/politician_trading/connectivity.py +490 -0
- mcli/workflow/politician_trading/data_sources.py +395 -0
- mcli/workflow/politician_trading/database.py +410 -0
- mcli/workflow/politician_trading/demo.py +248 -0
- mcli/workflow/politician_trading/models.py +165 -0
- mcli/workflow/politician_trading/monitoring.py +413 -0
- mcli/workflow/politician_trading/scrapers.py +966 -0
- mcli/workflow/politician_trading/scrapers_california.py +412 -0
- mcli/workflow/politician_trading/scrapers_eu.py +377 -0
- mcli/workflow/politician_trading/scrapers_uk.py +350 -0
- mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
- mcli/workflow/politician_trading/supabase_functions.py +354 -0
- mcli/workflow/politician_trading/workflow.py +852 -0
- mcli/workflow/registry/registry.py +180 -0
- mcli/workflow/repo/repo.py +223 -0
- mcli/workflow/scheduler/commands.py +493 -0
- mcli/workflow/scheduler/cron_parser.py +238 -0
- mcli/workflow/scheduler/job.py +182 -0
- mcli/workflow/scheduler/monitor.py +139 -0
- mcli/workflow/scheduler/persistence.py +324 -0
- mcli/workflow/scheduler/scheduler.py +679 -0
- mcli/workflow/sync/sync_cmd.py +437 -0
- mcli/workflow/sync/test_cmd.py +314 -0
- mcli/workflow/videos/videos.py +242 -0
- mcli/workflow/wakatime/wakatime.py +11 -0
- mcli/workflow/workflow.py +37 -0
- mcli_framework-7.0.0.dist-info/METADATA +479 -0
- mcli_framework-7.0.0.dist-info/RECORD +186 -0
- mcli_framework-7.0.0.dist-info/WHEEL +5 -0
- mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
- mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
- mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
PDF Processor for Lightweight Model Service
|
|
4
|
+
|
|
5
|
+
This module provides PDF text extraction and processing capabilities
|
|
6
|
+
that integrate with the lightweight model service for AI-powered
|
|
7
|
+
document analysis.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import base64
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
import sys
|
|
16
|
+
import tempfile
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, List, Optional
|
|
20
|
+
|
|
21
|
+
import requests
|
|
22
|
+
|
|
23
|
+
# PDF processing libraries
|
|
24
|
+
try:
|
|
25
|
+
import fitz # PyMuPDF
|
|
26
|
+
import PyPDF2
|
|
27
|
+
except ImportError:
|
|
28
|
+
print("Warning: PDF libraries not available. Install with: pip install PyPDF2 PyMuPDF")
|
|
29
|
+
|
|
30
|
+
# Import lightweight model server
|
|
31
|
+
from .lightweight_model_server import LIGHTWEIGHT_MODELS, LightweightModelServer
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class PDFProcessor:
|
|
37
|
+
"""PDF processing with lightweight model integration"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, models_dir: str = "./models/lightweight", port: int = 8080):
|
|
40
|
+
self.models_dir = Path(models_dir)
|
|
41
|
+
self.port = port
|
|
42
|
+
self.lightweight_server = LightweightModelServer(models_dir, port)
|
|
43
|
+
self.temp_dir = Path(tempfile.mkdtemp(prefix="pdf_processor_"))
|
|
44
|
+
|
|
45
|
+
def extract_text_from_pdf(self, pdf_path: str) -> Dict[str, Any]:
|
|
46
|
+
"""Extract text from PDF with enhanced processing"""
|
|
47
|
+
try:
|
|
48
|
+
pdf_path = Path(pdf_path)
|
|
49
|
+
if not pdf_path.exists():
|
|
50
|
+
return {"error": f"PDF file not found: {pdf_path}"}
|
|
51
|
+
|
|
52
|
+
# Extract text using multiple methods for better results
|
|
53
|
+
text_content = self._extract_pdf_text_enhanced(pdf_path)
|
|
54
|
+
|
|
55
|
+
if not text_content.strip():
|
|
56
|
+
return {"error": "No text content extracted from PDF"}
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
"success": True,
|
|
60
|
+
"text": text_content,
|
|
61
|
+
"text_length": len(text_content),
|
|
62
|
+
"file_path": str(pdf_path),
|
|
63
|
+
"extraction_method": "enhanced",
|
|
64
|
+
"timestamp": datetime.now().isoformat(),
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.error(f"Error extracting PDF text: {e}")
|
|
69
|
+
return {"error": str(e)}
|
|
70
|
+
|
|
71
|
+
def _extract_pdf_text_enhanced(self, pdf_path: Path) -> str:
|
|
72
|
+
"""Enhanced PDF text extraction using multiple methods"""
|
|
73
|
+
text_content = ""
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
# Method 1: PyMuPDF (fitz) - better text extraction
|
|
77
|
+
if "fitz" in globals():
|
|
78
|
+
doc = fitz.open(pdf_path)
|
|
79
|
+
for page_num in range(len(doc)):
|
|
80
|
+
page = doc.load_page(page_num)
|
|
81
|
+
text_content += page.get_text() + "\n"
|
|
82
|
+
doc.close()
|
|
83
|
+
logger.info(f"Extracted {len(text_content)} characters using PyMuPDF")
|
|
84
|
+
return text_content
|
|
85
|
+
except Exception as e:
|
|
86
|
+
logger.warning(f"PyMuPDF extraction failed: {e}")
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
# Method 2: PyPDF2 - fallback
|
|
90
|
+
if "PyPDF2" in globals():
|
|
91
|
+
with open(pdf_path, "rb") as file:
|
|
92
|
+
pdf_reader = PyPDF2.PdfReader(file)
|
|
93
|
+
for page in pdf_reader.pages:
|
|
94
|
+
text_content += page.extract_text() + "\n"
|
|
95
|
+
logger.info(f"Extracted {len(text_content)} characters using PyPDF2")
|
|
96
|
+
return text_content
|
|
97
|
+
except Exception as e:
|
|
98
|
+
logger.warning(f"PyPDF2 extraction failed: {e}")
|
|
99
|
+
|
|
100
|
+
return text_content
|
|
101
|
+
|
|
102
|
+
def process_pdf_with_ai(self, pdf_path: str, model_key: Optional[str] = None) -> Dict[str, Any]:
|
|
103
|
+
"""Process PDF with AI model for enhanced analysis"""
|
|
104
|
+
try:
|
|
105
|
+
# Extract text first
|
|
106
|
+
extraction_result = self.extract_text_from_pdf(pdf_path)
|
|
107
|
+
if not extraction_result.get("success"):
|
|
108
|
+
return extraction_result
|
|
109
|
+
|
|
110
|
+
text_content = extraction_result["text"]
|
|
111
|
+
|
|
112
|
+
# Auto-select model if not specified
|
|
113
|
+
if not model_key:
|
|
114
|
+
model_key = self.lightweight_server.recommend_model()
|
|
115
|
+
|
|
116
|
+
# Ensure model is downloaded and loaded
|
|
117
|
+
if model_key not in self.lightweight_server.loaded_models:
|
|
118
|
+
success = self.lightweight_server.download_and_load_model(model_key)
|
|
119
|
+
if not success:
|
|
120
|
+
return {"error": f"Failed to load model: {model_key}"}
|
|
121
|
+
|
|
122
|
+
# Process text with AI model
|
|
123
|
+
ai_analysis = self._analyze_text_with_ai(text_content, model_key)
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
"success": True,
|
|
127
|
+
"pdf_analysis": {
|
|
128
|
+
"text_extraction": extraction_result,
|
|
129
|
+
"ai_analysis": ai_analysis,
|
|
130
|
+
"model_used": model_key,
|
|
131
|
+
"processing_timestamp": datetime.now().isoformat(),
|
|
132
|
+
},
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
except Exception as e:
|
|
136
|
+
logger.error(f"Error processing PDF with AI: {e}")
|
|
137
|
+
return {"error": str(e)}
|
|
138
|
+
|
|
139
|
+
def _analyze_text_with_ai(self, text_content: str, model_key: str) -> Dict[str, Any]:
|
|
140
|
+
"""Analyze text content with AI model"""
|
|
141
|
+
try:
|
|
142
|
+
# For now, provide basic analysis
|
|
143
|
+
# In a full implementation, this would use the actual model for inference
|
|
144
|
+
|
|
145
|
+
analysis = {
|
|
146
|
+
"summary": self._generate_summary(text_content),
|
|
147
|
+
"key_topics": self._extract_key_topics(text_content),
|
|
148
|
+
"document_type": self._classify_document_type(text_content),
|
|
149
|
+
"word_count": len(text_content.split()),
|
|
150
|
+
"character_count": len(text_content),
|
|
151
|
+
"estimated_reading_time": len(text_content.split()) // 200, # ~200 words per minute
|
|
152
|
+
"complexity_score": self._calculate_complexity_score(text_content),
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return analysis
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.error(f"Error analyzing text with AI: {e}")
|
|
159
|
+
return {"error": str(e)}
|
|
160
|
+
|
|
161
|
+
def _generate_summary(self, text: str) -> str:
|
|
162
|
+
"""Generate a basic summary of the text"""
|
|
163
|
+
sentences = text.split(".")
|
|
164
|
+
if len(sentences) <= 3:
|
|
165
|
+
return text[:500] + "..." if len(text) > 500 else text
|
|
166
|
+
|
|
167
|
+
# Simple summary: first few sentences + last sentence
|
|
168
|
+
summary = ". ".join(sentences[:2]) + ". " + sentences[-1]
|
|
169
|
+
return summary[:500] + "..." if len(summary) > 500 else summary
|
|
170
|
+
|
|
171
|
+
def _extract_key_topics(self, text: str) -> List[str]:
|
|
172
|
+
"""Extract key topics from text"""
|
|
173
|
+
# Simple keyword extraction
|
|
174
|
+
words = text.lower().split()
|
|
175
|
+
word_freq = {}
|
|
176
|
+
|
|
177
|
+
# Common stop words to ignore
|
|
178
|
+
stop_words = {
|
|
179
|
+
"the",
|
|
180
|
+
"a",
|
|
181
|
+
"an",
|
|
182
|
+
"and",
|
|
183
|
+
"or",
|
|
184
|
+
"but",
|
|
185
|
+
"in",
|
|
186
|
+
"on",
|
|
187
|
+
"at",
|
|
188
|
+
"to",
|
|
189
|
+
"for",
|
|
190
|
+
"of",
|
|
191
|
+
"with",
|
|
192
|
+
"by",
|
|
193
|
+
"is",
|
|
194
|
+
"are",
|
|
195
|
+
"was",
|
|
196
|
+
"were",
|
|
197
|
+
"be",
|
|
198
|
+
"been",
|
|
199
|
+
"have",
|
|
200
|
+
"has",
|
|
201
|
+
"had",
|
|
202
|
+
"do",
|
|
203
|
+
"does",
|
|
204
|
+
"did",
|
|
205
|
+
"will",
|
|
206
|
+
"would",
|
|
207
|
+
"could",
|
|
208
|
+
"should",
|
|
209
|
+
"may",
|
|
210
|
+
"might",
|
|
211
|
+
"can",
|
|
212
|
+
"this",
|
|
213
|
+
"that",
|
|
214
|
+
"these",
|
|
215
|
+
"those",
|
|
216
|
+
"i",
|
|
217
|
+
"you",
|
|
218
|
+
"he",
|
|
219
|
+
"she",
|
|
220
|
+
"it",
|
|
221
|
+
"we",
|
|
222
|
+
"they",
|
|
223
|
+
"me",
|
|
224
|
+
"him",
|
|
225
|
+
"her",
|
|
226
|
+
"us",
|
|
227
|
+
"them",
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
for word in words:
|
|
231
|
+
if len(word) > 3 and word not in stop_words:
|
|
232
|
+
word_freq[word] = word_freq.get(word, 0) + 1
|
|
233
|
+
|
|
234
|
+
# Return top 5 most frequent words
|
|
235
|
+
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
|
|
236
|
+
return [word for word, freq in sorted_words[:5]]
|
|
237
|
+
|
|
238
|
+
def _classify_document_type(self, text: str) -> str:
|
|
239
|
+
"""Classify the type of document"""
|
|
240
|
+
text_lower = text.lower()
|
|
241
|
+
|
|
242
|
+
if any(word in text_lower for word in ["contract", "agreement", "terms", "conditions"]):
|
|
243
|
+
return "legal_document"
|
|
244
|
+
elif any(word in text_lower for word in ["report", "analysis", "study", "research"]):
|
|
245
|
+
return "report"
|
|
246
|
+
elif any(word in text_lower for word in ["manual", "guide", "instructions", "how-to"]):
|
|
247
|
+
return "manual"
|
|
248
|
+
elif any(word in text_lower for word in ["invoice", "bill", "payment", "receipt"]):
|
|
249
|
+
return "financial"
|
|
250
|
+
elif any(word in text_lower for word in ["resume", "cv", "curriculum vitae"]):
|
|
251
|
+
return "resume"
|
|
252
|
+
else:
|
|
253
|
+
return "general"
|
|
254
|
+
|
|
255
|
+
def _calculate_complexity_score(self, text: str) -> float:
|
|
256
|
+
"""Calculate text complexity score (0-1)"""
|
|
257
|
+
sentences = text.split(".")
|
|
258
|
+
words = text.split()
|
|
259
|
+
|
|
260
|
+
if not sentences or not words:
|
|
261
|
+
return 0.0
|
|
262
|
+
|
|
263
|
+
avg_sentence_length = len(words) / len(sentences)
|
|
264
|
+
avg_word_length = sum(len(word) for word in words) / len(words)
|
|
265
|
+
|
|
266
|
+
# Normalize scores
|
|
267
|
+
complexity = (avg_sentence_length / 20.0 + avg_word_length / 8.0) / 2.0
|
|
268
|
+
return min(1.0, max(0.0, complexity))
|
|
269
|
+
|
|
270
|
+
def start_pdf_processing_service(self, port: int = 8080) -> bool:
|
|
271
|
+
"""Start the PDF processing service"""
|
|
272
|
+
try:
|
|
273
|
+
self.port = port
|
|
274
|
+
self.lightweight_server.port = port
|
|
275
|
+
self.lightweight_server.start_server()
|
|
276
|
+
logger.info(f"PDF processing service started on port {port}")
|
|
277
|
+
return True
|
|
278
|
+
except Exception as e:
|
|
279
|
+
logger.error(f"Error starting PDF processing service: {e}")
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
def get_service_status(self) -> Dict[str, Any]:
|
|
283
|
+
"""Get the status of the PDF processing service"""
|
|
284
|
+
return {
|
|
285
|
+
"service_running": self.lightweight_server.running,
|
|
286
|
+
"port": self.port,
|
|
287
|
+
"models_loaded": list(self.lightweight_server.loaded_models.keys()),
|
|
288
|
+
"temp_directory": str(self.temp_dir),
|
|
289
|
+
"available_models": list(LIGHTWEIGHT_MODELS.keys()),
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def create_pdf_processor_api():
|
|
294
|
+
"""Create a simple API for PDF processing"""
|
|
295
|
+
import urllib.parse
|
|
296
|
+
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
297
|
+
|
|
298
|
+
class PDFProcessorHandler(BaseHTTPRequestHandler):
|
|
299
|
+
def __init__(self, *args, processor=None, **kwargs):
|
|
300
|
+
self.processor = processor
|
|
301
|
+
super().__init__(*args, **kwargs)
|
|
302
|
+
|
|
303
|
+
def do_POST(self):
|
|
304
|
+
"""Handle PDF processing requests"""
|
|
305
|
+
parsed_path = urllib.parse.urlparse(self.path)
|
|
306
|
+
path = parsed_path.path
|
|
307
|
+
|
|
308
|
+
if path == "/process-pdf":
|
|
309
|
+
self._handle_process_pdf()
|
|
310
|
+
elif path == "/extract-text":
|
|
311
|
+
self._handle_extract_text()
|
|
312
|
+
else:
|
|
313
|
+
self._send_response(404, {"error": "Endpoint not found"})
|
|
314
|
+
|
|
315
|
+
def do_GET(self):
|
|
316
|
+
"""Handle status requests"""
|
|
317
|
+
parsed_path = urllib.parse.urlparse(self.path)
|
|
318
|
+
path = parsed_path.path
|
|
319
|
+
|
|
320
|
+
if path == "/status":
|
|
321
|
+
status = self.processor.get_service_status()
|
|
322
|
+
self._send_response(200, status)
|
|
323
|
+
else:
|
|
324
|
+
self._send_response(404, {"error": "Endpoint not found"})
|
|
325
|
+
|
|
326
|
+
def _handle_process_pdf(self):
|
|
327
|
+
"""Handle PDF processing with AI"""
|
|
328
|
+
try:
|
|
329
|
+
content_length = int(self.headers.get("Content-Length", 0))
|
|
330
|
+
post_data = self.rfile.read(content_length)
|
|
331
|
+
request_data = json.loads(post_data.decode("utf-8"))
|
|
332
|
+
|
|
333
|
+
pdf_path = request_data.get("pdf_path")
|
|
334
|
+
model_key = request_data.get("model_key")
|
|
335
|
+
|
|
336
|
+
if not pdf_path:
|
|
337
|
+
self._send_response(400, {"error": "PDF path is required"})
|
|
338
|
+
return
|
|
339
|
+
|
|
340
|
+
result = self.processor.process_pdf_with_ai(pdf_path, model_key)
|
|
341
|
+
self._send_response(200, result)
|
|
342
|
+
|
|
343
|
+
except Exception as e:
|
|
344
|
+
self._send_response(500, {"error": str(e)})
|
|
345
|
+
|
|
346
|
+
def _handle_extract_text(self):
|
|
347
|
+
"""Handle text extraction from PDF"""
|
|
348
|
+
try:
|
|
349
|
+
content_length = int(self.headers.get("Content-Length", 0))
|
|
350
|
+
post_data = self.rfile.read(content_length)
|
|
351
|
+
request_data = json.loads(post_data.decode("utf-8"))
|
|
352
|
+
|
|
353
|
+
pdf_path = request_data.get("pdf_path")
|
|
354
|
+
|
|
355
|
+
if not pdf_path:
|
|
356
|
+
self._send_response(400, {"error": "PDF path is required"})
|
|
357
|
+
return
|
|
358
|
+
|
|
359
|
+
result = self.processor.extract_text_from_pdf(pdf_path)
|
|
360
|
+
self._send_response(200, result)
|
|
361
|
+
|
|
362
|
+
except Exception as e:
|
|
363
|
+
self._send_response(500, {"error": str(e)})
|
|
364
|
+
|
|
365
|
+
def _send_response(self, status_code, data):
|
|
366
|
+
"""Send JSON response"""
|
|
367
|
+
self.send_response(status_code)
|
|
368
|
+
self.send_header("Content-Type", "application/json")
|
|
369
|
+
self.send_header("Access-Control-Allow-Origin", "*")
|
|
370
|
+
self.end_headers()
|
|
371
|
+
self.wfile.write(json.dumps(data).encode("utf-8"))
|
|
372
|
+
|
|
373
|
+
return PDFProcessorHandler
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
if __name__ == "__main__":
|
|
377
|
+
# Test the PDF processor
|
|
378
|
+
processor = PDFProcessor()
|
|
379
|
+
|
|
380
|
+
# Test with a sample PDF if available
|
|
381
|
+
test_pdf = "test.pdf"
|
|
382
|
+
if os.path.exists(test_pdf):
|
|
383
|
+
result = processor.process_pdf_with_ai(test_pdf)
|
|
384
|
+
print(json.dumps(result, indent=2))
|
|
385
|
+
else:
|
|
386
|
+
print("No test PDF found. Create a test.pdf file to test the processor.")
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Test script for the efficient model runner functionality.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# Add the parent directory to the path so we can import the modules
|
|
11
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_imports():
|
|
15
|
+
"""Test that all required modules can be imported"""
|
|
16
|
+
print("๐งช Testing imports...")
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import requests
|
|
20
|
+
|
|
21
|
+
print("โ
requests imported")
|
|
22
|
+
except ImportError as e:
|
|
23
|
+
print(f"โ requests import failed: {e}")
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import click
|
|
28
|
+
|
|
29
|
+
print("โ
click imported")
|
|
30
|
+
except ImportError as e:
|
|
31
|
+
print(f"โ click import failed: {e}")
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
import psutil
|
|
36
|
+
|
|
37
|
+
print("โ
psutil imported")
|
|
38
|
+
except ImportError as e:
|
|
39
|
+
print(f"โ psutil import failed: {e}")
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
from mcli.workflow.model_service.model_service import ModelService
|
|
44
|
+
|
|
45
|
+
print("โ
ModelService imported")
|
|
46
|
+
except ImportError as e:
|
|
47
|
+
print(f"โ ModelService import failed: {e}")
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
return True
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_system_analysis():
|
|
54
|
+
"""Test system analysis functionality"""
|
|
55
|
+
print("\n๐งช Testing system analysis...")
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
import psutil
|
|
59
|
+
|
|
60
|
+
# Get basic system info
|
|
61
|
+
cpu_count = psutil.cpu_count()
|
|
62
|
+
memory_gb = psutil.virtual_memory().total / (1024**3)
|
|
63
|
+
|
|
64
|
+
print(f"โ
CPU cores: {cpu_count}")
|
|
65
|
+
print(f"โ
Memory: {memory_gb:.1f} GB")
|
|
66
|
+
|
|
67
|
+
# Test GPU detection
|
|
68
|
+
try:
|
|
69
|
+
import torch
|
|
70
|
+
|
|
71
|
+
gpu_available = torch.cuda.is_available()
|
|
72
|
+
print(f"โ
GPU available: {gpu_available}")
|
|
73
|
+
if gpu_available:
|
|
74
|
+
gpu_name = torch.cuda.get_device_name(0)
|
|
75
|
+
print(f"โ
GPU name: {gpu_name}")
|
|
76
|
+
except ImportError:
|
|
77
|
+
print("โ ๏ธ PyTorch not available for GPU detection")
|
|
78
|
+
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
print(f"โ System analysis failed: {e}")
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_model_selection():
|
|
87
|
+
"""Test model selection logic"""
|
|
88
|
+
print("\n๐งช Testing model selection...")
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
# Import the efficient runner
|
|
92
|
+
from mcli.workflow.model_service.ollama_efficient_runner import (
|
|
93
|
+
EFFICIENT_MODELS,
|
|
94
|
+
get_system_info,
|
|
95
|
+
recommend_model,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Test model dictionary
|
|
99
|
+
print(f"โ
Found {len(EFFICIENT_MODELS)} efficient models:")
|
|
100
|
+
for key, info in EFFICIENT_MODELS.items():
|
|
101
|
+
print(f" - {key}: {info['name']} ({info['parameters']})")
|
|
102
|
+
|
|
103
|
+
# Test system info
|
|
104
|
+
system_info = get_system_info()
|
|
105
|
+
print(f"โ
System info collected")
|
|
106
|
+
|
|
107
|
+
# Test model recommendation
|
|
108
|
+
recommended = recommend_model(system_info)
|
|
109
|
+
print(f"โ
Recommended model: {recommended}")
|
|
110
|
+
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
except Exception as e:
|
|
114
|
+
print(f"โ Model selection test failed: {e}")
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_ollama_check():
|
|
119
|
+
"""Test Ollama installation check"""
|
|
120
|
+
print("\n๐งช Testing Ollama check...")
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
from mcli.workflow.model_service.ollama_efficient_runner import check_ollama_installed
|
|
124
|
+
|
|
125
|
+
# This will check if ollama is installed
|
|
126
|
+
installed = check_ollama_installed()
|
|
127
|
+
|
|
128
|
+
if installed:
|
|
129
|
+
print("โ
Ollama is installed")
|
|
130
|
+
else:
|
|
131
|
+
print("โ ๏ธ Ollama not installed (this is expected if not installed)")
|
|
132
|
+
|
|
133
|
+
return True
|
|
134
|
+
|
|
135
|
+
except Exception as e:
|
|
136
|
+
print(f"โ Ollama check failed: {e}")
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_mcli_service():
|
|
141
|
+
"""Test MCLI model service functionality"""
|
|
142
|
+
print("\n๐งช Testing MCLI model service...")
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
from mcli.workflow.model_service.model_service import ModelService
|
|
146
|
+
|
|
147
|
+
# Create service instance
|
|
148
|
+
service = ModelService()
|
|
149
|
+
print("โ
ModelService created")
|
|
150
|
+
|
|
151
|
+
# Check status
|
|
152
|
+
status = service.status()
|
|
153
|
+
print(f"โ
Service status: {status['running']}")
|
|
154
|
+
|
|
155
|
+
# Test database
|
|
156
|
+
models = service.model_manager.db.get_all_models()
|
|
157
|
+
print(f"โ
Database accessible, {len(models)} models found")
|
|
158
|
+
|
|
159
|
+
return True
|
|
160
|
+
|
|
161
|
+
except Exception as e:
|
|
162
|
+
print(f"โ MCLI service test failed: {e}")
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def test_api_endpoints():
|
|
167
|
+
"""Test API endpoint definitions"""
|
|
168
|
+
print("\n๐งช Testing API endpoints...")
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
from mcli.workflow.model_service.model_service import ModelService
|
|
172
|
+
|
|
173
|
+
service = ModelService()
|
|
174
|
+
|
|
175
|
+
# Check for required endpoints
|
|
176
|
+
routes = [route.path for route in service.app.routes]
|
|
177
|
+
required_routes = ["/models", "/models/summary", "/models/from-url"]
|
|
178
|
+
|
|
179
|
+
for route in required_routes:
|
|
180
|
+
if route in routes:
|
|
181
|
+
print(f"โ
Route {route} found")
|
|
182
|
+
else:
|
|
183
|
+
print(f"โ Route {route} not found")
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
return True
|
|
187
|
+
|
|
188
|
+
except Exception as e:
|
|
189
|
+
print(f"โ API endpoints test failed: {e}")
|
|
190
|
+
return False
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def main():
|
|
194
|
+
"""Run all tests"""
|
|
195
|
+
print("๐ Testing Efficient Model Runner")
|
|
196
|
+
print("=" * 50)
|
|
197
|
+
|
|
198
|
+
tests = [
|
|
199
|
+
("Imports", test_imports),
|
|
200
|
+
("System Analysis", test_system_analysis),
|
|
201
|
+
("Model Selection", test_model_selection),
|
|
202
|
+
("Ollama Check", test_ollama_check),
|
|
203
|
+
("MCLI Service", test_mcli_service),
|
|
204
|
+
("API Endpoints", test_api_endpoints),
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
passed = 0
|
|
208
|
+
total = len(tests)
|
|
209
|
+
|
|
210
|
+
for test_name, test_func in tests:
|
|
211
|
+
print(f"\n๐ Running {test_name} test...")
|
|
212
|
+
if test_func():
|
|
213
|
+
passed += 1
|
|
214
|
+
print(f"โ
{test_name} test passed")
|
|
215
|
+
else:
|
|
216
|
+
print(f"โ {test_name} test failed")
|
|
217
|
+
|
|
218
|
+
print("\n" + "=" * 50)
|
|
219
|
+
print(f"๐ Test Results: {passed}/{total} tests passed")
|
|
220
|
+
|
|
221
|
+
if passed == total:
|
|
222
|
+
print("๐ All tests passed! The efficient model runner is ready to use.")
|
|
223
|
+
print("\n๐ Next steps:")
|
|
224
|
+
print("1. Install Ollama: https://ollama.com/download")
|
|
225
|
+
print("2. Run: python ollama_efficient_runner.py")
|
|
226
|
+
print("3. Follow the prompts to download and test models")
|
|
227
|
+
return 0
|
|
228
|
+
else:
|
|
229
|
+
print("โ Some tests failed. Please check the errors above.")
|
|
230
|
+
return 1
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
if __name__ == "__main__":
|
|
234
|
+
sys.exit(main())
|