mfcli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mfcli/.env.example +72 -0
- mfcli/__init__.py +0 -0
- mfcli/agents/__init__.py +0 -0
- mfcli/agents/controller/__init__.py +0 -0
- mfcli/agents/controller/agent.py +19 -0
- mfcli/agents/controller/config.yaml +27 -0
- mfcli/agents/controller/tools.py +42 -0
- mfcli/agents/tools/general.py +118 -0
- mfcli/alembic/env.py +61 -0
- mfcli/alembic/script.py.mako +28 -0
- mfcli/alembic/versions/6ccc0c7c397c_added_fields_to_pdf_parts_model.py +39 -0
- mfcli/alembic/versions/769019ef4870_added_gemini_file_path_to_pdf_part_model.py +33 -0
- mfcli/alembic/versions/7a2e3a779fdc_added_functional_block_and_component_.py +54 -0
- mfcli/alembic/versions/7d5adb2a47a7_added_pdf_parts_model.py +41 -0
- mfcli/alembic/versions/7fcb7d6a5836_init.py +167 -0
- mfcli/alembic/versions/e0f2b5765c72_added_cascade_delete_for_models_that_.py +32 -0
- mfcli/alembic.ini +147 -0
- mfcli/cli/__init__.py +0 -0
- mfcli/cli/dependencies.py +59 -0
- mfcli/cli/main.py +192 -0
- mfcli/client/__init__.py +0 -0
- mfcli/client/chroma_db.py +184 -0
- mfcli/client/docling.py +44 -0
- mfcli/client/gemini.py +252 -0
- mfcli/client/llama_parse.py +38 -0
- mfcli/client/vector_db.py +93 -0
- mfcli/constants/__init__.py +0 -0
- mfcli/constants/base_enum.py +18 -0
- mfcli/constants/directory_names.py +1 -0
- mfcli/constants/file_types.py +189 -0
- mfcli/constants/gemini.py +1 -0
- mfcli/constants/openai.py +6 -0
- mfcli/constants/pipeline_run_status.py +3 -0
- mfcli/crud/__init__.py +0 -0
- mfcli/crud/file.py +42 -0
- mfcli/crud/functional_blocks.py +26 -0
- mfcli/crud/netlist.py +18 -0
- mfcli/crud/pipeline_run.py +17 -0
- mfcli/crud/project.py +99 -0
- mfcli/digikey/__init__.py +0 -0
- mfcli/digikey/digikey.py +105 -0
- mfcli/main.py +5 -0
- mfcli/mcp/__init__.py +0 -0
- mfcli/mcp/configs/cline_mcp_settings.json +11 -0
- mfcli/mcp/configs/mfcli.mcp.json +7 -0
- mfcli/mcp/mcp_instance.py +6 -0
- mfcli/mcp/server.py +37 -0
- mfcli/mcp/state_manager.py +51 -0
- mfcli/mcp/tools/__init__.py +0 -0
- mfcli/mcp/tools/query_knowledgebase.py +108 -0
- mfcli/models/__init__.py +10 -0
- mfcli/models/base.py +10 -0
- mfcli/models/bom.py +71 -0
- mfcli/models/datasheet.py +10 -0
- mfcli/models/debug_setup.py +64 -0
- mfcli/models/file.py +43 -0
- mfcli/models/file_docket.py +94 -0
- mfcli/models/file_metadata.py +19 -0
- mfcli/models/functional_blocks.py +94 -0
- mfcli/models/llm_response.py +5 -0
- mfcli/models/mcu.py +97 -0
- mfcli/models/mcu_errata.py +26 -0
- mfcli/models/netlist.py +59 -0
- mfcli/models/pdf_parts.py +25 -0
- mfcli/models/pipeline_run.py +34 -0
- mfcli/models/project.py +27 -0
- mfcli/models/project_metadata.py +15 -0
- mfcli/pipeline/__init__.py +0 -0
- mfcli/pipeline/analysis/__init__.py +0 -0
- mfcli/pipeline/analysis/bom_netlist_mapper.py +28 -0
- mfcli/pipeline/analysis/generators/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/bom/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/bom/bom.py +74 -0
- mfcli/pipeline/analysis/generators/debug_setup/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/debug_setup/debug_setup.py +71 -0
- mfcli/pipeline/analysis/generators/debug_setup/instructions.py +150 -0
- mfcli/pipeline/analysis/generators/functional_blocks/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/functional_blocks/functional_blocks.py +93 -0
- mfcli/pipeline/analysis/generators/functional_blocks/instructions.py +34 -0
- mfcli/pipeline/analysis/generators/functional_blocks/validator.py +94 -0
- mfcli/pipeline/analysis/generators/generator.py +258 -0
- mfcli/pipeline/analysis/generators/generator_base.py +18 -0
- mfcli/pipeline/analysis/generators/mcu/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/mcu/instructions.py +156 -0
- mfcli/pipeline/analysis/generators/mcu/mcu.py +84 -0
- mfcli/pipeline/analysis/generators/mcu_errata/__init__.py +1 -0
- mfcli/pipeline/analysis/generators/mcu_errata/instructions.py +77 -0
- mfcli/pipeline/analysis/generators/mcu_errata/mcu_errata.py +95 -0
- mfcli/pipeline/analysis/generators/summary/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/summary/summary.py +47 -0
- mfcli/pipeline/classifier.py +93 -0
- mfcli/pipeline/data_enricher.py +15 -0
- mfcli/pipeline/extractor.py +34 -0
- mfcli/pipeline/extractors/__init__.py +0 -0
- mfcli/pipeline/extractors/pdf.py +12 -0
- mfcli/pipeline/parser.py +120 -0
- mfcli/pipeline/parsers/__init__.py +0 -0
- mfcli/pipeline/parsers/netlist/__init__.py +0 -0
- mfcli/pipeline/parsers/netlist/edif.py +93 -0
- mfcli/pipeline/parsers/netlist/kicad_legacy_net.py +326 -0
- mfcli/pipeline/parsers/netlist/kicad_spice.py +135 -0
- mfcli/pipeline/parsers/netlist/pads.py +185 -0
- mfcli/pipeline/parsers/netlist/protel.py +166 -0
- mfcli/pipeline/parsers/netlist/protel_detector.py +29 -0
- mfcli/pipeline/pipeline.py +419 -0
- mfcli/pipeline/preprocessors/__init__.py +0 -0
- mfcli/pipeline/preprocessors/user_guide.py +127 -0
- mfcli/pipeline/run_context.py +32 -0
- mfcli/pipeline/schema_mapper.py +89 -0
- mfcli/pipeline/sub_classifier.py +115 -0
- mfcli/utils/__init__.py +0 -0
- mfcli/utils/config.py +33 -0
- mfcli/utils/configurator.py +324 -0
- mfcli/utils/data_cleaner.py +82 -0
- mfcli/utils/datasheet_vectorizer.py +281 -0
- mfcli/utils/directory_manager.py +96 -0
- mfcli/utils/file_upload.py +298 -0
- mfcli/utils/files.py +16 -0
- mfcli/utils/http_requests.py +54 -0
- mfcli/utils/kb_lister.py +89 -0
- mfcli/utils/kb_remover.py +173 -0
- mfcli/utils/logger.py +28 -0
- mfcli/utils/mcp_configurator.py +311 -0
- mfcli/utils/migrations.py +18 -0
- mfcli/utils/orm.py +43 -0
- mfcli/utils/pdf_splitter.py +63 -0
- mfcli/utils/query_service.py +22 -0
- mfcli/utils/system_check.py +306 -0
- mfcli/utils/tools.py +31 -0
- mfcli/utils/vectorizer.py +28 -0
- mfcli-0.2.0.dist-info/METADATA +841 -0
- mfcli-0.2.0.dist-info/RECORD +136 -0
- mfcli-0.2.0.dist-info/WHEEL +5 -0
- mfcli-0.2.0.dist-info/entry_points.txt +3 -0
- mfcli-0.2.0.dist-info/licenses/LICENSE +21 -0
- mfcli-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""System health check and diagnostics for mfcli."""
|
|
2
|
+
import os
|
|
3
|
+
import platform
|
|
4
|
+
import shutil
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Tuple, Optional
|
|
8
|
+
|
|
9
|
+
from mfcli.utils.directory_manager import app_dirs
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def check_python_version() -> Tuple[bool, str]:
|
|
13
|
+
"""Check Python version."""
|
|
14
|
+
version = sys.version_info
|
|
15
|
+
if version.major == 3 and version.minor == 12:
|
|
16
|
+
return True, f"Python {version.major}.{version.minor}.{version.micro}"
|
|
17
|
+
else:
|
|
18
|
+
return False, f"Python {version.major}.{version.minor}.{version.micro} (requires 3.12.x)"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def check_pipx_installation() -> Tuple[bool, str]:
|
|
22
|
+
"""Check if pipx is installed."""
|
|
23
|
+
pipx_path = shutil.which("pipx")
|
|
24
|
+
if pipx_path:
|
|
25
|
+
return True, f"Found at {pipx_path}"
|
|
26
|
+
else:
|
|
27
|
+
return False, "Not installed"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def check_mfcli_installation() -> Tuple[bool, str]:
|
|
31
|
+
"""Check if mfcli is properly installed."""
|
|
32
|
+
mfcli_path = shutil.which("mfcli")
|
|
33
|
+
mfcli_mcp_path = shutil.which("mfcli-mcp")
|
|
34
|
+
|
|
35
|
+
if mfcli_path and mfcli_mcp_path:
|
|
36
|
+
return True, f"Both mfcli and mfcli-mcp found"
|
|
37
|
+
elif mfcli_path:
|
|
38
|
+
return False, "mfcli found but mfcli-mcp missing"
|
|
39
|
+
else:
|
|
40
|
+
return False, "Not found in PATH"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def check_env_file() -> Tuple[bool, str]:
|
|
44
|
+
"""Check if .env file exists and has required keys."""
|
|
45
|
+
env_path = app_dirs.env_file_path
|
|
46
|
+
|
|
47
|
+
if not env_path.exists():
|
|
48
|
+
return False, f"Not found at {env_path}"
|
|
49
|
+
|
|
50
|
+
# Read and check for required keys
|
|
51
|
+
required_keys = ["google_api_key", "openai_api_key", "llama_cloud_api_key",
|
|
52
|
+
"digikey_client_id", "digikey_client_secret"]
|
|
53
|
+
|
|
54
|
+
with open(env_path, 'r') as f:
|
|
55
|
+
content = f.read()
|
|
56
|
+
|
|
57
|
+
missing_keys = []
|
|
58
|
+
for key in required_keys:
|
|
59
|
+
if key not in content or f"{key}=your_" in content:
|
|
60
|
+
missing_keys.append(key)
|
|
61
|
+
|
|
62
|
+
if missing_keys:
|
|
63
|
+
return False, f"Missing or unconfigured: {', '.join(missing_keys)}"
|
|
64
|
+
|
|
65
|
+
return True, f"Found at {env_path}"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_google_api() -> Tuple[bool, str]:
|
|
69
|
+
"""Test Google Gemini API connection."""
|
|
70
|
+
try:
|
|
71
|
+
from mfcli.utils.config import get_config
|
|
72
|
+
config = get_config()
|
|
73
|
+
|
|
74
|
+
if not config.google_api_key or config.google_api_key.startswith("your_"):
|
|
75
|
+
return False, "API key not configured"
|
|
76
|
+
|
|
77
|
+
import google.generativeai as genai
|
|
78
|
+
genai.configure(api_key=config.google_api_key)
|
|
79
|
+
|
|
80
|
+
# Try to list models
|
|
81
|
+
models = list(genai.list_models())
|
|
82
|
+
if models:
|
|
83
|
+
return True, f"Connected (found {len(models)} models)"
|
|
84
|
+
else:
|
|
85
|
+
return False, "No models found"
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
return False, f"Error: {str(e)[:60]}"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_openai_api() -> Tuple[bool, str]:
|
|
92
|
+
"""Test OpenAI API connection."""
|
|
93
|
+
try:
|
|
94
|
+
from mfcli.utils.config import get_config
|
|
95
|
+
config = get_config()
|
|
96
|
+
|
|
97
|
+
if not config.openai_api_key or config.openai_api_key.startswith("your_"):
|
|
98
|
+
return False, "API key not configured"
|
|
99
|
+
|
|
100
|
+
from openai import OpenAI
|
|
101
|
+
client = OpenAI(api_key=config.openai_api_key)
|
|
102
|
+
|
|
103
|
+
# Try to list models
|
|
104
|
+
models = client.models.list()
|
|
105
|
+
return True, "Connected"
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
return False, f"Error: {str(e)[:60]}"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def check_chromadb() -> Tuple[bool, str]:
|
|
112
|
+
"""Check ChromaDB setup."""
|
|
113
|
+
try:
|
|
114
|
+
chroma_dir = app_dirs.chroma_db_dir
|
|
115
|
+
|
|
116
|
+
if not chroma_dir.exists():
|
|
117
|
+
return False, f"Directory not found: {chroma_dir}"
|
|
118
|
+
|
|
119
|
+
import chromadb
|
|
120
|
+
from chromadb.utils import embedding_functions
|
|
121
|
+
from mfcli.utils.config import get_config
|
|
122
|
+
|
|
123
|
+
config = get_config()
|
|
124
|
+
client = chromadb.PersistentClient(path=str(chroma_dir))
|
|
125
|
+
|
|
126
|
+
# Try to get collection
|
|
127
|
+
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
|
|
128
|
+
api_key=config.openai_api_key,
|
|
129
|
+
model_name=config.embedding_model
|
|
130
|
+
)
|
|
131
|
+
collection = client.get_or_create_collection("engineering_docs", embedding_function=openai_ef)
|
|
132
|
+
|
|
133
|
+
count = collection.count()
|
|
134
|
+
return True, f"Connected ({count} documents)"
|
|
135
|
+
|
|
136
|
+
except Exception as e:
|
|
137
|
+
return False, f"Error: {str(e)[:60]}"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def check_mcp_config() -> Tuple[bool, str]:
|
|
141
|
+
"""Check if MCP server is configured."""
|
|
142
|
+
try:
|
|
143
|
+
from mfcli.utils.mcp_configurator import get_mcp_config_paths
|
|
144
|
+
|
|
145
|
+
config_paths = get_mcp_config_paths()
|
|
146
|
+
|
|
147
|
+
if not config_paths:
|
|
148
|
+
return False, "No MCP config files found"
|
|
149
|
+
|
|
150
|
+
# Check if mfcli-mcp is configured in any of them
|
|
151
|
+
import json
|
|
152
|
+
configured_editors = []
|
|
153
|
+
|
|
154
|
+
for name, path in config_paths:
|
|
155
|
+
try:
|
|
156
|
+
with open(path, 'r') as f:
|
|
157
|
+
config = json.load(f)
|
|
158
|
+
|
|
159
|
+
if "mcpServers" in config and "mfcli-mcp" in config["mcpServers"]:
|
|
160
|
+
configured_editors.append(name)
|
|
161
|
+
except:
|
|
162
|
+
pass
|
|
163
|
+
|
|
164
|
+
if configured_editors:
|
|
165
|
+
return True, f"Configured in: {', '.join(configured_editors)}"
|
|
166
|
+
else:
|
|
167
|
+
return False, f"Found {len(config_paths)} config(s) but mfcli-mcp not configured"
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
return False, f"Error: {str(e)[:60]}"
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def check_sqlite_db() -> Tuple[bool, str]:
|
|
174
|
+
"""Check SQLite database."""
|
|
175
|
+
try:
|
|
176
|
+
from mfcli.utils.config import get_config
|
|
177
|
+
config = get_config()
|
|
178
|
+
|
|
179
|
+
db_path = Path(config.sqlite_db_path)
|
|
180
|
+
|
|
181
|
+
if not db_path.exists():
|
|
182
|
+
return False, f"Database not found: {db_path}"
|
|
183
|
+
|
|
184
|
+
# Try to connect
|
|
185
|
+
import sqlite3
|
|
186
|
+
conn = sqlite3.connect(db_path)
|
|
187
|
+
cursor = conn.cursor()
|
|
188
|
+
|
|
189
|
+
# Check if tables exist
|
|
190
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
|
191
|
+
tables = cursor.fetchall()
|
|
192
|
+
conn.close()
|
|
193
|
+
|
|
194
|
+
if tables:
|
|
195
|
+
return True, f"Connected ({len(tables)} tables)"
|
|
196
|
+
else:
|
|
197
|
+
return False, "Database has no tables"
|
|
198
|
+
|
|
199
|
+
except Exception as e:
|
|
200
|
+
return False, f"Error: {str(e)[:60]}"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def print_check_result(component: str, passed: bool, message: str, indent: int = 2) -> None:
|
|
204
|
+
"""Print a check result with formatting."""
|
|
205
|
+
indent_str = " " * indent
|
|
206
|
+
status = "✅" if passed else "❌"
|
|
207
|
+
print(f"{indent_str}{status} {component}: {message}")
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def run_system_check() -> None:
|
|
211
|
+
"""Run comprehensive system health check."""
|
|
212
|
+
print("\n" + "="*70)
|
|
213
|
+
print(" MFCLI SYSTEM HEALTH CHECK")
|
|
214
|
+
print("="*70)
|
|
215
|
+
|
|
216
|
+
print("\n System Information:")
|
|
217
|
+
print(f" OS: {platform.system()} {platform.release()}")
|
|
218
|
+
print(f" Architecture: {platform.machine()}")
|
|
219
|
+
|
|
220
|
+
all_checks = []
|
|
221
|
+
|
|
222
|
+
# Python version
|
|
223
|
+
print("\n Python Environment:")
|
|
224
|
+
passed, msg = check_python_version()
|
|
225
|
+
print_check_result("Python Version", passed, msg)
|
|
226
|
+
all_checks.append(passed)
|
|
227
|
+
|
|
228
|
+
# pipx installation
|
|
229
|
+
passed, msg = check_pipx_installation()
|
|
230
|
+
print_check_result("pipx", passed, msg)
|
|
231
|
+
|
|
232
|
+
# mfcli installation
|
|
233
|
+
passed, msg = check_mfcli_installation()
|
|
234
|
+
print_check_result("mfcli Installation", passed, msg)
|
|
235
|
+
all_checks.append(passed)
|
|
236
|
+
|
|
237
|
+
# Configuration
|
|
238
|
+
print("\n Configuration:")
|
|
239
|
+
passed, msg = check_env_file()
|
|
240
|
+
print_check_result("Environment File", passed, msg)
|
|
241
|
+
all_checks.append(passed)
|
|
242
|
+
|
|
243
|
+
# API connections
|
|
244
|
+
print("\n API Connections:")
|
|
245
|
+
passed, msg = test_google_api()
|
|
246
|
+
print_check_result("Google Gemini API", passed, msg)
|
|
247
|
+
all_checks.append(passed)
|
|
248
|
+
|
|
249
|
+
passed, msg = test_openai_api()
|
|
250
|
+
print_check_result("OpenAI API", passed, msg)
|
|
251
|
+
all_checks.append(passed)
|
|
252
|
+
|
|
253
|
+
# Data storage
|
|
254
|
+
print("\n Data Storage:")
|
|
255
|
+
passed, msg = check_sqlite_db()
|
|
256
|
+
print_check_result("SQLite Database", passed, msg)
|
|
257
|
+
all_checks.append(passed)
|
|
258
|
+
|
|
259
|
+
passed, msg = check_chromadb()
|
|
260
|
+
print_check_result("ChromaDB", passed, msg)
|
|
261
|
+
all_checks.append(passed)
|
|
262
|
+
|
|
263
|
+
# MCP configuration
|
|
264
|
+
print("\n MCP Server:")
|
|
265
|
+
passed, msg = check_mcp_config()
|
|
266
|
+
print_check_result("MCP Configuration", passed, msg)
|
|
267
|
+
|
|
268
|
+
# Summary
|
|
269
|
+
print("\n" + "="*70)
|
|
270
|
+
critical_checks_passed = sum(all_checks)
|
|
271
|
+
total_critical = len(all_checks)
|
|
272
|
+
|
|
273
|
+
if critical_checks_passed == total_critical:
|
|
274
|
+
print(" ✅ ALL CRITICAL CHECKS PASSED")
|
|
275
|
+
print(" Your mfcli installation is healthy and ready to use!")
|
|
276
|
+
else:
|
|
277
|
+
print(f" ⚠️ {critical_checks_passed}/{total_critical} CRITICAL CHECKS PASSED")
|
|
278
|
+
print("\n Recommendations:")
|
|
279
|
+
|
|
280
|
+
if not all_checks[0]: # Python version
|
|
281
|
+
print(" - Install Python 3.12.x")
|
|
282
|
+
|
|
283
|
+
if not all_checks[1]: # mfcli installation
|
|
284
|
+
print(" - Install mfcli: pipx install mfcli")
|
|
285
|
+
|
|
286
|
+
if not all_checks[2]: # env file
|
|
287
|
+
print(" - Configure mfcli: mfcli configure")
|
|
288
|
+
|
|
289
|
+
if not all_checks[3]: # Google API
|
|
290
|
+
print(" - Check Google API key in configuration")
|
|
291
|
+
|
|
292
|
+
if not all_checks[4]: # OpenAI API
|
|
293
|
+
print(" - Check OpenAI API key in configuration")
|
|
294
|
+
|
|
295
|
+
if not all_checks[5]: # SQLite
|
|
296
|
+
print(" - Run: mfcli init (in a project directory)")
|
|
297
|
+
|
|
298
|
+
if not all_checks[6]: # ChromaDB
|
|
299
|
+
print(" - Run: mfcli run (to process documents and create database)")
|
|
300
|
+
|
|
301
|
+
print("="*70)
|
|
302
|
+
print("\n For more help:")
|
|
303
|
+
print(" - Configuration: mfcli configure")
|
|
304
|
+
print(" - MCP Setup: mfcli setup-mcp")
|
|
305
|
+
print(" - Check config: mfcli configure --check")
|
|
306
|
+
print("\n")
|
mfcli/utils/tools.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import mimetypes
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_mime_type_from_bytes(file_bytes: bytes, filename: str = None):
|
|
5
|
+
"""
|
|
6
|
+
Get MIME type from file bytes using Python's built-in mimetypes module
|
|
7
|
+
and file signature detection.
|
|
8
|
+
|
|
9
|
+
:param file_bytes: Bytes of the file
|
|
10
|
+
:param filename: Optional filename to help with detection
|
|
11
|
+
:return: MIME type string
|
|
12
|
+
"""
|
|
13
|
+
# Check for common file signatures
|
|
14
|
+
if len(file_bytes) >= 5 and file_bytes.startswith(b'%PDF-'):
|
|
15
|
+
return 'application/pdf'
|
|
16
|
+
elif len(file_bytes) >= 4 and file_bytes.startswith(b'PK\x03\x04'):
|
|
17
|
+
# ZIP-based formats (could be various formats)
|
|
18
|
+
return 'application/zip'
|
|
19
|
+
elif len(file_bytes) >= 8 and file_bytes.startswith(b'\x89PNG\r\n\x1a\n'):
|
|
20
|
+
return 'image/png'
|
|
21
|
+
elif len(file_bytes) >= 3 and file_bytes.startswith(b'\xff\xd8\xff'):
|
|
22
|
+
return 'image/jpeg'
|
|
23
|
+
|
|
24
|
+
# If filename is provided, try to guess from extension
|
|
25
|
+
if filename:
|
|
26
|
+
mime_type, _ = mimetypes.guess_type(filename)
|
|
27
|
+
if mime_type:
|
|
28
|
+
return mime_type
|
|
29
|
+
|
|
30
|
+
# Default fallback
|
|
31
|
+
return 'application/octet-stream'
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from mfcli.client.chroma_db import get_chromadb_client_for_project_name
|
|
4
|
+
from mfcli.utils.logger import get_logger
|
|
5
|
+
from mfcli.utils.orm import Session
|
|
6
|
+
|
|
7
|
+
logger = get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def add_file_to_db(project_name: str, file_path: Path, purpose: str):
|
|
11
|
+
from mfcli.utils.datasheet_vectorizer import DatasheetVectorizer
|
|
12
|
+
if not file_path.exists():
|
|
13
|
+
logger.error(f"File does not exist: {file_path}")
|
|
14
|
+
return
|
|
15
|
+
|
|
16
|
+
if not file_path.is_file():
|
|
17
|
+
logger.error(f"Path is not a file: {file_path}")
|
|
18
|
+
return
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
with Session() as db:
|
|
22
|
+
chroma_db = get_chromadb_client_for_project_name(db, project_name)
|
|
23
|
+
vectorizer = DatasheetVectorizer(chroma_db)
|
|
24
|
+
vectorizer.vectorize_local_file(str(file_path), purpose)
|
|
25
|
+
logger.info(f"Successfully added file to ChromaDB: {file_path}")
|
|
26
|
+
except Exception as e:
|
|
27
|
+
logger.error(f"Failed to add file to ChromaDB: {file_path}")
|
|
28
|
+
logger.exception(e)
|