mfcli 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mfcli/.env.example +72 -0
- mfcli/__init__.py +0 -0
- mfcli/agents/__init__.py +0 -0
- mfcli/agents/controller/__init__.py +0 -0
- mfcli/agents/controller/agent.py +19 -0
- mfcli/agents/controller/config.yaml +27 -0
- mfcli/agents/controller/tools.py +42 -0
- mfcli/agents/tools/general.py +118 -0
- mfcli/alembic/env.py +61 -0
- mfcli/alembic/script.py.mako +28 -0
- mfcli/alembic/versions/6ccc0c7c397c_added_fields_to_pdf_parts_model.py +39 -0
- mfcli/alembic/versions/769019ef4870_added_gemini_file_path_to_pdf_part_model.py +33 -0
- mfcli/alembic/versions/7a2e3a779fdc_added_functional_block_and_component_.py +54 -0
- mfcli/alembic/versions/7d5adb2a47a7_added_pdf_parts_model.py +41 -0
- mfcli/alembic/versions/7fcb7d6a5836_init.py +167 -0
- mfcli/alembic/versions/e0f2b5765c72_added_cascade_delete_for_models_that_.py +32 -0
- mfcli/alembic.ini +147 -0
- mfcli/cli/__init__.py +0 -0
- mfcli/cli/dependencies.py +59 -0
- mfcli/cli/main.py +200 -0
- mfcli/client/__init__.py +0 -0
- mfcli/client/chroma_db.py +184 -0
- mfcli/client/docling.py +44 -0
- mfcli/client/gemini.py +252 -0
- mfcli/client/llama_parse.py +38 -0
- mfcli/client/vector_db.py +93 -0
- mfcli/constants/__init__.py +0 -0
- mfcli/constants/base_enum.py +18 -0
- mfcli/constants/directory_names.py +1 -0
- mfcli/constants/file_types.py +189 -0
- mfcli/constants/gemini.py +1 -0
- mfcli/constants/openai.py +6 -0
- mfcli/constants/pipeline_run_status.py +3 -0
- mfcli/crud/__init__.py +0 -0
- mfcli/crud/file.py +42 -0
- mfcli/crud/functional_blocks.py +26 -0
- mfcli/crud/netlist.py +18 -0
- mfcli/crud/pipeline_run.py +17 -0
- mfcli/crud/project.py +144 -0
- mfcli/digikey/__init__.py +0 -0
- mfcli/digikey/digikey.py +105 -0
- mfcli/main.py +5 -0
- mfcli/mcp/__init__.py +0 -0
- mfcli/mcp/configs/cline_mcp_settings.json +11 -0
- mfcli/mcp/configs/mfcli.mcp.json +7 -0
- mfcli/mcp/mcp_instance.py +6 -0
- mfcli/mcp/server.py +37 -0
- mfcli/mcp/state_manager.py +51 -0
- mfcli/mcp/tools/__init__.py +0 -0
- mfcli/mcp/tools/query_knowledgebase.py +108 -0
- mfcli/models/__init__.py +10 -0
- mfcli/models/base.py +10 -0
- mfcli/models/bom.py +71 -0
- mfcli/models/datasheet.py +10 -0
- mfcli/models/debug_setup.py +64 -0
- mfcli/models/file.py +43 -0
- mfcli/models/file_docket.py +94 -0
- mfcli/models/file_metadata.py +19 -0
- mfcli/models/functional_blocks.py +94 -0
- mfcli/models/llm_response.py +5 -0
- mfcli/models/mcu.py +97 -0
- mfcli/models/mcu_errata.py +26 -0
- mfcli/models/netlist.py +59 -0
- mfcli/models/pdf_parts.py +25 -0
- mfcli/models/pipeline_run.py +34 -0
- mfcli/models/project.py +27 -0
- mfcli/models/project_metadata.py +15 -0
- mfcli/pipeline/__init__.py +0 -0
- mfcli/pipeline/analysis/__init__.py +0 -0
- mfcli/pipeline/analysis/bom_netlist_mapper.py +28 -0
- mfcli/pipeline/analysis/generators/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/bom/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/bom/bom.py +74 -0
- mfcli/pipeline/analysis/generators/debug_setup/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/debug_setup/debug_setup.py +71 -0
- mfcli/pipeline/analysis/generators/debug_setup/instructions.py +150 -0
- mfcli/pipeline/analysis/generators/functional_blocks/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/functional_blocks/functional_blocks.py +93 -0
- mfcli/pipeline/analysis/generators/functional_blocks/instructions.py +34 -0
- mfcli/pipeline/analysis/generators/functional_blocks/validator.py +94 -0
- mfcli/pipeline/analysis/generators/generator.py +258 -0
- mfcli/pipeline/analysis/generators/generator_base.py +18 -0
- mfcli/pipeline/analysis/generators/mcu/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/mcu/instructions.py +156 -0
- mfcli/pipeline/analysis/generators/mcu/mcu.py +84 -0
- mfcli/pipeline/analysis/generators/mcu_errata/__init__.py +1 -0
- mfcli/pipeline/analysis/generators/mcu_errata/instructions.py +77 -0
- mfcli/pipeline/analysis/generators/mcu_errata/mcu_errata.py +95 -0
- mfcli/pipeline/analysis/generators/summary/__init__.py +0 -0
- mfcli/pipeline/analysis/generators/summary/summary.py +47 -0
- mfcli/pipeline/classifier.py +93 -0
- mfcli/pipeline/data_enricher.py +15 -0
- mfcli/pipeline/extractor.py +34 -0
- mfcli/pipeline/extractors/__init__.py +0 -0
- mfcli/pipeline/extractors/pdf.py +12 -0
- mfcli/pipeline/parser.py +120 -0
- mfcli/pipeline/parsers/__init__.py +0 -0
- mfcli/pipeline/parsers/netlist/__init__.py +0 -0
- mfcli/pipeline/parsers/netlist/edif.py +93 -0
- mfcli/pipeline/parsers/netlist/kicad_legacy_net.py +326 -0
- mfcli/pipeline/parsers/netlist/kicad_spice.py +135 -0
- mfcli/pipeline/parsers/netlist/pads.py +185 -0
- mfcli/pipeline/parsers/netlist/protel.py +166 -0
- mfcli/pipeline/parsers/netlist/protel_detector.py +29 -0
- mfcli/pipeline/pipeline.py +470 -0
- mfcli/pipeline/preprocessors/__init__.py +0 -0
- mfcli/pipeline/preprocessors/user_guide.py +127 -0
- mfcli/pipeline/run_context.py +32 -0
- mfcli/pipeline/schema_mapper.py +89 -0
- mfcli/pipeline/sub_classifier.py +115 -0
- mfcli/utils/__init__.py +0 -0
- mfcli/utils/cline_rules.py +256 -0
- mfcli/utils/config.py +33 -0
- mfcli/utils/configurator.py +324 -0
- mfcli/utils/data_cleaner.py +114 -0
- mfcli/utils/datasheet_vectorizer.py +283 -0
- mfcli/utils/directory_manager.py +116 -0
- mfcli/utils/file_upload.py +298 -0
- mfcli/utils/files.py +16 -0
- mfcli/utils/http_requests.py +54 -0
- mfcli/utils/kb_lister.py +89 -0
- mfcli/utils/kb_remover.py +173 -0
- mfcli/utils/logger.py +28 -0
- mfcli/utils/mcp_configurator.py +394 -0
- mfcli/utils/migrations.py +18 -0
- mfcli/utils/orm.py +43 -0
- mfcli/utils/pdf_splitter.py +63 -0
- mfcli/utils/pre_uninstall.py +167 -0
- mfcli/utils/query_service.py +22 -0
- mfcli/utils/system_check.py +306 -0
- mfcli/utils/tools.py +98 -0
- mfcli/utils/vectorizer.py +28 -0
- mfcli-0.2.1.dist-info/METADATA +956 -0
- mfcli-0.2.1.dist-info/RECORD +138 -0
- mfcli-0.2.1.dist-info/WHEEL +5 -0
- mfcli-0.2.1.dist-info/entry_points.txt +4 -0
- mfcli-0.2.1.dist-info/licenses/LICENSE +21 -0
- mfcli-0.2.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
from urllib.parse import urlparse, unquote
|
|
4
|
+
|
|
5
|
+
from playwright.async_api import async_playwright, Browser
|
|
6
|
+
from requests import RequestException
|
|
7
|
+
from sqlmodel import select
|
|
8
|
+
|
|
9
|
+
from mfcli.client.chroma_db import ChromaClient
|
|
10
|
+
from mfcli.client.docling import DoclingChunker
|
|
11
|
+
from mfcli.client.vector_db import DocumentVectorizer
|
|
12
|
+
from mfcli.constants.file_types import PDFMimeTypes
|
|
13
|
+
from mfcli.digikey.digikey import DigiKey
|
|
14
|
+
from mfcli.models.bom import BOM
|
|
15
|
+
from mfcli.models.datasheet import Datasheet
|
|
16
|
+
from mfcli.pipeline.extractor import TextExtractor
|
|
17
|
+
from mfcli.utils.directory_manager import app_dirs
|
|
18
|
+
from mfcli.utils.http_requests import http_request
|
|
19
|
+
from mfcli.utils.logger import get_logger
|
|
20
|
+
from mfcli.utils.orm import Session
|
|
21
|
+
from mfcli.utils.tools import get_mime_type_from_bytes
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DatasheetVectorizer:
|
|
27
|
+
def __init__(self, chroma_db: ChromaClient):
|
|
28
|
+
self._extractor = TextExtractor()
|
|
29
|
+
self._vectorizer = DocumentVectorizer(chroma_db)
|
|
30
|
+
self._docling = DoclingChunker()
|
|
31
|
+
|
|
32
|
+
def _vectorize_text(self, text: str, file_name: str, purpose: str, additional_metadata: dict = None):
|
|
33
|
+
"""
|
|
34
|
+
Shared method to vectorize text with metadata
|
|
35
|
+
:param text: Extracted text content
|
|
36
|
+
:param file_name: Name of the file
|
|
37
|
+
:param purpose: Purpose of the vectorization (e.g., 'datasheet', 'bom', 'errata')
|
|
38
|
+
:param additional_metadata: Optional additional metadata to include
|
|
39
|
+
"""
|
|
40
|
+
metadata = {"file_name": file_name, "purpose": purpose}
|
|
41
|
+
if additional_metadata:
|
|
42
|
+
metadata.update(additional_metadata)
|
|
43
|
+
self._vectorizer.vectorize(text, metadata)
|
|
44
|
+
logger.debug(f"File vectorized: {file_name} (purpose: {purpose})")
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
async def _fetch_with_playwright(browser: Browser, url: str):
|
|
48
|
+
context = await browser.new_context()
|
|
49
|
+
response = await context.request.get(url)
|
|
50
|
+
body = await response.body()
|
|
51
|
+
return body
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def _parse_ti_url(url: str) -> str:
|
|
55
|
+
"""
|
|
56
|
+
Texas Instruments URLs may have goTo param which is the real URL of the PDF
|
|
57
|
+
:param url: TI URL
|
|
58
|
+
:return: URL from goTo param
|
|
59
|
+
"""
|
|
60
|
+
url_query_params = urlparse(url).query
|
|
61
|
+
if not url_query_params:
|
|
62
|
+
return url
|
|
63
|
+
params = url_query_params.split('&')
|
|
64
|
+
for param in params:
|
|
65
|
+
name = param.split('=')[0]
|
|
66
|
+
value = param.split('=')[1]
|
|
67
|
+
if not name == 'gotoUrl':
|
|
68
|
+
continue
|
|
69
|
+
return unquote(value)
|
|
70
|
+
return url
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def _save_datasheet(name: str, content: bytes):
|
|
74
|
+
file_path = app_dirs.data_sheets_dir / name
|
|
75
|
+
with open(file_path, "wb") as f:
|
|
76
|
+
f.write(content)
|
|
77
|
+
|
|
78
|
+
async def _download(self, browser: Browser, url: str, purpose: str = "datasheet"):
|
|
79
|
+
logger.debug(f"Fetching datasheet: {url}")
|
|
80
|
+
try:
|
|
81
|
+
ti_url_regex = r"^https?://www.ti.com/.+$"
|
|
82
|
+
if re.match(ti_url_regex, url, re.I):
|
|
83
|
+
logger.debug(f"URL is a TI URL: {url}")
|
|
84
|
+
url = self._parse_ti_url(url)
|
|
85
|
+
logger.debug(f"Parsed TI URL: {url}")
|
|
86
|
+
url_path = urlparse(url).path
|
|
87
|
+
except ValueError as e:
|
|
88
|
+
logger.debug(f"Unable to parse datasheet URL: {url}")
|
|
89
|
+
logger.debug(e)
|
|
90
|
+
return
|
|
91
|
+
file_name = os.path.basename(url_path)
|
|
92
|
+
if not file_name.endswith(".pdf"):
|
|
93
|
+
file_name = f"{file_name}.pdf"
|
|
94
|
+
try:
|
|
95
|
+
content = http_request(method='GET', url=url).content
|
|
96
|
+
mime_type = get_mime_type_from_bytes(content, file_name)
|
|
97
|
+
if mime_type not in PDFMimeTypes:
|
|
98
|
+
logger.debug(f"Retrieved PDF is not PDF MIME type: {url}")
|
|
99
|
+
logger.debug(f"Retrying with playwright: {url}")
|
|
100
|
+
content = await self._fetch_with_playwright(browser, url)
|
|
101
|
+
except RequestException as e:
|
|
102
|
+
logger.debug(e)
|
|
103
|
+
logger.debug(f"HTTP error fetching PDF: {url}")
|
|
104
|
+
logger.debug(f"Retrying with playwright: {url}")
|
|
105
|
+
content = await self._fetch_with_playwright(browser, url)
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.debug(f"Unhandled error fetching datasheet URL: {url}")
|
|
108
|
+
logger.debug(e)
|
|
109
|
+
return
|
|
110
|
+
mime_type = get_mime_type_from_bytes(content, file_name)
|
|
111
|
+
if mime_type not in PDFMimeTypes:
|
|
112
|
+
logger.debug(f"Could not fetch PDF even with playwright: {url}")
|
|
113
|
+
return
|
|
114
|
+
try:
|
|
115
|
+
self._save_datasheet(file_name, content)
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.debug(e)
|
|
118
|
+
logger.debug(f"Error saving datasheet: {file_name}")
|
|
119
|
+
|
|
120
|
+
async def download(self, urls: list[str], purpose: str = "datasheet"):
|
|
121
|
+
if not urls:
|
|
122
|
+
logger.debug(f"No datasheets to vectorize, exiting")
|
|
123
|
+
return
|
|
124
|
+
logger.debug(f"Vectorizing {len(urls)} documents (purpose: {purpose})")
|
|
125
|
+
async with async_playwright() as p:
|
|
126
|
+
browser = await p.chromium.launch(headless=True)
|
|
127
|
+
try:
|
|
128
|
+
for url in urls:
|
|
129
|
+
try:
|
|
130
|
+
await self._download(browser, url, purpose)
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.debug(e)
|
|
133
|
+
logger.debug(f"Error processing document: {url}")
|
|
134
|
+
finally:
|
|
135
|
+
await browser.close()
|
|
136
|
+
|
|
137
|
+
def vectorize_local_file(self, file_path: str, purpose: str, additional_metadata: dict = None):
|
|
138
|
+
"""
|
|
139
|
+
Vectorize a local file (e.g., generated by agents)
|
|
140
|
+
:param file_path: Path to the local file
|
|
141
|
+
:param purpose: Purpose of the vectorization (e.g., 'bom', 'errata', 'functional_blocks')
|
|
142
|
+
:param additional_metadata: Optional additional metadata to include
|
|
143
|
+
"""
|
|
144
|
+
try:
|
|
145
|
+
logger.debug(f"Vectorizing local file: {file_path} (purpose: {purpose})")
|
|
146
|
+
file_name = os.path.basename(file_path)
|
|
147
|
+
|
|
148
|
+
# Check if file exists
|
|
149
|
+
if not os.path.exists(file_path):
|
|
150
|
+
logger.error(f"File does not exist: {file_path}")
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
# Extract text based on file type
|
|
154
|
+
with open(file_path, 'rb') as f:
|
|
155
|
+
content = f.read()
|
|
156
|
+
|
|
157
|
+
mime_type = get_mime_type_from_bytes(content, file_name)
|
|
158
|
+
|
|
159
|
+
if mime_type in PDFMimeTypes:
|
|
160
|
+
text = self._extractor.extract_pdf_bytes(content)
|
|
161
|
+
else:
|
|
162
|
+
# For non-PDF files, use the general extractor
|
|
163
|
+
text = self._extractor.extract_text_from_file_bytes(file_name, content)
|
|
164
|
+
|
|
165
|
+
logger.debug(f"Text extracted from local file: {file_path}")
|
|
166
|
+
self._vectorize_text(text, file_name, purpose, additional_metadata)
|
|
167
|
+
|
|
168
|
+
except Exception as e:
|
|
169
|
+
logger.error(f"Error vectorizing local file: {file_path}")
|
|
170
|
+
logger.exception(e)
|
|
171
|
+
raise
|
|
172
|
+
|
|
173
|
+
def vectorize_local_files(self, file_paths: list[str], purpose: str, additional_metadata: dict = None):
|
|
174
|
+
"""
|
|
175
|
+
Vectorize multiple local files
|
|
176
|
+
:param file_paths: List of paths to local files
|
|
177
|
+
:param purpose: Purpose of the vectorization (e.g., 'bom', 'errata', 'functional_blocks')
|
|
178
|
+
:param additional_metadata: Optional additional metadata to include
|
|
179
|
+
"""
|
|
180
|
+
if not file_paths:
|
|
181
|
+
logger.debug(f"No files to vectorize, exiting")
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
logger.debug(f"Vectorizing {len(file_paths)} local files (purpose: {purpose})")
|
|
185
|
+
for file_path in file_paths:
|
|
186
|
+
try:
|
|
187
|
+
self.vectorize_local_file(file_path, purpose, additional_metadata)
|
|
188
|
+
except Exception as e:
|
|
189
|
+
logger.exception(e)
|
|
190
|
+
logger.error(f"Error processing local file: {file_path}")
|
|
191
|
+
logger.debug(f"Finished vectorizing {len(file_paths)} local files")
|
|
192
|
+
|
|
193
|
+
def vectorize_text_content(self, text: str, file_name: str, purpose: str, additional_metadata: dict = None):
|
|
194
|
+
"""
|
|
195
|
+
Vectorize text content directly (e.g., from agent output)
|
|
196
|
+
:param text: Text content to vectorize
|
|
197
|
+
:param file_name: Name to associate with this content
|
|
198
|
+
:param purpose: Purpose of the vectorization (e.g., 'bom', 'errata', 'functional_blocks')
|
|
199
|
+
:param additional_metadata: Optional additional metadata to include
|
|
200
|
+
"""
|
|
201
|
+
try:
|
|
202
|
+
logger.debug(f"Vectorizing text content: {file_name} (purpose: {purpose})")
|
|
203
|
+
self._vectorize_text(text, file_name, purpose, additional_metadata)
|
|
204
|
+
except Exception as e:
|
|
205
|
+
logger.error(f"Error vectorizing text content: {file_name}")
|
|
206
|
+
logger.exception(e)
|
|
207
|
+
raise
|
|
208
|
+
|
|
209
|
+
def vectorize_file_buf(
|
|
210
|
+
self,
|
|
211
|
+
file_bytes: bytes,
|
|
212
|
+
file_name: str,
|
|
213
|
+
purpose: str,
|
|
214
|
+
additional_metadata: dict = None
|
|
215
|
+
) -> None:
|
|
216
|
+
"""
|
|
217
|
+
Vectorize a file from a buffer. This vectorizer uses DoclingChunker.
|
|
218
|
+
:param file_bytes: file bytes
|
|
219
|
+
:param file_name: file name
|
|
220
|
+
:param purpose: file purpose
|
|
221
|
+
:param additional_metadata: dict of metadata
|
|
222
|
+
:return: None
|
|
223
|
+
"""
|
|
224
|
+
chunks = self._docling.chunk(file_name, file_bytes)
|
|
225
|
+
metadata = {"file_name": file_name, "purpose": purpose}
|
|
226
|
+
if additional_metadata:
|
|
227
|
+
metadata.update(additional_metadata)
|
|
228
|
+
self._vectorizer.vectorize_chunks(chunks, metadata)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
async def get_datasheets_for_bom_entries(db: Session, chroma_db: ChromaClient, entries: list[BOM]):
|
|
232
|
+
logger.info(f"Fetching datasheets for {len(entries)} BOM entries")
|
|
233
|
+
part_numbers = {entry.value for entry in entries}
|
|
234
|
+
logger.debug("Fetching existing datasheets for part numbers")
|
|
235
|
+
|
|
236
|
+
# Fetch existing datasheets
|
|
237
|
+
stmt = select(Datasheet).where(Datasheet.part_number.in_(part_numbers))
|
|
238
|
+
datasheets: list[Datasheet] = db.execute(stmt).scalars().all()
|
|
239
|
+
datasheet_map = {d.part_number: d.datasheet for d in datasheets}
|
|
240
|
+
|
|
241
|
+
logger.debug(f"Datasheet map: {datasheet_map}")
|
|
242
|
+
client = DigiKey()
|
|
243
|
+
new_datasheets: list[Datasheet] = []
|
|
244
|
+
datasheet_urls: list[str] = []
|
|
245
|
+
for entry in entries:
|
|
246
|
+
try:
|
|
247
|
+
logger.debug(f"Processing BOM entry: {entry.value}")
|
|
248
|
+
|
|
249
|
+
# Skip resistors, capacitors and inductors
|
|
250
|
+
ref = entry.reference
|
|
251
|
+
if ref.startswith('R') \
|
|
252
|
+
or ref.startswith('C') \
|
|
253
|
+
or ref.startswith('L') \
|
|
254
|
+
or ref.startswith('J') \
|
|
255
|
+
or ref.startswith('T') \
|
|
256
|
+
or ref.startswith('D'):
|
|
257
|
+
logger.debug(f"Skipping BOM entry {entry.value} with reference: {ref}")
|
|
258
|
+
continue
|
|
259
|
+
|
|
260
|
+
existing_datasheet = datasheet_map.get(entry.value)
|
|
261
|
+
if not existing_datasheet:
|
|
262
|
+
logger.debug(f"Datasheet does not exist for {entry.value}")
|
|
263
|
+
entry.datasheet = existing_datasheet or client.datasheet(entry.value)
|
|
264
|
+
|
|
265
|
+
# If datasheet is new create it in DB
|
|
266
|
+
if not existing_datasheet and entry.datasheet:
|
|
267
|
+
logger.debug(f"Adding new datasheet for {entry.value}: {entry.datasheet}")
|
|
268
|
+
new_datasheets.append(Datasheet(part_number=entry.value, datasheet=entry.datasheet))
|
|
269
|
+
datasheet_urls.append(entry.datasheet)
|
|
270
|
+
except Exception as e:
|
|
271
|
+
logger.error(f"Error adding datasheet for BOM entry: {entry.value}")
|
|
272
|
+
logger.exception(e)
|
|
273
|
+
if new_datasheets:
|
|
274
|
+
db.add_all(new_datasheets)
|
|
275
|
+
logger.debug(f"Adding new data sheets: {new_datasheets}")
|
|
276
|
+
if datasheet_urls:
|
|
277
|
+
logger.debug(f"About to download datasheets: {datasheet_urls}")
|
|
278
|
+
try:
|
|
279
|
+
await DatasheetVectorizer(chroma_db).download(datasheet_urls)
|
|
280
|
+
except Exception as e:
|
|
281
|
+
logger.error("Error vectorizing datasheets for BOM")
|
|
282
|
+
raise e
|
|
283
|
+
logger.debug("Finished adding datasheets")
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from mfcli.utils.tools import get_git_root
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DirectoryManager:
|
|
8
|
+
_instance = None
|
|
9
|
+
|
|
10
|
+
def __new__(cls):
|
|
11
|
+
if cls._instance is None:
|
|
12
|
+
cls._instance = super().__new__(cls)
|
|
13
|
+
cls._instance._initialized = False
|
|
14
|
+
return cls._instance
|
|
15
|
+
|
|
16
|
+
def __init__(self):
|
|
17
|
+
if self._initialized:
|
|
18
|
+
return
|
|
19
|
+
# OS-specific base appdata location
|
|
20
|
+
if os.name == "nt":
|
|
21
|
+
app_data_base = Path(os.getenv("LOCALAPPDATA", os.getenv("APPDATA")))
|
|
22
|
+
elif sys.platform == "darwin":
|
|
23
|
+
app_data_base = Path.home() / "Library" / "Application Support"
|
|
24
|
+
else:
|
|
25
|
+
app_data_base = Path.home() / ".local" / "share"
|
|
26
|
+
|
|
27
|
+
self.home_dir: Path = Path(os.path.expanduser("~")) / "Multifactor"
|
|
28
|
+
self.env_file_path: Path = self.home_dir / ".env"
|
|
29
|
+
|
|
30
|
+
# User app directories
|
|
31
|
+
self.app_data_dir: Path = app_data_base / "Multifactor"
|
|
32
|
+
self.chroma_db_dir: Path = self.app_data_dir / "chromadb"
|
|
33
|
+
|
|
34
|
+
self.app_data_dir.mkdir(exist_ok=True, parents=True)
|
|
35
|
+
self.chroma_db_dir.mkdir(exist_ok=True, parents=True)
|
|
36
|
+
self.home_dir.mkdir(exist_ok=True, parents=True)
|
|
37
|
+
|
|
38
|
+
# Repo dirs
|
|
39
|
+
self.root_dir: Path | None = None
|
|
40
|
+
self.context_dir: Path | None = None
|
|
41
|
+
self.agent_instructions_dir: Path | None = None
|
|
42
|
+
self.data_sheets_dir: Path | None = None
|
|
43
|
+
self.fw_tasks_dir: Path | None = None
|
|
44
|
+
self.generated_files_dir: Path | None = None
|
|
45
|
+
self.cheat_sheets_dir: Path | None = None
|
|
46
|
+
self.reqs_dir: Path | None = None
|
|
47
|
+
self.pdf_parts_dir: Path | None = None
|
|
48
|
+
self.metadata_dir: Path | None = None
|
|
49
|
+
self.config_file_path: Path | None = None
|
|
50
|
+
self.file_docket_path: Path | None = None
|
|
51
|
+
|
|
52
|
+
self._initialized = True
|
|
53
|
+
|
|
54
|
+
def initialize(self, root: str):
|
|
55
|
+
# Accept file or directory
|
|
56
|
+
root_path = Path(root)
|
|
57
|
+
if root_path.is_file():
|
|
58
|
+
self.root_dir = root_path.parent
|
|
59
|
+
else:
|
|
60
|
+
self.root_dir = root_path
|
|
61
|
+
|
|
62
|
+
# Determine the base directory for project folders
|
|
63
|
+
# If in a git repo, use the git root; otherwise use the current directory
|
|
64
|
+
git_root = get_git_root(self.root_dir)
|
|
65
|
+
if git_root:
|
|
66
|
+
# Use git root for all project folders
|
|
67
|
+
base_dir = git_root
|
|
68
|
+
else:
|
|
69
|
+
# Not a git repo, use the current directory
|
|
70
|
+
base_dir = self.root_dir
|
|
71
|
+
|
|
72
|
+
# Create "multifactor" parent folder at the base directory
|
|
73
|
+
multifactor_parent = base_dir / "multifactor"
|
|
74
|
+
|
|
75
|
+
# Context folder - where users place files to be ingested
|
|
76
|
+
self.context_dir = multifactor_parent / "context"
|
|
77
|
+
|
|
78
|
+
# Repo directories - all created within the "multifactor" folder
|
|
79
|
+
self.agent_instructions_dir = multifactor_parent / "agent_instructions"
|
|
80
|
+
self.data_sheets_dir = multifactor_parent / "data_sheets"
|
|
81
|
+
self.fw_tasks_dir = multifactor_parent / "fw_tasks"
|
|
82
|
+
self.generated_files_dir = multifactor_parent / "generated_files"
|
|
83
|
+
self.cheat_sheets_dir = multifactor_parent / "hw_cheat_sheets"
|
|
84
|
+
self.reqs_dir = multifactor_parent / "requirements"
|
|
85
|
+
self.pdf_parts_dir = multifactor_parent / "pdf_parts"
|
|
86
|
+
|
|
87
|
+
# Metadata directory - also within the "multifactor" folder
|
|
88
|
+
self.metadata_dir = multifactor_parent
|
|
89
|
+
self.config_file_path = self.metadata_dir / "config.json"
|
|
90
|
+
self.file_docket_path = self.metadata_dir / "file_docket.json"
|
|
91
|
+
|
|
92
|
+
# Create all dirs
|
|
93
|
+
self._create_directory_structure()
|
|
94
|
+
|
|
95
|
+
def _create_directory_structure(self):
|
|
96
|
+
for directory in [
|
|
97
|
+
self.context_dir,
|
|
98
|
+
self.agent_instructions_dir,
|
|
99
|
+
self.data_sheets_dir,
|
|
100
|
+
self.fw_tasks_dir,
|
|
101
|
+
self.generated_files_dir,
|
|
102
|
+
self.cheat_sheets_dir,
|
|
103
|
+
self.reqs_dir,
|
|
104
|
+
self.pdf_parts_dir,
|
|
105
|
+
self.app_data_dir,
|
|
106
|
+
self.chroma_db_dir,
|
|
107
|
+
self.metadata_dir
|
|
108
|
+
]:
|
|
109
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
app_dirs = DirectoryManager()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def init_directory_structure(root_dir: str):
|
|
116
|
+
app_dirs.initialize(root_dir)
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""Unified file upload abstraction for different LLM providers."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from google import genai
|
|
10
|
+
from google.genai.types import File
|
|
11
|
+
|
|
12
|
+
from mfcli.utils.config import get_config
|
|
13
|
+
from mfcli.utils.logger import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FileUploadProvider(str, Enum):
|
|
19
|
+
"""Supported file upload providers."""
|
|
20
|
+
GEMINI = "gemini"
|
|
21
|
+
OPENAI = "openai"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BaseFileUploader(ABC):
|
|
25
|
+
"""Base class for file upload implementations."""
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def upload_file(self, file_path: str, display_name: Optional[str] = None) -> dict:
|
|
29
|
+
"""
|
|
30
|
+
Upload a file to the provider's file storage.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
file_path: Path to the local file to upload
|
|
34
|
+
display_name: Optional display name for the file
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Dictionary containing file metadata including URI/ID for accessing the file
|
|
38
|
+
"""
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def delete_file(self, file_id: str) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
Delete a file from the provider's storage.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
file_id: The ID/URI of the file to delete
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
True if deletion was successful, False otherwise
|
|
51
|
+
"""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def get_file_info(self, file_id: str) -> dict:
|
|
56
|
+
"""
|
|
57
|
+
Get information about an uploaded file.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
file_id: The ID/URI of the file
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Dictionary containing file metadata
|
|
64
|
+
"""
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class GeminiFileUploader(BaseFileUploader):
|
|
69
|
+
"""File uploader implementation for Google Gemini."""
|
|
70
|
+
|
|
71
|
+
def __init__(self):
|
|
72
|
+
"""Initialize the Gemini file uploader."""
|
|
73
|
+
config = get_config()
|
|
74
|
+
self.client = genai.Client(api_key=config.google_api_key)
|
|
75
|
+
logger.info("Initialized Gemini file uploader")
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
def _file_access_check(file_path: str):
|
|
79
|
+
file_path_obj = Path(file_path)
|
|
80
|
+
|
|
81
|
+
# Validate file exists and is readable
|
|
82
|
+
if not file_path_obj.exists():
|
|
83
|
+
raise ValueError(f"File does not exist: {file_path}")
|
|
84
|
+
if not os.access(file_path_obj, os.R_OK):
|
|
85
|
+
raise ValueError(f"File is not readable: {file_path}")
|
|
86
|
+
|
|
87
|
+
def upload(self, file_path: str) -> File:
|
|
88
|
+
"""
|
|
89
|
+
Upload a file to Gemini Files API and return File object.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
file_path: Path to the local file to upload
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Gemini types File object.
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
ValueError: If file doesn't exist or is not readable
|
|
99
|
+
Exception: If upload fails
|
|
100
|
+
"""
|
|
101
|
+
self._file_access_check(file_path)
|
|
102
|
+
return self.client.files.upload(file=file_path)
|
|
103
|
+
|
|
104
|
+
def upload_file(self, file_path: str, display_name: Optional[str] = None) -> dict:
|
|
105
|
+
"""
|
|
106
|
+
Upload a file to Gemini Files API.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
file_path: Path to the local file to upload
|
|
110
|
+
display_name: Optional display name for the file
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Dictionary with file metadata including 'uri', 'name', 'mime_type', 'size_bytes'
|
|
114
|
+
|
|
115
|
+
Raises:
|
|
116
|
+
ValueError: If file doesn't exist or is not readable
|
|
117
|
+
Exception: If upload fails
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
self._file_access_check(file_path)
|
|
121
|
+
|
|
122
|
+
file_path_obj = Path(file_path)
|
|
123
|
+
|
|
124
|
+
# Use filename as display name if not provided
|
|
125
|
+
if display_name is None:
|
|
126
|
+
display_name = file_path_obj.name
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
logger.info(f"Uploading file to Gemini: {file_path}")
|
|
130
|
+
|
|
131
|
+
# Upload the file
|
|
132
|
+
uploaded_file = self.client.files.upload(
|
|
133
|
+
file=str(file_path_obj),
|
|
134
|
+
config={'display_name': display_name}
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Extract metadata
|
|
138
|
+
result = {
|
|
139
|
+
'uri': uploaded_file.uri,
|
|
140
|
+
'name': uploaded_file.name,
|
|
141
|
+
'display_name': uploaded_file.display_name,
|
|
142
|
+
'mime_type': uploaded_file.mime_type,
|
|
143
|
+
'size_bytes': uploaded_file.size_bytes,
|
|
144
|
+
'state': uploaded_file.state.name,
|
|
145
|
+
'provider': FileUploadProvider.GEMINI.value
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
logger.info(f"Successfully uploaded file: {result['name']}")
|
|
149
|
+
return result
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.error(f"Failed to upload file to Gemini: {e}")
|
|
153
|
+
raise Exception(f"Failed to upload file to Gemini: {str(e)}")
|
|
154
|
+
|
|
155
|
+
def delete_file(self, file_id: str) -> bool:
|
|
156
|
+
"""
|
|
157
|
+
Delete a file from Gemini Files API.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
file_id: The name/ID of the file (e.g., 'files/abc123')
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
True if deletion was successful, False otherwise
|
|
164
|
+
"""
|
|
165
|
+
try:
|
|
166
|
+
logger.info(f"Deleting file from Gemini: {file_id}")
|
|
167
|
+
self.client.files.delete(name=file_id)
|
|
168
|
+
logger.info(f"Successfully deleted file: {file_id}")
|
|
169
|
+
return True
|
|
170
|
+
except Exception as e:
|
|
171
|
+
logger.error(f"Failed to delete file from Gemini: {e}")
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
def get_file_info(self, file_id: str) -> dict:
|
|
175
|
+
"""
|
|
176
|
+
Get information about an uploaded file.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
file_id: The name/ID of the file (e.g., 'files/abc123')
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Dictionary containing file metadata
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
logger.info(f"Getting file info from Gemini: {file_id}")
|
|
186
|
+
file_info = self.client.files.get(name=file_id)
|
|
187
|
+
|
|
188
|
+
result = {
|
|
189
|
+
'uri': file_info.uri,
|
|
190
|
+
'name': file_info.name,
|
|
191
|
+
'display_name': file_info.display_name,
|
|
192
|
+
'mime_type': file_info.mime_type,
|
|
193
|
+
'size_bytes': file_info.size_bytes,
|
|
194
|
+
'state': file_info.state.name,
|
|
195
|
+
'provider': FileUploadProvider.GEMINI.value
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return result
|
|
199
|
+
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.error(f"Failed to get file info from Gemini: {e}")
|
|
202
|
+
raise Exception(f"Failed to get file info: {str(e)}")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class OpenAIFileUploader(BaseFileUploader):
|
|
206
|
+
"""File uploader implementation for OpenAI (placeholder for future implementation)."""
|
|
207
|
+
|
|
208
|
+
def __init__(self):
|
|
209
|
+
"""Initialize the OpenAI file uploader."""
|
|
210
|
+
config = get_config()
|
|
211
|
+
# This will be implemented when OpenAI support is added
|
|
212
|
+
logger.info("OpenAI file uploader - not yet implemented")
|
|
213
|
+
raise NotImplementedError("OpenAI file upload support coming soon")
|
|
214
|
+
|
|
215
|
+
def upload_file(self, file_path: str, display_name: Optional[str] = None) -> dict:
|
|
216
|
+
"""Upload a file to OpenAI."""
|
|
217
|
+
raise NotImplementedError("OpenAI file upload not yet implemented")
|
|
218
|
+
|
|
219
|
+
def delete_file(self, file_id: str) -> bool:
|
|
220
|
+
"""Delete a file from OpenAI."""
|
|
221
|
+
raise NotImplementedError("OpenAI file deletion not yet implemented")
|
|
222
|
+
|
|
223
|
+
def get_file_info(self, file_id: str) -> dict:
|
|
224
|
+
"""Get file info from OpenAI."""
|
|
225
|
+
raise NotImplementedError("OpenAI file info not yet implemented")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class FileUploadManager:
|
|
229
|
+
"""Manager class to handle file uploads across different providers."""
|
|
230
|
+
|
|
231
|
+
def __init__(self, provider: FileUploadProvider = FileUploadProvider.GEMINI):
|
|
232
|
+
"""
|
|
233
|
+
Initialize the file upload manager.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
provider: The file upload provider to use (default: GEMINI)
|
|
237
|
+
"""
|
|
238
|
+
self.provider = provider
|
|
239
|
+
self.uploader = self._get_uploader(provider)
|
|
240
|
+
|
|
241
|
+
def _get_uploader(self, provider: FileUploadProvider) -> BaseFileUploader:
|
|
242
|
+
"""Get the appropriate uploader for the specified provider."""
|
|
243
|
+
if provider == FileUploadProvider.GEMINI:
|
|
244
|
+
return GeminiFileUploader()
|
|
245
|
+
elif provider == FileUploadProvider.OPENAI:
|
|
246
|
+
return OpenAIFileUploader()
|
|
247
|
+
else:
|
|
248
|
+
raise ValueError(f"Unsupported file upload provider: {provider}")
|
|
249
|
+
|
|
250
|
+
def upload_file(self, file_path: str, display_name: Optional[str] = None) -> dict:
|
|
251
|
+
"""
|
|
252
|
+
Upload a file using the configured provider.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
file_path: Path to the local file to upload
|
|
256
|
+
display_name: Optional display name for the file
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Dictionary with file metadata
|
|
260
|
+
"""
|
|
261
|
+
return self.uploader.upload_file(file_path, display_name)
|
|
262
|
+
|
|
263
|
+
def delete_file(self, file_id: str) -> bool:
|
|
264
|
+
"""
|
|
265
|
+
Delete a file using the configured provider.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
file_id: The ID/URI of the file to delete
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
True if deletion was successful, False otherwise
|
|
272
|
+
"""
|
|
273
|
+
return self.uploader.delete_file(file_id)
|
|
274
|
+
|
|
275
|
+
def get_file_info(self, file_id: str) -> dict:
|
|
276
|
+
"""
|
|
277
|
+
Get file information using the configured provider.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
file_id: The ID/URI of the file
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Dictionary containing file metadata
|
|
284
|
+
"""
|
|
285
|
+
return self.uploader.get_file_info(file_id)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def get_file_upload_manager(provider: FileUploadProvider = FileUploadProvider.GEMINI) -> FileUploadManager:
|
|
289
|
+
"""
|
|
290
|
+
Factory function to get a file upload manager instance.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
provider: The file upload provider to use (default: GEMINI)
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
FileUploadManager instance
|
|
297
|
+
"""
|
|
298
|
+
return FileUploadManager(provider)
|