mfcli 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. mfcli/.env.example +72 -0
  2. mfcli/__init__.py +0 -0
  3. mfcli/agents/__init__.py +0 -0
  4. mfcli/agents/controller/__init__.py +0 -0
  5. mfcli/agents/controller/agent.py +19 -0
  6. mfcli/agents/controller/config.yaml +27 -0
  7. mfcli/agents/controller/tools.py +42 -0
  8. mfcli/agents/tools/general.py +118 -0
  9. mfcli/alembic/env.py +61 -0
  10. mfcli/alembic/script.py.mako +28 -0
  11. mfcli/alembic/versions/6ccc0c7c397c_added_fields_to_pdf_parts_model.py +39 -0
  12. mfcli/alembic/versions/769019ef4870_added_gemini_file_path_to_pdf_part_model.py +33 -0
  13. mfcli/alembic/versions/7a2e3a779fdc_added_functional_block_and_component_.py +54 -0
  14. mfcli/alembic/versions/7d5adb2a47a7_added_pdf_parts_model.py +41 -0
  15. mfcli/alembic/versions/7fcb7d6a5836_init.py +167 -0
  16. mfcli/alembic/versions/e0f2b5765c72_added_cascade_delete_for_models_that_.py +32 -0
  17. mfcli/alembic.ini +147 -0
  18. mfcli/cli/__init__.py +0 -0
  19. mfcli/cli/dependencies.py +59 -0
  20. mfcli/cli/main.py +200 -0
  21. mfcli/client/__init__.py +0 -0
  22. mfcli/client/chroma_db.py +184 -0
  23. mfcli/client/docling.py +44 -0
  24. mfcli/client/gemini.py +252 -0
  25. mfcli/client/llama_parse.py +38 -0
  26. mfcli/client/vector_db.py +93 -0
  27. mfcli/constants/__init__.py +0 -0
  28. mfcli/constants/base_enum.py +18 -0
  29. mfcli/constants/directory_names.py +1 -0
  30. mfcli/constants/file_types.py +189 -0
  31. mfcli/constants/gemini.py +1 -0
  32. mfcli/constants/openai.py +6 -0
  33. mfcli/constants/pipeline_run_status.py +3 -0
  34. mfcli/crud/__init__.py +0 -0
  35. mfcli/crud/file.py +42 -0
  36. mfcli/crud/functional_blocks.py +26 -0
  37. mfcli/crud/netlist.py +18 -0
  38. mfcli/crud/pipeline_run.py +17 -0
  39. mfcli/crud/project.py +144 -0
  40. mfcli/digikey/__init__.py +0 -0
  41. mfcli/digikey/digikey.py +105 -0
  42. mfcli/main.py +5 -0
  43. mfcli/mcp/__init__.py +0 -0
  44. mfcli/mcp/configs/cline_mcp_settings.json +11 -0
  45. mfcli/mcp/configs/mfcli.mcp.json +7 -0
  46. mfcli/mcp/mcp_instance.py +6 -0
  47. mfcli/mcp/server.py +37 -0
  48. mfcli/mcp/state_manager.py +51 -0
  49. mfcli/mcp/tools/__init__.py +0 -0
  50. mfcli/mcp/tools/query_knowledgebase.py +108 -0
  51. mfcli/models/__init__.py +10 -0
  52. mfcli/models/base.py +10 -0
  53. mfcli/models/bom.py +71 -0
  54. mfcli/models/datasheet.py +10 -0
  55. mfcli/models/debug_setup.py +64 -0
  56. mfcli/models/file.py +43 -0
  57. mfcli/models/file_docket.py +94 -0
  58. mfcli/models/file_metadata.py +19 -0
  59. mfcli/models/functional_blocks.py +94 -0
  60. mfcli/models/llm_response.py +5 -0
  61. mfcli/models/mcu.py +97 -0
  62. mfcli/models/mcu_errata.py +26 -0
  63. mfcli/models/netlist.py +59 -0
  64. mfcli/models/pdf_parts.py +25 -0
  65. mfcli/models/pipeline_run.py +34 -0
  66. mfcli/models/project.py +27 -0
  67. mfcli/models/project_metadata.py +15 -0
  68. mfcli/pipeline/__init__.py +0 -0
  69. mfcli/pipeline/analysis/__init__.py +0 -0
  70. mfcli/pipeline/analysis/bom_netlist_mapper.py +28 -0
  71. mfcli/pipeline/analysis/generators/__init__.py +0 -0
  72. mfcli/pipeline/analysis/generators/bom/__init__.py +0 -0
  73. mfcli/pipeline/analysis/generators/bom/bom.py +74 -0
  74. mfcli/pipeline/analysis/generators/debug_setup/__init__.py +0 -0
  75. mfcli/pipeline/analysis/generators/debug_setup/debug_setup.py +71 -0
  76. mfcli/pipeline/analysis/generators/debug_setup/instructions.py +150 -0
  77. mfcli/pipeline/analysis/generators/functional_blocks/__init__.py +0 -0
  78. mfcli/pipeline/analysis/generators/functional_blocks/functional_blocks.py +93 -0
  79. mfcli/pipeline/analysis/generators/functional_blocks/instructions.py +34 -0
  80. mfcli/pipeline/analysis/generators/functional_blocks/validator.py +94 -0
  81. mfcli/pipeline/analysis/generators/generator.py +258 -0
  82. mfcli/pipeline/analysis/generators/generator_base.py +18 -0
  83. mfcli/pipeline/analysis/generators/mcu/__init__.py +0 -0
  84. mfcli/pipeline/analysis/generators/mcu/instructions.py +156 -0
  85. mfcli/pipeline/analysis/generators/mcu/mcu.py +84 -0
  86. mfcli/pipeline/analysis/generators/mcu_errata/__init__.py +1 -0
  87. mfcli/pipeline/analysis/generators/mcu_errata/instructions.py +77 -0
  88. mfcli/pipeline/analysis/generators/mcu_errata/mcu_errata.py +95 -0
  89. mfcli/pipeline/analysis/generators/summary/__init__.py +0 -0
  90. mfcli/pipeline/analysis/generators/summary/summary.py +47 -0
  91. mfcli/pipeline/classifier.py +93 -0
  92. mfcli/pipeline/data_enricher.py +15 -0
  93. mfcli/pipeline/extractor.py +34 -0
  94. mfcli/pipeline/extractors/__init__.py +0 -0
  95. mfcli/pipeline/extractors/pdf.py +12 -0
  96. mfcli/pipeline/parser.py +120 -0
  97. mfcli/pipeline/parsers/__init__.py +0 -0
  98. mfcli/pipeline/parsers/netlist/__init__.py +0 -0
  99. mfcli/pipeline/parsers/netlist/edif.py +93 -0
  100. mfcli/pipeline/parsers/netlist/kicad_legacy_net.py +326 -0
  101. mfcli/pipeline/parsers/netlist/kicad_spice.py +135 -0
  102. mfcli/pipeline/parsers/netlist/pads.py +185 -0
  103. mfcli/pipeline/parsers/netlist/protel.py +166 -0
  104. mfcli/pipeline/parsers/netlist/protel_detector.py +29 -0
  105. mfcli/pipeline/pipeline.py +470 -0
  106. mfcli/pipeline/preprocessors/__init__.py +0 -0
  107. mfcli/pipeline/preprocessors/user_guide.py +127 -0
  108. mfcli/pipeline/run_context.py +32 -0
  109. mfcli/pipeline/schema_mapper.py +89 -0
  110. mfcli/pipeline/sub_classifier.py +115 -0
  111. mfcli/utils/__init__.py +0 -0
  112. mfcli/utils/cline_rules.py +256 -0
  113. mfcli/utils/config.py +33 -0
  114. mfcli/utils/configurator.py +324 -0
  115. mfcli/utils/data_cleaner.py +114 -0
  116. mfcli/utils/datasheet_vectorizer.py +283 -0
  117. mfcli/utils/directory_manager.py +116 -0
  118. mfcli/utils/file_upload.py +298 -0
  119. mfcli/utils/files.py +16 -0
  120. mfcli/utils/http_requests.py +54 -0
  121. mfcli/utils/kb_lister.py +89 -0
  122. mfcli/utils/kb_remover.py +173 -0
  123. mfcli/utils/logger.py +28 -0
  124. mfcli/utils/mcp_configurator.py +394 -0
  125. mfcli/utils/migrations.py +18 -0
  126. mfcli/utils/orm.py +43 -0
  127. mfcli/utils/pdf_splitter.py +63 -0
  128. mfcli/utils/pre_uninstall.py +167 -0
  129. mfcli/utils/query_service.py +22 -0
  130. mfcli/utils/system_check.py +306 -0
  131. mfcli/utils/tools.py +98 -0
  132. mfcli/utils/vectorizer.py +28 -0
  133. mfcli-0.2.1.dist-info/METADATA +956 -0
  134. mfcli-0.2.1.dist-info/RECORD +138 -0
  135. mfcli-0.2.1.dist-info/WHEEL +5 -0
  136. mfcli-0.2.1.dist-info/entry_points.txt +4 -0
  137. mfcli-0.2.1.dist-info/licenses/LICENSE +21 -0
  138. mfcli-0.2.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,283 @@
1
+ import os
2
+ import re
3
+ from urllib.parse import urlparse, unquote
4
+
5
+ from playwright.async_api import async_playwright, Browser
6
+ from requests import RequestException
7
+ from sqlmodel import select
8
+
9
+ from mfcli.client.chroma_db import ChromaClient
10
+ from mfcli.client.docling import DoclingChunker
11
+ from mfcli.client.vector_db import DocumentVectorizer
12
+ from mfcli.constants.file_types import PDFMimeTypes
13
+ from mfcli.digikey.digikey import DigiKey
14
+ from mfcli.models.bom import BOM
15
+ from mfcli.models.datasheet import Datasheet
16
+ from mfcli.pipeline.extractor import TextExtractor
17
+ from mfcli.utils.directory_manager import app_dirs
18
+ from mfcli.utils.http_requests import http_request
19
+ from mfcli.utils.logger import get_logger
20
+ from mfcli.utils.orm import Session
21
+ from mfcli.utils.tools import get_mime_type_from_bytes
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class DatasheetVectorizer:
27
+ def __init__(self, chroma_db: ChromaClient):
28
+ self._extractor = TextExtractor()
29
+ self._vectorizer = DocumentVectorizer(chroma_db)
30
+ self._docling = DoclingChunker()
31
+
32
+ def _vectorize_text(self, text: str, file_name: str, purpose: str, additional_metadata: dict = None):
33
+ """
34
+ Shared method to vectorize text with metadata
35
+ :param text: Extracted text content
36
+ :param file_name: Name of the file
37
+ :param purpose: Purpose of the vectorization (e.g., 'datasheet', 'bom', 'errata')
38
+ :param additional_metadata: Optional additional metadata to include
39
+ """
40
+ metadata = {"file_name": file_name, "purpose": purpose}
41
+ if additional_metadata:
42
+ metadata.update(additional_metadata)
43
+ self._vectorizer.vectorize(text, metadata)
44
+ logger.debug(f"File vectorized: {file_name} (purpose: {purpose})")
45
+
46
+ @staticmethod
47
+ async def _fetch_with_playwright(browser: Browser, url: str):
48
+ context = await browser.new_context()
49
+ response = await context.request.get(url)
50
+ body = await response.body()
51
+ return body
52
+
53
+ @staticmethod
54
+ def _parse_ti_url(url: str) -> str:
55
+ """
56
+ Texas Instruments URLs may have goTo param which is the real URL of the PDF
57
+ :param url: TI URL
58
+ :return: URL from goTo param
59
+ """
60
+ url_query_params = urlparse(url).query
61
+ if not url_query_params:
62
+ return url
63
+ params = url_query_params.split('&')
64
+ for param in params:
65
+ name = param.split('=')[0]
66
+ value = param.split('=')[1]
67
+ if not name == 'gotoUrl':
68
+ continue
69
+ return unquote(value)
70
+ return url
71
+
72
+ @staticmethod
73
+ def _save_datasheet(name: str, content: bytes):
74
+ file_path = app_dirs.data_sheets_dir / name
75
+ with open(file_path, "wb") as f:
76
+ f.write(content)
77
+
78
+ async def _download(self, browser: Browser, url: str, purpose: str = "datasheet"):
79
+ logger.debug(f"Fetching datasheet: {url}")
80
+ try:
81
+ ti_url_regex = r"^https?://www.ti.com/.+$"
82
+ if re.match(ti_url_regex, url, re.I):
83
+ logger.debug(f"URL is a TI URL: {url}")
84
+ url = self._parse_ti_url(url)
85
+ logger.debug(f"Parsed TI URL: {url}")
86
+ url_path = urlparse(url).path
87
+ except ValueError as e:
88
+ logger.debug(f"Unable to parse datasheet URL: {url}")
89
+ logger.debug(e)
90
+ return
91
+ file_name = os.path.basename(url_path)
92
+ if not file_name.endswith(".pdf"):
93
+ file_name = f"{file_name}.pdf"
94
+ try:
95
+ content = http_request(method='GET', url=url).content
96
+ mime_type = get_mime_type_from_bytes(content, file_name)
97
+ if mime_type not in PDFMimeTypes:
98
+ logger.debug(f"Retrieved PDF is not PDF MIME type: {url}")
99
+ logger.debug(f"Retrying with playwright: {url}")
100
+ content = await self._fetch_with_playwright(browser, url)
101
+ except RequestException as e:
102
+ logger.debug(e)
103
+ logger.debug(f"HTTP error fetching PDF: {url}")
104
+ logger.debug(f"Retrying with playwright: {url}")
105
+ content = await self._fetch_with_playwright(browser, url)
106
+ except Exception as e:
107
+ logger.debug(f"Unhandled error fetching datasheet URL: {url}")
108
+ logger.debug(e)
109
+ return
110
+ mime_type = get_mime_type_from_bytes(content, file_name)
111
+ if mime_type not in PDFMimeTypes:
112
+ logger.debug(f"Could not fetch PDF even with playwright: {url}")
113
+ return
114
+ try:
115
+ self._save_datasheet(file_name, content)
116
+ except Exception as e:
117
+ logger.debug(e)
118
+ logger.debug(f"Error saving datasheet: {file_name}")
119
+
120
+ async def download(self, urls: list[str], purpose: str = "datasheet"):
121
+ if not urls:
122
+ logger.debug(f"No datasheets to vectorize, exiting")
123
+ return
124
+ logger.debug(f"Vectorizing {len(urls)} documents (purpose: {purpose})")
125
+ async with async_playwright() as p:
126
+ browser = await p.chromium.launch(headless=True)
127
+ try:
128
+ for url in urls:
129
+ try:
130
+ await self._download(browser, url, purpose)
131
+ except Exception as e:
132
+ logger.debug(e)
133
+ logger.debug(f"Error processing document: {url}")
134
+ finally:
135
+ await browser.close()
136
+
137
+ def vectorize_local_file(self, file_path: str, purpose: str, additional_metadata: dict = None):
138
+ """
139
+ Vectorize a local file (e.g., generated by agents)
140
+ :param file_path: Path to the local file
141
+ :param purpose: Purpose of the vectorization (e.g., 'bom', 'errata', 'functional_blocks')
142
+ :param additional_metadata: Optional additional metadata to include
143
+ """
144
+ try:
145
+ logger.debug(f"Vectorizing local file: {file_path} (purpose: {purpose})")
146
+ file_name = os.path.basename(file_path)
147
+
148
+ # Check if file exists
149
+ if not os.path.exists(file_path):
150
+ logger.error(f"File does not exist: {file_path}")
151
+ return
152
+
153
+ # Extract text based on file type
154
+ with open(file_path, 'rb') as f:
155
+ content = f.read()
156
+
157
+ mime_type = get_mime_type_from_bytes(content, file_name)
158
+
159
+ if mime_type in PDFMimeTypes:
160
+ text = self._extractor.extract_pdf_bytes(content)
161
+ else:
162
+ # For non-PDF files, use the general extractor
163
+ text = self._extractor.extract_text_from_file_bytes(file_name, content)
164
+
165
+ logger.debug(f"Text extracted from local file: {file_path}")
166
+ self._vectorize_text(text, file_name, purpose, additional_metadata)
167
+
168
+ except Exception as e:
169
+ logger.error(f"Error vectorizing local file: {file_path}")
170
+ logger.exception(e)
171
+ raise
172
+
173
+ def vectorize_local_files(self, file_paths: list[str], purpose: str, additional_metadata: dict = None):
174
+ """
175
+ Vectorize multiple local files
176
+ :param file_paths: List of paths to local files
177
+ :param purpose: Purpose of the vectorization (e.g., 'bom', 'errata', 'functional_blocks')
178
+ :param additional_metadata: Optional additional metadata to include
179
+ """
180
+ if not file_paths:
181
+ logger.debug(f"No files to vectorize, exiting")
182
+ return
183
+
184
+ logger.debug(f"Vectorizing {len(file_paths)} local files (purpose: {purpose})")
185
+ for file_path in file_paths:
186
+ try:
187
+ self.vectorize_local_file(file_path, purpose, additional_metadata)
188
+ except Exception as e:
189
+ logger.exception(e)
190
+ logger.error(f"Error processing local file: {file_path}")
191
+ logger.debug(f"Finished vectorizing {len(file_paths)} local files")
192
+
193
+ def vectorize_text_content(self, text: str, file_name: str, purpose: str, additional_metadata: dict = None):
194
+ """
195
+ Vectorize text content directly (e.g., from agent output)
196
+ :param text: Text content to vectorize
197
+ :param file_name: Name to associate with this content
198
+ :param purpose: Purpose of the vectorization (e.g., 'bom', 'errata', 'functional_blocks')
199
+ :param additional_metadata: Optional additional metadata to include
200
+ """
201
+ try:
202
+ logger.debug(f"Vectorizing text content: {file_name} (purpose: {purpose})")
203
+ self._vectorize_text(text, file_name, purpose, additional_metadata)
204
+ except Exception as e:
205
+ logger.error(f"Error vectorizing text content: {file_name}")
206
+ logger.exception(e)
207
+ raise
208
+
209
+ def vectorize_file_buf(
210
+ self,
211
+ file_bytes: bytes,
212
+ file_name: str,
213
+ purpose: str,
214
+ additional_metadata: dict = None
215
+ ) -> None:
216
+ """
217
+ Vectorize a file from a buffer. This vectorizer uses DoclingChunker.
218
+ :param file_bytes: file bytes
219
+ :param file_name: file name
220
+ :param purpose: file purpose
221
+ :param additional_metadata: dict of metadata
222
+ :return: None
223
+ """
224
+ chunks = self._docling.chunk(file_name, file_bytes)
225
+ metadata = {"file_name": file_name, "purpose": purpose}
226
+ if additional_metadata:
227
+ metadata.update(additional_metadata)
228
+ self._vectorizer.vectorize_chunks(chunks, metadata)
229
+
230
+
231
+ async def get_datasheets_for_bom_entries(db: Session, chroma_db: ChromaClient, entries: list[BOM]):
232
+ logger.info(f"Fetching datasheets for {len(entries)} BOM entries")
233
+ part_numbers = {entry.value for entry in entries}
234
+ logger.debug("Fetching existing datasheets for part numbers")
235
+
236
+ # Fetch existing datasheets
237
+ stmt = select(Datasheet).where(Datasheet.part_number.in_(part_numbers))
238
+ datasheets: list[Datasheet] = db.execute(stmt).scalars().all()
239
+ datasheet_map = {d.part_number: d.datasheet for d in datasheets}
240
+
241
+ logger.debug(f"Datasheet map: {datasheet_map}")
242
+ client = DigiKey()
243
+ new_datasheets: list[Datasheet] = []
244
+ datasheet_urls: list[str] = []
245
+ for entry in entries:
246
+ try:
247
+ logger.debug(f"Processing BOM entry: {entry.value}")
248
+
249
+ # Skip resistors, capacitors and inductors
250
+ ref = entry.reference
251
+ if ref.startswith('R') \
252
+ or ref.startswith('C') \
253
+ or ref.startswith('L') \
254
+ or ref.startswith('J') \
255
+ or ref.startswith('T') \
256
+ or ref.startswith('D'):
257
+ logger.debug(f"Skipping BOM entry {entry.value} with reference: {ref}")
258
+ continue
259
+
260
+ existing_datasheet = datasheet_map.get(entry.value)
261
+ if not existing_datasheet:
262
+ logger.debug(f"Datasheet does not exist for {entry.value}")
263
+ entry.datasheet = existing_datasheet or client.datasheet(entry.value)
264
+
265
+ # If datasheet is new create it in DB
266
+ if not existing_datasheet and entry.datasheet:
267
+ logger.debug(f"Adding new datasheet for {entry.value}: {entry.datasheet}")
268
+ new_datasheets.append(Datasheet(part_number=entry.value, datasheet=entry.datasheet))
269
+ datasheet_urls.append(entry.datasheet)
270
+ except Exception as e:
271
+ logger.error(f"Error adding datasheet for BOM entry: {entry.value}")
272
+ logger.exception(e)
273
+ if new_datasheets:
274
+ db.add_all(new_datasheets)
275
+ logger.debug(f"Adding new data sheets: {new_datasheets}")
276
+ if datasheet_urls:
277
+ logger.debug(f"About to download datasheets: {datasheet_urls}")
278
+ try:
279
+ await DatasheetVectorizer(chroma_db).download(datasheet_urls)
280
+ except Exception as e:
281
+ logger.error("Error vectorizing datasheets for BOM")
282
+ raise e
283
+ logger.debug("Finished adding datasheets")
@@ -0,0 +1,116 @@
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+ from mfcli.utils.tools import get_git_root
5
+
6
+
7
+ class DirectoryManager:
8
+ _instance = None
9
+
10
+ def __new__(cls):
11
+ if cls._instance is None:
12
+ cls._instance = super().__new__(cls)
13
+ cls._instance._initialized = False
14
+ return cls._instance
15
+
16
+ def __init__(self):
17
+ if self._initialized:
18
+ return
19
+ # OS-specific base appdata location
20
+ if os.name == "nt":
21
+ app_data_base = Path(os.getenv("LOCALAPPDATA", os.getenv("APPDATA")))
22
+ elif sys.platform == "darwin":
23
+ app_data_base = Path.home() / "Library" / "Application Support"
24
+ else:
25
+ app_data_base = Path.home() / ".local" / "share"
26
+
27
+ self.home_dir: Path = Path(os.path.expanduser("~")) / "Multifactor"
28
+ self.env_file_path: Path = self.home_dir / ".env"
29
+
30
+ # User app directories
31
+ self.app_data_dir: Path = app_data_base / "Multifactor"
32
+ self.chroma_db_dir: Path = self.app_data_dir / "chromadb"
33
+
34
+ self.app_data_dir.mkdir(exist_ok=True, parents=True)
35
+ self.chroma_db_dir.mkdir(exist_ok=True, parents=True)
36
+ self.home_dir.mkdir(exist_ok=True, parents=True)
37
+
38
+ # Repo dirs
39
+ self.root_dir: Path | None = None
40
+ self.context_dir: Path | None = None
41
+ self.agent_instructions_dir: Path | None = None
42
+ self.data_sheets_dir: Path | None = None
43
+ self.fw_tasks_dir: Path | None = None
44
+ self.generated_files_dir: Path | None = None
45
+ self.cheat_sheets_dir: Path | None = None
46
+ self.reqs_dir: Path | None = None
47
+ self.pdf_parts_dir: Path | None = None
48
+ self.metadata_dir: Path | None = None
49
+ self.config_file_path: Path | None = None
50
+ self.file_docket_path: Path | None = None
51
+
52
+ self._initialized = True
53
+
54
+ def initialize(self, root: str):
55
+ # Accept file or directory
56
+ root_path = Path(root)
57
+ if root_path.is_file():
58
+ self.root_dir = root_path.parent
59
+ else:
60
+ self.root_dir = root_path
61
+
62
+ # Determine the base directory for project folders
63
+ # If in a git repo, use the git root; otherwise use the current directory
64
+ git_root = get_git_root(self.root_dir)
65
+ if git_root:
66
+ # Use git root for all project folders
67
+ base_dir = git_root
68
+ else:
69
+ # Not a git repo, use the current directory
70
+ base_dir = self.root_dir
71
+
72
+ # Create "multifactor" parent folder at the base directory
73
+ multifactor_parent = base_dir / "multifactor"
74
+
75
+ # Context folder - where users place files to be ingested
76
+ self.context_dir = multifactor_parent / "context"
77
+
78
+ # Repo directories - all created within the "multifactor" folder
79
+ self.agent_instructions_dir = multifactor_parent / "agent_instructions"
80
+ self.data_sheets_dir = multifactor_parent / "data_sheets"
81
+ self.fw_tasks_dir = multifactor_parent / "fw_tasks"
82
+ self.generated_files_dir = multifactor_parent / "generated_files"
83
+ self.cheat_sheets_dir = multifactor_parent / "hw_cheat_sheets"
84
+ self.reqs_dir = multifactor_parent / "requirements"
85
+ self.pdf_parts_dir = multifactor_parent / "pdf_parts"
86
+
87
+ # Metadata directory - also within the "multifactor" folder
88
+ self.metadata_dir = multifactor_parent
89
+ self.config_file_path = self.metadata_dir / "config.json"
90
+ self.file_docket_path = self.metadata_dir / "file_docket.json"
91
+
92
+ # Create all dirs
93
+ self._create_directory_structure()
94
+
95
+ def _create_directory_structure(self):
96
+ for directory in [
97
+ self.context_dir,
98
+ self.agent_instructions_dir,
99
+ self.data_sheets_dir,
100
+ self.fw_tasks_dir,
101
+ self.generated_files_dir,
102
+ self.cheat_sheets_dir,
103
+ self.reqs_dir,
104
+ self.pdf_parts_dir,
105
+ self.app_data_dir,
106
+ self.chroma_db_dir,
107
+ self.metadata_dir
108
+ ]:
109
+ directory.mkdir(parents=True, exist_ok=True)
110
+
111
+
112
+ app_dirs = DirectoryManager()
113
+
114
+
115
+ def init_directory_structure(root_dir: str):
116
+ app_dirs.initialize(root_dir)
@@ -0,0 +1,298 @@
1
+ """Unified file upload abstraction for different LLM providers."""
2
+
3
+ import os
4
+ from abc import ABC, abstractmethod
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from google import genai
10
+ from google.genai.types import File
11
+
12
+ from mfcli.utils.config import get_config
13
+ from mfcli.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ class FileUploadProvider(str, Enum):
19
+ """Supported file upload providers."""
20
+ GEMINI = "gemini"
21
+ OPENAI = "openai"
22
+
23
+
24
+ class BaseFileUploader(ABC):
25
+ """Base class for file upload implementations."""
26
+
27
+ @abstractmethod
28
+ def upload_file(self, file_path: str, display_name: Optional[str] = None) -> dict:
29
+ """
30
+ Upload a file to the provider's file storage.
31
+
32
+ Args:
33
+ file_path: Path to the local file to upload
34
+ display_name: Optional display name for the file
35
+
36
+ Returns:
37
+ Dictionary containing file metadata including URI/ID for accessing the file
38
+ """
39
+ pass
40
+
41
+ @abstractmethod
42
+ def delete_file(self, file_id: str) -> bool:
43
+ """
44
+ Delete a file from the provider's storage.
45
+
46
+ Args:
47
+ file_id: The ID/URI of the file to delete
48
+
49
+ Returns:
50
+ True if deletion was successful, False otherwise
51
+ """
52
+ pass
53
+
54
+ @abstractmethod
55
+ def get_file_info(self, file_id: str) -> dict:
56
+ """
57
+ Get information about an uploaded file.
58
+
59
+ Args:
60
+ file_id: The ID/URI of the file
61
+
62
+ Returns:
63
+ Dictionary containing file metadata
64
+ """
65
+ pass
66
+
67
+
68
+ class GeminiFileUploader(BaseFileUploader):
69
+ """File uploader implementation for Google Gemini."""
70
+
71
+ def __init__(self):
72
+ """Initialize the Gemini file uploader."""
73
+ config = get_config()
74
+ self.client = genai.Client(api_key=config.google_api_key)
75
+ logger.info("Initialized Gemini file uploader")
76
+
77
+ @staticmethod
78
+ def _file_access_check(file_path: str):
79
+ file_path_obj = Path(file_path)
80
+
81
+ # Validate file exists and is readable
82
+ if not file_path_obj.exists():
83
+ raise ValueError(f"File does not exist: {file_path}")
84
+ if not os.access(file_path_obj, os.R_OK):
85
+ raise ValueError(f"File is not readable: {file_path}")
86
+
87
+ def upload(self, file_path: str) -> File:
88
+ """
89
+ Upload a file to Gemini Files API and return File object.
90
+
91
+ Args:
92
+ file_path: Path to the local file to upload
93
+
94
+ Returns:
95
+ Gemini types File object.
96
+
97
+ Raises:
98
+ ValueError: If file doesn't exist or is not readable
99
+ Exception: If upload fails
100
+ """
101
+ self._file_access_check(file_path)
102
+ return self.client.files.upload(file=file_path)
103
+
104
+ def upload_file(self, file_path: str, display_name: Optional[str] = None) -> dict:
105
+ """
106
+ Upload a file to Gemini Files API.
107
+
108
+ Args:
109
+ file_path: Path to the local file to upload
110
+ display_name: Optional display name for the file
111
+
112
+ Returns:
113
+ Dictionary with file metadata including 'uri', 'name', 'mime_type', 'size_bytes'
114
+
115
+ Raises:
116
+ ValueError: If file doesn't exist or is not readable
117
+ Exception: If upload fails
118
+ """
119
+
120
+ self._file_access_check(file_path)
121
+
122
+ file_path_obj = Path(file_path)
123
+
124
+ # Use filename as display name if not provided
125
+ if display_name is None:
126
+ display_name = file_path_obj.name
127
+
128
+ try:
129
+ logger.info(f"Uploading file to Gemini: {file_path}")
130
+
131
+ # Upload the file
132
+ uploaded_file = self.client.files.upload(
133
+ file=str(file_path_obj),
134
+ config={'display_name': display_name}
135
+ )
136
+
137
+ # Extract metadata
138
+ result = {
139
+ 'uri': uploaded_file.uri,
140
+ 'name': uploaded_file.name,
141
+ 'display_name': uploaded_file.display_name,
142
+ 'mime_type': uploaded_file.mime_type,
143
+ 'size_bytes': uploaded_file.size_bytes,
144
+ 'state': uploaded_file.state.name,
145
+ 'provider': FileUploadProvider.GEMINI.value
146
+ }
147
+
148
+ logger.info(f"Successfully uploaded file: {result['name']}")
149
+ return result
150
+
151
+ except Exception as e:
152
+ logger.error(f"Failed to upload file to Gemini: {e}")
153
+ raise Exception(f"Failed to upload file to Gemini: {str(e)}")
154
+
155
+ def delete_file(self, file_id: str) -> bool:
156
+ """
157
+ Delete a file from Gemini Files API.
158
+
159
+ Args:
160
+ file_id: The name/ID of the file (e.g., 'files/abc123')
161
+
162
+ Returns:
163
+ True if deletion was successful, False otherwise
164
+ """
165
+ try:
166
+ logger.info(f"Deleting file from Gemini: {file_id}")
167
+ self.client.files.delete(name=file_id)
168
+ logger.info(f"Successfully deleted file: {file_id}")
169
+ return True
170
+ except Exception as e:
171
+ logger.error(f"Failed to delete file from Gemini: {e}")
172
+ return False
173
+
174
+ def get_file_info(self, file_id: str) -> dict:
175
+ """
176
+ Get information about an uploaded file.
177
+
178
+ Args:
179
+ file_id: The name/ID of the file (e.g., 'files/abc123')
180
+
181
+ Returns:
182
+ Dictionary containing file metadata
183
+ """
184
+ try:
185
+ logger.info(f"Getting file info from Gemini: {file_id}")
186
+ file_info = self.client.files.get(name=file_id)
187
+
188
+ result = {
189
+ 'uri': file_info.uri,
190
+ 'name': file_info.name,
191
+ 'display_name': file_info.display_name,
192
+ 'mime_type': file_info.mime_type,
193
+ 'size_bytes': file_info.size_bytes,
194
+ 'state': file_info.state.name,
195
+ 'provider': FileUploadProvider.GEMINI.value
196
+ }
197
+
198
+ return result
199
+
200
+ except Exception as e:
201
+ logger.error(f"Failed to get file info from Gemini: {e}")
202
+ raise Exception(f"Failed to get file info: {str(e)}")
203
+
204
+
205
+ class OpenAIFileUploader(BaseFileUploader):
206
+ """File uploader implementation for OpenAI (placeholder for future implementation)."""
207
+
208
+ def __init__(self):
209
+ """Initialize the OpenAI file uploader."""
210
+ config = get_config()
211
+ # This will be implemented when OpenAI support is added
212
+ logger.info("OpenAI file uploader - not yet implemented")
213
+ raise NotImplementedError("OpenAI file upload support coming soon")
214
+
215
+ def upload_file(self, file_path: str, display_name: Optional[str] = None) -> dict:
216
+ """Upload a file to OpenAI."""
217
+ raise NotImplementedError("OpenAI file upload not yet implemented")
218
+
219
+ def delete_file(self, file_id: str) -> bool:
220
+ """Delete a file from OpenAI."""
221
+ raise NotImplementedError("OpenAI file deletion not yet implemented")
222
+
223
+ def get_file_info(self, file_id: str) -> dict:
224
+ """Get file info from OpenAI."""
225
+ raise NotImplementedError("OpenAI file info not yet implemented")
226
+
227
+
228
+ class FileUploadManager:
229
+ """Manager class to handle file uploads across different providers."""
230
+
231
+ def __init__(self, provider: FileUploadProvider = FileUploadProvider.GEMINI):
232
+ """
233
+ Initialize the file upload manager.
234
+
235
+ Args:
236
+ provider: The file upload provider to use (default: GEMINI)
237
+ """
238
+ self.provider = provider
239
+ self.uploader = self._get_uploader(provider)
240
+
241
+ def _get_uploader(self, provider: FileUploadProvider) -> BaseFileUploader:
242
+ """Get the appropriate uploader for the specified provider."""
243
+ if provider == FileUploadProvider.GEMINI:
244
+ return GeminiFileUploader()
245
+ elif provider == FileUploadProvider.OPENAI:
246
+ return OpenAIFileUploader()
247
+ else:
248
+ raise ValueError(f"Unsupported file upload provider: {provider}")
249
+
250
+ def upload_file(self, file_path: str, display_name: Optional[str] = None) -> dict:
251
+ """
252
+ Upload a file using the configured provider.
253
+
254
+ Args:
255
+ file_path: Path to the local file to upload
256
+ display_name: Optional display name for the file
257
+
258
+ Returns:
259
+ Dictionary with file metadata
260
+ """
261
+ return self.uploader.upload_file(file_path, display_name)
262
+
263
+ def delete_file(self, file_id: str) -> bool:
264
+ """
265
+ Delete a file using the configured provider.
266
+
267
+ Args:
268
+ file_id: The ID/URI of the file to delete
269
+
270
+ Returns:
271
+ True if deletion was successful, False otherwise
272
+ """
273
+ return self.uploader.delete_file(file_id)
274
+
275
+ def get_file_info(self, file_id: str) -> dict:
276
+ """
277
+ Get file information using the configured provider.
278
+
279
+ Args:
280
+ file_id: The ID/URI of the file
281
+
282
+ Returns:
283
+ Dictionary containing file metadata
284
+ """
285
+ return self.uploader.get_file_info(file_id)
286
+
287
+
288
+ def get_file_upload_manager(provider: FileUploadProvider = FileUploadProvider.GEMINI) -> FileUploadManager:
289
+ """
290
+ Factory function to get a file upload manager instance.
291
+
292
+ Args:
293
+ provider: The file upload provider to use (default: GEMINI)
294
+
295
+ Returns:
296
+ FileUploadManager instance
297
+ """
298
+ return FileUploadManager(provider)