mfcli 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. mfcli/.env.example +72 -0
  2. mfcli/__init__.py +0 -0
  3. mfcli/agents/__init__.py +0 -0
  4. mfcli/agents/controller/__init__.py +0 -0
  5. mfcli/agents/controller/agent.py +19 -0
  6. mfcli/agents/controller/config.yaml +27 -0
  7. mfcli/agents/controller/tools.py +42 -0
  8. mfcli/agents/tools/general.py +118 -0
  9. mfcli/alembic/env.py +61 -0
  10. mfcli/alembic/script.py.mako +28 -0
  11. mfcli/alembic/versions/6ccc0c7c397c_added_fields_to_pdf_parts_model.py +39 -0
  12. mfcli/alembic/versions/769019ef4870_added_gemini_file_path_to_pdf_part_model.py +33 -0
  13. mfcli/alembic/versions/7a2e3a779fdc_added_functional_block_and_component_.py +54 -0
  14. mfcli/alembic/versions/7d5adb2a47a7_added_pdf_parts_model.py +41 -0
  15. mfcli/alembic/versions/7fcb7d6a5836_init.py +167 -0
  16. mfcli/alembic/versions/e0f2b5765c72_added_cascade_delete_for_models_that_.py +32 -0
  17. mfcli/alembic.ini +147 -0
  18. mfcli/cli/__init__.py +0 -0
  19. mfcli/cli/dependencies.py +59 -0
  20. mfcli/cli/main.py +200 -0
  21. mfcli/client/__init__.py +0 -0
  22. mfcli/client/chroma_db.py +184 -0
  23. mfcli/client/docling.py +44 -0
  24. mfcli/client/gemini.py +252 -0
  25. mfcli/client/llama_parse.py +38 -0
  26. mfcli/client/vector_db.py +93 -0
  27. mfcli/constants/__init__.py +0 -0
  28. mfcli/constants/base_enum.py +18 -0
  29. mfcli/constants/directory_names.py +1 -0
  30. mfcli/constants/file_types.py +189 -0
  31. mfcli/constants/gemini.py +1 -0
  32. mfcli/constants/openai.py +6 -0
  33. mfcli/constants/pipeline_run_status.py +3 -0
  34. mfcli/crud/__init__.py +0 -0
  35. mfcli/crud/file.py +42 -0
  36. mfcli/crud/functional_blocks.py +26 -0
  37. mfcli/crud/netlist.py +18 -0
  38. mfcli/crud/pipeline_run.py +17 -0
  39. mfcli/crud/project.py +144 -0
  40. mfcli/digikey/__init__.py +0 -0
  41. mfcli/digikey/digikey.py +105 -0
  42. mfcli/main.py +5 -0
  43. mfcli/mcp/__init__.py +0 -0
  44. mfcli/mcp/configs/cline_mcp_settings.json +11 -0
  45. mfcli/mcp/configs/mfcli.mcp.json +7 -0
  46. mfcli/mcp/mcp_instance.py +6 -0
  47. mfcli/mcp/server.py +37 -0
  48. mfcli/mcp/state_manager.py +51 -0
  49. mfcli/mcp/tools/__init__.py +0 -0
  50. mfcli/mcp/tools/query_knowledgebase.py +108 -0
  51. mfcli/models/__init__.py +10 -0
  52. mfcli/models/base.py +10 -0
  53. mfcli/models/bom.py +71 -0
  54. mfcli/models/datasheet.py +10 -0
  55. mfcli/models/debug_setup.py +64 -0
  56. mfcli/models/file.py +43 -0
  57. mfcli/models/file_docket.py +94 -0
  58. mfcli/models/file_metadata.py +19 -0
  59. mfcli/models/functional_blocks.py +94 -0
  60. mfcli/models/llm_response.py +5 -0
  61. mfcli/models/mcu.py +97 -0
  62. mfcli/models/mcu_errata.py +26 -0
  63. mfcli/models/netlist.py +59 -0
  64. mfcli/models/pdf_parts.py +25 -0
  65. mfcli/models/pipeline_run.py +34 -0
  66. mfcli/models/project.py +27 -0
  67. mfcli/models/project_metadata.py +15 -0
  68. mfcli/pipeline/__init__.py +0 -0
  69. mfcli/pipeline/analysis/__init__.py +0 -0
  70. mfcli/pipeline/analysis/bom_netlist_mapper.py +28 -0
  71. mfcli/pipeline/analysis/generators/__init__.py +0 -0
  72. mfcli/pipeline/analysis/generators/bom/__init__.py +0 -0
  73. mfcli/pipeline/analysis/generators/bom/bom.py +74 -0
  74. mfcli/pipeline/analysis/generators/debug_setup/__init__.py +0 -0
  75. mfcli/pipeline/analysis/generators/debug_setup/debug_setup.py +71 -0
  76. mfcli/pipeline/analysis/generators/debug_setup/instructions.py +150 -0
  77. mfcli/pipeline/analysis/generators/functional_blocks/__init__.py +0 -0
  78. mfcli/pipeline/analysis/generators/functional_blocks/functional_blocks.py +93 -0
  79. mfcli/pipeline/analysis/generators/functional_blocks/instructions.py +34 -0
  80. mfcli/pipeline/analysis/generators/functional_blocks/validator.py +94 -0
  81. mfcli/pipeline/analysis/generators/generator.py +258 -0
  82. mfcli/pipeline/analysis/generators/generator_base.py +18 -0
  83. mfcli/pipeline/analysis/generators/mcu/__init__.py +0 -0
  84. mfcli/pipeline/analysis/generators/mcu/instructions.py +156 -0
  85. mfcli/pipeline/analysis/generators/mcu/mcu.py +84 -0
  86. mfcli/pipeline/analysis/generators/mcu_errata/__init__.py +1 -0
  87. mfcli/pipeline/analysis/generators/mcu_errata/instructions.py +77 -0
  88. mfcli/pipeline/analysis/generators/mcu_errata/mcu_errata.py +95 -0
  89. mfcli/pipeline/analysis/generators/summary/__init__.py +0 -0
  90. mfcli/pipeline/analysis/generators/summary/summary.py +47 -0
  91. mfcli/pipeline/classifier.py +93 -0
  92. mfcli/pipeline/data_enricher.py +15 -0
  93. mfcli/pipeline/extractor.py +34 -0
  94. mfcli/pipeline/extractors/__init__.py +0 -0
  95. mfcli/pipeline/extractors/pdf.py +12 -0
  96. mfcli/pipeline/parser.py +120 -0
  97. mfcli/pipeline/parsers/__init__.py +0 -0
  98. mfcli/pipeline/parsers/netlist/__init__.py +0 -0
  99. mfcli/pipeline/parsers/netlist/edif.py +93 -0
  100. mfcli/pipeline/parsers/netlist/kicad_legacy_net.py +326 -0
  101. mfcli/pipeline/parsers/netlist/kicad_spice.py +135 -0
  102. mfcli/pipeline/parsers/netlist/pads.py +185 -0
  103. mfcli/pipeline/parsers/netlist/protel.py +166 -0
  104. mfcli/pipeline/parsers/netlist/protel_detector.py +29 -0
  105. mfcli/pipeline/pipeline.py +470 -0
  106. mfcli/pipeline/preprocessors/__init__.py +0 -0
  107. mfcli/pipeline/preprocessors/user_guide.py +127 -0
  108. mfcli/pipeline/run_context.py +32 -0
  109. mfcli/pipeline/schema_mapper.py +89 -0
  110. mfcli/pipeline/sub_classifier.py +115 -0
  111. mfcli/utils/__init__.py +0 -0
  112. mfcli/utils/cline_rules.py +256 -0
  113. mfcli/utils/config.py +33 -0
  114. mfcli/utils/configurator.py +324 -0
  115. mfcli/utils/data_cleaner.py +114 -0
  116. mfcli/utils/datasheet_vectorizer.py +283 -0
  117. mfcli/utils/directory_manager.py +116 -0
  118. mfcli/utils/file_upload.py +298 -0
  119. mfcli/utils/files.py +16 -0
  120. mfcli/utils/http_requests.py +54 -0
  121. mfcli/utils/kb_lister.py +89 -0
  122. mfcli/utils/kb_remover.py +173 -0
  123. mfcli/utils/logger.py +28 -0
  124. mfcli/utils/mcp_configurator.py +394 -0
  125. mfcli/utils/migrations.py +18 -0
  126. mfcli/utils/orm.py +43 -0
  127. mfcli/utils/pdf_splitter.py +63 -0
  128. mfcli/utils/pre_uninstall.py +167 -0
  129. mfcli/utils/query_service.py +22 -0
  130. mfcli/utils/system_check.py +306 -0
  131. mfcli/utils/tools.py +98 -0
  132. mfcli/utils/vectorizer.py +28 -0
  133. mfcli-0.2.1.dist-info/METADATA +956 -0
  134. mfcli-0.2.1.dist-info/RECORD +138 -0
  135. mfcli-0.2.1.dist-info/WHEEL +5 -0
  136. mfcli-0.2.1.dist-info/entry_points.txt +4 -0
  137. mfcli-0.2.1.dist-info/licenses/LICENSE +21 -0
  138. mfcli-0.2.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,956 @@
1
+ Metadata-Version: 2.4
2
+ Name: mfcli
3
+ Version: 0.2.1
4
+ Summary: AI-powered CLI for analyzing hardware engineering documents
5
+ Author: Multifactor AI
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/MultifactorAI/multifactor-adk-backend
8
+ Project-URL: Repository, https://github.com/MultifactorAI/multifactor-adk-backend
9
+ Project-URL: Issues, https://github.com/MultifactorAI/multifactor-adk-backend/issues
10
+ Keywords: hardware,engineering,AI,MCP,RAG,electronics,schematic,BOM,datasheet
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Electronic Design Automation (EDA)
17
+ Requires-Python: <3.13,>=3.12
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: google-adk
21
+ Requires-Dist: python-dotenv
22
+ Requires-Dist: fastapi
23
+ Requires-Dist: uvicorn
24
+ Requires-Dist: pandas
25
+ Requires-Dist: pyyaml
26
+ Requires-Dist: sqlalchemy
27
+ Requires-Dist: pydantic-settings==2.10.1
28
+ Requires-Dist: alembic
29
+ Requires-Dist: werkzeug
30
+ Requires-Dist: boto3
31
+ Requires-Dist: botocore
32
+ Requires-Dist: pydantic==2.11.9
33
+ Requires-Dist: protobuf
34
+ Requires-Dist: sqlmodel
35
+ Requires-Dist: llama-parse
36
+ Requires-Dist: llama-index-core
37
+ Requires-Dist: requests
38
+ Requires-Dist: openai
39
+ Requires-Dist: langchain
40
+ Requires-Dist: langchain-text-splitters
41
+ Requires-Dist: tiktoken
42
+ Requires-Dist: chromadb==1.3.4
43
+ Requires-Dist: urllib3
44
+ Requires-Dist: playwright
45
+ Requires-Dist: pymupdf
46
+ Requires-Dist: fastmcp
47
+ Requires-Dist: docling
48
+ Requires-Dist: pytest-asyncio
49
+ Requires-Dist: pytest-mock
50
+ Requires-Dist: trio
51
+ Requires-Dist: pikepdf
52
+ Provides-Extra: dev
53
+ Requires-Dist: pytest; extra == "dev"
54
+ Requires-Dist: pyinstaller; extra == "dev"
55
+ Requires-Dist: build; extra == "dev"
56
+ Requires-Dist: twine; extra == "dev"
57
+ Dynamic: license-file
58
+
59
+ # Multifactor ADK Backend
60
+
61
+ [![Development Status](https://img.shields.io/badge/status-in%20development-yellow)](https://github.com/MultifactorAI/multifactor-adk-backend)
62
+
63
+ An AI-powered engineering document processing pipeline that intelligently analyzes hardware engineering documents including schematics, datasheets, BOMs, and netlists. Built with Google's Gemini models, the system extracts structured data, generates documentation, and enables semantic search across processed documents.
64
+
65
+ ## 🚀 Features
66
+
67
+ - **CLI-Based Pipeline**: Process entire directories of engineering documents with a single command
68
+ - **Intelligent Document Analysis**: Automated classification, text extraction, and schema mapping
69
+ - **BOM Generation**: Extract components from schematics and generate CSV Bill of Materials
70
+ - **Datasheet Enrichment**: Automatically download component datasheets from BOMs
71
+ - **Cheat Sheet Generation**: AI-generated documentation for MCU datasheets, errata, debug setup, and functional blocks
72
+ - **RAG-Powered Queries**: Query processed documents using ChromaDB-backed Retrieval-Augmented Generation
73
+ - **Web UI**: Interactive interface for document processing and agent interaction (optional)
74
+ - **File Type Support**: PDF, EDIF, PADS, KiCad netlists, CSV BOMs, and more
75
+
76
+ ## 📋 Table of Contents
77
+
78
+ - [Architecture](#architecture)
79
+ - [Prerequisites](#prerequisites)
80
+ - [Installation](#installation)
81
+ - [Uninstalling](#uninstalling)
82
+ - [Configuration](#configuration)
83
+ - [Usage](#usage)
84
+ - [MCP Server](#mcp-server)
85
+ - [Pipeline](#pipeline)
86
+ - [Output Directories](#output-directories)
87
+ - [Data Storage](#data-storage)
88
+ - [Project Structure](#project-structure)
89
+ - [Development](#development)
90
+ - [Troubleshooting](#troubleshooting)
91
+
92
+ ## 🏗️ Architecture
93
+
94
+ The system uses a streamlined architecture with a single controller agent that orchestrates a sequential processing pipeline:
95
+
96
+ ```
97
+ Controller Agent
98
+ ├── Tools:
99
+ │ ├── run_pipeline (Sequential pipeline execution)
100
+ │ └── query_knowledgebase (RAG-based document queries)
101
+
102
+ └── Pipeline Stages:
103
+ ├── Pre-processing
104
+ │ ├── File classification & validation
105
+ │ ├── Text extraction
106
+ │ ├── Document sub-type detection (uses LLM when needed)
107
+ │ ├── Schema mapping (for structured documents)
108
+ │ ├── Data parsing
109
+ │ └── Data enrichment
110
+
111
+ └── Analysis & Generation
112
+ ├── Netlist-to-BOM mapping
113
+ └── File generation:
114
+ ├── BOM CSV files
115
+ └── JSON cheat sheets (MCU, errata, debug setup, functional blocks)
116
+ ```
117
+
118
+ The pipeline processes files sequentially, making LLM calls only when necessary for tasks like document sub-classification and schema mapping, rather than using a hierarchy of sub-agents.
119
+
120
+ ## 📦 Prerequisites
121
+
122
+ - **Python 3.12+**
123
+ - **Required API Keys**:
124
+ - Google API Key (for Gemini models)
125
+ - OpenAI API Key (for embeddings)
126
+ - LlamaParse Cloud API Key (for document parsing)
127
+ - DigiKey API credentials (client ID & secret, for datasheet downloads)
128
+ - AWS credentials (for S3 storage, optional)
129
+ - **Database**: SQLite (automatically managed)
130
+
131
+ ## 🔧 Installation
132
+
133
+ ### Quick Install (Recommended)
134
+
135
+ The easiest way to install mfcli is using our automated installation script with **pipx**, which provides isolated dependency management while making the CLI globally available.
136
+
137
+ **Windows (PowerShell):**
138
+ ```powershell
139
+ iwr -useb https://raw.githubusercontent.com/MultifactorAI/multifactor-adk-backend/main/install.ps1 | iex
140
+ ```
141
+
142
+ **Linux/macOS:**
143
+ ```bash
144
+ curl -fsSL https://raw.githubusercontent.com/MultifactorAI/multifactor-adk-backend/main/install.sh | bash
145
+ ```
146
+
147
+ The script will:
148
+ - ✅ Check Python 3.12 installation
149
+ - ✅ Install pipx if needed
150
+ - ✅ Install mfcli with isolated dependencies
151
+ - ✅ Set up configuration directory
152
+ - ✅ Make `mfcli` and `mfcli-mcp` commands globally available
153
+
154
+ ### Manual Installation
155
+
156
+ If you prefer manual installation or the script doesn't work:
157
+
158
+ #### Using pipx (Recommended)
159
+
160
+ ```bash
161
+ # Install pipx if not already installed
162
+ python -m pip install --user pipx
163
+ python -m pipx ensurepath
164
+
165
+ # Install mfcli from GitHub
166
+ pipx install git+https://github.com/MultifactorAI/multifactor-adk-backend.git
167
+
168
+ # Or install from PyPI (once published)
169
+ pipx install mfcli
170
+ ```
171
+
172
+ **Why pipx?**
173
+ - ✅ Isolated dependencies - no conflicts with other Python packages
174
+ - ✅ Global CLI access - available in any terminal
175
+ - ✅ No virtual environment activation needed
176
+ - ✅ MCP server compatible - works with external tools like Cline
177
+ - ✅ Easy updates: `pipx upgrade mfcli`
178
+
179
+ #### Using pip (For Development)
180
+
181
+ ```bash
182
+ # Clone the repository
183
+ git clone https://github.com/MultifactorAI/multifactor-adk-backend.git
184
+ cd multifactor-adk-backend
185
+
186
+ # Create virtual environment
187
+ python -m venv venv
188
+
189
+ # Activate virtual environment
190
+ # Windows:
191
+ venv\Scripts\activate
192
+ # macOS/Linux:
193
+ source venv/bin/activate
194
+
195
+ # Install in development mode
196
+ pip install -e .
197
+ ```
198
+
199
+ **Note**: If you plan to use the MCP server with Cline/Claude Code, install with pipx instead to ensure global availability.
200
+
201
+ ### Verify Installation
202
+
203
+ After installation, verify everything is working:
204
+
205
+ ```bash
206
+ # Check mfcli is installed
207
+ mfcli --help
208
+
209
+ # Run system health check
210
+ mfcli doctor
211
+ ```
212
+
213
+ ## 🗑️ Uninstalling
214
+
215
+ If you need to uninstall mfcli, we provide automated scripts and tools to ensure a clean removal.
216
+
217
+ ### Important: Stop MCP Server First
218
+
219
+ **Before uninstalling**, you must stop the MCP server if it's running. The MCP server (`mfcli-mcp`) runs as a background process when used with Cline or Claude Code. If it's still running, pipx cannot delete the virtual environment due to file locks.
220
+
221
+ ### Quick Pre-Uninstall Check
222
+
223
+ Run this command to check for running processes and get uninstall guidance:
224
+
225
+ ```bash
226
+ mfcli pre-uninstall
227
+ ```
228
+
229
+ This will:
230
+ - ✅ Check for running mfcli-mcp processes
231
+ - ⚠️ Warn you if processes are still running
232
+ - 📋 Provide step-by-step uninstall instructions
233
+
234
+ ### Automated Uninstall (Recommended)
235
+
236
+ **Windows:**
237
+ ```powershell
238
+ .\uninstall.ps1
239
+ ```
240
+
241
+ **Linux/macOS:**
242
+ ```bash
243
+ chmod +x uninstall.sh
244
+ ./uninstall.sh
245
+ ```
246
+
247
+ The scripts will:
248
+ - Check for running processes
249
+ - Offer to stop them automatically
250
+ - Uninstall mfcli via pipx
251
+ - Fallback to manual cleanup if needed
252
+ - Preserve your configuration data in `~/Multifactor`
253
+
254
+ ### Manual Uninstall
255
+
256
+ 1. **Close your IDE** (VS Code, Cline, etc.) to stop the MCP server
257
+ 2. **Wait 5-10 seconds** for processes to fully terminate
258
+ 3. **Run pipx uninstall**:
259
+
260
+ ```bash
261
+ pipx uninstall mfcli
262
+ ```
263
+
264
+ ### Troubleshooting Permission Errors
265
+
266
+ If you get "Access is denied" or "Permission denied" errors:
267
+
268
+ 1. **The MCP server is still running** - Close your IDE completely
269
+ 2. **Wait 10 seconds** for background processes to terminate
270
+ 3. **Check Task Manager/Activity Monitor** for remaining python processes
271
+ 4. **Use the uninstall script** which can force-stop processes
272
+ 5. **Restart your computer** if processes won't stop
273
+
274
+ ### Complete Documentation
275
+
276
+ For detailed uninstall instructions, troubleshooting, and data removal:
277
+
278
+ 📖 **See [UNINSTALL.md](UNINSTALL.md)** for complete uninstallation guide
279
+
280
+ **Note**: Uninstalling mfcli does NOT delete your configuration and data files in `~/Multifactor`. This preserves your API keys and project data for future reinstallation.
281
+
282
+ ## ⚙️ Configuration
283
+
284
+ ### Interactive Configuration Wizard (Recommended)
285
+
286
+ The easiest way to configure mfcli is using the interactive wizard:
287
+
288
+ ```bash
289
+ mfcli configure
290
+ ```
291
+
292
+ This will guide you through setting up all required API keys with:
293
+ - 🔗 Direct links to get each API key
294
+ - ✅ Automatic validation of API keys
295
+ - 📝 Smart defaults for vectorization settings
296
+ - 💾 Automatic saving to the correct location
297
+
298
+ ### Manual Configuration
299
+
300
+ Alternatively, create a `.env` file at:
301
+
302
+ **Windows:** `C:\Users\<username>\Multifactor\.env`
303
+ **macOS/Linux:** `~/Multifactor/.env`
304
+
305
+ ```ini
306
+ # API Keys (Required)
307
+ google_api_key=your_google_api_key
308
+ openai_api_key=your_openai_api_key
309
+ llama_cloud_api_key=your_llamaparse_api_key
310
+ digikey_client_id=your_digikey_client_id
311
+ digikey_client_secret=your_digikey_client_secret
312
+
313
+ # Vector Database Configuration
314
+ chunk_size=1000
315
+ chunk_overlap=200
316
+ embedding_model=text-embedding-3-small
317
+ embedding_dimensions=1536
318
+ ```
319
+
320
+ ### Check Configuration
321
+
322
+ To verify your configuration at any time:
323
+
324
+ ```bash
325
+ # Check configuration status
326
+ mfcli configure --check
327
+
328
+ # Run comprehensive system check
329
+ mfcli doctor
330
+ ```
331
+
332
+ ### Required API Keys & How to Get Them
333
+
334
+ - **Google API Key**: [Google AI Studio](https://aistudio.google.com/app/apikey)
335
+ - **OpenAI API Key**: [OpenAI Platform](https://platform.openai.com/api-keys)
336
+ - **LlamaParse API Key**: [LlamaIndex Cloud](https://cloud.llamaindex.ai/)
337
+ - **DigiKey API**: [DigiKey Developer Portal](https://developer.digikey.com/)
338
+ - **AWS Credentials**: [AWS IAM Console](https://console.aws.amazon.com/iam/) (optional)
339
+
340
+ **Tip:** The `mfcli configure` wizard provides these links interactively and validates your keys!
341
+
342
+ ## 🚀 Usage
343
+
344
+ ### Command-Line Interface
345
+
346
+ #### Getting Started with a Project
347
+
348
+ To analyze hardware design files, follow these steps:
349
+
350
+ **1. Navigate to your hardware design files directory:**
351
+
352
+ ```bash
353
+ cd C:\Projects\hardware\board_v1
354
+ ```
355
+
356
+ **2. Initialize the project:**
357
+
358
+ ```bash
359
+ mfcli init
360
+ ```
361
+
362
+ You'll be prompted to enter a project name (3-45 characters, alphanumeric with underscores/hyphens allowed). If your directory is a git repository, the repository name will be suggested as the default - simply press Enter to accept it or type a different name. This creates a `multifactor/` folder at the git root (or current directory) containing:
363
+ - `context/` - **Place your hardware design files here for processing**
364
+ - `config.json` - Project configuration with your project name
365
+ - `file_docket.json` - File tracking and metadata
366
+ - `generated_files/`, `hw_cheat_sheets/`, `data_sheets/`, etc. - Output folders
367
+
368
+ **3. Place your files in the context folder:**
369
+
370
+ ```bash
371
+ # Copy or move your hardware design files to the context folder
372
+ # Example for git repo at C:\git\my-project\:
373
+ copy *.pdf C:\git\my-project\multifactor\context\
374
+ copy *.csv C:\git\my-project\multifactor\context\
375
+ ```
376
+
377
+ **4. Run the pipeline:**
378
+
379
+ ```bash
380
+ mfcli run
381
+ ```
382
+
383
+ This will:
384
+ - Process all supported files in the `multifactor/context/` directory
385
+ - Skip files that have already been processed (matching MD5 checksum)
386
+ - Prompt for confirmation if a file has been modified (different MD5)
387
+ - Generate BOM CSV files (if schematics are found)
388
+ - Download datasheets for BOM components
389
+ - Generate cheat sheets for MCU datasheets, errata, and schematics
390
+ - Store vector embeddings for RAG queries
391
+
392
+ #### File Change Detection
393
+
394
+ The pipeline tracks files using MD5 checksums stored in `multifactor/file_docket.json`. When you run the pipeline:
395
+
396
+ - **New files**: Automatically processed
397
+ - **Unchanged files**: Skipped (MD5 matches previous run)
398
+ - **Modified files**: You'll be prompted:
399
+ ```
400
+ ======================================================================
401
+ File has been modified: schematic.pdf
402
+ Path: C:\Projects\hardware\board_v1\schematic.pdf
403
+ Old MD5: abc123...
404
+ New MD5: def456...
405
+ ======================================================================
406
+ Do you want to delete the old file data and process the new version? (yes/no):
407
+ ```
408
+
409
+ - Answer **yes** to remove old data from the knowledge base and reprocess
410
+ - Answer **no** to skip the file and keep the old version
411
+
412
+ This ensures efficient processing by only analyzing new or changed files, while maintaining data consistency in the knowledge base.
413
+
414
+ ### CLI Commands Reference
415
+
416
+ The `mfcli` tool provides the following commands:
417
+
418
+ - **`mfcli init`** - Initialize a new project in the current directory
419
+ - **`mfcli run_pipeline`** - Run the analysis pipeline on the current directory
420
+ - **`mfcli web [--port PORT]`** - Start the web UI (default port: 9999)
421
+ - **`mfcli addfile FILE [--purpose PURPOSE]`** - Add a file to ChromaDB knowledge base
422
+
423
+ #### Start Web UI
424
+
425
+ Launch the interactive web interface:
426
+
427
+ ```bash
428
+ mfcli web
429
+ ```
430
+
431
+ **With custom port:**
432
+ ```bash
433
+ mfcli web --port 8080
434
+ ```
435
+
436
+ The web UI will be available at `http://localhost:9999/dev-ui/` (or your specified port).
437
+
438
+ ### Web UI Usage
439
+
440
+ The web interface allows you to:
441
+ - Upload and process individual files
442
+ - Run the pipeline on directories
443
+ - Query processed documents using natural language
444
+ - View processing status and results
445
+
446
+ **Example queries in web UI:**
447
+ - "What components are in the processed schematic?"
448
+ - "Tell me about the voltage ratings in the last datasheet"
449
+ - "What are the errata for this MCU?"
450
+
451
+ ## 🔌 MCP Server
452
+
453
+ This package includes a **Model Context Protocol (MCP) server** that exposes tools for AI assistants and development environments like Cline/Claude to interact with your engineering documentation knowledge base.
454
+
455
+ ### What is MCP?
456
+
457
+ The Model Context Protocol (MCP) is a standard that allows AI assistants to securely access external tools and data sources. The mfcli MCP server provides AI-powered access to your processed engineering documents through the local ChromaDB vector database.
458
+
459
+ ### Available Tools
460
+
461
+ The MCP server exposes the following tool:
462
+
463
+ #### `query_local_rag`
464
+
465
+ Query the local hardware knowledge base of processed engineering documents using natural language.
466
+
467
+ **Parameters:**
468
+ - `query` (required): Your search query (e.g., "MSPM0L130x", "power management", "IEC 61000-4-2")
469
+ - `project_name` (optional): The name of the project to query. If not provided, uses the last known project name from previous queries.
470
+ - `n_results` (optional): Number of results to return (1-20, default: 8)
471
+
472
+ **Returns:**
473
+ - Document chunks matching your query
474
+ - Metadata (file names, document types)
475
+ - Similarity scores (lower distance = more relevant)
476
+ - ChromaDB database path
477
+ - Project name that was used for the query
478
+
479
+ **Note:** The function automatically remembers the last project name used, so you only need to specify `project_name` for the first query or when switching between projects.
480
+
481
+ **Example queries:**
482
+ - "MSPM0L130x specifications"
483
+ - "What are the voltage requirements?"
484
+ - "MCU pin configurations"
485
+ - "Component datasheets for capacitors"
486
+
487
+ ### Configuration for Cline/Claude
488
+
489
+ To use the MCP server with Cline (or other MCP-compatible clients), add the following configuration to your MCP settings file:
490
+
491
+ **Configuration Location:**
492
+ - **VS Code (Cline)**: `%APPDATA%\Code\User\globalStorage\saoudrizwan.claude-dev\settings\cline_mcp_settings.json`
493
+ - **Cline standalone**: `~/.cline/mcp_settings.json`
494
+
495
+ **Configuration:**
496
+
497
+ ```json
498
+ {
499
+ "mcpServers": {
500
+ "mfcli-mcp": {
501
+ "disabled": false,
502
+ "timeout": 60,
503
+ "type": "stdio",
504
+ "command": "python",
505
+ "args": ["-m", "mfcli.mcp.server"]
506
+ }
507
+ }
508
+ }
509
+ ```
510
+
511
+ ### Setup Instructions
512
+
513
+ 1. **Install mfcli system-wide** (see Installation section above):
514
+ ```bash
515
+ pip install .
516
+ ```
517
+
518
+ 2. **Process your engineering documents** to populate the knowledge base:
519
+ ```bash
520
+ cd /path/to/hardware/files
521
+ mfcli init
522
+ mfcli run_pipeline
523
+ ```
524
+
525
+ 3. **Add the MCP configuration** to your Cline/Claude settings file (see Configuration above)
526
+
527
+ 4. **Restart Cline/Claude** to load the MCP server
528
+
529
+ 5. **Use the tool** in your AI assistant:
530
+ - Ask questions like: "Query the local RAG for MSPM0L130x in project test"
531
+ - The assistant will use the `query_local_rag` tool to search your documents
532
+
533
+ ### Troubleshooting MCP Server
534
+
535
+ **Error**: `Module 'mfcli' not found`
536
+ - **Solution**: Ensure mfcli is installed system-wide (not just in a virtual environment)
537
+ ```bash
538
+ deactivate # Exit any virtual environment
539
+ pip install .
540
+ ```
541
+
542
+ **Error**: `ChromaDB directory not found`
543
+ - **Solution**: Run the pipeline at least once to create the vector database:
544
+ ```bash
545
+ mfcli init
546
+ mfcli run_pipeline
547
+ ```
548
+
549
+ **Error**: `MCP server timeout`
550
+ - **Solution**: Increase the timeout value in your MCP settings (default: 60 seconds)
551
+
552
+ **Server not connecting:**
553
+ - Verify the MCP server configuration in your settings file
554
+ - Check that Python is in your system PATH
555
+ - Restart your IDE/editor after updating MCP settings
556
+
557
+ ### MCP Server Architecture
558
+
559
+ The MCP server is implemented in `mfcli/mcp/` with the following structure:
560
+
561
+ ```
562
+ mfcli/mcp/
563
+ ├── server.py # MCP server entry point
564
+ ├── mcp_instance.py # MCP server instance and tool definitions
565
+ └── tools/
566
+ └── query_knowledgebase.py # RAG query implementation
567
+ ```
568
+
569
+ The server connects to your local ChromaDB instance (located in your system's application data directory) and provides semantic search capabilities over all processed engineering documents.
570
+
571
+ ## 🔄 Pipeline
572
+
573
+ The pipeline processes engineering documents in two main phases:
574
+
575
+ ### Phase 1: Pre-processing
576
+
577
+ For each file in the input directory:
578
+
579
+ 1. **Classification & Validation** (`classifier.py`)
580
+ - Determines file type (PDF, EDIF, CSV, etc.)
581
+ - Validates file integrity and MIME type
582
+ - Checks file size limits
583
+
584
+ 2. **Gemini File Upload** (PDFs only)
585
+ - Uploads PDF files to Gemini's Files API for vision-based processing
586
+
587
+ 3. **Text Extraction** (`extractor.py`)
588
+ - Extracts text content from documents
589
+ - Handles various formats (PDF, netlist formats, CSV)
590
+
591
+ 4. **Sub-type Classification** (`sub_classifier.py`)
592
+ - Determines document sub-type (e.g., schematic, BOM, datasheet, MCU datasheet, errata)
593
+ - Uses LLM analysis when necessary
594
+
595
+ 5. **Schema Mapping** (`schema_mapper.py`)
596
+ - Maps document structure to database schemas
597
+ - Skipped for schemaless files like schematics
598
+
599
+ 6. **Data Parsing** (`parser.py`)
600
+ - Parses structured data from documents
601
+ - Stores in SQLite database
602
+
603
+ 7. **Data Enrichment** (`data_enricher.py`)
604
+ - Enriches parsed data with additional information
605
+ - Downloads component datasheets for BOM entries
606
+
607
+ ### Phase 2: Analysis & Generation
608
+
609
+ After all files are pre-processed:
610
+
611
+ 1. **Netlist-to-BOM Mapping** (`bom_netlist_mapper.py`)
612
+ - Maps netlist components to BOM entries
613
+ - Correlates design files with component lists
614
+
615
+ 2. **File Generation** (`generator.py`)
616
+ - **BOM CSV**: Extracts components from schematics, generates CSV with reference, value, quantity, manufacturer, MPN, description
617
+ - **Cheat Sheets**: Generates JSON cheat sheets for:
618
+ - MCU datasheets (register maps, peripherals, specs)
619
+ - MCU errata (known issues, workarounds)
620
+ - Debug setup (pin configurations, debugging instructions)
621
+ - Functional blocks (system architecture, block diagrams)
622
+
623
+ ### Supported File Types
624
+
625
+ - **PDF**: Schematics, datasheets, MCU documentation, errata sheets
626
+ - **EDIF**: Electronic Design Interchange Format netlists
627
+ - **PADS**: PADS ASCII netlist format
628
+ - **KiCad**: Legacy netlist (.net) and SPICE circuit (.cir) formats
629
+ - **CSV**: Bill of Materials files
630
+
631
+ ## 📂 Output Directories
632
+
633
+ The pipeline creates directories in two locations:
634
+
635
+ ### 1. User Application Data
636
+
637
+ Platform-specific storage for global application data:
638
+
639
+ **Windows:**
640
+ ```
641
+ C:\Users\<username>\AppData\Local\Multifactor\
642
+ └── chromadb/ # Vector embeddings database
643
+ ```
644
+
645
+ **macOS:**
646
+ ```
647
+ /Users/<username>/Library/Application Support/Multifactor/
648
+ └── chromadb/ # Vector embeddings database
649
+ ```
650
+
651
+ **Linux:**
652
+ ```
653
+ ~/.local/share/Multifactor/
654
+ └── chromadb/ # Vector embeddings database
655
+ ```
656
+
657
+ **Contents:**
658
+ - `chromadb/` - Vector embeddings of processed documents for RAG queries
659
+
660
+ ### 2. Project Output Directories
661
+
662
+ Created within a **"multifactor"** folder at the **git repository root** if you're in a git repo, or in the **current directory** if not:
663
+
664
+ ```
665
+ <git_root_or_current_directory>/
666
+ └── multifactor/ # Parent folder for all project files and outputs
667
+ ├── config.json # Project configuration (project name, etc.)
668
+ ├── file_docket.json # File tracking and processing metadata
669
+ ├── context/ # Place your hardware design files here for processing
670
+ ├── generated_files/ # BOM CSV files generated from schematics
671
+ ├── hw_cheat_sheets/ # JSON cheat sheets (MCU, errata, debug, functional blocks)
672
+ ├── data_sheets/ # Downloaded component datasheets (from BOM processing)
673
+ ├── pdf_parts/ # Extracted PDF parts for analysis
674
+ ├── agent_instructions/ # (Reserved for future use)
675
+ ├── fw_tasks/ # (Reserved for future use)
676
+ └── requirements/ # (Reserved for future use)
677
+ ```
678
+
679
+ **Git Repository Example:**
680
+
681
+ If you're in a git repository at `C:\git\my-hardware-project\` and run `mfcli init` from any subdirectory, all project folders will be created within a "multifactor" folder at the git root:
682
+
683
+ - `C:\git\my-hardware-project\multifactor\config.json` - Project configuration
684
+ - `C:\git\my-hardware-project\multifactor\file_docket.json` - File tracking
685
+ - `C:\git\my-hardware-project\multifactor\context\` - Place source files here
686
+ - `C:\git\my-hardware-project\multifactor\generated_files\` - Generated BOM files
687
+ - `C:\git\my-hardware-project\multifactor\hw_cheat_sheets\` - Cheat sheets
688
+ - `C:\git\my-hardware-project\multifactor\data_sheets\` - Downloaded datasheets
689
+ - `C:\git\my-hardware-project\multifactor\pdf_parts\` - PDF parts
690
+
691
+ **Non-Git Example:**
692
+
693
+ If you're NOT in a git repository and run `mfcli init` from `C:\Projects\hardware\board_v1\`, all folders will be created within a "multifactor" folder in that same directory:
694
+
695
+ - `C:\Projects\hardware\board_v1\multifactor\config.json` - Project configuration
696
+ - `C:\Projects\hardware\board_v1\multifactor\file_docket.json` - File tracking
697
+ - `C:\Projects\hardware\board_v1\multifactor\context\` - Place source files here
698
+ - `C:\Projects\hardware\board_v1\multifactor\generated_files\` - Generated BOM files
699
+ - `C:\Projects\hardware\board_v1\multifactor\hw_cheat_sheets\` - Cheat sheets
700
+ - `C:\Projects\hardware\board_v1\multifactor\data_sheets\` - Downloaded datasheets
701
+ - `C:\Projects\hardware\board_v1\multifactor\pdf_parts\` - PDF parts
702
+
703
+ **Why This Structure?**
704
+
705
+ This design ensures that all project-related files are organized within a single "multifactor" folder, making it easier to:
706
+ - ✅ Keep all generated files contained in one location
707
+ - ✅ Easy to add to .gitignore if you don't want to version-control outputs
708
+ - ✅ Clean separation between source files and generated outputs
709
+ - ✅ Avoid cluttering the repository root with multiple folders
710
+ - ✅ Simple to backup or delete all mfcli outputs at once
711
+
712
+ ## 💾 Data Storage
713
+
714
+ ### SQLite Database
715
+
716
+ Located at `sessions.db` in the project root (or path specified in `.env`).
717
+
718
+ **Stores:**
719
+ - Pipeline run metadata and status
720
+ - File metadata and processing results
721
+ - Parsed component data (BOMs, netlists, datasheets)
722
+ - MCU information and errata
723
+ - ADK session data and conversation history
724
+
725
+ ### ChromaDB Vector Store
726
+
727
+ Located in the user application data directory.
728
+
729
+ **Stores:**
730
+ - Vector embeddings of processed documents
731
+ - Enables semantic search and RAG queries
732
+ - Uses OpenAI embeddings (text-embedding-3-small)
733
+
734
+ ### S3 Storage (Optional)
735
+
736
+ If AWS credentials are configured:
737
+ - Uploaded files
738
+ - Generated outputs
739
+ - Long-term document storage
740
+
741
+ ## 📁 Project Structure
742
+
743
+ ```
744
+ multifactor-adk-backend/
745
+ ├── app/
746
+ │ ├── agents/
747
+ │ │ ├── controller/ # Main controller agent
748
+ │ │ │ ├── agent.py # Agent definition
749
+ │ │ │ ├── config.yaml # Agent configuration
750
+ │ │ │ └── tools.py # Agent tools
751
+ │ │ └── tools/
752
+ │ │ └── general.py # Shared tools
753
+ │ ├── alembic/ # Database migrations
754
+ │ ├── cli/
755
+ │ │ └── main.py # CLI entry point (mfcli)
756
+ │ ├── client/ # External service clients
757
+ │ │ ├── chroma_db.py # ChromaDB vector store
758
+ │ │ ├── gemini.py # Gemini API client
759
+ │ │ ├── llama_parse.py # LlamaParse client
760
+ │ │ └── vector_db.py # Vector DB interface
761
+ │ ├── constants/ # Enums and constants
762
+ │ ├── crud/ # Database operations
763
+ │ ├── digikey/ # DigiKey API integration
764
+ │ ├── models/ # SQLAlchemy models
765
+ │ ├── pipeline/
766
+ │ │ ├── pipeline.py # Main pipeline orchestration
767
+ │ │ ├── classifier.py # File classification
768
+ │ │ ├── extractor.py # Text extraction
769
+ │ │ ├── sub_classifier.py # Document sub-typing
770
+ │ │ ├── schema_mapper.py # Schema mapping
771
+ │ │ ├── parser.py # Data parsing
772
+ │ │ ├── data_enricher.py # Data enrichment
773
+ │ │ ├── analysis/
774
+ │ │ │ ├── bom_netlist_mapper.py
775
+ │ │ │ └── generators/ # Output generators
776
+ │ │ │ ├── generator.py # Main generator
777
+ │ │ │ ├── bom/ # BOM generation
778
+ │ │ │ ├── debug_setup/ # Debug setup cheat sheets
779
+ │ │ │ ├── functional_blocks/ # Functional block diagrams
780
+ │ │ │ ├── mcu/ # MCU documentation
781
+ │ │ │ └── mcu_errata/ # Errata cheat sheets
782
+ │ │ ├── extractors/ # Format-specific extractors
783
+ │ │ └── parsers/
784
+ │ │ └── netlist/ # Netlist parsers (EDIF, KiCad, PADS)
785
+ │ ├── tests/ # Unit tests
786
+ │ └── utils/ # Utility functions
787
+ │ ├── config.py
788
+ │ ├── directory_manager.py # Output directory management
789
+ │ ├── logger.py
790
+ │ └── ...
791
+ ├── .env # Environment configuration (create this)
792
+ ├── .gitignore
793
+ ├── alembic.ini # Alembic configuration
794
+ ├── pyproject.toml # Package configuration
795
+ ├── requirements.txt # Python dependencies
796
+ └── README.md # This file
797
+ ```
798
+
799
+ ## 🛠️ Development
800
+
801
+ ### Running the Pipeline
802
+
803
+ First, navigate to your hardware files directory and initialize:
804
+
805
+ ```bash
806
+ cd /path/to/hardware/files
807
+ mfcli init
808
+ mfcli run_pipeline
809
+ ```
810
+
811
+ ### Starting the Web UI
812
+
813
+ ```bash
814
+ mfcli web --port 9999
815
+ ```
816
+
817
+ ### Development Status
818
+
819
+ ⚠️ **This project is currently in development.** Features and APIs may change.
820
+
821
+ ### Publishing to PyPI
822
+
823
+ This project is automatically published to PyPI when changes are merged into the `dev` branch. See [PYPI_PUBLISHING.md](PYPI_PUBLISHING.md) for details on:
824
+ - How automated publishing works
825
+ - PyPI configuration setup (trusted publishing or API tokens)
826
+ - Version management and semantic versioning
827
+ - Testing and troubleshooting
828
+ - Manual publishing (if needed)
829
+
830
+ **Quick version update workflow:**
831
+ 1. Update version in `pyproject.toml`
832
+ 2. Commit and push to a feature branch
833
+ 3. Create and merge PR to `dev`
834
+ 4. Automated workflow builds and publishes to PyPI
835
+
836
+ ### Database Migrations
837
+
838
+ This project uses Alembic for database schema management.
839
+
840
+ #### Create a New Migration
841
+ ```bash
842
+ alembic revision -m "description of changes"
843
+ ```
844
+
845
+ #### Apply Migrations
846
+ ```bash
847
+ alembic upgrade head
848
+ ```
849
+
850
+ #### Rollback Migration
851
+ ```bash
852
+ alembic downgrade -1
853
+ ```
854
+
855
+ ### Logging
856
+
857
+ Logs are configured in `utils/logger.py`. View logs for debugging:
858
+
859
+ ```python
860
+ from mfcli.utils.logger import get_logger
861
+ logger = get_logger(__name__)
862
+ logger.info("Message")
863
+ logger.error("Error message")
864
+ ```
865
+
866
+ ### Running Tests
867
+
868
+ ```bash
869
+ pytest app/tests/
870
+ ```
871
+
872
+ ## 🐛 Troubleshooting
873
+
874
+ ### Common Issues
875
+
876
+ #### 1. Installation Issues
877
+
878
+ **Error**: `ModuleNotFoundError: No module named 'google'`
879
+ - **Solution**: Reinstall package: `pip install .`
880
+
881
+ **Error**: `Command 'mfcli' not found`
882
+ - **Solution**: Ensure virtual environment is activated and package is installed: `pip install .`
883
+
884
+ #### 2. API Key Errors
885
+
886
+ **Error**: `google.api_core.exceptions.Unauthenticated: 401 API key not valid`
887
+ - **Solution**: Verify `google_api_key` in `.env` file
888
+ - Get key from: https://aistudio.google.com/app/apikey
889
+
890
+ **Error**: `OpenAI API error`
891
+ - **Solution**: Check `openai_api_key` in `.env`
892
+ - Ensure API key has billing enabled
893
+
894
+ #### 3. ChromaDB Issues
895
+
896
+ **Error**: `ChromaDB directory not found`
897
+ - **Solution**: ChromaDB will be created automatically on first run in AppData folder
898
+
899
+ **Error**: `Embedding dimension mismatch`
900
+ - **Solution**: Delete ChromaDB directory and restart to rebuild with correct dimensions
901
+
902
+ #### 4. Pipeline Processing Failures
903
+
904
+ **Error**: `Could not find metadata file. Please initialize this repo with "mfcli init"`
905
+ - **Solution**: You need to run `mfcli init` in your hardware files directory before running `mfcli run_pipeline`
906
+
907
+ **Error**: `File not found`
908
+ - **Solution**: Make sure you're running `mfcli run_pipeline` from within your hardware files directory (where you ran `mfcli init`)
909
+
910
+ **Error**: `File extension is not supported`
911
+ - **Solution**: Check that your files are in supported formats (PDF, EDIF, CSV, .net, .cir, .asc)
912
+
913
+ **Error**: `No components extracted from schematic`
914
+ - **Solution**:
915
+ - Ensure schematic PDF is clear and readable
916
+ - Check that component designators are visible
917
+ - Verify file is an actual schematic (not layout or other document type)
918
+
919
+ #### 5. Database Issues
920
+
921
+ **Error**: `Database connection failed`
922
+ - **Solution**: Check that SQLite database path in `.env` is valid
923
+ - Run migrations: `alembic upgrade head`
924
+
925
+ ### Verify Configuration
926
+
927
+ Run this Python snippet to verify your configuration:
928
+
929
+ ```python
930
+ from mfcli.utils.config import get_config
931
+ config = get_config()
932
+ print(f"Google API Key set: {'Yes' if config.google_api_key else 'No'}")
933
+ print(f"OpenAI API Key set: {'Yes' if config.openai_api_key else 'No'}")
934
+ print(f"Database path: {config.sqlite_db_path}")
935
+ ```
936
+
937
+ ### Debug Mode
938
+
939
+ For detailed debugging, check the console output when running `mfcli pipeline` or `mfcli web`. The application uses structured logging to help diagnose issues.
940
+
941
+ ### Getting Help
942
+
943
+ - **Issues**: [GitHub Issues](https://github.com/MultifactorAI/multifactor-adk-backend/issues)
944
+ - **Documentation**: Check this README and inline code documentation
945
+
946
+ ## 📄 License
947
+
948
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
949
+
950
+ ## 🤝 Contributing
951
+
952
+ This project is in active development. Contributions, issues, and feature requests are welcome!
953
+
954
+ ---
955
+
956
+ **Built with [Google Gemini](https://ai.google.dev/) and [Google Agent Development Kit (ADK)](https://github.com/google/adk)**