mfcli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. mfcli/.env.example +72 -0
  2. mfcli/__init__.py +0 -0
  3. mfcli/agents/__init__.py +0 -0
  4. mfcli/agents/controller/__init__.py +0 -0
  5. mfcli/agents/controller/agent.py +19 -0
  6. mfcli/agents/controller/config.yaml +27 -0
  7. mfcli/agents/controller/tools.py +42 -0
  8. mfcli/agents/tools/general.py +118 -0
  9. mfcli/alembic/env.py +61 -0
  10. mfcli/alembic/script.py.mako +28 -0
  11. mfcli/alembic/versions/6ccc0c7c397c_added_fields_to_pdf_parts_model.py +39 -0
  12. mfcli/alembic/versions/769019ef4870_added_gemini_file_path_to_pdf_part_model.py +33 -0
  13. mfcli/alembic/versions/7a2e3a779fdc_added_functional_block_and_component_.py +54 -0
  14. mfcli/alembic/versions/7d5adb2a47a7_added_pdf_parts_model.py +41 -0
  15. mfcli/alembic/versions/7fcb7d6a5836_init.py +167 -0
  16. mfcli/alembic/versions/e0f2b5765c72_added_cascade_delete_for_models_that_.py +32 -0
  17. mfcli/alembic.ini +147 -0
  18. mfcli/cli/__init__.py +0 -0
  19. mfcli/cli/dependencies.py +59 -0
  20. mfcli/cli/main.py +192 -0
  21. mfcli/client/__init__.py +0 -0
  22. mfcli/client/chroma_db.py +184 -0
  23. mfcli/client/docling.py +44 -0
  24. mfcli/client/gemini.py +252 -0
  25. mfcli/client/llama_parse.py +38 -0
  26. mfcli/client/vector_db.py +93 -0
  27. mfcli/constants/__init__.py +0 -0
  28. mfcli/constants/base_enum.py +18 -0
  29. mfcli/constants/directory_names.py +1 -0
  30. mfcli/constants/file_types.py +189 -0
  31. mfcli/constants/gemini.py +1 -0
  32. mfcli/constants/openai.py +6 -0
  33. mfcli/constants/pipeline_run_status.py +3 -0
  34. mfcli/crud/__init__.py +0 -0
  35. mfcli/crud/file.py +42 -0
  36. mfcli/crud/functional_blocks.py +26 -0
  37. mfcli/crud/netlist.py +18 -0
  38. mfcli/crud/pipeline_run.py +17 -0
  39. mfcli/crud/project.py +99 -0
  40. mfcli/digikey/__init__.py +0 -0
  41. mfcli/digikey/digikey.py +105 -0
  42. mfcli/main.py +5 -0
  43. mfcli/mcp/__init__.py +0 -0
  44. mfcli/mcp/configs/cline_mcp_settings.json +11 -0
  45. mfcli/mcp/configs/mfcli.mcp.json +7 -0
  46. mfcli/mcp/mcp_instance.py +6 -0
  47. mfcli/mcp/server.py +37 -0
  48. mfcli/mcp/state_manager.py +51 -0
  49. mfcli/mcp/tools/__init__.py +0 -0
  50. mfcli/mcp/tools/query_knowledgebase.py +108 -0
  51. mfcli/models/__init__.py +10 -0
  52. mfcli/models/base.py +10 -0
  53. mfcli/models/bom.py +71 -0
  54. mfcli/models/datasheet.py +10 -0
  55. mfcli/models/debug_setup.py +64 -0
  56. mfcli/models/file.py +43 -0
  57. mfcli/models/file_docket.py +94 -0
  58. mfcli/models/file_metadata.py +19 -0
  59. mfcli/models/functional_blocks.py +94 -0
  60. mfcli/models/llm_response.py +5 -0
  61. mfcli/models/mcu.py +97 -0
  62. mfcli/models/mcu_errata.py +26 -0
  63. mfcli/models/netlist.py +59 -0
  64. mfcli/models/pdf_parts.py +25 -0
  65. mfcli/models/pipeline_run.py +34 -0
  66. mfcli/models/project.py +27 -0
  67. mfcli/models/project_metadata.py +15 -0
  68. mfcli/pipeline/__init__.py +0 -0
  69. mfcli/pipeline/analysis/__init__.py +0 -0
  70. mfcli/pipeline/analysis/bom_netlist_mapper.py +28 -0
  71. mfcli/pipeline/analysis/generators/__init__.py +0 -0
  72. mfcli/pipeline/analysis/generators/bom/__init__.py +0 -0
  73. mfcli/pipeline/analysis/generators/bom/bom.py +74 -0
  74. mfcli/pipeline/analysis/generators/debug_setup/__init__.py +0 -0
  75. mfcli/pipeline/analysis/generators/debug_setup/debug_setup.py +71 -0
  76. mfcli/pipeline/analysis/generators/debug_setup/instructions.py +150 -0
  77. mfcli/pipeline/analysis/generators/functional_blocks/__init__.py +0 -0
  78. mfcli/pipeline/analysis/generators/functional_blocks/functional_blocks.py +93 -0
  79. mfcli/pipeline/analysis/generators/functional_blocks/instructions.py +34 -0
  80. mfcli/pipeline/analysis/generators/functional_blocks/validator.py +94 -0
  81. mfcli/pipeline/analysis/generators/generator.py +258 -0
  82. mfcli/pipeline/analysis/generators/generator_base.py +18 -0
  83. mfcli/pipeline/analysis/generators/mcu/__init__.py +0 -0
  84. mfcli/pipeline/analysis/generators/mcu/instructions.py +156 -0
  85. mfcli/pipeline/analysis/generators/mcu/mcu.py +84 -0
  86. mfcli/pipeline/analysis/generators/mcu_errata/__init__.py +1 -0
  87. mfcli/pipeline/analysis/generators/mcu_errata/instructions.py +77 -0
  88. mfcli/pipeline/analysis/generators/mcu_errata/mcu_errata.py +95 -0
  89. mfcli/pipeline/analysis/generators/summary/__init__.py +0 -0
  90. mfcli/pipeline/analysis/generators/summary/summary.py +47 -0
  91. mfcli/pipeline/classifier.py +93 -0
  92. mfcli/pipeline/data_enricher.py +15 -0
  93. mfcli/pipeline/extractor.py +34 -0
  94. mfcli/pipeline/extractors/__init__.py +0 -0
  95. mfcli/pipeline/extractors/pdf.py +12 -0
  96. mfcli/pipeline/parser.py +120 -0
  97. mfcli/pipeline/parsers/__init__.py +0 -0
  98. mfcli/pipeline/parsers/netlist/__init__.py +0 -0
  99. mfcli/pipeline/parsers/netlist/edif.py +93 -0
  100. mfcli/pipeline/parsers/netlist/kicad_legacy_net.py +326 -0
  101. mfcli/pipeline/parsers/netlist/kicad_spice.py +135 -0
  102. mfcli/pipeline/parsers/netlist/pads.py +185 -0
  103. mfcli/pipeline/parsers/netlist/protel.py +166 -0
  104. mfcli/pipeline/parsers/netlist/protel_detector.py +29 -0
  105. mfcli/pipeline/pipeline.py +419 -0
  106. mfcli/pipeline/preprocessors/__init__.py +0 -0
  107. mfcli/pipeline/preprocessors/user_guide.py +127 -0
  108. mfcli/pipeline/run_context.py +32 -0
  109. mfcli/pipeline/schema_mapper.py +89 -0
  110. mfcli/pipeline/sub_classifier.py +115 -0
  111. mfcli/utils/__init__.py +0 -0
  112. mfcli/utils/config.py +33 -0
  113. mfcli/utils/configurator.py +324 -0
  114. mfcli/utils/data_cleaner.py +82 -0
  115. mfcli/utils/datasheet_vectorizer.py +281 -0
  116. mfcli/utils/directory_manager.py +96 -0
  117. mfcli/utils/file_upload.py +298 -0
  118. mfcli/utils/files.py +16 -0
  119. mfcli/utils/http_requests.py +54 -0
  120. mfcli/utils/kb_lister.py +89 -0
  121. mfcli/utils/kb_remover.py +173 -0
  122. mfcli/utils/logger.py +28 -0
  123. mfcli/utils/mcp_configurator.py +311 -0
  124. mfcli/utils/migrations.py +18 -0
  125. mfcli/utils/orm.py +43 -0
  126. mfcli/utils/pdf_splitter.py +63 -0
  127. mfcli/utils/query_service.py +22 -0
  128. mfcli/utils/system_check.py +306 -0
  129. mfcli/utils/tools.py +31 -0
  130. mfcli/utils/vectorizer.py +28 -0
  131. mfcli-0.2.0.dist-info/METADATA +841 -0
  132. mfcli-0.2.0.dist-info/RECORD +136 -0
  133. mfcli-0.2.0.dist-info/WHEEL +5 -0
  134. mfcli-0.2.0.dist-info/entry_points.txt +3 -0
  135. mfcli-0.2.0.dist-info/licenses/LICENSE +21 -0
  136. mfcli-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,841 @@
1
+ Metadata-Version: 2.4
2
+ Name: mfcli
3
+ Version: 0.2.0
4
+ Summary: AI-powered CLI for analyzing hardware engineering documents
5
+ Author: Multifactor AI
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/MultifactorAI/multifactor-adk-backend
8
+ Project-URL: Repository, https://github.com/MultifactorAI/multifactor-adk-backend
9
+ Project-URL: Issues, https://github.com/MultifactorAI/multifactor-adk-backend/issues
10
+ Keywords: hardware,engineering,AI,MCP,RAG,electronics,schematic,BOM,datasheet
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Electronic Design Automation (EDA)
17
+ Requires-Python: <3.13,>=3.12
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: google-adk
21
+ Requires-Dist: python-dotenv
22
+ Requires-Dist: fastapi
23
+ Requires-Dist: uvicorn
24
+ Requires-Dist: pandas
25
+ Requires-Dist: pyyaml
26
+ Requires-Dist: sqlalchemy
27
+ Requires-Dist: pydantic-settings==2.10.1
28
+ Requires-Dist: alembic
29
+ Requires-Dist: werkzeug
30
+ Requires-Dist: boto3
31
+ Requires-Dist: botocore
32
+ Requires-Dist: pydantic==2.11.9
33
+ Requires-Dist: protobuf
34
+ Requires-Dist: sqlmodel
35
+ Requires-Dist: llama-parse
36
+ Requires-Dist: llama-index-core
37
+ Requires-Dist: requests
38
+ Requires-Dist: openai
39
+ Requires-Dist: langchain
40
+ Requires-Dist: langchain-text-splitters
41
+ Requires-Dist: tiktoken
42
+ Requires-Dist: chromadb==1.3.4
43
+ Requires-Dist: urllib3
44
+ Requires-Dist: playwright
45
+ Requires-Dist: pymupdf
46
+ Requires-Dist: fastmcp
47
+ Requires-Dist: docling
48
+ Requires-Dist: pytest-asyncio
49
+ Requires-Dist: pytest-mock
50
+ Requires-Dist: trio
51
+ Requires-Dist: pikepdf
52
+ Provides-Extra: dev
53
+ Requires-Dist: pytest; extra == "dev"
54
+ Requires-Dist: pyinstaller; extra == "dev"
55
+ Requires-Dist: build; extra == "dev"
56
+ Requires-Dist: twine; extra == "dev"
57
+ Dynamic: license-file
58
+
59
+ # Multifactor ADK Backend
60
+
61
+ [![Development Status](https://img.shields.io/badge/status-in%20development-yellow)](https://github.com/MultifactorAI/multifactor-adk-backend)
62
+
63
+ An AI-powered engineering document processing pipeline that intelligently analyzes hardware engineering documents including schematics, datasheets, BOMs, and netlists. Built with Google's Gemini models, the system extracts structured data, generates documentation, and enables semantic search across processed documents.
64
+
65
+ ## 🚀 Features
66
+
67
+ - **CLI-Based Pipeline**: Process entire directories of engineering documents with a single command
68
+ - **Intelligent Document Analysis**: Automated classification, text extraction, and schema mapping
69
+ - **BOM Generation**: Extract components from schematics and generate CSV Bill of Materials
70
+ - **Datasheet Enrichment**: Automatically download component datasheets from BOMs
71
+ - **Cheat Sheet Generation**: AI-generated documentation for MCU datasheets, errata, debug setup, and functional blocks
72
+ - **RAG-Powered Queries**: Query processed documents using ChromaDB-backed Retrieval-Augmented Generation
73
+ - **Web UI**: Interactive interface for document processing and agent interaction (optional)
74
+ - **File Type Support**: PDF, EDIF, PADS, KiCad netlists, CSV BOMs, and more
75
+
76
+ ## 📋 Table of Contents
77
+
78
+ - [Architecture](#architecture)
79
+ - [Prerequisites](#prerequisites)
80
+ - [Installation](#installation)
81
+ - [Configuration](#configuration)
82
+ - [Usage](#usage)
83
+ - [MCP Server](#mcp-server)
84
+ - [Pipeline](#pipeline)
85
+ - [Output Directories](#output-directories)
86
+ - [Data Storage](#data-storage)
87
+ - [Project Structure](#project-structure)
88
+ - [Development](#development)
89
+ - [Troubleshooting](#troubleshooting)
90
+
91
+ ## 🏗️ Architecture
92
+
93
+ The system uses a streamlined architecture with a single controller agent that orchestrates a sequential processing pipeline:
94
+
95
+ ```
96
+ Controller Agent
97
+ ├── Tools:
98
+ │ ├── run_pipeline (Sequential pipeline execution)
99
+ │ └── query_knowledgebase (RAG-based document queries)
100
+
101
+ └── Pipeline Stages:
102
+ ├── Pre-processing
103
+ │ ├── File classification & validation
104
+ │ ├── Text extraction
105
+ │ ├── Document sub-type detection (uses LLM when needed)
106
+ │ ├── Schema mapping (for structured documents)
107
+ │ ├── Data parsing
108
+ │ └── Data enrichment
109
+
110
+ └── Analysis & Generation
111
+ ├── Netlist-to-BOM mapping
112
+ └── File generation:
113
+ ├── BOM CSV files
114
+ └── JSON cheat sheets (MCU, errata, debug setup, functional blocks)
115
+ ```
116
+
117
+ The pipeline processes files sequentially, making LLM calls only when necessary for tasks like document sub-classification and schema mapping, rather than using a hierarchy of sub-agents.
118
+
119
+ ## 📦 Prerequisites
120
+
121
+ - **Python 3.12+**
122
+ - **Required API Keys**:
123
+ - Google API Key (for Gemini models)
124
+ - OpenAI API Key (for embeddings)
125
+ - LlamaParse Cloud API Key (for document parsing)
126
+ - DigiKey API credentials (client ID & secret, for datasheet downloads)
127
+ - AWS credentials (for S3 storage, optional)
128
+ - **Database**: SQLite (automatically managed)
129
+
130
+ ## 🔧 Installation
131
+
132
+ ### Quick Install (Recommended)
133
+
134
+ The easiest way to install mfcli is using our automated installation script with **pipx**, which provides isolated dependency management while making the CLI globally available.
135
+
136
+ **Windows (PowerShell):**
137
+ ```powershell
138
+ iwr -useb https://raw.githubusercontent.com/MultifactorAI/multifactor-adk-backend/main/install.ps1 | iex
139
+ ```
140
+
141
+ **Linux/macOS:**
142
+ ```bash
143
+ curl -fsSL https://raw.githubusercontent.com/MultifactorAI/multifactor-adk-backend/main/install.sh | bash
144
+ ```
145
+
146
+ The script will:
147
+ - ✅ Check Python 3.12 installation
148
+ - ✅ Install pipx if needed
149
+ - ✅ Install mfcli with isolated dependencies
150
+ - ✅ Set up configuration directory
151
+ - ✅ Make `mfcli` and `mfcli-mcp` commands globally available
152
+
153
+ ### Manual Installation
154
+
155
+ If you prefer manual installation or the script doesn't work:
156
+
157
+ #### Using pipx (Recommended)
158
+
159
+ ```bash
160
+ # Install pipx if not already installed
161
+ python -m pip install --user pipx
162
+ python -m pipx ensurepath
163
+
164
+ # Install mfcli from GitHub
165
+ pipx install git+https://github.com/MultifactorAI/multifactor-adk-backend.git
166
+
167
+ # Or install from PyPI (once published)
168
+ pipx install mfcli
169
+ ```
170
+
171
+ **Why pipx?**
172
+ - ✅ Isolated dependencies - no conflicts with other Python packages
173
+ - ✅ Global CLI access - available in any terminal
174
+ - ✅ No virtual environment activation needed
175
+ - ✅ MCP server compatible - works with external tools like Cline
176
+ - ✅ Easy updates: `pipx upgrade mfcli`
177
+
178
+ #### Using pip (For Development)
179
+
180
+ ```bash
181
+ # Clone the repository
182
+ git clone https://github.com/MultifactorAI/multifactor-adk-backend.git
183
+ cd multifactor-adk-backend
184
+
185
+ # Create virtual environment
186
+ python -m venv venv
187
+
188
+ # Activate virtual environment
189
+ # Windows:
190
+ venv\Scripts\activate
191
+ # macOS/Linux:
192
+ source venv/bin/activate
193
+
194
+ # Install in development mode
195
+ pip install -e .
196
+ ```
197
+
198
+ **Note**: If you plan to use the MCP server with Cline/Claude Code, install with pipx instead to ensure global availability.
199
+
200
+ ### Verify Installation
201
+
202
+ After installation, verify everything is working:
203
+
204
+ ```bash
205
+ # Check mfcli is installed
206
+ mfcli --help
207
+
208
+ # Run system health check
209
+ mfcli doctor
210
+ ```
211
+
212
+ ## ⚙️ Configuration
213
+
214
+ ### Interactive Configuration Wizard (Recommended)
215
+
216
+ The easiest way to configure mfcli is using the interactive wizard:
217
+
218
+ ```bash
219
+ mfcli configure
220
+ ```
221
+
222
+ This will guide you through setting up all required API keys with:
223
+ - 🔗 Direct links to get each API key
224
+ - ✅ Automatic validation of API keys
225
+ - 📝 Smart defaults for vectorization settings
226
+ - 💾 Automatic saving to the correct location
227
+
228
+ ### Manual Configuration
229
+
230
+ Alternatively, create a `.env` file at:
231
+
232
+ **Windows:** `C:\Users\<username>\Multifactor\.env`
233
+ **macOS/Linux:** `~/Multifactor/.env`
234
+
235
+ ```ini
236
+ # API Keys (Required)
237
+ google_api_key=your_google_api_key
238
+ openai_api_key=your_openai_api_key
239
+ llama_cloud_api_key=your_llamaparse_api_key
240
+ digikey_client_id=your_digikey_client_id
241
+ digikey_client_secret=your_digikey_client_secret
242
+
243
+ # Vector Database Configuration
244
+ chunk_size=1000
245
+ chunk_overlap=200
246
+ embedding_model=text-embedding-3-small
247
+ embedding_dimensions=1536
248
+ ```
249
+
250
+ ### Check Configuration
251
+
252
+ To verify your configuration at any time:
253
+
254
+ ```bash
255
+ # Check configuration status
256
+ mfcli configure --check
257
+
258
+ # Run comprehensive system check
259
+ mfcli doctor
260
+ ```
261
+
262
+ ### Required API Keys & How to Get Them
263
+
264
+ - **Google API Key**: [Google AI Studio](https://aistudio.google.com/app/apikey)
265
+ - **OpenAI API Key**: [OpenAI Platform](https://platform.openai.com/api-keys)
266
+ - **LlamaParse API Key**: [LlamaIndex Cloud](https://cloud.llamaindex.ai/)
267
+ - **DigiKey API**: [DigiKey Developer Portal](https://developer.digikey.com/)
268
+ - **AWS Credentials**: [AWS IAM Console](https://console.aws.amazon.com/iam/) (optional)
269
+
270
+ **Tip:** The `mfcli configure` wizard provides these links interactively and validates your keys!
271
+
272
+ ## 🚀 Usage
273
+
274
+ ### Command-Line Interface
275
+
276
+ #### Getting Started with a Project
277
+
278
+ To analyze hardware design files, follow these steps:
279
+
280
+ **1. Navigate to your hardware design files directory:**
281
+
282
+ ```bash
283
+ cd C:\Projects\hardware\board_v1
284
+ ```
285
+
286
+ **2. Initialize the project:**
287
+
288
+ ```bash
289
+ mfcli init
290
+ ```
291
+
292
+ You'll be prompted to enter a project name (3-45 characters, alphanumeric with underscores/hyphens allowed). This creates a `.multifactor` folder in your current directory containing:
293
+ - `config.json` - Project configuration with your project name
294
+ - `file_docket.json` - File tracking and metadata
295
+
296
+ **3. Run the pipeline:**
297
+
298
+ ```bash
299
+ mfcli run_pipeline
300
+ ```
301
+
302
+ This will:
303
+ - Process all supported files in the directory
304
+ - Skip files that have already been processed (matching MD5 checksum)
305
+ - Prompt for confirmation if a file has been modified (different MD5)
306
+ - Generate BOM CSV files (if schematics are found)
307
+ - Download datasheets for BOM components
308
+ - Generate cheat sheets for MCU datasheets, errata, and schematics
309
+ - Store vector embeddings for RAG queries
310
+
311
+ #### File Change Detection
312
+
313
+ The pipeline tracks files using MD5 checksums stored in `.multifactor/file_docket.json`. When you run the pipeline:
314
+
315
+ - **New files**: Automatically processed
316
+ - **Unchanged files**: Skipped (MD5 matches previous run)
317
+ - **Modified files**: You'll be prompted:
318
+ ```
319
+ ======================================================================
320
+ File has been modified: schematic.pdf
321
+ Path: C:\Projects\hardware\board_v1\schematic.pdf
322
+ Old MD5: abc123...
323
+ New MD5: def456...
324
+ ======================================================================
325
+ Do you want to delete the old file data and process the new version? (yes/no):
326
+ ```
327
+
328
+ - Answer **yes** to remove old data from the knowledge base and reprocess
329
+ - Answer **no** to skip the file and keep the old version
330
+
331
+ This ensures efficient processing by only analyzing new or changed files, while maintaining data consistency in the knowledge base.
332
+
333
+ ### CLI Commands Reference
334
+
335
+ The `mfcli` tool provides the following commands:
336
+
337
+ - **`mfcli init`** - Initialize a new project in the current directory
338
+ - **`mfcli run_pipeline`** - Run the analysis pipeline on the current directory
339
+ - **`mfcli web [--port PORT]`** - Start the web UI (default port: 9999)
340
+ - **`mfcli addfile FILE [--purpose PURPOSE]`** - Add a file to ChromaDB knowledge base
341
+
342
+ #### Start Web UI
343
+
344
+ Launch the interactive web interface:
345
+
346
+ ```bash
347
+ mfcli web
348
+ ```
349
+
350
+ **With custom port:**
351
+ ```bash
352
+ mfcli web --port 8080
353
+ ```
354
+
355
+ The web UI will be available at `http://localhost:9999/dev-ui/` (or your specified port).
356
+
357
+ ### Web UI Usage
358
+
359
+ The web interface allows you to:
360
+ - Upload and process individual files
361
+ - Run the pipeline on directories
362
+ - Query processed documents using natural language
363
+ - View processing status and results
364
+
365
+ **Example queries in web UI:**
366
+ - "What components are in the processed schematic?"
367
+ - "Tell me about the voltage ratings in the last datasheet"
368
+ - "What are the errata for this MCU?"
369
+
370
+ ## 🔌 MCP Server
371
+
372
+ This package includes a **Model Context Protocol (MCP) server** that exposes tools for AI assistants and development environments like Cline/Claude to interact with your engineering documentation knowledge base.
373
+
374
+ ### What is MCP?
375
+
376
+ The Model Context Protocol (MCP) is a standard that allows AI assistants to securely access external tools and data sources. The mfcli MCP server provides AI-powered access to your processed engineering documents through the local ChromaDB vector database.
377
+
378
+ ### Available Tools
379
+
380
+ The MCP server exposes the following tool:
381
+
382
+ #### `query_local_rag`
383
+
384
+ Query the local hardware knowledge base of processed engineering documents using natural language.
385
+
386
+ **Parameters:**
387
+ - `query` (required): Your search query (e.g., "MSPM0L130x", "power management", "IEC 61000-4-2")
388
+ - `project_name` (optional): The name of the project to query. If not provided, uses the last known project name from previous queries.
389
+ - `n_results` (optional): Number of results to return (1-20, default: 8)
390
+
391
+ **Returns:**
392
+ - Document chunks matching your query
393
+ - Metadata (file names, document types)
394
+ - Similarity scores (lower distance = more relevant)
395
+ - ChromaDB database path
396
+ - Project name that was used for the query
397
+
398
+ **Note:** The function automatically remembers the last project name used, so you only need to specify `project_name` for the first query or when switching between projects.
399
+
400
+ **Example queries:**
401
+ - "MSPM0L130x specifications"
402
+ - "What are the voltage requirements?"
403
+ - "MCU pin configurations"
404
+ - "Component datasheets for capacitors"
405
+
406
+ ### Configuration for Cline/Claude
407
+
408
+ To use the MCP server with Cline (or other MCP-compatible clients), add the following configuration to your MCP settings file:
409
+
410
+ **Configuration Location:**
411
+ - **VS Code (Cline)**: `%APPDATA%\Code\User\globalStorage\saoudrizwan.claude-dev\settings\cline_mcp_settings.json`
412
+ - **Cline standalone**: `~/.cline/mcp_settings.json`
413
+
414
+ **Configuration:**
415
+
416
+ ```json
417
+ {
418
+ "mcpServers": {
419
+ "mfcli-mcp": {
420
+ "disabled": false,
421
+ "timeout": 60,
422
+ "type": "stdio",
423
+ "command": "python",
424
+ "args": ["-m", "mfcli.mcp.server"]
425
+ }
426
+ }
427
+ }
428
+ ```
429
+
430
+ ### Setup Instructions
431
+
432
+ 1. **Install mfcli system-wide** (see Installation section above):
433
+ ```bash
434
+ pip install .
435
+ ```
436
+
437
+ 2. **Process your engineering documents** to populate the knowledge base:
438
+ ```bash
439
+ cd /path/to/hardware/files
440
+ mfcli init
441
+ mfcli run_pipeline
442
+ ```
443
+
444
+ 3. **Add the MCP configuration** to your Cline/Claude settings file (see Configuration above)
445
+
446
+ 4. **Restart Cline/Claude** to load the MCP server
447
+
448
+ 5. **Use the tool** in your AI assistant:
449
+ - Ask questions like: "Query the local RAG for MSPM0L130x in project test"
450
+ - The assistant will use the `query_local_rag` tool to search your documents
451
+
452
+ ### Troubleshooting MCP Server
453
+
454
+ **Error**: `Module 'mfcli' not found`
455
+ - **Solution**: Ensure mfcli is installed system-wide (not just in a virtual environment)
456
+ ```bash
457
+ deactivate # Exit any virtual environment
458
+ pip install .
459
+ ```
460
+
461
+ **Error**: `ChromaDB directory not found`
462
+ - **Solution**: Run the pipeline at least once to create the vector database:
463
+ ```bash
464
+ mfcli init
465
+ mfcli run_pipeline
466
+ ```
467
+
468
+ **Error**: `MCP server timeout`
469
+ - **Solution**: Increase the timeout value in your MCP settings (default: 60 seconds)
470
+
471
+ **Server not connecting:**
472
+ - Verify the MCP server configuration in your settings file
473
+ - Check that Python is in your system PATH
474
+ - Restart your IDE/editor after updating MCP settings
475
+
476
+ ### MCP Server Architecture
477
+
478
+ The MCP server is implemented in `mfcli/mcp/` with the following structure:
479
+
480
+ ```
481
+ mfcli/mcp/
482
+ ├── server.py # MCP server entry point
483
+ ├── mcp_instance.py # MCP server instance and tool definitions
484
+ └── tools/
485
+ └── query_knowledgebase.py # RAG query implementation
486
+ ```
487
+
488
+ The server connects to your local ChromaDB instance (located in your system's application data directory) and provides semantic search capabilities over all processed engineering documents.
489
+
490
+ ## 🔄 Pipeline
491
+
492
+ The pipeline processes engineering documents in two main phases:
493
+
494
+ ### Phase 1: Pre-processing
495
+
496
+ For each file in the input directory:
497
+
498
+ 1. **Classification & Validation** (`classifier.py`)
499
+ - Determines file type (PDF, EDIF, CSV, etc.)
500
+ - Validates file integrity and MIME type
501
+ - Checks file size limits
502
+
503
+ 2. **Gemini File Upload** (PDFs only)
504
+ - Uploads PDF files to Gemini's Files API for vision-based processing
505
+
506
+ 3. **Text Extraction** (`extractor.py`)
507
+ - Extracts text content from documents
508
+ - Handles various formats (PDF, netlist formats, CSV)
509
+
510
+ 4. **Sub-type Classification** (`sub_classifier.py`)
511
+ - Determines document sub-type (e.g., schematic, BOM, datasheet, MCU datasheet, errata)
512
+ - Uses LLM analysis when necessary
513
+
514
+ 5. **Schema Mapping** (`schema_mapper.py`)
515
+ - Maps document structure to database schemas
516
+ - Skipped for schemaless files like schematics
517
+
518
+ 6. **Data Parsing** (`parser.py`)
519
+ - Parses structured data from documents
520
+ - Stores in SQLite database
521
+
522
+ 7. **Data Enrichment** (`data_enricher.py`)
523
+ - Enriches parsed data with additional information
524
+ - Downloads component datasheets for BOM entries
525
+
526
+ ### Phase 2: Analysis & Generation
527
+
528
+ After all files are pre-processed:
529
+
530
+ 1. **Netlist-to-BOM Mapping** (`bom_netlist_mapper.py`)
531
+ - Maps netlist components to BOM entries
532
+ - Correlates design files with component lists
533
+
534
+ 2. **File Generation** (`generator.py`)
535
+ - **BOM CSV**: Extracts components from schematics, generates CSV with reference, value, quantity, manufacturer, MPN, description
536
+ - **Cheat Sheets**: Generates JSON cheat sheets for:
537
+ - MCU datasheets (register maps, peripherals, specs)
538
+ - MCU errata (known issues, workarounds)
539
+ - Debug setup (pin configurations, debugging instructions)
540
+ - Functional blocks (system architecture, block diagrams)
541
+
542
+ ### Supported File Types
543
+
544
+ - **PDF**: Schematics, datasheets, MCU documentation, errata sheets
545
+ - **EDIF**: Electronic Design Interchange Format netlists
546
+ - **PADS**: PADS ASCII netlist format
547
+ - **KiCad**: Legacy netlist (.net) and SPICE circuit (.cir) formats
548
+ - **CSV**: Bill of Materials files
549
+
550
+ ## 📂 Output Directories
551
+
552
+ The pipeline creates directories in three locations:
553
+
554
+ ### 1. Project Metadata Directory (`.multifactor`)
555
+
556
+ Created in your hardware design files directory when you run `mfcli init`:
557
+
558
+ ```
559
+ your_hardware_files_directory/
560
+ └── .multifactor/
561
+ ├── config.json # Project configuration (project name, etc.)
562
+ └── file_docket.json # File tracking and processing metadata
563
+ ```
564
+
565
+ This folder stores project-specific configuration and tracks which files have been processed.
566
+
567
+ ### 2. User Application Data
568
+
569
+ Platform-specific storage for global application data:
570
+
571
+ **Windows:**
572
+ ```
573
+ C:\Users\<username>\AppData\Local\Multifactor\
574
+ └── chromadb/ # Vector embeddings database
575
+ ```
576
+
577
+ **macOS:**
578
+ ```
579
+ /Users/<username>/Library/Application Support/Multifactor/
580
+ └── chromadb/ # Vector embeddings database
581
+ ```
582
+
583
+ **Linux:**
584
+ ```
585
+ ~/.local/share/Multifactor/
586
+ └── chromadb/ # Vector embeddings database
587
+ ```
588
+
589
+ **Contents:**
590
+ - `chromadb/` - Vector embeddings of processed documents for RAG queries
591
+
592
+ ### 3. Project Output Directories
593
+
594
+ Created in the parent directory of your hardware files directory:
595
+
596
+ ```
597
+ <parent_directory>/
598
+ ├── generated_files/ # BOM CSV files generated from schematics
599
+ ├── hw_cheat_sheets/ # JSON cheat sheets (MCU, errata, debug, functional blocks)
600
+ ├── data_sheets/ # Downloaded component datasheets (from BOM processing)
601
+ ├── agent_instructions/ # (Reserved for future use)
602
+ └── requirements/ # (Reserved for future use)
603
+ ```
604
+
605
+ **Example:**
606
+ If your hardware files are in `C:\Projects\hardware\board_v1\`, outputs will be created in `C:\Projects\hardware\`:
607
+ - `C:\Projects\hardware\board_v1\.multifactor\` - Project metadata
608
+ - `C:\Projects\hardware\generated_files\bom.csv` - Generated BOM
609
+ - `C:\Projects\hardware\hw_cheat_sheets\schematic_cheat_sheet.json` - Cheat sheets
610
+ - `C:\Projects\hardware\data_sheets\STM32F4_datasheet.pdf` - Downloaded datasheets
611
+
612
+ ## 💾 Data Storage
613
+
614
+ ### SQLite Database
615
+
616
+ Located at `sessions.db` in the project root (or path specified in `.env`).
617
+
618
+ **Stores:**
619
+ - Pipeline run metadata and status
620
+ - File metadata and processing results
621
+ - Parsed component data (BOMs, netlists, datasheets)
622
+ - MCU information and errata
623
+ - ADK session data and conversation history
624
+
625
+ ### ChromaDB Vector Store
626
+
627
+ Located in the user application data directory.
628
+
629
+ **Stores:**
630
+ - Vector embeddings of processed documents
631
+ - Enables semantic search and RAG queries
632
+ - Uses OpenAI embeddings (text-embedding-3-small)
633
+
634
+ ### S3 Storage (Optional)
635
+
636
+ If AWS credentials are configured:
637
+ - Uploaded files
638
+ - Generated outputs
639
+ - Long-term document storage
640
+
641
+ ## 📁 Project Structure
642
+
643
+ ```
644
+ multifactor-adk-backend/
645
+ ├── app/
646
+ │ ├── agents/
647
+ │ │ ├── controller/ # Main controller agent
648
+ │ │ │ ├── agent.py # Agent definition
649
+ │ │ │ ├── config.yaml # Agent configuration
650
+ │ │ │ └── tools.py # Agent tools
651
+ │ │ └── tools/
652
+ │ │ └── general.py # Shared tools
653
+ │ ├── alembic/ # Database migrations
654
+ │ ├── cli/
655
+ │ │ └── main.py # CLI entry point (mfcli)
656
+ │ ├── client/ # External service clients
657
+ │ │ ├── chroma_db.py # ChromaDB vector store
658
+ │ │ ├── gemini.py # Gemini API client
659
+ │ │ ├── llama_parse.py # LlamaParse client
660
+ │ │ └── vector_db.py # Vector DB interface
661
+ │ ├── constants/ # Enums and constants
662
+ │ ├── crud/ # Database operations
663
+ │ ├── digikey/ # DigiKey API integration
664
+ │ ├── models/ # SQLAlchemy models
665
+ │ ├── pipeline/
666
+ │ │ ├── pipeline.py # Main pipeline orchestration
667
+ │ │ ├── classifier.py # File classification
668
+ │ │ ├── extractor.py # Text extraction
669
+ │ │ ├── sub_classifier.py # Document sub-typing
670
+ │ │ ├── schema_mapper.py # Schema mapping
671
+ │ │ ├── parser.py # Data parsing
672
+ │ │ ├── data_enricher.py # Data enrichment
673
+ │ │ ├── analysis/
674
+ │ │ │ ├── bom_netlist_mapper.py
675
+ │ │ │ └── generators/ # Output generators
676
+ │ │ │ ├── generator.py # Main generator
677
+ │ │ │ ├── bom/ # BOM generation
678
+ │ │ │ ├── debug_setup/ # Debug setup cheat sheets
679
+ │ │ │ ├── functional_blocks/ # Functional block diagrams
680
+ │ │ │ ├── mcu/ # MCU documentation
681
+ │ │ │ └── mcu_errata/ # Errata cheat sheets
682
+ │ │ ├── extractors/ # Format-specific extractors
683
+ │ │ └── parsers/
684
+ │ │ └── netlist/ # Netlist parsers (EDIF, KiCad, PADS)
685
+ │ ├── tests/ # Unit tests
686
+ │ └── utils/ # Utility functions
687
+ │ ├── config.py
688
+ │ ├── directory_manager.py # Output directory management
689
+ │ ├── logger.py
690
+ │ └── ...
691
+ ├── .env # Environment configuration (create this)
692
+ ├── .gitignore
693
+ ├── alembic.ini # Alembic configuration
694
+ ├── pyproject.toml # Package configuration
695
+ ├── requirements.txt # Python dependencies
696
+ └── README.md # This file
697
+ ```
698
+
699
+ ## 🛠️ Development
700
+
701
+ ### Running the Pipeline
702
+
703
+ First, navigate to your hardware files directory and initialize:
704
+
705
+ ```bash
706
+ cd /path/to/hardware/files
707
+ mfcli init
708
+ mfcli run_pipeline
709
+ ```
710
+
711
+ ### Starting the Web UI
712
+
713
+ ```bash
714
+ mfcli web --port 9999
715
+ ```
716
+
717
+ ### Development Status
718
+
719
+ ⚠️ **This project is currently in development.** Features and APIs may change.
720
+
721
+ ### Database Migrations
722
+
723
+ This project uses Alembic for database schema management.
724
+
725
+ #### Create a New Migration
726
+ ```bash
727
+ alembic revision -m "description of changes"
728
+ ```
729
+
730
+ #### Apply Migrations
731
+ ```bash
732
+ alembic upgrade head
733
+ ```
734
+
735
+ #### Rollback Migration
736
+ ```bash
737
+ alembic downgrade -1
738
+ ```
739
+
740
+ ### Logging
741
+
742
+ Logs are configured in `utils/logger.py`. View logs for debugging:
743
+
744
+ ```python
745
+ from mfcli.utils.logger import get_logger
746
+ logger = get_logger(__name__)
747
+ logger.info("Message")
748
+ logger.error("Error message")
749
+ ```
750
+
751
+ ### Running Tests
752
+
753
+ ```bash
754
+ pytest app/tests/
755
+ ```
756
+
757
+ ## 🐛 Troubleshooting
758
+
759
+ ### Common Issues
760
+
761
+ #### 1. Installation Issues
762
+
763
+ **Error**: `ModuleNotFoundError: No module named 'google'`
764
+ - **Solution**: Reinstall package: `pip install .`
765
+
766
+ **Error**: `Command 'mfcli' not found`
767
+ - **Solution**: Ensure virtual environment is activated and package is installed: `pip install .`
768
+
769
+ #### 2. API Key Errors
770
+
771
+ **Error**: `google.api_core.exceptions.Unauthenticated: 401 API key not valid`
772
+ - **Solution**: Verify `google_api_key` in `.env` file
773
+ - Get key from: https://aistudio.google.com/app/apikey
774
+
775
+ **Error**: `OpenAI API error`
776
+ - **Solution**: Check `openai_api_key` in `.env`
777
+ - Ensure API key has billing enabled
778
+
779
+ #### 3. ChromaDB Issues
780
+
781
+ **Error**: `ChromaDB directory not found`
782
+ - **Solution**: ChromaDB will be created automatically on first run in AppData folder
783
+
784
+ **Error**: `Embedding dimension mismatch`
785
+ - **Solution**: Delete ChromaDB directory and restart to rebuild with correct dimensions
786
+
787
+ #### 4. Pipeline Processing Failures
788
+
789
+ **Error**: `Could not find metadata file. Please initialize this repo with "mfcli init"`
790
+ - **Solution**: You need to run `mfcli init` in your hardware files directory before running `mfcli run_pipeline`
791
+
792
+ **Error**: `File not found`
793
+ - **Solution**: Make sure you're running `mfcli run_pipeline` from within your hardware files directory (where you ran `mfcli init`)
794
+
795
+ **Error**: `File extension is not supported`
796
+ - **Solution**: Check that your files are in supported formats (PDF, EDIF, CSV, .net, .cir, .asc)
797
+
798
+ **Error**: `No components extracted from schematic`
799
+ - **Solution**:
800
+ - Ensure schematic PDF is clear and readable
801
+ - Check that component designators are visible
802
+ - Verify file is an actual schematic (not layout or other document type)
803
+
804
+ #### 5. Database Issues
805
+
806
+ **Error**: `Database connection failed`
807
+ - **Solution**: Check that SQLite database path in `.env` is valid
808
+ - Run migrations: `alembic upgrade head`
809
+
810
+ ### Verify Configuration
811
+
812
+ Run this Python snippet to verify your configuration:
813
+
814
+ ```python
815
+ from mfcli.utils.config import get_config
816
+ config = get_config()
817
+ print(f"Google API Key set: {'Yes' if config.google_api_key else 'No'}")
818
+ print(f"OpenAI API Key set: {'Yes' if config.openai_api_key else 'No'}")
819
+ print(f"Database path: {config.sqlite_db_path}")
820
+ ```
821
+
822
+ ### Debug Mode
823
+
824
+ For detailed debugging, check the console output when running `mfcli pipeline` or `mfcli web`. The application uses structured logging to help diagnose issues.
825
+
826
+ ### Getting Help
827
+
828
+ - **Issues**: [GitHub Issues](https://github.com/MultifactorAI/multifactor-adk-backend/issues)
829
+ - **Documentation**: Check this README and inline code documentation
830
+
831
+ ## 📄 License
832
+
833
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
834
+
835
+ ## 🤝 Contributing
836
+
837
+ This project is in active development. Contributions, issues, and feature requests are welcome!
838
+
839
+ ---
840
+
841
+ **Built with [Google Gemini](https://ai.google.dev/) and [Google Agent Development Kit (ADK)](https://github.com/google/adk)**