PyPI - mfcli - Versions diffs - 0.2.0__py3-none-any.whl - Mend

mfcli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

mfcli/.env.example +72 -0
mfcli/__init__.py +0 -0
mfcli/agents/__init__.py +0 -0
mfcli/agents/controller/__init__.py +0 -0
mfcli/agents/controller/agent.py +19 -0
mfcli/agents/controller/config.yaml +27 -0
mfcli/agents/controller/tools.py +42 -0
mfcli/agents/tools/general.py +118 -0
mfcli/alembic/env.py +61 -0
mfcli/alembic/script.py.mako +28 -0
mfcli/alembic/versions/6ccc0c7c397c_added_fields_to_pdf_parts_model.py +39 -0
mfcli/alembic/versions/769019ef4870_added_gemini_file_path_to_pdf_part_model.py +33 -0
mfcli/alembic/versions/7a2e3a779fdc_added_functional_block_and_component_.py +54 -0
mfcli/alembic/versions/7d5adb2a47a7_added_pdf_parts_model.py +41 -0
mfcli/alembic/versions/7fcb7d6a5836_init.py +167 -0
mfcli/alembic/versions/e0f2b5765c72_added_cascade_delete_for_models_that_.py +32 -0
mfcli/alembic.ini +147 -0
mfcli/cli/__init__.py +0 -0
mfcli/cli/dependencies.py +59 -0
mfcli/cli/main.py +192 -0
mfcli/client/__init__.py +0 -0
mfcli/client/chroma_db.py +184 -0
mfcli/client/docling.py +44 -0
mfcli/client/gemini.py +252 -0
mfcli/client/llama_parse.py +38 -0
mfcli/client/vector_db.py +93 -0
mfcli/constants/__init__.py +0 -0
mfcli/constants/base_enum.py +18 -0
mfcli/constants/directory_names.py +1 -0
mfcli/constants/file_types.py +189 -0
mfcli/constants/gemini.py +1 -0
mfcli/constants/openai.py +6 -0
mfcli/constants/pipeline_run_status.py +3 -0
mfcli/crud/__init__.py +0 -0
mfcli/crud/file.py +42 -0
mfcli/crud/functional_blocks.py +26 -0
mfcli/crud/netlist.py +18 -0
mfcli/crud/pipeline_run.py +17 -0
mfcli/crud/project.py +99 -0
mfcli/digikey/__init__.py +0 -0
mfcli/digikey/digikey.py +105 -0
mfcli/main.py +5 -0
mfcli/mcp/__init__.py +0 -0
mfcli/mcp/configs/cline_mcp_settings.json +11 -0
mfcli/mcp/configs/mfcli.mcp.json +7 -0
mfcli/mcp/mcp_instance.py +6 -0
mfcli/mcp/server.py +37 -0
mfcli/mcp/state_manager.py +51 -0
mfcli/mcp/tools/__init__.py +0 -0
mfcli/mcp/tools/query_knowledgebase.py +108 -0
mfcli/models/__init__.py +10 -0
mfcli/models/base.py +10 -0
mfcli/models/bom.py +71 -0
mfcli/models/datasheet.py +10 -0
mfcli/models/debug_setup.py +64 -0
mfcli/models/file.py +43 -0
mfcli/models/file_docket.py +94 -0
mfcli/models/file_metadata.py +19 -0
mfcli/models/functional_blocks.py +94 -0
mfcli/models/llm_response.py +5 -0
mfcli/models/mcu.py +97 -0
mfcli/models/mcu_errata.py +26 -0
mfcli/models/netlist.py +59 -0
mfcli/models/pdf_parts.py +25 -0
mfcli/models/pipeline_run.py +34 -0
mfcli/models/project.py +27 -0
mfcli/models/project_metadata.py +15 -0
mfcli/pipeline/__init__.py +0 -0
mfcli/pipeline/analysis/__init__.py +0 -0
mfcli/pipeline/analysis/bom_netlist_mapper.py +28 -0
mfcli/pipeline/analysis/generators/__init__.py +0 -0
mfcli/pipeline/analysis/generators/bom/__init__.py +0 -0
mfcli/pipeline/analysis/generators/bom/bom.py +74 -0
mfcli/pipeline/analysis/generators/debug_setup/__init__.py +0 -0
mfcli/pipeline/analysis/generators/debug_setup/debug_setup.py +71 -0
mfcli/pipeline/analysis/generators/debug_setup/instructions.py +150 -0
mfcli/pipeline/analysis/generators/functional_blocks/__init__.py +0 -0
mfcli/pipeline/analysis/generators/functional_blocks/functional_blocks.py +93 -0
mfcli/pipeline/analysis/generators/functional_blocks/instructions.py +34 -0
mfcli/pipeline/analysis/generators/functional_blocks/validator.py +94 -0
mfcli/pipeline/analysis/generators/generator.py +258 -0
mfcli/pipeline/analysis/generators/generator_base.py +18 -0
mfcli/pipeline/analysis/generators/mcu/__init__.py +0 -0
mfcli/pipeline/analysis/generators/mcu/instructions.py +156 -0
mfcli/pipeline/analysis/generators/mcu/mcu.py +84 -0
mfcli/pipeline/analysis/generators/mcu_errata/__init__.py +1 -0
mfcli/pipeline/analysis/generators/mcu_errata/instructions.py +77 -0
mfcli/pipeline/analysis/generators/mcu_errata/mcu_errata.py +95 -0
mfcli/pipeline/analysis/generators/summary/__init__.py +0 -0
mfcli/pipeline/analysis/generators/summary/summary.py +47 -0
mfcli/pipeline/classifier.py +93 -0
mfcli/pipeline/data_enricher.py +15 -0
mfcli/pipeline/extractor.py +34 -0
mfcli/pipeline/extractors/__init__.py +0 -0
mfcli/pipeline/extractors/pdf.py +12 -0
mfcli/pipeline/parser.py +120 -0
mfcli/pipeline/parsers/__init__.py +0 -0
mfcli/pipeline/parsers/netlist/__init__.py +0 -0
mfcli/pipeline/parsers/netlist/edif.py +93 -0
mfcli/pipeline/parsers/netlist/kicad_legacy_net.py +326 -0
mfcli/pipeline/parsers/netlist/kicad_spice.py +135 -0
mfcli/pipeline/parsers/netlist/pads.py +185 -0
mfcli/pipeline/parsers/netlist/protel.py +166 -0
mfcli/pipeline/parsers/netlist/protel_detector.py +29 -0
mfcli/pipeline/pipeline.py +419 -0
mfcli/pipeline/preprocessors/__init__.py +0 -0
mfcli/pipeline/preprocessors/user_guide.py +127 -0
mfcli/pipeline/run_context.py +32 -0
mfcli/pipeline/schema_mapper.py +89 -0
mfcli/pipeline/sub_classifier.py +115 -0
mfcli/utils/__init__.py +0 -0
mfcli/utils/config.py +33 -0
mfcli/utils/configurator.py +324 -0
mfcli/utils/data_cleaner.py +82 -0
mfcli/utils/datasheet_vectorizer.py +281 -0
mfcli/utils/directory_manager.py +96 -0
mfcli/utils/file_upload.py +298 -0
mfcli/utils/files.py +16 -0
mfcli/utils/http_requests.py +54 -0
mfcli/utils/kb_lister.py +89 -0
mfcli/utils/kb_remover.py +173 -0
mfcli/utils/logger.py +28 -0
mfcli/utils/mcp_configurator.py +311 -0
mfcli/utils/migrations.py +18 -0
mfcli/utils/orm.py +43 -0
mfcli/utils/pdf_splitter.py +63 -0
mfcli/utils/query_service.py +22 -0
mfcli/utils/system_check.py +306 -0
mfcli/utils/tools.py +31 -0
mfcli/utils/vectorizer.py +28 -0
mfcli-0.2.0.dist-info/METADATA +841 -0
mfcli-0.2.0.dist-info/RECORD +136 -0
mfcli-0.2.0.dist-info/WHEEL +5 -0
mfcli-0.2.0.dist-info/entry_points.txt +3 -0
mfcli-0.2.0.dist-info/licenses/LICENSE +21 -0
mfcli-0.2.0.dist-info/top_level.txt +1 -0

mfcli-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,841 @@
+Metadata-Version: 2.4
+Name: mfcli
+Version: 0.2.0
+Summary: AI-powered CLI for analyzing hardware engineering documents
+Author: Multifactor AI
+License: MIT
+Project-URL: Homepage, https://github.com/MultifactorAI/multifactor-adk-backend
+Project-URL: Repository, https://github.com/MultifactorAI/multifactor-adk-backend
+Project-URL: Issues, https://github.com/MultifactorAI/multifactor-adk-backend/issues
+Keywords: hardware,engineering,AI,MCP,RAG,electronics,schematic,BOM,datasheet
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Electronic Design Automation (EDA)
+Requires-Python: <3.13,>=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: google-adk
+Requires-Dist: python-dotenv
+Requires-Dist: fastapi
+Requires-Dist: uvicorn
+Requires-Dist: pandas
+Requires-Dist: pyyaml
+Requires-Dist: sqlalchemy
+Requires-Dist: pydantic-settings==2.10.1
+Requires-Dist: alembic
+Requires-Dist: werkzeug
+Requires-Dist: boto3
+Requires-Dist: botocore
+Requires-Dist: pydantic==2.11.9
+Requires-Dist: protobuf
+Requires-Dist: sqlmodel
+Requires-Dist: llama-parse
+Requires-Dist: llama-index-core
+Requires-Dist: requests
+Requires-Dist: openai
+Requires-Dist: langchain
+Requires-Dist: langchain-text-splitters
+Requires-Dist: tiktoken
+Requires-Dist: chromadb==1.3.4
+Requires-Dist: urllib3
+Requires-Dist: playwright
+Requires-Dist: pymupdf
+Requires-Dist: fastmcp
+Requires-Dist: docling
+Requires-Dist: pytest-asyncio
+Requires-Dist: pytest-mock
+Requires-Dist: trio
+Requires-Dist: pikepdf
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pyinstaller; extra == "dev"
+Requires-Dist: build; extra == "dev"
+Requires-Dist: twine; extra == "dev"
+Dynamic: license-file
+# Multifactor ADK Backend
+[![Development Status](https://img.shields.io/badge/status-in%20development-yellow)](https://github.com/MultifactorAI/multifactor-adk-backend)
+An AI-powered engineering document processing pipeline that intelligently analyzes hardware engineering documents including schematics, datasheets, BOMs, and netlists. Built with Google's Gemini models, the system extracts structured data, generates documentation, and enables semantic search across processed documents.
+## 🚀 Features
+- **CLI-Based Pipeline**: Process entire directories of engineering documents with a single command
+- **Intelligent Document Analysis**: Automated classification, text extraction, and schema mapping
+- **BOM Generation**: Extract components from schematics and generate CSV Bill of Materials
+- **Datasheet Enrichment**: Automatically download component datasheets from BOMs
+- **Cheat Sheet Generation**: AI-generated documentation for MCU datasheets, errata, debug setup, and functional blocks
+- **RAG-Powered Queries**: Query processed documents using ChromaDB-backed Retrieval-Augmented Generation
+- **Web UI**: Interactive interface for document processing and agent interaction (optional)
+- **File Type Support**: PDF, EDIF, PADS, KiCad netlists, CSV BOMs, and more
+## 📋 Table of Contents
+- [Architecture](#architecture)
+- [Prerequisites](#prerequisites)
+- [Installation](#installation)
+- [Configuration](#configuration)
+- [Usage](#usage)
+- [MCP Server](#mcp-server)
+- [Pipeline](#pipeline)
+- [Output Directories](#output-directories)
+- [Data Storage](#data-storage)
+- [Project Structure](#project-structure)
+- [Development](#development)
+- [Troubleshooting](#troubleshooting)
+## 🏗️ Architecture
+The system uses a streamlined architecture with a single controller agent that orchestrates a sequential processing pipeline:
+```
+Controller Agent
+├── Tools:
+│   ├── run_pipeline (Sequential pipeline execution)
+│   └── query_knowledgebase (RAG-based document queries)
+│
+└── Pipeline Stages:
+    ├── Pre-processing
+    │   ├── File classification & validation
+    │   ├── Text extraction
+    │   ├── Document sub-type detection (uses LLM when needed)
+    │   ├── Schema mapping (for structured documents)
+    │   ├── Data parsing
+    │   └── Data enrichment
+    │
+    └── Analysis & Generation
+        ├── Netlist-to-BOM mapping
+        └── File generation:
+            ├── BOM CSV files
+            └── JSON cheat sheets (MCU, errata, debug setup, functional blocks)
+```
+The pipeline processes files sequentially, making LLM calls only when necessary for tasks like document sub-classification and schema mapping, rather than using a hierarchy of sub-agents.
+## 📦 Prerequisites
+- **Python 3.12+**
+- **Required API Keys**:
+  - Google API Key (for Gemini models)
+  - OpenAI API Key (for embeddings)
+  - LlamaParse Cloud API Key (for document parsing)
+  - DigiKey API credentials (client ID & secret, for datasheet downloads)
+  - AWS credentials (for S3 storage, optional)
+- **Database**: SQLite (automatically managed)
+## 🔧 Installation
+### Quick Install (Recommended)
+The easiest way to install mfcli is using our automated installation script with **pipx**, which provides isolated dependency management while making the CLI globally available.
+**Windows (PowerShell):**
+```powershell
+iwr -useb https://raw.githubusercontent.com/MultifactorAI/multifactor-adk-backend/main/install.ps1 | iex
+```
+**Linux/macOS:**
+```bash
+curl -fsSL https://raw.githubusercontent.com/MultifactorAI/multifactor-adk-backend/main/install.sh | bash
+```
+The script will:
+- ✅ Check Python 3.12 installation
+- ✅ Install pipx if needed
+- ✅ Install mfcli with isolated dependencies
+- ✅ Set up configuration directory
+- ✅ Make `mfcli` and `mfcli-mcp` commands globally available
+### Manual Installation
+If you prefer manual installation or the script doesn't work:
+#### Using pipx (Recommended)
+```bash
+# Install pipx if not already installed
+python -m pip install --user pipx
+python -m pipx ensurepath
+# Install mfcli from GitHub
+pipx install git+https://github.com/MultifactorAI/multifactor-adk-backend.git
+# Or install from PyPI (once published)
+pipx install mfcli
+```
+**Why pipx?**
+- ✅ Isolated dependencies - no conflicts with other Python packages
+- ✅ Global CLI access - available in any terminal
+- ✅ No virtual environment activation needed
+- ✅ MCP server compatible - works with external tools like Cline
+- ✅ Easy updates: `pipx upgrade mfcli`
+#### Using pip (For Development)
+```bash
+# Clone the repository
+git clone https://github.com/MultifactorAI/multifactor-adk-backend.git
+cd multifactor-adk-backend
+# Create virtual environment
+python -m venv venv
+# Activate virtual environment
+# Windows:
+venv\Scripts\activate
+# macOS/Linux:
+source venv/bin/activate
+# Install in development mode
+pip install -e .
+```
+**Note**: If you plan to use the MCP server with Cline/Claude Code, install with pipx instead to ensure global availability.
+### Verify Installation
+After installation, verify everything is working:
+```bash
+# Check mfcli is installed
+mfcli --help
+# Run system health check
+mfcli doctor
+```
+## ⚙️ Configuration
+### Interactive Configuration Wizard (Recommended)
+The easiest way to configure mfcli is using the interactive wizard:
+```bash
+mfcli configure
+```
+This will guide you through setting up all required API keys with:
+- 🔗 Direct links to get each API key
+- ✅ Automatic validation of API keys
+- 📝 Smart defaults for vectorization settings
+- 💾 Automatic saving to the correct location
+### Manual Configuration
+Alternatively, create a `.env` file at:
+**Windows:** `C:\Users\<username>\Multifactor\.env`
+**macOS/Linux:** `~/Multifactor/.env`
+```ini
+# API Keys (Required)
+google_api_key=your_google_api_key
+openai_api_key=your_openai_api_key
+llama_cloud_api_key=your_llamaparse_api_key
+digikey_client_id=your_digikey_client_id
+digikey_client_secret=your_digikey_client_secret
+# Vector Database Configuration
+chunk_size=1000
+chunk_overlap=200
+embedding_model=text-embedding-3-small
+embedding_dimensions=1536
+```
+### Check Configuration
+To verify your configuration at any time:
+```bash
+# Check configuration status
+mfcli configure --check
+# Run comprehensive system check
+mfcli doctor
+```
+### Required API Keys & How to Get Them
+- **Google API Key**: [Google AI Studio](https://aistudio.google.com/app/apikey)
+- **OpenAI API Key**: [OpenAI Platform](https://platform.openai.com/api-keys)
+- **LlamaParse API Key**: [LlamaIndex Cloud](https://cloud.llamaindex.ai/)
+- **DigiKey API**: [DigiKey Developer Portal](https://developer.digikey.com/)
+- **AWS Credentials**: [AWS IAM Console](https://console.aws.amazon.com/iam/) (optional)
+**Tip:** The `mfcli configure` wizard provides these links interactively and validates your keys!
+## 🚀 Usage
+### Command-Line Interface
+#### Getting Started with a Project
+To analyze hardware design files, follow these steps:
+**1. Navigate to your hardware design files directory:**
+```bash
+cd C:\Projects\hardware\board_v1
+```
+**2. Initialize the project:**
+```bash
+mfcli init
+```
+You'll be prompted to enter a project name (3-45 characters, alphanumeric with underscores/hyphens allowed). This creates a `.multifactor` folder in your current directory containing:
+- `config.json` - Project configuration with your project name
+- `file_docket.json` - File tracking and metadata
+**3. Run the pipeline:**
+```bash
+mfcli run_pipeline
+```
+This will:
+- Process all supported files in the directory
+- Skip files that have already been processed (matching MD5 checksum)
+- Prompt for confirmation if a file has been modified (different MD5)
+- Generate BOM CSV files (if schematics are found)
+- Download datasheets for BOM components
+- Generate cheat sheets for MCU datasheets, errata, and schematics
+- Store vector embeddings for RAG queries
+#### File Change Detection
+The pipeline tracks files using MD5 checksums stored in `.multifactor/file_docket.json`. When you run the pipeline:
+- **New files**: Automatically processed
+- **Unchanged files**: Skipped (MD5 matches previous run)
+- **Modified files**: You'll be prompted:
+  ```
+  ======================================================================
+  File has been modified: schematic.pdf
+  Path: C:\Projects\hardware\board_v1\schematic.pdf
+  Old MD5: abc123...
+  New MD5: def456...
+  ======================================================================
+  Do you want to delete the old file data and process the new version? (yes/no):
+  ```
+  - Answer **yes** to remove old data from the knowledge base and reprocess
+  - Answer **no** to skip the file and keep the old version
+This ensures efficient processing by only analyzing new or changed files, while maintaining data consistency in the knowledge base.
+### CLI Commands Reference
+The `mfcli` tool provides the following commands:
+- **`mfcli init`** - Initialize a new project in the current directory
+- **`mfcli run_pipeline`** - Run the analysis pipeline on the current directory
+- **`mfcli web [--port PORT]`** - Start the web UI (default port: 9999)
+- **`mfcli addfile FILE [--purpose PURPOSE]`** - Add a file to ChromaDB knowledge base
+#### Start Web UI
+Launch the interactive web interface:
+```bash
+mfcli web
+```
+**With custom port:**
+```bash
+mfcli web --port 8080
+```
+The web UI will be available at `http://localhost:9999/dev-ui/` (or your specified port).
+### Web UI Usage
+The web interface allows you to:
+- Upload and process individual files
+- Run the pipeline on directories
+- Query processed documents using natural language
+- View processing status and results
+**Example queries in web UI:**
+- "What components are in the processed schematic?"
+- "Tell me about the voltage ratings in the last datasheet"
+- "What are the errata for this MCU?"
+## 🔌 MCP Server
+This package includes a **Model Context Protocol (MCP) server** that exposes tools for AI assistants and development environments like Cline/Claude to interact with your engineering documentation knowledge base.
+### What is MCP?
+The Model Context Protocol (MCP) is a standard that allows AI assistants to securely access external tools and data sources. The mfcli MCP server provides AI-powered access to your processed engineering documents through the local ChromaDB vector database.
+### Available Tools
+The MCP server exposes the following tool:
+#### `query_local_rag`
+Query the local hardware knowledge base of processed engineering documents using natural language.
+**Parameters:**
+- `query` (required): Your search query (e.g., "MSPM0L130x", "power management", "IEC 61000-4-2")
+- `project_name` (optional): The name of the project to query. If not provided, uses the last known project name from previous queries.
+- `n_results` (optional): Number of results to return (1-20, default: 8)
+**Returns:**
+- Document chunks matching your query
+- Metadata (file names, document types)
+- Similarity scores (lower distance = more relevant)
+- ChromaDB database path
+- Project name that was used for the query
+**Note:** The function automatically remembers the last project name used, so you only need to specify `project_name` for the first query or when switching between projects.
+**Example queries:**
+- "MSPM0L130x specifications"
+- "What are the voltage requirements?"
+- "MCU pin configurations"
+- "Component datasheets for capacitors"
+### Configuration for Cline/Claude
+To use the MCP server with Cline (or other MCP-compatible clients), add the following configuration to your MCP settings file:
+**Configuration Location:**
+- **VS Code (Cline)**: `%APPDATA%\Code\User\globalStorage\saoudrizwan.claude-dev\settings\cline_mcp_settings.json`
+- **Cline standalone**: `~/.cline/mcp_settings.json`
+**Configuration:**
+```json
+{
+  "mcpServers": {
+    "mfcli-mcp": {
+      "disabled": false,
+      "timeout": 60,
+      "type": "stdio",
+      "command": "python",
+      "args": ["-m", "mfcli.mcp.server"]
+    }
+  }
+}
+```
+### Setup Instructions
+1. **Install mfcli system-wide** (see Installation section above):
+   ```bash
+   pip install .
+   ```
+2. **Process your engineering documents** to populate the knowledge base:
+   ```bash
+   cd /path/to/hardware/files
+   mfcli init
+   mfcli run_pipeline
+   ```
+3. **Add the MCP configuration** to your Cline/Claude settings file (see Configuration above)
+4. **Restart Cline/Claude** to load the MCP server
+5. **Use the tool** in your AI assistant:
+   - Ask questions like: "Query the local RAG for MSPM0L130x in project test"
+   - The assistant will use the `query_local_rag` tool to search your documents
+### Troubleshooting MCP Server
+**Error**: `Module 'mfcli' not found`
+- **Solution**: Ensure mfcli is installed system-wide (not just in a virtual environment)
+  ```bash
+  deactivate  # Exit any virtual environment
+  pip install .
+  ```
+**Error**: `ChromaDB directory not found`
+- **Solution**: Run the pipeline at least once to create the vector database:
+  ```bash
+  mfcli init
+  mfcli run_pipeline
+  ```
+**Error**: `MCP server timeout`
+- **Solution**: Increase the timeout value in your MCP settings (default: 60 seconds)
+**Server not connecting:**
+- Verify the MCP server configuration in your settings file
+- Check that Python is in your system PATH
+- Restart your IDE/editor after updating MCP settings
+### MCP Server Architecture
+The MCP server is implemented in `mfcli/mcp/` with the following structure:
+```
+mfcli/mcp/
+├── server.py              # MCP server entry point
+├── mcp_instance.py        # MCP server instance and tool definitions
+└── tools/
+    └── query_knowledgebase.py  # RAG query implementation
+```
+The server connects to your local ChromaDB instance (located in your system's application data directory) and provides semantic search capabilities over all processed engineering documents.
+## 🔄 Pipeline
+The pipeline processes engineering documents in two main phases:
+### Phase 1: Pre-processing
+For each file in the input directory:
+1. **Classification & Validation** (`classifier.py`)
+   - Determines file type (PDF, EDIF, CSV, etc.)
+   - Validates file integrity and MIME type
+   - Checks file size limits
+2. **Gemini File Upload** (PDFs only)
+   - Uploads PDF files to Gemini's Files API for vision-based processing
+3. **Text Extraction** (`extractor.py`)
+   - Extracts text content from documents
+   - Handles various formats (PDF, netlist formats, CSV)
+4. **Sub-type Classification** (`sub_classifier.py`)
+   - Determines document sub-type (e.g., schematic, BOM, datasheet, MCU datasheet, errata)
+   - Uses LLM analysis when necessary
+5. **Schema Mapping** (`schema_mapper.py`)
+   - Maps document structure to database schemas
+   - Skipped for schemaless files like schematics
+6. **Data Parsing** (`parser.py`)
+   - Parses structured data from documents
+   - Stores in SQLite database
+7. **Data Enrichment** (`data_enricher.py`)
+   - Enriches parsed data with additional information
+   - Downloads component datasheets for BOM entries
+### Phase 2: Analysis & Generation
+After all files are pre-processed:
+1. **Netlist-to-BOM Mapping** (`bom_netlist_mapper.py`)
+   - Maps netlist components to BOM entries
+   - Correlates design files with component lists
+2. **File Generation** (`generator.py`)
+   - **BOM CSV**: Extracts components from schematics, generates CSV with reference, value, quantity, manufacturer, MPN, description
+   - **Cheat Sheets**: Generates JSON cheat sheets for:
+     - MCU datasheets (register maps, peripherals, specs)
+     - MCU errata (known issues, workarounds)
+     - Debug setup (pin configurations, debugging instructions)
+     - Functional blocks (system architecture, block diagrams)
+### Supported File Types
+- **PDF**: Schematics, datasheets, MCU documentation, errata sheets
+- **EDIF**: Electronic Design Interchange Format netlists
+- **PADS**: PADS ASCII netlist format
+- **KiCad**: Legacy netlist (.net) and SPICE circuit (.cir) formats
+- **CSV**: Bill of Materials files
+## 📂 Output Directories
+The pipeline creates directories in three locations:
+### 1. Project Metadata Directory (`.multifactor`)
+Created in your hardware design files directory when you run `mfcli init`:
+```
+your_hardware_files_directory/
+└── .multifactor/
+    ├── config.json         # Project configuration (project name, etc.)
+    └── file_docket.json    # File tracking and processing metadata
+```
+This folder stores project-specific configuration and tracks which files have been processed.
+### 2. User Application Data
+Platform-specific storage for global application data:
+**Windows:**
+```
+C:\Users\<username>\AppData\Local\Multifactor\
+└── chromadb/              # Vector embeddings database
+```
+**macOS:**
+```
+/Users/<username>/Library/Application Support/Multifactor/
+└── chromadb/              # Vector embeddings database
+```
+**Linux:**
+```
+~/.local/share/Multifactor/
+└── chromadb/              # Vector embeddings database
+```
+**Contents:**
+- `chromadb/` - Vector embeddings of processed documents for RAG queries
+### 3. Project Output Directories
+Created in the parent directory of your hardware files directory:
+```
+<parent_directory>/
+├── generated_files/         # BOM CSV files generated from schematics
+├── hw_cheat_sheets/         # JSON cheat sheets (MCU, errata, debug, functional blocks)
+├── data_sheets/             # Downloaded component datasheets (from BOM processing)
+├── agent_instructions/      # (Reserved for future use)
+└── requirements/            # (Reserved for future use)
+```
+**Example:**
+If your hardware files are in `C:\Projects\hardware\board_v1\`, outputs will be created in `C:\Projects\hardware\`:
+- `C:\Projects\hardware\board_v1\.multifactor\` - Project metadata
+- `C:\Projects\hardware\generated_files\bom.csv` - Generated BOM
+- `C:\Projects\hardware\hw_cheat_sheets\schematic_cheat_sheet.json` - Cheat sheets
+- `C:\Projects\hardware\data_sheets\STM32F4_datasheet.pdf` - Downloaded datasheets
+## 💾 Data Storage
+### SQLite Database
+Located at `sessions.db` in the project root (or path specified in `.env`).
+**Stores:**
+- Pipeline run metadata and status
+- File metadata and processing results
+- Parsed component data (BOMs, netlists, datasheets)
+- MCU information and errata
+- ADK session data and conversation history
+### ChromaDB Vector Store
+Located in the user application data directory.
+**Stores:**
+- Vector embeddings of processed documents
+- Enables semantic search and RAG queries
+- Uses OpenAI embeddings (text-embedding-3-small)
+### S3 Storage (Optional)
+If AWS credentials are configured:
+- Uploaded files
+- Generated outputs
+- Long-term document storage
+## 📁 Project Structure
+```
+multifactor-adk-backend/
+├── app/
+│   ├── agents/
+│   │   ├── controller/              # Main controller agent
+│   │   │   ├── agent.py            # Agent definition
+│   │   │   ├── config.yaml         # Agent configuration
+│   │   │   └── tools.py            # Agent tools
+│   │   └── tools/
+│   │       └── general.py          # Shared tools
+│   ├── alembic/                    # Database migrations
+│   ├── cli/
+│   │   └── main.py                 # CLI entry point (mfcli)
+│   ├── client/                     # External service clients
+│   │   ├── chroma_db.py           # ChromaDB vector store
+│   │   ├── gemini.py              # Gemini API client
+│   │   ├── llama_parse.py         # LlamaParse client
+│   │   └── vector_db.py           # Vector DB interface
+│   ├── constants/                  # Enums and constants
+│   ├── crud/                       # Database operations
+│   ├── digikey/                    # DigiKey API integration
+│   ├── models/                     # SQLAlchemy models
+│   ├── pipeline/
+│   │   ├── pipeline.py            # Main pipeline orchestration
+│   │   ├── classifier.py          # File classification
+│   │   ├── extractor.py           # Text extraction
+│   │   ├── sub_classifier.py      # Document sub-typing
+│   │   ├── schema_mapper.py       # Schema mapping
+│   │   ├── parser.py              # Data parsing
+│   │   ├── data_enricher.py       # Data enrichment
+│   │   ├── analysis/
+│   │   │   ├── bom_netlist_mapper.py
+│   │   │   └── generators/        # Output generators
+│   │   │       ├── generator.py   # Main generator
+│   │   │       ├── bom/          # BOM generation
+│   │   │       ├── debug_setup/  # Debug setup cheat sheets
+│   │   │       ├── functional_blocks/  # Functional block diagrams
+│   │   │       ├── mcu/          # MCU documentation
+│   │   │       └── mcu_errata/   # Errata cheat sheets
+│   │   ├── extractors/           # Format-specific extractors
+│   │   └── parsers/
+│   │       └── netlist/          # Netlist parsers (EDIF, KiCad, PADS)
+│   ├── tests/                     # Unit tests
+│   └── utils/                     # Utility functions
+│       ├── config.py
+│       ├── directory_manager.py   # Output directory management
+│       ├── logger.py
+│       └── ...
+├── .env                           # Environment configuration (create this)
+├── .gitignore
+├── alembic.ini                    # Alembic configuration
+├── pyproject.toml                 # Package configuration
+├── requirements.txt               # Python dependencies
+└── README.md                      # This file
+```
+## 🛠️ Development
+### Running the Pipeline
+First, navigate to your hardware files directory and initialize:
+```bash
+cd /path/to/hardware/files
+mfcli init
+mfcli run_pipeline
+```
+### Starting the Web UI
+```bash
+mfcli web --port 9999
+```
+### Development Status
+⚠️ **This project is currently in development.** Features and APIs may change.
+### Database Migrations
+This project uses Alembic for database schema management.
+#### Create a New Migration
+```bash
+alembic revision -m "description of changes"
+```
+#### Apply Migrations
+```bash
+alembic upgrade head
+```
+#### Rollback Migration
+```bash
+alembic downgrade -1
+```
+### Logging
+Logs are configured in `utils/logger.py`. View logs for debugging:
+```python
+from mfcli.utils.logger import get_logger
+logger = get_logger(__name__)
+logger.info("Message")
+logger.error("Error message")
+```
+### Running Tests
+```bash
+pytest app/tests/
+```
+## 🐛 Troubleshooting
+### Common Issues
+#### 1. Installation Issues
+**Error**: `ModuleNotFoundError: No module named 'google'`
+- **Solution**: Reinstall package: `pip install .`
+**Error**: `Command 'mfcli' not found`
+- **Solution**: Ensure virtual environment is activated and package is installed: `pip install .`
+#### 2. API Key Errors
+**Error**: `google.api_core.exceptions.Unauthenticated: 401 API key not valid`
+- **Solution**: Verify `google_api_key` in `.env` file
+- Get key from: https://aistudio.google.com/app/apikey
+**Error**: `OpenAI API error`
+- **Solution**: Check `openai_api_key` in `.env`
+- Ensure API key has billing enabled
+#### 3. ChromaDB Issues
+**Error**: `ChromaDB directory not found`
+- **Solution**: ChromaDB will be created automatically on first run in AppData folder
+**Error**: `Embedding dimension mismatch`
+- **Solution**: Delete ChromaDB directory and restart to rebuild with correct dimensions
+#### 4. Pipeline Processing Failures
+**Error**: `Could not find metadata file. Please initialize this repo with "mfcli init"`
+- **Solution**: You need to run `mfcli init` in your hardware files directory before running `mfcli run_pipeline`
+**Error**: `File not found`
+- **Solution**: Make sure you're running `mfcli run_pipeline` from within your hardware files directory (where you ran `mfcli init`)
+**Error**: `File extension is not supported`
+- **Solution**: Check that your files are in supported formats (PDF, EDIF, CSV, .net, .cir, .asc)
+**Error**: `No components extracted from schematic`
+- **Solution**:
+  - Ensure schematic PDF is clear and readable
+  - Check that component designators are visible
+  - Verify file is an actual schematic (not layout or other document type)
+#### 5. Database Issues
+**Error**: `Database connection failed`
+- **Solution**: Check that SQLite database path in `.env` is valid
+- Run migrations: `alembic upgrade head`
+### Verify Configuration
+Run this Python snippet to verify your configuration:
+```python
+from mfcli.utils.config import get_config
+config = get_config()
+print(f"Google API Key set: {'Yes' if config.google_api_key else 'No'}")
+print(f"OpenAI API Key set: {'Yes' if config.openai_api_key else 'No'}")
+print(f"Database path: {config.sqlite_db_path}")
+```
+### Debug Mode
+For detailed debugging, check the console output when running `mfcli pipeline` or `mfcli web`. The application uses structured logging to help diagnose issues.
+### Getting Help
+- **Issues**: [GitHub Issues](https://github.com/MultifactorAI/multifactor-adk-backend/issues)
+- **Documentation**: Check this README and inline code documentation
+## 📄 License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## 🤝 Contributing
+This project is in active development. Contributions, issues, and feature requests are welcome!
+---
+**Built with [Google Gemini](https://ai.google.dev/) and [Google Agent Development Kit (ADK)](https://github.com/google/adk)**