mfcli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. mfcli/.env.example +72 -0
  2. mfcli/__init__.py +0 -0
  3. mfcli/agents/__init__.py +0 -0
  4. mfcli/agents/controller/__init__.py +0 -0
  5. mfcli/agents/controller/agent.py +19 -0
  6. mfcli/agents/controller/config.yaml +27 -0
  7. mfcli/agents/controller/tools.py +42 -0
  8. mfcli/agents/tools/general.py +118 -0
  9. mfcli/alembic/env.py +61 -0
  10. mfcli/alembic/script.py.mako +28 -0
  11. mfcli/alembic/versions/6ccc0c7c397c_added_fields_to_pdf_parts_model.py +39 -0
  12. mfcli/alembic/versions/769019ef4870_added_gemini_file_path_to_pdf_part_model.py +33 -0
  13. mfcli/alembic/versions/7a2e3a779fdc_added_functional_block_and_component_.py +54 -0
  14. mfcli/alembic/versions/7d5adb2a47a7_added_pdf_parts_model.py +41 -0
  15. mfcli/alembic/versions/7fcb7d6a5836_init.py +167 -0
  16. mfcli/alembic/versions/e0f2b5765c72_added_cascade_delete_for_models_that_.py +32 -0
  17. mfcli/alembic.ini +147 -0
  18. mfcli/cli/__init__.py +0 -0
  19. mfcli/cli/dependencies.py +59 -0
  20. mfcli/cli/main.py +192 -0
  21. mfcli/client/__init__.py +0 -0
  22. mfcli/client/chroma_db.py +184 -0
  23. mfcli/client/docling.py +44 -0
  24. mfcli/client/gemini.py +252 -0
  25. mfcli/client/llama_parse.py +38 -0
  26. mfcli/client/vector_db.py +93 -0
  27. mfcli/constants/__init__.py +0 -0
  28. mfcli/constants/base_enum.py +18 -0
  29. mfcli/constants/directory_names.py +1 -0
  30. mfcli/constants/file_types.py +189 -0
  31. mfcli/constants/gemini.py +1 -0
  32. mfcli/constants/openai.py +6 -0
  33. mfcli/constants/pipeline_run_status.py +3 -0
  34. mfcli/crud/__init__.py +0 -0
  35. mfcli/crud/file.py +42 -0
  36. mfcli/crud/functional_blocks.py +26 -0
  37. mfcli/crud/netlist.py +18 -0
  38. mfcli/crud/pipeline_run.py +17 -0
  39. mfcli/crud/project.py +99 -0
  40. mfcli/digikey/__init__.py +0 -0
  41. mfcli/digikey/digikey.py +105 -0
  42. mfcli/main.py +5 -0
  43. mfcli/mcp/__init__.py +0 -0
  44. mfcli/mcp/configs/cline_mcp_settings.json +11 -0
  45. mfcli/mcp/configs/mfcli.mcp.json +7 -0
  46. mfcli/mcp/mcp_instance.py +6 -0
  47. mfcli/mcp/server.py +37 -0
  48. mfcli/mcp/state_manager.py +51 -0
  49. mfcli/mcp/tools/__init__.py +0 -0
  50. mfcli/mcp/tools/query_knowledgebase.py +108 -0
  51. mfcli/models/__init__.py +10 -0
  52. mfcli/models/base.py +10 -0
  53. mfcli/models/bom.py +71 -0
  54. mfcli/models/datasheet.py +10 -0
  55. mfcli/models/debug_setup.py +64 -0
  56. mfcli/models/file.py +43 -0
  57. mfcli/models/file_docket.py +94 -0
  58. mfcli/models/file_metadata.py +19 -0
  59. mfcli/models/functional_blocks.py +94 -0
  60. mfcli/models/llm_response.py +5 -0
  61. mfcli/models/mcu.py +97 -0
  62. mfcli/models/mcu_errata.py +26 -0
  63. mfcli/models/netlist.py +59 -0
  64. mfcli/models/pdf_parts.py +25 -0
  65. mfcli/models/pipeline_run.py +34 -0
  66. mfcli/models/project.py +27 -0
  67. mfcli/models/project_metadata.py +15 -0
  68. mfcli/pipeline/__init__.py +0 -0
  69. mfcli/pipeline/analysis/__init__.py +0 -0
  70. mfcli/pipeline/analysis/bom_netlist_mapper.py +28 -0
  71. mfcli/pipeline/analysis/generators/__init__.py +0 -0
  72. mfcli/pipeline/analysis/generators/bom/__init__.py +0 -0
  73. mfcli/pipeline/analysis/generators/bom/bom.py +74 -0
  74. mfcli/pipeline/analysis/generators/debug_setup/__init__.py +0 -0
  75. mfcli/pipeline/analysis/generators/debug_setup/debug_setup.py +71 -0
  76. mfcli/pipeline/analysis/generators/debug_setup/instructions.py +150 -0
  77. mfcli/pipeline/analysis/generators/functional_blocks/__init__.py +0 -0
  78. mfcli/pipeline/analysis/generators/functional_blocks/functional_blocks.py +93 -0
  79. mfcli/pipeline/analysis/generators/functional_blocks/instructions.py +34 -0
  80. mfcli/pipeline/analysis/generators/functional_blocks/validator.py +94 -0
  81. mfcli/pipeline/analysis/generators/generator.py +258 -0
  82. mfcli/pipeline/analysis/generators/generator_base.py +18 -0
  83. mfcli/pipeline/analysis/generators/mcu/__init__.py +0 -0
  84. mfcli/pipeline/analysis/generators/mcu/instructions.py +156 -0
  85. mfcli/pipeline/analysis/generators/mcu/mcu.py +84 -0
  86. mfcli/pipeline/analysis/generators/mcu_errata/__init__.py +1 -0
  87. mfcli/pipeline/analysis/generators/mcu_errata/instructions.py +77 -0
  88. mfcli/pipeline/analysis/generators/mcu_errata/mcu_errata.py +95 -0
  89. mfcli/pipeline/analysis/generators/summary/__init__.py +0 -0
  90. mfcli/pipeline/analysis/generators/summary/summary.py +47 -0
  91. mfcli/pipeline/classifier.py +93 -0
  92. mfcli/pipeline/data_enricher.py +15 -0
  93. mfcli/pipeline/extractor.py +34 -0
  94. mfcli/pipeline/extractors/__init__.py +0 -0
  95. mfcli/pipeline/extractors/pdf.py +12 -0
  96. mfcli/pipeline/parser.py +120 -0
  97. mfcli/pipeline/parsers/__init__.py +0 -0
  98. mfcli/pipeline/parsers/netlist/__init__.py +0 -0
  99. mfcli/pipeline/parsers/netlist/edif.py +93 -0
  100. mfcli/pipeline/parsers/netlist/kicad_legacy_net.py +326 -0
  101. mfcli/pipeline/parsers/netlist/kicad_spice.py +135 -0
  102. mfcli/pipeline/parsers/netlist/pads.py +185 -0
  103. mfcli/pipeline/parsers/netlist/protel.py +166 -0
  104. mfcli/pipeline/parsers/netlist/protel_detector.py +29 -0
  105. mfcli/pipeline/pipeline.py +419 -0
  106. mfcli/pipeline/preprocessors/__init__.py +0 -0
  107. mfcli/pipeline/preprocessors/user_guide.py +127 -0
  108. mfcli/pipeline/run_context.py +32 -0
  109. mfcli/pipeline/schema_mapper.py +89 -0
  110. mfcli/pipeline/sub_classifier.py +115 -0
  111. mfcli/utils/__init__.py +0 -0
  112. mfcli/utils/config.py +33 -0
  113. mfcli/utils/configurator.py +324 -0
  114. mfcli/utils/data_cleaner.py +82 -0
  115. mfcli/utils/datasheet_vectorizer.py +281 -0
  116. mfcli/utils/directory_manager.py +96 -0
  117. mfcli/utils/file_upload.py +298 -0
  118. mfcli/utils/files.py +16 -0
  119. mfcli/utils/http_requests.py +54 -0
  120. mfcli/utils/kb_lister.py +89 -0
  121. mfcli/utils/kb_remover.py +173 -0
  122. mfcli/utils/logger.py +28 -0
  123. mfcli/utils/mcp_configurator.py +311 -0
  124. mfcli/utils/migrations.py +18 -0
  125. mfcli/utils/orm.py +43 -0
  126. mfcli/utils/pdf_splitter.py +63 -0
  127. mfcli/utils/query_service.py +22 -0
  128. mfcli/utils/system_check.py +306 -0
  129. mfcli/utils/tools.py +31 -0
  130. mfcli/utils/vectorizer.py +28 -0
  131. mfcli-0.2.0.dist-info/METADATA +841 -0
  132. mfcli-0.2.0.dist-info/RECORD +136 -0
  133. mfcli-0.2.0.dist-info/WHEEL +5 -0
  134. mfcli-0.2.0.dist-info/entry_points.txt +3 -0
  135. mfcli-0.2.0.dist-info/licenses/LICENSE +21 -0
  136. mfcli-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,77 @@
1
+ errata_extraction_base_instructions = (
2
+ """
3
+ You are the MCU Errata Cheat Sheet Generator agent. Your role is to analyze MCU errata
4
+ documents (PDF files) and extract ONLY firmware-relevant issues - silicon bugs that can be
5
+ addressed or worked around in firmware code. Exclude hardware-only issues that cannot be
6
+ fixed in software. You will be given an MCU Errata file to analyze.
7
+ """
8
+ )
9
+
10
+ extract_errata_ids_instruction = (
11
+ f"""
12
+ {errata_extraction_base_instructions}
13
+
14
+ Your job is to extract all the Errata IDs. Return a list of official IDs from document (e.g., "I2C_01", "ADV0123")
15
+ """
16
+ )
17
+
18
+ extract_errata_instructions = (
19
+ f"""
20
+ {errata_extraction_base_instructions}
21
+
22
+ You be given the errata ID to extract, and you will extract this info:
23
+
24
+ a. IDENTIFICATION:
25
+ - errata_id: Official ID from document (e.g., "I2C_01", "ADV0123")
26
+ - title: Brief descriptive title
27
+ - affected_modules: List of affected peripherals/modules
28
+ Examples: ["I2C", "SPI", "UART", "ADC", "Timer", "DMA", "RTC"]
29
+ b. SEVERITY CLASSIFICATION:
30
+ - Critical: Can cause data corruption, system hang, or major malfunction
31
+ - Major: Significant functional impact, workaround is complex
32
+ - Minor: Minor inconvenience, easy workaround
33
+ c. DETAILED INFORMATION:
34
+ - description: Clear explanation of the bug
35
+ - conditions: When/how the bug occurs
36
+ * Specific register values
37
+ * Timing conditions
38
+ * Operating modes
39
+ * Environmental conditions (temperature, voltage)
40
+ d. FIRMWARE WORKAROUND:
41
+ - firmware_workaround: Specific code-level workaround
42
+ Examples:
43
+ * "Add 10us delay after setting register X"
44
+ * "Avoid using bits [7:5] in CONFIG register"
45
+ * "Initialize peripheral in specific order: Step 1, Step 2, Step 3"
46
+ * "Use polling instead of interrupts for this peripheral"
47
+ * "Apply calibration value from factory settings"
48
+ - Be SPECIFIC - provide actual steps/code guidance
49
+ e. IMPACT:
50
+ - impact: How this affects firmware operation
51
+ Examples:
52
+ * "May cause I2C communication failures"
53
+ * "Incorrect ADC readings below 10% of range"
54
+ * "System hang if DMA used with this peripheral"
55
+ f. SILICON REVISIONS:
56
+ - affected_revisions: Which chip revisions have this bug
57
+ Examples: ["Rev A", "Rev B"], ["All revisions"], ["Rev 1.0 - 1.2"]
58
+ """
59
+ )
60
+
61
+ errata_document_summary_instructions = (
62
+ f"""
63
+ {errata_extraction_base_instructions}
64
+
65
+ You will extract three items from this document:
66
+
67
+ 1. Errata document name, for example Silicon Errata Rev 1.2 - March 2024.
68
+
69
+ 2. MCU name, for example MSPM0L1306
70
+
71
+ 3. Top-level firmware recommendations.
72
+ Examples:
73
+ * "Always use polling mode for I2C on Rev A silicon"
74
+ * "Add delays in ADC initialization sequence"
75
+ * "Avoid simultaneous use of Timer3 and DMA Channel 2"
76
+ """
77
+ )
@@ -0,0 +1,95 @@
1
+ import asyncio
2
+ from asyncio import Semaphore
3
+ from collections import defaultdict
4
+ from typing import Dict, List
5
+
6
+ from google.genai.types import File as GeminiFile
7
+
8
+ from mfcli.models.file import File
9
+ from mfcli.models.mcu_errata import ErrataIDs, ErrataItem, ErrataTopLevelSummary
10
+ from mfcli.pipeline.analysis.generators.generator_base import GeneratorBase
11
+ from mfcli.pipeline.analysis.generators.mcu_errata.instructions import (
12
+ extract_errata_ids_instruction,
13
+ extract_errata_instructions,
14
+ errata_document_summary_instructions
15
+ )
16
+ from mfcli.pipeline.run_context import PipelineRunContext
17
+ from mfcli.utils.logger import get_logger
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ class ErrataCheatSheetGenerator(GeneratorBase):
23
+ def __init__(self, context: PipelineRunContext, db_file: File, uploads: List[GeminiFile]):
24
+ super().__init__(context, db_file, uploads)
25
+
26
+ async def _extract_errata_ids(self) -> list[str]:
27
+ prompt = "Use the Errata file to extract errata_ids"
28
+ errata_ids: ErrataIDs = await self._context.gemini.generate(
29
+ prompt=prompt,
30
+ instructions=extract_errata_ids_instruction,
31
+ response_model=ErrataIDs,
32
+ files=self._uploads
33
+ )
34
+ return errata_ids.ids
35
+
36
+ async def _extract_errata(self, errata_id: str, sem: Semaphore) -> ErrataItem:
37
+ prompt = f"Extract errata info from ID: {errata_id}"
38
+ async with sem:
39
+ return await self._context.gemini.generate(
40
+ prompt=prompt,
41
+ instructions=extract_errata_instructions,
42
+ response_model=ErrataItem,
43
+ files=self._uploads
44
+ )
45
+
46
+ async def _generate_top_level_summary(self) -> ErrataTopLevelSummary:
47
+ prompt = "Generate top-level document summary"
48
+ return await self._context.gemini.generate(
49
+ prompt=prompt,
50
+ instructions=errata_document_summary_instructions,
51
+ response_model=ErrataTopLevelSummary,
52
+ files=self._uploads
53
+ )
54
+
55
+ async def _create_summary(self, errata: list[ErrataItem]) -> Dict:
56
+ critical = []
57
+ major = []
58
+ minor = []
59
+ model_issues_counts = defaultdict(int)
60
+ for erratum in errata:
61
+ if erratum.severity == 'Critical':
62
+ critical.append(erratum.model_dump())
63
+ elif erratum.severity == 'Major':
64
+ major.append(erratum.model_dump())
65
+ else:
66
+ minor.append(erratum.model_dump())
67
+ for module in erratum.affected_modules:
68
+ model_issues_counts[module] += 1
69
+ summary = await self._generate_top_level_summary()
70
+ return {
71
+ "errata_cheat_sheet": {
72
+ "mcu_name": summary.mcu_name,
73
+ "errata_document": summary.errata_document,
74
+ "total_issues": len(errata),
75
+ "critical_issues": critical,
76
+ "major_issues": major,
77
+ "minor_issues": minor,
78
+ "summary_by_module": model_issues_counts,
79
+ "key_recommendations": summary.recommendations
80
+ }
81
+ }
82
+
83
+ async def generate(self) -> Dict:
84
+ errata_ids = await self._extract_errata_ids()
85
+ errata: list[ErrataItem] = []
86
+ sem = asyncio.Semaphore(5)
87
+ tasks = [self._extract_errata(errata_id, sem) for errata_id in errata_ids]
88
+ results: list[ErrataItem | Exception] = await asyncio.gather(*tasks, return_exceptions=True)
89
+ for result in results:
90
+ if isinstance(result, Exception):
91
+ logger.exception(result)
92
+ logger.error(f"Error extracting errata")
93
+ continue
94
+ errata.append(result)
95
+ return await self._create_summary(errata)
File without changes
@@ -0,0 +1,47 @@
1
+ from typing import List, Dict
2
+
3
+ from google.genai.types import File as GeminiFile
4
+
5
+ from mfcli.models.file import File
6
+ from mfcli.models.llm_response import LLMResponse
7
+ from mfcli.models.pdf_parts import PDFPart
8
+ from mfcli.pipeline.analysis.generators.generator_base import GeneratorBase
9
+ from mfcli.pipeline.run_context import PipelineRunContext
10
+
11
+ user_guide_summary_instructions = (
12
+ """
13
+ You will receive sections of a hardware engineering user guide PDF.
14
+ Your job is to summarize those sections.
15
+ You will also receive the sections that have been summarized so far.
16
+ Use the sections that have been summarized as context to generate new summaries.
17
+ ONLY output the summary text, no other information.
18
+ The summary text will be read directly by users.
19
+ """
20
+ )
21
+
22
+
23
+ class SummaryCheatSheetGenerator(GeneratorBase):
24
+ def __init__(self, context: PipelineRunContext, db_file: File, uploads: List[GeminiFile]):
25
+ super().__init__(context, db_file, uploads)
26
+
27
+ async def generate(self) -> Dict:
28
+ pdf_parts: List[PDFPart] = self._file.pdf_parts
29
+ pdf_parts.sort(key=lambda part: part.section_no)
30
+ summaries = []
31
+ for pdf_part in pdf_parts:
32
+ prompt = f"Summarize the {pdf_part.title} section\n\nCurrent summaries:\n\n{summaries}"
33
+ upload = self._context.gemini_file_cache[pdf_part.gemini_file_id]
34
+ response: LLMResponse = await self._context.gemini.generate(
35
+ prompt=prompt,
36
+ instructions=user_guide_summary_instructions,
37
+ response_model=LLMResponse,
38
+ files=[upload]
39
+ )
40
+ summaries.append({
41
+ "no": pdf_part.section_no,
42
+ "title": pdf_part.title,
43
+ "summary": response.text
44
+ })
45
+ return {
46
+ "summaries": summaries
47
+ }
@@ -0,0 +1,93 @@
1
+ import hashlib
2
+ import mimetypes
3
+ import os
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+
8
+ from werkzeug.utils import secure_filename
9
+
10
+ from mfcli.constants.file_types import SupportedFileTypes, FileTypes
11
+ from mfcli.models.file_metadata import FileMetadata
12
+ from mfcli.utils.logger import get_logger
13
+
14
+ MAX_FILE_GB = 1
15
+ MAX_FILE_SIZE = 1024 * 1024 * 1024 * MAX_FILE_GB
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ def is_csv(file_path):
21
+ pd.read_csv(file_path, nrows=5, encoding_errors='ignore')
22
+
23
+
24
+ def validate_file(metadata: FileMetadata):
25
+ logger.debug(f"Validating file: {metadata.name}")
26
+ try:
27
+ if metadata.type_id == FileTypes.CSV:
28
+ is_csv(metadata.path)
29
+ else:
30
+ logger.debug(f"File type has no validator: {metadata.type_name}")
31
+ except Exception as e:
32
+ raise ValueError(f"The file is not a valid {metadata.type_name} file: {e}")
33
+
34
+
35
+ def get_file_metadata(file_path: str, is_datasheet: bool) -> tuple[FileMetadata, bytes]:
36
+ logger.debug(f"Starting categorize_and_validate_file tool: {file_path}")
37
+ file_name = os.path.basename(file_path)
38
+ file_name = secure_filename(file_name).lower().strip()
39
+
40
+ file_ext = os.path.splitext(file_name)[1]
41
+ file_type_name = file_ext.replace('.', '').upper()
42
+ if not file_type_name:
43
+ raise ValueError("File requires an extension")
44
+ if file_type_name not in SupportedFileTypes:
45
+ raise ValueError(f"File extension is not supported: {file_type_name}")
46
+ file_type_id = FileTypes[file_type_name].value
47
+ logger.debug(f"File type id: {file_type_id}")
48
+
49
+ path = Path(file_path)
50
+ if not path.exists():
51
+ raise ValueError("File does not exist")
52
+ if not os.access(path, os.R_OK):
53
+ raise ValueError("File is not readable")
54
+
55
+ file_bytes = os.stat(file_path).st_size
56
+ if file_bytes == 0:
57
+ raise ValueError(f"File is empty: {file_name}")
58
+ if file_bytes > MAX_FILE_SIZE:
59
+ raise ValueError(f"File size exceeds limit: {file_bytes}")
60
+
61
+ logger.debug(f"File validated: {file_path}")
62
+ with open(file_path, "rb") as fp:
63
+ content = fp.read()
64
+ md5_sum = hashlib.md5(content).hexdigest()
65
+
66
+ # Use mimetypes module to guess MIME type from file extension
67
+ mime_type, _ = mimetypes.guess_type(file_path)
68
+
69
+ # If mimetypes can't detect it, use default based on file type
70
+ if mime_type is None:
71
+ # Use the first supported MIME type as default
72
+ mime_types = SupportedFileTypes[file_type_name]["mime_types"]
73
+ mime_type = list(mime_types)[0] if mime_types else 'application/octet-stream'
74
+
75
+ # Validate MIME type matches expected types for this file extension
76
+ if mime_type not in SupportedFileTypes[file_type_name]["mime_types"]:
77
+ logger.warning(f"Extension {file_type_name} has unexpected MIME type: {mime_type}. Expected one of: {SupportedFileTypes[file_type_name]['mime_types']}")
78
+ # Use the first supported MIME type as default instead of failing
79
+ mime_type = list(SupportedFileTypes[file_type_name]["mime_types"])[0]
80
+
81
+ file_metadata = FileMetadata(
82
+ name=file_name,
83
+ size=file_bytes,
84
+ md5=md5_sum,
85
+ path=file_path,
86
+ mime=mime_type,
87
+ ext=file_ext,
88
+ type_id=file_type_id,
89
+ type_name=file_type_name,
90
+ is_datasheet=is_datasheet
91
+ )
92
+ logger.debug(file_metadata)
93
+ return file_metadata, content
@@ -0,0 +1,15 @@
1
+ from typing import TypeVar
2
+
3
+ from sqlmodel import SQLModel
4
+
5
+ from mfcli.client.chroma_db import ChromaClient
6
+ from mfcli.constants.file_types import FileSubtypes
7
+ from mfcli.utils.datasheet_vectorizer import get_datasheets_for_bom_entries
8
+ from mfcli.utils.orm import Session
9
+
10
+ T = TypeVar('T', bound=SQLModel)
11
+
12
+
13
+ async def enrich_data_for_model(db: Session, chroma_db: ChromaClient, subtype: int, instances: list[T]):
14
+ if subtype == FileSubtypes.BOM:
15
+ await get_datasheets_for_bom_entries(db, chroma_db, instances)
@@ -0,0 +1,34 @@
1
+ from mfcli.constants.file_types import FileTypes
2
+ from mfcli.models.file import File
3
+ from mfcli.pipeline.extractors.pdf import extract_text_from_pdf
4
+ from mfcli.utils.files import is_text_mime_type
5
+
6
+ import os.path
7
+
8
+ from mfcli.client.llama_parse import LlamaParseClient
9
+
10
+
11
+ class TextExtractor(LlamaParseClient):
12
+ def __init__(self):
13
+ super().__init__()
14
+
15
+ @staticmethod
16
+ def extract_pdf_bytes(pdf_bytes: bytes):
17
+ return extract_text_from_pdf(pdf_bytes)
18
+
19
+ def extract_text_from_file_bytes(self, file_name: str, file_bytes: bytes) -> str:
20
+ return self.parse(file_name, file_bytes)
21
+
22
+ def extract_text(self, file_path: str):
23
+ with open(file_path, "rb") as f:
24
+ file_name = os.path.basename(file_path)
25
+ file_bytes = f.read()
26
+ return self.parse(file_name, file_bytes)
27
+
28
+
29
+ def extract_document_text(file: File, file_bytes: bytes) -> str:
30
+ if is_text_mime_type(file.mime_type):
31
+ return file_bytes.decode(errors='ignore')
32
+ elif file.type == FileTypes.PDF:
33
+ return TextExtractor().extract_pdf_bytes(file_bytes)
34
+ raise ValueError(f"Unsupported MIME type: {file.mime_type}")
File without changes
@@ -0,0 +1,12 @@
1
+ import fitz
2
+ from io import BytesIO
3
+
4
+
5
+ def extract_text_from_pdf(pdf_bytes: bytes) -> str:
6
+ """Extract all text from a PDF given as bytes (no temp file needed)."""
7
+ text = []
8
+ # Open from a memory stream instead of file path
9
+ with fitz.open(stream=BytesIO(pdf_bytes), filetype="pdf") as doc:
10
+ for page in doc:
11
+ text.append(page.get_text("text"))
12
+ return "\n".join(text)
@@ -0,0 +1,120 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Type
4
+
5
+ import pandas as pd
6
+ from sqlmodel import SQLModel
7
+
8
+ from mfcli.constants.file_types import FileTypes, FileSubtypes
9
+ from mfcli.crud.netlist import create_netlist
10
+ from mfcli.models.file import File
11
+ from mfcli.models.netlist import Netlist
12
+ from mfcli.pipeline.parsers.netlist.kicad_legacy_net import parse_kicad_legacy_net_file
13
+ from mfcli.pipeline.parsers.netlist.kicad_spice import parse_kicad_spice_file
14
+ from mfcli.pipeline.parsers.netlist.pads import parse_pads_file
15
+ from mfcli.pipeline.parsers.netlist.protel import parse_protel_file
16
+ from mfcli.pipeline.schema_mapper import SchemaMappings, SubtypeModels
17
+ from mfcli.utils.logger import get_logger
18
+ from mfcli.utils.orm import Session
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ def parse_csv(file_path: str) -> dict:
24
+ logger.debug(f"Parsing CSV: {file_path}")
25
+ df = pd.read_csv(file_path, header='infer', encoding_errors='ignore')
26
+ json_str = df.to_json(orient="records")
27
+ logger.debug(f"CSV parsed: {file_path}")
28
+ return json.loads(json_str)
29
+
30
+
31
+ def _extract_schema_from_csv(
32
+ file: File,
33
+ input_column_field_map: dict[str, str],
34
+ model: Type[SQLModel]
35
+ ) -> list[SQLModel]:
36
+ rows = parse_csv(file.path)
37
+ model_instances: list[SQLModel] = []
38
+ for row in rows:
39
+ try:
40
+ mapped_data = {}
41
+ for input_col, model_field in input_column_field_map.items():
42
+ if input_col in row:
43
+ mapped_data[model_field] = row[input_col]
44
+ else:
45
+ mapped_data[model_field] = None
46
+ instance = model(**mapped_data)
47
+ instance.file_id = file.id
48
+ model_instances.append(instance)
49
+ logger.debug(f"Model parsed from CSV: {instance}")
50
+ except Exception as e:
51
+ logger.warn(e)
52
+ if not model_instances:
53
+ raise ValueError(f"No data could be parsed from this CSV: {file.path}")
54
+ return model_instances
55
+
56
+
57
+ class SchemaParser:
58
+ def __init__(self, db: Session, file: File):
59
+ self._db = db
60
+ self.file = file
61
+
62
+ def _parse_with_schema_mappings(
63
+ self,
64
+ model: Type[SQLModel],
65
+ mappings: SchemaMappings
66
+ ) -> list[SQLModel]:
67
+ input_column_field_map = {mapping.input_field: mapping.mapped_field for mapping in mappings.fields}
68
+ logger.debug(f"Model: {model}")
69
+ if self.file.type == FileTypes.CSV:
70
+ return _extract_schema_from_csv(self.file, input_column_field_map, model)
71
+ raise ValueError(f"Unsupported extraction file type: {self.file.type}")
72
+
73
+ @staticmethod
74
+ def _is_netlist_file(subtype: FileSubtypes):
75
+ if subtype in [
76
+ FileSubtypes.KICAD_LEGACY_NET,
77
+ FileSubtypes.KICAD_SPICE,
78
+ FileSubtypes.PADS_PCB_ASCII,
79
+ FileSubtypes.PROTEL_ALTIUM
80
+ ]:
81
+ return True
82
+ return False
83
+
84
+ def _parse_netlist_file(self, subtype: FileSubtypes, file_path: Path) -> Netlist:
85
+ if subtype == FileSubtypes.KICAD_LEGACY_NET:
86
+ netlist_schema = parse_kicad_legacy_net_file(file_path)
87
+ elif subtype == FileSubtypes.PADS_PCB_ASCII:
88
+ netlist_schema = parse_pads_file(file_path)
89
+ elif subtype == FileSubtypes.KICAD_SPICE:
90
+ netlist_schema = parse_kicad_spice_file(file_path)
91
+ elif subtype == FileSubtypes.PROTEL_ALTIUM:
92
+ netlist_schema = parse_protel_file(file_path)
93
+ else:
94
+ raise ValueError(f"Netlist file has no parser: {self.file.name}")
95
+ netlist = create_netlist(self.file.pipeline_run_id, netlist_schema)
96
+ return netlist
97
+
98
+ def _parse_without_schema_mappings(self) -> list[SQLModel]:
99
+ subtype = self.file.sub_type
100
+ file_path = Path(self.file.path)
101
+ if self._is_netlist_file(subtype):
102
+ return [self._parse_netlist_file(subtype, file_path)]
103
+ raise ValueError(f"No parser for file subtype: {self.file.sub_type}")
104
+
105
+ def parse(self, mappings: SchemaMappings | None) -> list[SQLModel]:
106
+ logger.debug(f"Extracting schema from file: {self.file.name}")
107
+ if not SubtypeModels.get(self.file.sub_type):
108
+ raise ValueError(f"Cannot find subtype model for subtype: {self.file.sub_type}")
109
+ model: Type[SQLModel] = SubtypeModels.get(self.file.sub_type)
110
+ if mappings:
111
+ instances = self._parse_with_schema_mappings(model, mappings)
112
+ else:
113
+ instances = self._parse_without_schema_mappings()
114
+ self._db.add_all(instances)
115
+ logger.debug(f"File has been successfully parsed")
116
+ return instances
117
+
118
+
119
+ def parse_schema(db: Session, file: File, mappings: SchemaMappings | None) -> list[SQLModel]:
120
+ return SchemaParser(db, file).parse(mappings)
File without changes
File without changes
@@ -0,0 +1,93 @@
1
+ import re
2
+ from collections import defaultdict
3
+ from typing import Dict, Any
4
+
5
+
6
+ def parse(file_path: str) -> Dict[str, Any]:
7
+ """
8
+ Parse an EDIF file and extract a relational JSON representation:
9
+ component-pin-net mapping, deduplicated net names, annotated with voltage domains and pull-up/pull-down info.
10
+ """
11
+ with open(file_path, "r") as f:
12
+ edif_text = f.read()
13
+
14
+ # --- Step 1: Extract NETLIST_TEXT ---
15
+ match = re.search(r'\(property NETLIST_TEXT \(string "(.*?)"\)\)', edif_text, re.DOTALL)
16
+ if not match:
17
+ raise ValueError("No NETLIST_TEXT property found in EDIF")
18
+
19
+ netlist_text = match.group(1)
20
+ netlist_text = netlist_text.replace('\\n', '\n').strip()
21
+
22
+ # --- Step 2: Parse SPICE-style lines ---
23
+ component_pattern = re.compile(
24
+ r'^(?P<name>\w+)\s+(?P<n1>\S+)\s+(?P<n2>\S+)\s+(?P<rest>.*)$', re.MULTILINE
25
+ )
26
+
27
+ components = []
28
+ nets = defaultdict(lambda: {"name": None, "connected_pins": []})
29
+
30
+ for match in component_pattern.finditer(netlist_text):
31
+ name = match.group("name")
32
+ n1, n2 = match.group("n1"), match.group("n2")
33
+ rest = match.group("rest")
34
+
35
+ # Deduce type from prefix (SPICE convention)
36
+ type_prefix = name[0].lower()
37
+ type_map = {
38
+ "r": "resistor",
39
+ "c": "capacitor",
40
+ "l": "inductor",
41
+ "v": "voltage_source",
42
+ "i": "current_source",
43
+ "e": "op_amp",
44
+ "d": "diode",
45
+ "j": "jumper",
46
+ "led": "led",
47
+ "s": "switch",
48
+ "y": "crystal",
49
+ "sh": "shunt",
50
+ "rt": "thermistor",
51
+ "tp": "transistor",
52
+ }
53
+ comp_type = type_map.get(type_prefix, "unknown")
54
+
55
+ pins = [n1, n2]
56
+ components.append({
57
+ "name": name,
58
+ "type": comp_type,
59
+ "pins": pins,
60
+ "params": rest,
61
+ })
62
+
63
+ # Connect pins to nets
64
+ for pin in pins:
65
+ nets[pin]["name"] = pin
66
+ nets[pin]["connected_pins"].append({"component": name, "pin": pin})
67
+
68
+ # --- Step 3: Deduplicate net names ---
69
+ unique_nets = list(nets.values())
70
+
71
+ # --- Step 4: Annotate nets ---
72
+ for net in unique_nets:
73
+ name = net["name"].lower()
74
+ if name in ("vcc", "vdd", "vin"):
75
+ net["voltage_domain"] = "high"
76
+ elif name in ("gnd", "vss", "0"):
77
+ net["voltage_domain"] = "ground"
78
+ else:
79
+ net["voltage_domain"] = "signal"
80
+
81
+ # Simple pull heuristic
82
+ if "pullup" in name:
83
+ net["pull"] = "pull-up"
84
+ elif "pulldown" in name:
85
+ net["pull"] = "pull-down"
86
+ else:
87
+ net["pull"] = None
88
+
89
+ # --- Step 5: Return structured output ---
90
+ return {
91
+ "components": components,
92
+ "nets": unique_nets
93
+ }