npm - vision-electronic-indexing-pi - Versions diffs - 0.1.4 → 0.1.5 - Mend

vision-electronic-indexing-pi 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/.pi/extensions/vision-inventory-mcp/README.md +1 -1
package/README.md +2 -2
package/package.json +1 -1
package/scripts/inventory_folder_to_csv.py +19 -14
package/vision_inventory_mcp.py +81 -27

package/.pi/extensions/vision-inventory-mcp/README.md CHANGED Viewed

@@ -81,7 +81,7 @@ This package intentionally does **not** bundle:
 The main output is `inventory.csv`, with columns:
 ```text
-likely_part
+normalized_part
 candidate_parts
 amount
 sighting_count

package/README.md CHANGED Viewed

@@ -135,13 +135,13 @@ verified=false
 ## CSV output columns
-`inventory.csv` is deduplicated by `likely_part`, the main/final part number column. Multiple images, or multiple candidates from one image, can merge into one BOM row when they resolve to the same `likely_part`.
+`inventory.csv` is deduplicated by `normalized_part`, the main/final part number column derived from the vision `likely_part` and datasheet enrichment. Multiple images, or multiple candidates from one image, can merge into one BOM row when they resolve to the same `normalized_part`.
 Columns:
 | Column | Description |
 |---|---|
-| `likely_part` | Main dedupe key/final likely part number, usually from datasheet enrichment. |
+| `normalized_part` | Main dedupe key/final part number, usually from datasheet enrichment and based on the vision `likely_part`. |
 | `candidate_parts` | Candidate part numbers extracted from visual markings. |
 | `amount` | Estimated quantity for the merged BOM row. |
 | `sighting_count` | Number of evidence rows merged into this BOM row. |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "vision-electronic-indexing-pi",
-  "version": "0.1.4",
+  "version": "0.1.5",
   "description": "Pi package for agent-assisted electronics/PCB image inventory with Cloudflare Workers AI vision and datasheet enrichment.",
   "license": "MIT",
   "repository": {

package/scripts/inventory_folder_to_csv.py CHANGED Viewed

@@ -228,15 +228,17 @@ def lookup_enrichment(part: str, cache: Dict[str, Any]) -> Dict[str, Any]:
 def estimate_amount_for_candidate(result: Dict[str, Any], candidate: str, evidence_count: int = 1) -> int:
     """Estimate physical IC quantity for one candidate in one image.
-    Count separate matching IC items. The schema field count_index is treated as
-    an ordinal/index, not a quantity. Fall back to the number of candidate
-    evidence rows when only observations are available.
+    Some vision results use count_index as a grouped visible count, while others
+    use it as an ordinal. Use the maximum of matching item count, evidence count,
+    and any numeric count_index values so grouped detections like count_index=4
+    produce amount=4 without double-counting duplicate observations.
     """
     items = result.get("items", [])
     if not isinstance(items, list):
         return max(1, evidence_count)
     matched = 0
+    count_values: List[int] = []
     for item in items:
         if not isinstance(item, dict):
             continue
@@ -245,10 +247,12 @@ def estimate_amount_for_candidate(result: Dict[str, Any], candidate: str, eviden
         if candidate_from_item(item).upper() != candidate.upper():
             continue
         matched += 1
+        try:
+            count_values.append(max(1, int(item.get("count_index", 1))))
+        except Exception:
+            pass
-    if matched > 0:
-        return matched
-    return max(1, evidence_count)
+    return max([1, evidence_count, matched, *count_values])
 def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -261,7 +265,7 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
             rows.append({
                 "image": image_name,
                 "candidate_part": "",
-                "likely_part": "",
+                "normalized_part": "",
                 "amount": 0,
                 "description": "",
                 "datasheet_url": "",
@@ -288,7 +292,8 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
             enrichment = lookup_enrichment(candidate, cache)
             likely_part = str(enrichment.get("normalized_part") or candidate).strip().upper()
             amount = estimate_amount_for_candidate(result, candidate, evidence_count=len(candidate_evidence))
-            observed_markings = sorted({row["observed_marking"] for row in candidate_evidence})
+            # Keep observed_markings normalized to the main visible part number, not full date/lot/package text.
+            observed_markings = [likely_part]
             observations = "; ".join(
                 f"{row['position_hint']}: {row['observed_marking']} ({row['marking_confidence']})"
                 for row in candidate_evidence
@@ -299,7 +304,7 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
             rows.append({
                 "image": image_name,
                 "candidate_part": candidate,
-                "likely_part": likely_part,
+                "normalized_part": likely_part,
                 "amount": amount,
                 "description": enrichment.get("description", ""),
                 "datasheet_url": enrichment.get("datasheet_url", ""),
@@ -329,7 +334,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
     evidence_fieldnames = [
         "image",
         "candidate_part",
-        "likely_part",
+        "normalized_part",
         "amount",
         "description",
         "datasheet_url",
@@ -346,7 +351,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
     grouped: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
     no_part_rows: List[Dict[str, Any]] = []
     for row in evidence_rows:
-        part = str(row.get("likely_part") or row.get("candidate_part") or "").strip().upper()
+        part = str(row.get("normalized_part") or row.get("candidate_part") or "").strip().upper()
         if not part:
             no_part_rows.append(row)
         else:
@@ -363,7 +368,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
         amount = sum(int(row.get("amount", 0) or 0) for row in rows_for_part)
         bom_rows.append({
-            "likely_part": part,
+            "normalized_part": part,
             "candidate_parts": " | ".join(sorted({str(row["candidate_part"]) for row in rows_for_part if row.get("candidate_part")})),
             "amount": amount,
             "sighting_count": len(rows_for_part),
@@ -381,7 +386,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
     for row in no_part_rows:
         bom_rows.append({
-            "likely_part": "",
+            "normalized_part": "",
             "candidate_parts": "",
             "amount": 0,
             "sighting_count": 1,
@@ -398,7 +403,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
         })
     bom_fieldnames = [
-        "likely_part",
+        "normalized_part",
         "candidate_parts",
         "amount",
         "sighting_count",

package/vision_inventory_mcp.py CHANGED Viewed

@@ -646,8 +646,15 @@ def count_inventory_rows(inventory: Dict[str, Any]) -> int:
     return 0
-def flatten_inventory_for_csv(inventory: Dict[str, Any]) -> List[Dict[str, Any]]:
-    rows: List[Dict[str, Any]] = []
+def flatten_inventory_for_csv(inventory: Dict[str, Any], enrichment_cache: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+    """Flatten raw vision output into BOM-style, likely-part-deduped CSV rows.
+    This is intentionally less complete than scripts/inventory_folder_to_csv.py
+    because the save tool only receives in-memory vision output. If a
+    datasheet_cache.json object is provided, matching enrichment fields are used.
+    """
+    grouped: Dict[str, List[Dict[str, Any]]] = {}
+    cache = enrichment_cache or {}
     if isinstance(inventory.get("items"), list):
         image_results = [inventory]
@@ -660,31 +667,64 @@ def flatten_inventory_for_csv(inventory: Dict[str, Any]) -> List[Dict[str, Any]]
             continue
         image_name = str(result.get("image", "unknown"))
-        warnings = result.get("warnings", [])
-        if isinstance(warnings, list):
-            warnings_text = " | ".join(str(w) for w in warnings)
-        else:
-            warnings_text = str(warnings)
         items = result.get("items", [])
         if not isinstance(items, list):
             continue
+        by_image_part: Dict[Tuple[str, str], Dict[str, Any]] = {}
         for item in items:
             if not isinstance(item, dict):
                 continue
-            rows.append({
+            if str(item.get("item_type", "")).strip().lower() != "ic":
+                continue
+            candidate = str(item.get("likely_part") or item.get("package_marking") or "unknown").strip().upper()
+            if not candidate or candidate.lower() in {"unknown", "unreadable", "unclear", "none", "n/a"}:
+                continue
+            enrichment = cache.get(candidate, {}) if isinstance(cache.get(candidate, {}), dict) else {}
+            normalized = str(enrichment.get("normalized_part") or candidate).strip().upper()
+            key = (image_name, normalized)
+            row = by_image_part.setdefault(key, {
                 "image": image_name,
-                "item_type": item.get("item_type", "unknown"),
-                "count_index": item.get("count_index", ""),
-                "package_marking": item.get("package_marking", "unknown"),
-                "marking_confidence": item.get("marking_confidence", "unreadable"),
-                "likely_part": item.get("likely_part", "unknown"),
-                "description": item.get("description", "unknown"),
-                "position_hint": item.get("position_hint", "unknown"),
-                "needs_review": item.get("needs_review", True),
-                "warnings": warnings_text,
+                "normalized_part": normalized,
+                "candidate_parts": set(),
+                "amount": 0,
+                "vision_confidence": set(),
+                "needs_review": False,
+                "observed_markings": set(),
             })
+            row["candidate_parts"].add(candidate)
+            row["vision_confidence"].add(str(item.get("marking_confidence", "unknown")))
+            row["needs_review"] = bool(row["needs_review"] or item.get("needs_review", True))
+            # Keep the main part number as the observation, not the full package/date/lot marking.
+            row["observed_markings"].add(normalized)
+            try:
+                row["amount"] = max(int(row["amount"]), int(item.get("count_index", 1)))
+            except Exception:
+                row["amount"] = max(int(row["amount"]), 1)
+        for row in by_image_part.values():
+            grouped.setdefault(str(row["normalized_part"]), []).append(row)
+    rows: List[Dict[str, Any]] = []
+    for part, part_rows in sorted(grouped.items()):
+        enrichment = cache.get(part, {}) if isinstance(cache.get(part, {}), dict) else {}
+        rows.append({
+            "normalized_part": part,
+            "candidate_parts": " | ".join(sorted({candidate for row in part_rows for candidate in row["candidate_parts"]})),
+            "amount": sum(int(row.get("amount", 0) or 0) for row in part_rows),
+            "sighting_count": len(part_rows),
+            "description": enrichment.get("description", ""),
+            "datasheet_url": enrichment.get("datasheet_url", ""),
+            "manufacturer": enrichment.get("manufacturer", ""),
+            "verified": bool(enrichment.get("verified", False)),
+            "vision_confidence": "/".join(sorted({value for row in part_rows for value in row["vision_confidence"]})),
+            "needs_review": any(bool(row.get("needs_review", True)) for row in part_rows) or not bool(enrichment.get("verified", False)),
+            "images": " | ".join(sorted({str(row["image"]) for row in part_rows})),
+            "observed_markings": " | ".join(sorted({marking for row in part_rows for marking in row["observed_markings"]})),
+            "raw_json": "",
+            "notes": enrichment.get("notes", "Missing datasheet enrichment"),
+        })
     return rows
@@ -720,18 +760,32 @@ def save_inventory(
             row_count = count_inventory_rows(inventory)
         else:
-            rows = flatten_inventory_for_csv(inventory)
+            cache_path = output.parent / "datasheet_cache.json"
+            enrichment_cache: Dict[str, Any] = {}
+            if cache_path.exists():
+                try:
+                    loaded_cache = json.loads(cache_path.read_text(encoding="utf-8"))
+                    if isinstance(loaded_cache, dict):
+                        enrichment_cache = loaded_cache
+                except Exception:
+                    enrichment_cache = {}
+            rows = flatten_inventory_for_csv(inventory, enrichment_cache)
             fieldnames = [
-                "image",
-                "item_type",
-                "count_index",
-                "package_marking",
-                "marking_confidence",
-                "likely_part",
+                "normalized_part",
+                "candidate_parts",
+                "amount",
+                "sighting_count",
                 "description",
-                "position_hint",
+                "datasheet_url",
+                "manufacturer",
+                "verified",
+                "vision_confidence",
                 "needs_review",
-                "warnings",
+                "images",
+                "observed_markings",
+                "raw_json",
+                "notes",
             ]
             with output.open("w", newline="", encoding="utf-8") as csv_file: