npm - vision-electronic-indexing-pi - Versions diffs - 0.1.4 → 0.1.6 - Mend

vision-electronic-indexing-pi 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/.pi/extensions/vision-inventory-mcp/README.md +1 -1
package/.pi/extensions/vision-inventory-mcp/index.ts +1 -1
package/.pi/skills/vision-inventory-workflow/SKILL.md +3 -0
package/README.md +5 -2
package/package.json +1 -1
package/scripts/inventory_folder_to_csv.py +23 -15
package/vision_inventory_mcp.py +81 -27

package/.pi/extensions/vision-inventory-mcp/README.md CHANGED Viewed

@@ -81,7 +81,7 @@ This package intentionally does **not** bundle:
 The main output is `inventory.csv`, with columns:
 ```text
-likely_part
+normalized_part
 candidate_parts
 amount
 sighting_count

package/.pi/extensions/vision-inventory-mcp/index.ts CHANGED Viewed

@@ -513,7 +513,7 @@ export default function (pi: ExtensionAPI) {
       await runSetup(ctx, false);
       const normalizedArgs = normalizeWorkflowArgs(ctx.cwd, parsed).map((arg) => JSON.stringify(arg)).join(" ");
       const outputDir = normalizeWorkflowArgs(ctx.cwd, parsed)[1];
-      const prompt = `Run the complete Vision Electronic Indexing workflow as an agent.\n\nPackage root containing the bundled Python workflow: ${packageRoot}\nCommand arguments, already resolved relative to the user's cwd: ${normalizedArgs}\nOutput directory: ${outputDir}\n\nImportant external agent dependency: datasheet enrichment requires a web-search/browser Pi tool or skill. This package intentionally does not bundle a web-search dependency. If no search/browser tool is available, stop after generating parts_to_lookup.json and tell the user which dependency is missing.\n\nDo these steps end-to-end:\n1. Run: ${PYTHON_COMMAND} ${join(packageRoot, "scripts", "inventory_folder_to_csv.py")} ${normalizedArgs}\n2. Read ${outputDir}/parts_to_lookup.json.\n3. For every part, web-search for a datasheet. Prefer official manufacturer pages/PDFs.\n4. Write ${outputDir}/datasheet_cache.json using ${outputDir}/datasheet_cache.template.json as the exact shape.\n5. Rerun: ${PYTHON_COMMAND} ${join(packageRoot, "scripts", "inventory_folder_to_csv.py")} ${normalizedArgs} --skip-vision\n6. Read ${outputDir}/inventory.csv and ${outputDir}/inventory_evidence.csv.\n7. Summarize final BOM rows and call out every uncertainty.\n\nRules:\n- Do not invent datasheets, manufacturers, or descriptions.\n- Set verified=false if the part or datasheet match is uncertain.\n- Keep descriptions short, like: \"74ls (4 bit) adder low power schottky ttl 5v DIP\".\n- Preserve raw JSON and evidence files.\n- Do not expose Cloudflare credentials.\n- If a command fails because credentials or Python dependencies are missing, tell the user to run /vision-inventory-setup or /vision-inventory-credentials.`;
+      const prompt = `Run the complete Vision Electronic Indexing workflow as an agent.\n\nPackage root containing the bundled Python workflow: ${packageRoot}\nCommand arguments, already resolved relative to the user's cwd: ${normalizedArgs}\nOutput directory: ${outputDir}\n\nImportant external agent dependency: datasheet enrichment requires a web-search/browser Pi tool or skill. This package intentionally does not bundle a web-search dependency. If no search/browser tool is available, stop after generating parts_to_lookup.json and tell the user which dependency is missing.\n\nDo these steps end-to-end:\n1. Run: ${PYTHON_COMMAND} ${join(packageRoot, "scripts", "inventory_folder_to_csv.py")} ${normalizedArgs}\n2. Read ${outputDir}/parts_to_lookup.json.\n3. For every part, web-search for a datasheet. Prefer official manufacturer pages/PDFs.\n4. Write ${outputDir}/datasheet_cache.json using ${outputDir}/datasheet_cache.template.json as the exact shape.\n5. Rerun: ${PYTHON_COMMAND} ${join(packageRoot, "scripts", "inventory_folder_to_csv.py")} ${normalizedArgs} --skip-vision\n6. Read ${outputDir}/inventory.csv and ${outputDir}/inventory_evidence.csv.\n7. Summarize final BOM rows and call out every uncertainty.\n\nRules:\n- Do not invent datasheets, manufacturers, or descriptions.\n- If an exact candidate part has no official datasheet but search results strongly indicate a likely OCR correction, keep the original candidate as the datasheet_cache key and set normalized_part to the official datasheet part number. Example: key SN74AS283N may normalize to SN74LS283N when official TI results match the family/function/package and the image could plausibly confuse A with 4/LS.\n- Only set verified=true for an OCR correction when official source evidence and visual/package context make the correction highly likely; otherwise set verified=false and explain in notes.\n- Include OCR correction notes such as: \"SN74AS283N appears to be OCR for SN74LS283N; verified against TI datasheet.\"\n- Set verified=false if the part or datasheet match is uncertain.\n- Keep descriptions short, like: \"74ls (4 bit) adder low power schottky ttl 5v DIP\".\n- Preserve raw JSON and evidence files.\n- Do not expose Cloudflare credentials.\n- If a command fails because credentials or Python dependencies are missing, tell the user to run /vision-inventory-setup or /vision-inventory-credentials.`;
       await ctx.sendUserMessage(prompt);
     },

package/.pi/skills/vision-inventory-workflow/SKILL.md CHANGED Viewed

@@ -34,5 +34,8 @@ Options are forwarded to `scripts/inventory_folder_to_csv.py`, for example `--re
 - Rerun the Python workflow with `--skip-vision`.
 - Review `inventory.csv` and `inventory_evidence.csv`.
 - Do not invent datasheets, manufacturers, voltages, package names, or descriptions.
+- If exact candidate search fails but official results strongly indicate a likely OCR correction, keep the original candidate as the `datasheet_cache.json` key and set `normalized_part` to the official datasheet part number.
+- Example: if `SN74AS283N` has no official datasheet but official TI results match `SN74LS283N` and the image could plausibly confuse characters, use key `SN74AS283N`, set `normalized_part` to `SN74LS283N`, and explain the correction in `notes`.
+- Only set `verified=true` for corrections when official source evidence and visual/package context make the correction highly likely; otherwise set `verified=false`.
 - Set `verified=false` if uncertain and explain in `notes`.
 - Preserve raw JSON and evidence files.

package/README.md CHANGED Viewed

@@ -135,13 +135,13 @@ verified=false
 ## CSV output columns
-`inventory.csv` is deduplicated by `likely_part`, the main/final part number column. Multiple images, or multiple candidates from one image, can merge into one BOM row when they resolve to the same `likely_part`.
+`inventory.csv` is deduplicated by `normalized_part`, the main/final part number column derived from the vision `likely_part` and datasheet enrichment. Multiple images, or multiple candidates from one image, can merge into one BOM row when they resolve to the same `normalized_part`.
 Columns:
 | Column | Description |
 |---|---|
-| `likely_part` | Main dedupe key/final likely part number, usually from datasheet enrichment. |
+| `normalized_part` | Main dedupe key/final part number, usually from datasheet enrichment and based on the vision `likely_part`. |
 | `candidate_parts` | Candidate part numbers extracted from visual markings. |
 | `amount` | Estimated quantity for the merged BOM row. |
 | `sighting_count` | Number of evidence rows merged into this BOM row. |
@@ -270,6 +270,9 @@ The agent should:
 - Prefer official manufacturer datasheets or product pages.
 - Keep descriptions short.
+- If exact candidate search fails but official results strongly indicate a likely OCR correction, keep the original candidate as the `datasheet_cache.json` key and set `normalized_part` to the official datasheet part number.
+- Example: if `SN74AS283N` has no official datasheet but official TI results match `SN74LS283N` and the image could plausibly confuse the characters, use key `SN74AS283N`, set `normalized_part` to `SN74LS283N`, and mention the correction in `notes`.
+- Set `verified=true` for OCR corrections only when official source evidence and visual/package context make the correction highly likely.
 - Set `verified=false` if the marking, part number, package, or source is uncertain.
 - Do not invent part numbers, manufacturers, voltages, functions, or datasheet URLs.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "vision-electronic-indexing-pi",
-  "version": "0.1.4",
+  "version": "0.1.6",
   "description": "Pi package for agent-assisted electronics/PCB image inventory with Cloudflare Workers AI vision and datasheet enrichment.",
   "license": "MIT",
   "repository": {

package/scripts/inventory_folder_to_csv.py CHANGED Viewed

@@ -201,6 +201,9 @@ def build_parts_to_lookup(results: List[Dict[str, Any]]) -> Dict[str, Any]:
             "Use web search to find each part datasheet, preferably from the manufacturer.",
             "Fill output/datasheet_cache.json using the template shape shown in datasheet_cache.template.json.",
             "Keep descriptions short, e.g. '74ls (4 bit) adder low power schottky ttl 5v DIP'.",
+            "If exact candidate search fails but official results strongly indicate a likely OCR correction, keep the original candidate as this cache key and set normalized_part to the official datasheet part number.",
+            "Example: if SN74AS283N appears to be an OCR error for official SN74LS283N, use key SN74AS283N with normalized_part SN74LS283N and explain the correction in notes.",
+            "Only mark verified=true for a correction when the official datasheet and visual/package context make the correction highly likely; otherwise set verified=false and explain in notes.",
             "If the visual marking is uncertain, set verified=false and explain in notes."
         ],
         "parts": parts,
@@ -228,15 +231,17 @@ def lookup_enrichment(part: str, cache: Dict[str, Any]) -> Dict[str, Any]:
 def estimate_amount_for_candidate(result: Dict[str, Any], candidate: str, evidence_count: int = 1) -> int:
     """Estimate physical IC quantity for one candidate in one image.
-    Count separate matching IC items. The schema field count_index is treated as
-    an ordinal/index, not a quantity. Fall back to the number of candidate
-    evidence rows when only observations are available.
+    Some vision results use count_index as a grouped visible count, while others
+    use it as an ordinal. Use the maximum of matching item count, evidence count,
+    and any numeric count_index values so grouped detections like count_index=4
+    produce amount=4 without double-counting duplicate observations.
     """
     items = result.get("items", [])
     if not isinstance(items, list):
         return max(1, evidence_count)
     matched = 0
+    count_values: List[int] = []
     for item in items:
         if not isinstance(item, dict):
             continue
@@ -245,10 +250,12 @@ def estimate_amount_for_candidate(result: Dict[str, Any], candidate: str, eviden
         if candidate_from_item(item).upper() != candidate.upper():
             continue
         matched += 1
+        try:
+            count_values.append(max(1, int(item.get("count_index", 1))))
+        except Exception:
+            pass
-    if matched > 0:
-        return matched
-    return max(1, evidence_count)
+    return max([1, evidence_count, matched, *count_values])
 def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -261,7 +268,7 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
             rows.append({
                 "image": image_name,
                 "candidate_part": "",
-                "likely_part": "",
+                "normalized_part": "",
                 "amount": 0,
                 "description": "",
                 "datasheet_url": "",
@@ -288,7 +295,8 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
             enrichment = lookup_enrichment(candidate, cache)
             likely_part = str(enrichment.get("normalized_part") or candidate).strip().upper()
             amount = estimate_amount_for_candidate(result, candidate, evidence_count=len(candidate_evidence))
-            observed_markings = sorted({row["observed_marking"] for row in candidate_evidence})
+            # Keep observed_markings normalized to the main visible part number, not full date/lot/package text.
+            observed_markings = [likely_part]
             observations = "; ".join(
                 f"{row['position_hint']}: {row['observed_marking']} ({row['marking_confidence']})"
                 for row in candidate_evidence
@@ -299,7 +307,7 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
             rows.append({
                 "image": image_name,
                 "candidate_part": candidate,
-                "likely_part": likely_part,
+                "normalized_part": likely_part,
                 "amount": amount,
                 "description": enrichment.get("description", ""),
                 "datasheet_url": enrichment.get("datasheet_url", ""),
@@ -329,7 +337,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
     evidence_fieldnames = [
         "image",
         "candidate_part",
-        "likely_part",
+        "normalized_part",
         "amount",
         "description",
         "datasheet_url",
@@ -346,7 +354,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
     grouped: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
     no_part_rows: List[Dict[str, Any]] = []
     for row in evidence_rows:
-        part = str(row.get("likely_part") or row.get("candidate_part") or "").strip().upper()
+        part = str(row.get("normalized_part") or row.get("candidate_part") or "").strip().upper()
         if not part:
             no_part_rows.append(row)
         else:
@@ -363,8 +371,8 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
         amount = sum(int(row.get("amount", 0) or 0) for row in rows_for_part)
         bom_rows.append({
-            "likely_part": part,
-            "candidate_parts": " | ".join(sorted({str(row["candidate_part"]) for row in rows_for_part if row.get("candidate_part")})),
+            "normalized_part": part,
+            "candidate_parts": ", ".join(sorted({str(row["candidate_part"]) for row in rows_for_part if row.get("candidate_part")})),
             "amount": amount,
             "sighting_count": len(rows_for_part),
             "description": first.get("description", ""),
@@ -381,7 +389,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
     for row in no_part_rows:
         bom_rows.append({
-            "likely_part": "",
+            "normalized_part": "",
             "candidate_parts": "",
             "amount": 0,
             "sighting_count": 1,
@@ -398,7 +406,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
         })
     bom_fieldnames = [
-        "likely_part",
+        "normalized_part",
         "candidate_parts",
         "amount",
         "sighting_count",

package/vision_inventory_mcp.py CHANGED Viewed

@@ -646,8 +646,15 @@ def count_inventory_rows(inventory: Dict[str, Any]) -> int:
     return 0
-def flatten_inventory_for_csv(inventory: Dict[str, Any]) -> List[Dict[str, Any]]:
-    rows: List[Dict[str, Any]] = []
+def flatten_inventory_for_csv(inventory: Dict[str, Any], enrichment_cache: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+    """Flatten raw vision output into BOM-style, likely-part-deduped CSV rows.
+    This is intentionally less complete than scripts/inventory_folder_to_csv.py
+    because the save tool only receives in-memory vision output. If a
+    datasheet_cache.json object is provided, matching enrichment fields are used.
+    """
+    grouped: Dict[str, List[Dict[str, Any]]] = {}
+    cache = enrichment_cache or {}
     if isinstance(inventory.get("items"), list):
         image_results = [inventory]
@@ -660,31 +667,64 @@ def flatten_inventory_for_csv(inventory: Dict[str, Any]) -> List[Dict[str, Any]]
             continue
         image_name = str(result.get("image", "unknown"))
-        warnings = result.get("warnings", [])
-        if isinstance(warnings, list):
-            warnings_text = " | ".join(str(w) for w in warnings)
-        else:
-            warnings_text = str(warnings)
         items = result.get("items", [])
         if not isinstance(items, list):
             continue
+        by_image_part: Dict[Tuple[str, str], Dict[str, Any]] = {}
         for item in items:
             if not isinstance(item, dict):
                 continue
-            rows.append({
+            if str(item.get("item_type", "")).strip().lower() != "ic":
+                continue
+            candidate = str(item.get("likely_part") or item.get("package_marking") or "unknown").strip().upper()
+            if not candidate or candidate.lower() in {"unknown", "unreadable", "unclear", "none", "n/a"}:
+                continue
+            enrichment = cache.get(candidate, {}) if isinstance(cache.get(candidate, {}), dict) else {}
+            normalized = str(enrichment.get("normalized_part") or candidate).strip().upper()
+            key = (image_name, normalized)
+            row = by_image_part.setdefault(key, {
                 "image": image_name,
-                "item_type": item.get("item_type", "unknown"),
-                "count_index": item.get("count_index", ""),
-                "package_marking": item.get("package_marking", "unknown"),
-                "marking_confidence": item.get("marking_confidence", "unreadable"),
-                "likely_part": item.get("likely_part", "unknown"),
-                "description": item.get("description", "unknown"),
-                "position_hint": item.get("position_hint", "unknown"),
-                "needs_review": item.get("needs_review", True),
-                "warnings": warnings_text,
+                "normalized_part": normalized,
+                "candidate_parts": set(),
+                "amount": 0,
+                "vision_confidence": set(),
+                "needs_review": False,
+                "observed_markings": set(),
             })
+            row["candidate_parts"].add(candidate)
+            row["vision_confidence"].add(str(item.get("marking_confidence", "unknown")))
+            row["needs_review"] = bool(row["needs_review"] or item.get("needs_review", True))
+            # Keep the main part number as the observation, not the full package/date/lot marking.
+            row["observed_markings"].add(normalized)
+            try:
+                row["amount"] = max(int(row["amount"]), int(item.get("count_index", 1)))
+            except Exception:
+                row["amount"] = max(int(row["amount"]), 1)
+        for row in by_image_part.values():
+            grouped.setdefault(str(row["normalized_part"]), []).append(row)
+    rows: List[Dict[str, Any]] = []
+    for part, part_rows in sorted(grouped.items()):
+        enrichment = cache.get(part, {}) if isinstance(cache.get(part, {}), dict) else {}
+        rows.append({
+            "normalized_part": part,
+            "candidate_parts": ", ".join(sorted({candidate for row in part_rows for candidate in row["candidate_parts"]})),
+            "amount": sum(int(row.get("amount", 0) or 0) for row in part_rows),
+            "sighting_count": len(part_rows),
+            "description": enrichment.get("description", ""),
+            "datasheet_url": enrichment.get("datasheet_url", ""),
+            "manufacturer": enrichment.get("manufacturer", ""),
+            "verified": bool(enrichment.get("verified", False)),
+            "vision_confidence": "/".join(sorted({value for row in part_rows for value in row["vision_confidence"]})),
+            "needs_review": any(bool(row.get("needs_review", True)) for row in part_rows) or not bool(enrichment.get("verified", False)),
+            "images": " | ".join(sorted({str(row["image"]) for row in part_rows})),
+            "observed_markings": " | ".join(sorted({marking for row in part_rows for marking in row["observed_markings"]})),
+            "raw_json": "",
+            "notes": enrichment.get("notes", "Missing datasheet enrichment"),
+        })
     return rows
@@ -720,18 +760,32 @@ def save_inventory(
             row_count = count_inventory_rows(inventory)
         else:
-            rows = flatten_inventory_for_csv(inventory)
+            cache_path = output.parent / "datasheet_cache.json"
+            enrichment_cache: Dict[str, Any] = {}
+            if cache_path.exists():
+                try:
+                    loaded_cache = json.loads(cache_path.read_text(encoding="utf-8"))
+                    if isinstance(loaded_cache, dict):
+                        enrichment_cache = loaded_cache
+                except Exception:
+                    enrichment_cache = {}
+            rows = flatten_inventory_for_csv(inventory, enrichment_cache)
             fieldnames = [
-                "image",
-                "item_type",
-                "count_index",
-                "package_marking",
-                "marking_confidence",
-                "likely_part",
+                "normalized_part",
+                "candidate_parts",
+                "amount",
+                "sighting_count",
                 "description",
-                "position_hint",
+                "datasheet_url",
+                "manufacturer",
+                "verified",
+                "vision_confidence",
                 "needs_review",
-                "warnings",
+                "images",
+                "observed_markings",
+                "raw_json",
+                "notes",
             ]
             with output.open("w", newline="", encoding="utf-8") as csv_file: