vision-electronic-indexing-pi 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,7 +81,7 @@ This package intentionally does **not** bundle:
81
81
  The main output is `inventory.csv`, with columns:
82
82
 
83
83
  ```text
84
- likely_part
84
+ normalized_part
85
85
  candidate_parts
86
86
  amount
87
87
  sighting_count
@@ -513,7 +513,7 @@ export default function (pi: ExtensionAPI) {
513
513
  await runSetup(ctx, false);
514
514
  const normalizedArgs = normalizeWorkflowArgs(ctx.cwd, parsed).map((arg) => JSON.stringify(arg)).join(" ");
515
515
  const outputDir = normalizeWorkflowArgs(ctx.cwd, parsed)[1];
516
- const prompt = `Run the complete Vision Electronic Indexing workflow as an agent.\n\nPackage root containing the bundled Python workflow: ${packageRoot}\nCommand arguments, already resolved relative to the user's cwd: ${normalizedArgs}\nOutput directory: ${outputDir}\n\nImportant external agent dependency: datasheet enrichment requires a web-search/browser Pi tool or skill. This package intentionally does not bundle a web-search dependency. If no search/browser tool is available, stop after generating parts_to_lookup.json and tell the user which dependency is missing.\n\nDo these steps end-to-end:\n1. Run: ${PYTHON_COMMAND} ${join(packageRoot, "scripts", "inventory_folder_to_csv.py")} ${normalizedArgs}\n2. Read ${outputDir}/parts_to_lookup.json.\n3. For every part, web-search for a datasheet. Prefer official manufacturer pages/PDFs.\n4. Write ${outputDir}/datasheet_cache.json using ${outputDir}/datasheet_cache.template.json as the exact shape.\n5. Rerun: ${PYTHON_COMMAND} ${join(packageRoot, "scripts", "inventory_folder_to_csv.py")} ${normalizedArgs} --skip-vision\n6. Read ${outputDir}/inventory.csv and ${outputDir}/inventory_evidence.csv.\n7. Summarize final BOM rows and call out every uncertainty.\n\nRules:\n- Do not invent datasheets, manufacturers, or descriptions.\n- Set verified=false if the part or datasheet match is uncertain.\n- Keep descriptions short, like: \"74ls (4 bit) adder low power schottky ttl 5v DIP\".\n- Preserve raw JSON and evidence files.\n- Do not expose Cloudflare credentials.\n- If a command fails because credentials or Python dependencies are missing, tell the user to run /vision-inventory-setup or /vision-inventory-credentials.`;
516
+ const prompt = `Run the complete Vision Electronic Indexing workflow as an agent.\n\nPackage root containing the bundled Python workflow: ${packageRoot}\nCommand arguments, already resolved relative to the user's cwd: ${normalizedArgs}\nOutput directory: ${outputDir}\n\nImportant external agent dependency: datasheet enrichment requires a web-search/browser Pi tool or skill. This package intentionally does not bundle a web-search dependency. If no search/browser tool is available, stop after generating parts_to_lookup.json and tell the user which dependency is missing.\n\nDo these steps end-to-end:\n1. Run: ${PYTHON_COMMAND} ${join(packageRoot, "scripts", "inventory_folder_to_csv.py")} ${normalizedArgs}\n2. Read ${outputDir}/parts_to_lookup.json.\n3. For every part, web-search for a datasheet. Prefer official manufacturer pages/PDFs.\n4. Write ${outputDir}/datasheet_cache.json using ${outputDir}/datasheet_cache.template.json as the exact shape.\n5. Rerun: ${PYTHON_COMMAND} ${join(packageRoot, "scripts", "inventory_folder_to_csv.py")} ${normalizedArgs} --skip-vision\n6. Read ${outputDir}/inventory.csv and ${outputDir}/inventory_evidence.csv.\n7. Summarize final BOM rows and call out every uncertainty.\n\nRules:\n- Do not invent datasheets, manufacturers, or descriptions.\n- If an exact candidate part has no official datasheet but search results strongly indicate a likely OCR correction, keep the original candidate as the datasheet_cache key and set normalized_part to the official datasheet part number. Example: key SN74AS283N may normalize to SN74LS283N when official TI results match the family/function/package and the image could plausibly confuse A with 4/LS.\n- Only set verified=true for an OCR correction when official source evidence and visual/package context make the correction highly likely; otherwise set verified=false and explain in notes.\n- Include OCR correction notes such as: \"SN74AS283N appears to be OCR for SN74LS283N; verified against TI datasheet.\"\n- Set verified=false if the part or datasheet match is uncertain.\n- Keep descriptions short, like: \"74ls (4 bit) adder low power schottky ttl 5v DIP\".\n- Preserve raw JSON and evidence files.\n- Do not expose Cloudflare credentials.\n- If a command fails because credentials or Python dependencies are missing, tell the user to run /vision-inventory-setup or /vision-inventory-credentials.`;
517
517
 
518
518
  await ctx.sendUserMessage(prompt);
519
519
  },
@@ -34,5 +34,8 @@ Options are forwarded to `scripts/inventory_folder_to_csv.py`, for example `--re
34
34
  - Rerun the Python workflow with `--skip-vision`.
35
35
  - Review `inventory.csv` and `inventory_evidence.csv`.
36
36
  - Do not invent datasheets, manufacturers, voltages, package names, or descriptions.
37
+ - If exact candidate search fails but official results strongly indicate a likely OCR correction, keep the original candidate as the `datasheet_cache.json` key and set `normalized_part` to the official datasheet part number.
38
+ - Example: if `SN74AS283N` has no official datasheet but official TI results match `SN74LS283N` and the image could plausibly confuse characters, use key `SN74AS283N`, set `normalized_part` to `SN74LS283N`, and explain the correction in `notes`.
39
+ - Only set `verified=true` for corrections when official source evidence and visual/package context make the correction highly likely; otherwise set `verified=false`.
37
40
  - Set `verified=false` if uncertain and explain in `notes`.
38
41
  - Preserve raw JSON and evidence files.
package/README.md CHANGED
@@ -135,13 +135,13 @@ verified=false
135
135
 
136
136
  ## CSV output columns
137
137
 
138
- `inventory.csv` is deduplicated by `likely_part`, the main/final part number column. Multiple images, or multiple candidates from one image, can merge into one BOM row when they resolve to the same `likely_part`.
138
+ `inventory.csv` is deduplicated by `normalized_part`, the main/final part number column derived from the vision `likely_part` and datasheet enrichment. Multiple images, or multiple candidates from one image, can merge into one BOM row when they resolve to the same `normalized_part`.
139
139
 
140
140
  Columns:
141
141
 
142
142
  | Column | Description |
143
143
  |---|---|
144
- | `likely_part` | Main dedupe key/final likely part number, usually from datasheet enrichment. |
144
+ | `normalized_part` | Main dedupe key/final part number, usually from datasheet enrichment and based on the vision `likely_part`. |
145
145
  | `candidate_parts` | Candidate part numbers extracted from visual markings. |
146
146
  | `amount` | Estimated quantity for the merged BOM row. |
147
147
  | `sighting_count` | Number of evidence rows merged into this BOM row. |
@@ -270,6 +270,9 @@ The agent should:
270
270
 
271
271
  - Prefer official manufacturer datasheets or product pages.
272
272
  - Keep descriptions short.
273
+ - If exact candidate search fails but official results strongly indicate a likely OCR correction, keep the original candidate as the `datasheet_cache.json` key and set `normalized_part` to the official datasheet part number.
274
+ - Example: if `SN74AS283N` has no official datasheet but official TI results match `SN74LS283N` and the image could plausibly confuse the characters, use key `SN74AS283N`, set `normalized_part` to `SN74LS283N`, and mention the correction in `notes`.
275
+ - Set `verified=true` for OCR corrections only when official source evidence and visual/package context make the correction highly likely.
273
276
  - Set `verified=false` if the marking, part number, package, or source is uncertain.
274
277
  - Do not invent part numbers, manufacturers, voltages, functions, or datasheet URLs.
275
278
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vision-electronic-indexing-pi",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "description": "Pi package for agent-assisted electronics/PCB image inventory with Cloudflare Workers AI vision and datasheet enrichment.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -201,6 +201,9 @@ def build_parts_to_lookup(results: List[Dict[str, Any]]) -> Dict[str, Any]:
201
201
  "Use web search to find each part datasheet, preferably from the manufacturer.",
202
202
  "Fill output/datasheet_cache.json using the template shape shown in datasheet_cache.template.json.",
203
203
  "Keep descriptions short, e.g. '74ls (4 bit) adder low power schottky ttl 5v DIP'.",
204
+ "If exact candidate search fails but official results strongly indicate a likely OCR correction, keep the original candidate as this cache key and set normalized_part to the official datasheet part number.",
205
+ "Example: if SN74AS283N appears to be an OCR error for official SN74LS283N, use key SN74AS283N with normalized_part SN74LS283N and explain the correction in notes.",
206
+ "Only mark verified=true for a correction when the official datasheet and visual/package context make the correction highly likely; otherwise set verified=false and explain in notes.",
204
207
  "If the visual marking is uncertain, set verified=false and explain in notes."
205
208
  ],
206
209
  "parts": parts,
@@ -228,15 +231,17 @@ def lookup_enrichment(part: str, cache: Dict[str, Any]) -> Dict[str, Any]:
228
231
  def estimate_amount_for_candidate(result: Dict[str, Any], candidate: str, evidence_count: int = 1) -> int:
229
232
  """Estimate physical IC quantity for one candidate in one image.
230
233
 
231
- Count separate matching IC items. The schema field count_index is treated as
232
- an ordinal/index, not a quantity. Fall back to the number of candidate
233
- evidence rows when only observations are available.
234
+ Some vision results use count_index as a grouped visible count, while others
235
+ use it as an ordinal. Use the maximum of matching item count, evidence count,
236
+ and any numeric count_index values so grouped detections like count_index=4
237
+ produce amount=4 without double-counting duplicate observations.
234
238
  """
235
239
  items = result.get("items", [])
236
240
  if not isinstance(items, list):
237
241
  return max(1, evidence_count)
238
242
 
239
243
  matched = 0
244
+ count_values: List[int] = []
240
245
  for item in items:
241
246
  if not isinstance(item, dict):
242
247
  continue
@@ -245,10 +250,12 @@ def estimate_amount_for_candidate(result: Dict[str, Any], candidate: str, eviden
245
250
  if candidate_from_item(item).upper() != candidate.upper():
246
251
  continue
247
252
  matched += 1
253
+ try:
254
+ count_values.append(max(1, int(item.get("count_index", 1))))
255
+ except Exception:
256
+ pass
248
257
 
249
- if matched > 0:
250
- return matched
251
- return max(1, evidence_count)
258
+ return max([1, evidence_count, matched, *count_values])
252
259
 
253
260
 
254
261
  def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -261,7 +268,7 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
261
268
  rows.append({
262
269
  "image": image_name,
263
270
  "candidate_part": "",
264
- "likely_part": "",
271
+ "normalized_part": "",
265
272
  "amount": 0,
266
273
  "description": "",
267
274
  "datasheet_url": "",
@@ -288,7 +295,8 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
288
295
  enrichment = lookup_enrichment(candidate, cache)
289
296
  likely_part = str(enrichment.get("normalized_part") or candidate).strip().upper()
290
297
  amount = estimate_amount_for_candidate(result, candidate, evidence_count=len(candidate_evidence))
291
- observed_markings = sorted({row["observed_marking"] for row in candidate_evidence})
298
+ # Keep observed_markings normalized to the main visible part number, not full date/lot/package text.
299
+ observed_markings = [likely_part]
292
300
  observations = "; ".join(
293
301
  f"{row['position_hint']}: {row['observed_marking']} ({row['marking_confidence']})"
294
302
  for row in candidate_evidence
@@ -299,7 +307,7 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
299
307
  rows.append({
300
308
  "image": image_name,
301
309
  "candidate_part": candidate,
302
- "likely_part": likely_part,
310
+ "normalized_part": likely_part,
303
311
  "amount": amount,
304
312
  "description": enrichment.get("description", ""),
305
313
  "datasheet_url": enrichment.get("datasheet_url", ""),
@@ -329,7 +337,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
329
337
  evidence_fieldnames = [
330
338
  "image",
331
339
  "candidate_part",
332
- "likely_part",
340
+ "normalized_part",
333
341
  "amount",
334
342
  "description",
335
343
  "datasheet_url",
@@ -346,7 +354,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
346
354
  grouped: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
347
355
  no_part_rows: List[Dict[str, Any]] = []
348
356
  for row in evidence_rows:
349
- part = str(row.get("likely_part") or row.get("candidate_part") or "").strip().upper()
357
+ part = str(row.get("normalized_part") or row.get("candidate_part") or "").strip().upper()
350
358
  if not part:
351
359
  no_part_rows.append(row)
352
360
  else:
@@ -363,8 +371,8 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
363
371
  amount = sum(int(row.get("amount", 0) or 0) for row in rows_for_part)
364
372
 
365
373
  bom_rows.append({
366
- "likely_part": part,
367
- "candidate_parts": " | ".join(sorted({str(row["candidate_part"]) for row in rows_for_part if row.get("candidate_part")})),
374
+ "normalized_part": part,
375
+ "candidate_parts": ", ".join(sorted({str(row["candidate_part"]) for row in rows_for_part if row.get("candidate_part")})),
368
376
  "amount": amount,
369
377
  "sighting_count": len(rows_for_part),
370
378
  "description": first.get("description", ""),
@@ -381,7 +389,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
381
389
 
382
390
  for row in no_part_rows:
383
391
  bom_rows.append({
384
- "likely_part": "",
392
+ "normalized_part": "",
385
393
  "candidate_parts": "",
386
394
  "amount": 0,
387
395
  "sighting_count": 1,
@@ -398,7 +406,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
398
406
  })
399
407
 
400
408
  bom_fieldnames = [
401
- "likely_part",
409
+ "normalized_part",
402
410
  "candidate_parts",
403
411
  "amount",
404
412
  "sighting_count",
@@ -646,8 +646,15 @@ def count_inventory_rows(inventory: Dict[str, Any]) -> int:
646
646
  return 0
647
647
 
648
648
 
649
- def flatten_inventory_for_csv(inventory: Dict[str, Any]) -> List[Dict[str, Any]]:
650
- rows: List[Dict[str, Any]] = []
649
+ def flatten_inventory_for_csv(inventory: Dict[str, Any], enrichment_cache: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
650
+ """Flatten raw vision output into BOM-style, likely-part-deduped CSV rows.
651
+
652
+ This is intentionally less complete than scripts/inventory_folder_to_csv.py
653
+ because the save tool only receives in-memory vision output. If a
654
+ datasheet_cache.json object is provided, matching enrichment fields are used.
655
+ """
656
+ grouped: Dict[str, List[Dict[str, Any]]] = {}
657
+ cache = enrichment_cache or {}
651
658
 
652
659
  if isinstance(inventory.get("items"), list):
653
660
  image_results = [inventory]
@@ -660,31 +667,64 @@ def flatten_inventory_for_csv(inventory: Dict[str, Any]) -> List[Dict[str, Any]]
660
667
  continue
661
668
 
662
669
  image_name = str(result.get("image", "unknown"))
663
- warnings = result.get("warnings", [])
664
- if isinstance(warnings, list):
665
- warnings_text = " | ".join(str(w) for w in warnings)
666
- else:
667
- warnings_text = str(warnings)
668
-
669
670
  items = result.get("items", [])
670
671
  if not isinstance(items, list):
671
672
  continue
672
673
 
674
+ by_image_part: Dict[Tuple[str, str], Dict[str, Any]] = {}
673
675
  for item in items:
674
676
  if not isinstance(item, dict):
675
677
  continue
676
- rows.append({
678
+ if str(item.get("item_type", "")).strip().lower() != "ic":
679
+ continue
680
+
681
+ candidate = str(item.get("likely_part") or item.get("package_marking") or "unknown").strip().upper()
682
+ if not candidate or candidate.lower() in {"unknown", "unreadable", "unclear", "none", "n/a"}:
683
+ continue
684
+ enrichment = cache.get(candidate, {}) if isinstance(cache.get(candidate, {}), dict) else {}
685
+ normalized = str(enrichment.get("normalized_part") or candidate).strip().upper()
686
+ key = (image_name, normalized)
687
+ row = by_image_part.setdefault(key, {
677
688
  "image": image_name,
678
- "item_type": item.get("item_type", "unknown"),
679
- "count_index": item.get("count_index", ""),
680
- "package_marking": item.get("package_marking", "unknown"),
681
- "marking_confidence": item.get("marking_confidence", "unreadable"),
682
- "likely_part": item.get("likely_part", "unknown"),
683
- "description": item.get("description", "unknown"),
684
- "position_hint": item.get("position_hint", "unknown"),
685
- "needs_review": item.get("needs_review", True),
686
- "warnings": warnings_text,
689
+ "normalized_part": normalized,
690
+ "candidate_parts": set(),
691
+ "amount": 0,
692
+ "vision_confidence": set(),
693
+ "needs_review": False,
694
+ "observed_markings": set(),
687
695
  })
696
+ row["candidate_parts"].add(candidate)
697
+ row["vision_confidence"].add(str(item.get("marking_confidence", "unknown")))
698
+ row["needs_review"] = bool(row["needs_review"] or item.get("needs_review", True))
699
+ # Keep the main part number as the observation, not the full package/date/lot marking.
700
+ row["observed_markings"].add(normalized)
701
+ try:
702
+ row["amount"] = max(int(row["amount"]), int(item.get("count_index", 1)))
703
+ except Exception:
704
+ row["amount"] = max(int(row["amount"]), 1)
705
+
706
+ for row in by_image_part.values():
707
+ grouped.setdefault(str(row["normalized_part"]), []).append(row)
708
+
709
+ rows: List[Dict[str, Any]] = []
710
+ for part, part_rows in sorted(grouped.items()):
711
+ enrichment = cache.get(part, {}) if isinstance(cache.get(part, {}), dict) else {}
712
+ rows.append({
713
+ "normalized_part": part,
714
+ "candidate_parts": ", ".join(sorted({candidate for row in part_rows for candidate in row["candidate_parts"]})),
715
+ "amount": sum(int(row.get("amount", 0) or 0) for row in part_rows),
716
+ "sighting_count": len(part_rows),
717
+ "description": enrichment.get("description", ""),
718
+ "datasheet_url": enrichment.get("datasheet_url", ""),
719
+ "manufacturer": enrichment.get("manufacturer", ""),
720
+ "verified": bool(enrichment.get("verified", False)),
721
+ "vision_confidence": "/".join(sorted({value for row in part_rows for value in row["vision_confidence"]})),
722
+ "needs_review": any(bool(row.get("needs_review", True)) for row in part_rows) or not bool(enrichment.get("verified", False)),
723
+ "images": " | ".join(sorted({str(row["image"]) for row in part_rows})),
724
+ "observed_markings": " | ".join(sorted({marking for row in part_rows for marking in row["observed_markings"]})),
725
+ "raw_json": "",
726
+ "notes": enrichment.get("notes", "Missing datasheet enrichment"),
727
+ })
688
728
 
689
729
  return rows
690
730
 
@@ -720,18 +760,32 @@ def save_inventory(
720
760
  row_count = count_inventory_rows(inventory)
721
761
 
722
762
  else:
723
- rows = flatten_inventory_for_csv(inventory)
763
+ cache_path = output.parent / "datasheet_cache.json"
764
+ enrichment_cache: Dict[str, Any] = {}
765
+ if cache_path.exists():
766
+ try:
767
+ loaded_cache = json.loads(cache_path.read_text(encoding="utf-8"))
768
+ if isinstance(loaded_cache, dict):
769
+ enrichment_cache = loaded_cache
770
+ except Exception:
771
+ enrichment_cache = {}
772
+
773
+ rows = flatten_inventory_for_csv(inventory, enrichment_cache)
724
774
  fieldnames = [
725
- "image",
726
- "item_type",
727
- "count_index",
728
- "package_marking",
729
- "marking_confidence",
730
- "likely_part",
775
+ "normalized_part",
776
+ "candidate_parts",
777
+ "amount",
778
+ "sighting_count",
731
779
  "description",
732
- "position_hint",
780
+ "datasheet_url",
781
+ "manufacturer",
782
+ "verified",
783
+ "vision_confidence",
733
784
  "needs_review",
734
- "warnings",
785
+ "images",
786
+ "observed_markings",
787
+ "raw_json",
788
+ "notes",
735
789
  ]
736
790
 
737
791
  with output.open("w", newline="", encoding="utf-8") as csv_file: