vision-electronic-indexing-pi 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,7 +81,7 @@ This package intentionally does **not** bundle:
81
81
  The main output is `inventory.csv`, with columns:
82
82
 
83
83
  ```text
84
- likely_part
84
+ normalized_part
85
85
  candidate_parts
86
86
  amount
87
87
  sighting_count
package/README.md CHANGED
@@ -135,13 +135,13 @@ verified=false
135
135
 
136
136
  ## CSV output columns
137
137
 
138
- `inventory.csv` is deduplicated by `likely_part`, the main/final part number column. Multiple images, or multiple candidates from one image, can merge into one BOM row when they resolve to the same `likely_part`.
138
+ `inventory.csv` is deduplicated by `normalized_part`, the main/final part number column derived from the vision `likely_part` and datasheet enrichment. Multiple images, or multiple candidates from one image, can merge into one BOM row when they resolve to the same `normalized_part`.
139
139
 
140
140
  Columns:
141
141
 
142
142
  | Column | Description |
143
143
  |---|---|
144
- | `likely_part` | Main dedupe key/final likely part number, usually from datasheet enrichment. |
144
+ | `normalized_part` | Main dedupe key/final part number, usually from datasheet enrichment and based on the vision `likely_part`. |
145
145
  | `candidate_parts` | Candidate part numbers extracted from visual markings. |
146
146
  | `amount` | Estimated quantity for the merged BOM row. |
147
147
  | `sighting_count` | Number of evidence rows merged into this BOM row. |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vision-electronic-indexing-pi",
3
- "version": "0.1.4",
3
+ "version": "0.1.5",
4
4
  "description": "Pi package for agent-assisted electronics/PCB image inventory with Cloudflare Workers AI vision and datasheet enrichment.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -228,15 +228,17 @@ def lookup_enrichment(part: str, cache: Dict[str, Any]) -> Dict[str, Any]:
228
228
  def estimate_amount_for_candidate(result: Dict[str, Any], candidate: str, evidence_count: int = 1) -> int:
229
229
  """Estimate physical IC quantity for one candidate in one image.
230
230
 
231
- Count separate matching IC items. The schema field count_index is treated as
232
- an ordinal/index, not a quantity. Fall back to the number of candidate
233
- evidence rows when only observations are available.
231
+ Some vision results use count_index as a grouped visible count, while others
232
+ use it as an ordinal. Use the maximum of matching item count, evidence count,
233
+ and any numeric count_index values so grouped detections like count_index=4
234
+ produce amount=4 without double-counting duplicate observations.
234
235
  """
235
236
  items = result.get("items", [])
236
237
  if not isinstance(items, list):
237
238
  return max(1, evidence_count)
238
239
 
239
240
  matched = 0
241
+ count_values: List[int] = []
240
242
  for item in items:
241
243
  if not isinstance(item, dict):
242
244
  continue
@@ -245,10 +247,12 @@ def estimate_amount_for_candidate(result: Dict[str, Any], candidate: str, eviden
245
247
  if candidate_from_item(item).upper() != candidate.upper():
246
248
  continue
247
249
  matched += 1
250
+ try:
251
+ count_values.append(max(1, int(item.get("count_index", 1))))
252
+ except Exception:
253
+ pass
248
254
 
249
- if matched > 0:
250
- return matched
251
- return max(1, evidence_count)
255
+ return max([1, evidence_count, matched, *count_values])
252
256
 
253
257
 
254
258
  def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -261,7 +265,7 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
261
265
  rows.append({
262
266
  "image": image_name,
263
267
  "candidate_part": "",
264
- "likely_part": "",
268
+ "normalized_part": "",
265
269
  "amount": 0,
266
270
  "description": "",
267
271
  "datasheet_url": "",
@@ -288,7 +292,8 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
288
292
  enrichment = lookup_enrichment(candidate, cache)
289
293
  likely_part = str(enrichment.get("normalized_part") or candidate).strip().upper()
290
294
  amount = estimate_amount_for_candidate(result, candidate, evidence_count=len(candidate_evidence))
291
- observed_markings = sorted({row["observed_marking"] for row in candidate_evidence})
295
+ # Keep observed_markings normalized to the main visible part number, not full date/lot/package text.
296
+ observed_markings = [likely_part]
292
297
  observations = "; ".join(
293
298
  f"{row['position_hint']}: {row['observed_marking']} ({row['marking_confidence']})"
294
299
  for row in candidate_evidence
@@ -299,7 +304,7 @@ def image_part_rows(results: List[Dict[str, Any]], cache: Dict[str, Any]) -> Lis
299
304
  rows.append({
300
305
  "image": image_name,
301
306
  "candidate_part": candidate,
302
- "likely_part": likely_part,
307
+ "normalized_part": likely_part,
303
308
  "amount": amount,
304
309
  "description": enrichment.get("description", ""),
305
310
  "datasheet_url": enrichment.get("datasheet_url", ""),
@@ -329,7 +334,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
329
334
  evidence_fieldnames = [
330
335
  "image",
331
336
  "candidate_part",
332
- "likely_part",
337
+ "normalized_part",
333
338
  "amount",
334
339
  "description",
335
340
  "datasheet_url",
@@ -346,7 +351,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
346
351
  grouped: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
347
352
  no_part_rows: List[Dict[str, Any]] = []
348
353
  for row in evidence_rows:
349
- part = str(row.get("likely_part") or row.get("candidate_part") or "").strip().upper()
354
+ part = str(row.get("normalized_part") or row.get("candidate_part") or "").strip().upper()
350
355
  if not part:
351
356
  no_part_rows.append(row)
352
357
  else:
@@ -363,7 +368,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
363
368
  amount = sum(int(row.get("amount", 0) or 0) for row in rows_for_part)
364
369
 
365
370
  bom_rows.append({
366
- "likely_part": part,
371
+ "normalized_part": part,
367
372
  "candidate_parts": " | ".join(sorted({str(row["candidate_part"]) for row in rows_for_part if row.get("candidate_part")})),
368
373
  "amount": amount,
369
374
  "sighting_count": len(rows_for_part),
@@ -381,7 +386,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
381
386
 
382
387
  for row in no_part_rows:
383
388
  bom_rows.append({
384
- "likely_part": "",
389
+ "normalized_part": "",
385
390
  "candidate_parts": "",
386
391
  "amount": 0,
387
392
  "sighting_count": 1,
@@ -398,7 +403,7 @@ def write_final_csv(results: List[Dict[str, Any]], cache: Dict[str, Any], output
398
403
  })
399
404
 
400
405
  bom_fieldnames = [
401
- "likely_part",
406
+ "normalized_part",
402
407
  "candidate_parts",
403
408
  "amount",
404
409
  "sighting_count",
@@ -646,8 +646,15 @@ def count_inventory_rows(inventory: Dict[str, Any]) -> int:
646
646
  return 0
647
647
 
648
648
 
649
- def flatten_inventory_for_csv(inventory: Dict[str, Any]) -> List[Dict[str, Any]]:
650
- rows: List[Dict[str, Any]] = []
649
+ def flatten_inventory_for_csv(inventory: Dict[str, Any], enrichment_cache: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
650
+ """Flatten raw vision output into BOM-style, likely-part-deduped CSV rows.
651
+
652
+ This is intentionally less complete than scripts/inventory_folder_to_csv.py
653
+ because the save tool only receives in-memory vision output. If a
654
+ datasheet_cache.json object is provided, matching enrichment fields are used.
655
+ """
656
+ grouped: Dict[str, List[Dict[str, Any]]] = {}
657
+ cache = enrichment_cache or {}
651
658
 
652
659
  if isinstance(inventory.get("items"), list):
653
660
  image_results = [inventory]
@@ -660,31 +667,64 @@ def flatten_inventory_for_csv(inventory: Dict[str, Any]) -> List[Dict[str, Any]]
660
667
  continue
661
668
 
662
669
  image_name = str(result.get("image", "unknown"))
663
- warnings = result.get("warnings", [])
664
- if isinstance(warnings, list):
665
- warnings_text = " | ".join(str(w) for w in warnings)
666
- else:
667
- warnings_text = str(warnings)
668
-
669
670
  items = result.get("items", [])
670
671
  if not isinstance(items, list):
671
672
  continue
672
673
 
674
+ by_image_part: Dict[Tuple[str, str], Dict[str, Any]] = {}
673
675
  for item in items:
674
676
  if not isinstance(item, dict):
675
677
  continue
676
- rows.append({
678
+ if str(item.get("item_type", "")).strip().lower() != "ic":
679
+ continue
680
+
681
+ candidate = str(item.get("likely_part") or item.get("package_marking") or "unknown").strip().upper()
682
+ if not candidate or candidate.lower() in {"unknown", "unreadable", "unclear", "none", "n/a"}:
683
+ continue
684
+ enrichment = cache.get(candidate, {}) if isinstance(cache.get(candidate, {}), dict) else {}
685
+ normalized = str(enrichment.get("normalized_part") or candidate).strip().upper()
686
+ key = (image_name, normalized)
687
+ row = by_image_part.setdefault(key, {
677
688
  "image": image_name,
678
- "item_type": item.get("item_type", "unknown"),
679
- "count_index": item.get("count_index", ""),
680
- "package_marking": item.get("package_marking", "unknown"),
681
- "marking_confidence": item.get("marking_confidence", "unreadable"),
682
- "likely_part": item.get("likely_part", "unknown"),
683
- "description": item.get("description", "unknown"),
684
- "position_hint": item.get("position_hint", "unknown"),
685
- "needs_review": item.get("needs_review", True),
686
- "warnings": warnings_text,
689
+ "normalized_part": normalized,
690
+ "candidate_parts": set(),
691
+ "amount": 0,
692
+ "vision_confidence": set(),
693
+ "needs_review": False,
694
+ "observed_markings": set(),
687
695
  })
696
+ row["candidate_parts"].add(candidate)
697
+ row["vision_confidence"].add(str(item.get("marking_confidence", "unknown")))
698
+ row["needs_review"] = bool(row["needs_review"] or item.get("needs_review", True))
699
+ # Keep the main part number as the observation, not the full package/date/lot marking.
700
+ row["observed_markings"].add(normalized)
701
+ try:
702
+ row["amount"] = max(int(row["amount"]), int(item.get("count_index", 1)))
703
+ except Exception:
704
+ row["amount"] = max(int(row["amount"]), 1)
705
+
706
+ for row in by_image_part.values():
707
+ grouped.setdefault(str(row["normalized_part"]), []).append(row)
708
+
709
+ rows: List[Dict[str, Any]] = []
710
+ for part, part_rows in sorted(grouped.items()):
711
+ enrichment = cache.get(part, {}) if isinstance(cache.get(part, {}), dict) else {}
712
+ rows.append({
713
+ "normalized_part": part,
714
+ "candidate_parts": " | ".join(sorted({candidate for row in part_rows for candidate in row["candidate_parts"]})),
715
+ "amount": sum(int(row.get("amount", 0) or 0) for row in part_rows),
716
+ "sighting_count": len(part_rows),
717
+ "description": enrichment.get("description", ""),
718
+ "datasheet_url": enrichment.get("datasheet_url", ""),
719
+ "manufacturer": enrichment.get("manufacturer", ""),
720
+ "verified": bool(enrichment.get("verified", False)),
721
+ "vision_confidence": "/".join(sorted({value for row in part_rows for value in row["vision_confidence"]})),
722
+ "needs_review": any(bool(row.get("needs_review", True)) for row in part_rows) or not bool(enrichment.get("verified", False)),
723
+ "images": " | ".join(sorted({str(row["image"]) for row in part_rows})),
724
+ "observed_markings": " | ".join(sorted({marking for row in part_rows for marking in row["observed_markings"]})),
725
+ "raw_json": "",
726
+ "notes": enrichment.get("notes", "Missing datasheet enrichment"),
727
+ })
688
728
 
689
729
  return rows
690
730
 
@@ -720,18 +760,32 @@ def save_inventory(
720
760
  row_count = count_inventory_rows(inventory)
721
761
 
722
762
  else:
723
- rows = flatten_inventory_for_csv(inventory)
763
+ cache_path = output.parent / "datasheet_cache.json"
764
+ enrichment_cache: Dict[str, Any] = {}
765
+ if cache_path.exists():
766
+ try:
767
+ loaded_cache = json.loads(cache_path.read_text(encoding="utf-8"))
768
+ if isinstance(loaded_cache, dict):
769
+ enrichment_cache = loaded_cache
770
+ except Exception:
771
+ enrichment_cache = {}
772
+
773
+ rows = flatten_inventory_for_csv(inventory, enrichment_cache)
724
774
  fieldnames = [
725
- "image",
726
- "item_type",
727
- "count_index",
728
- "package_marking",
729
- "marking_confidence",
730
- "likely_part",
775
+ "normalized_part",
776
+ "candidate_parts",
777
+ "amount",
778
+ "sighting_count",
731
779
  "description",
732
- "position_hint",
780
+ "datasheet_url",
781
+ "manufacturer",
782
+ "verified",
783
+ "vision_confidence",
733
784
  "needs_review",
734
- "warnings",
785
+ "images",
786
+ "observed_markings",
787
+ "raw_json",
788
+ "notes",
735
789
  ]
736
790
 
737
791
  with output.open("w", newline="", encoding="utf-8") as csv_file: