Semapp 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,461 @@
1
+ """
2
+ Module for reading and parsing KLARF files (.001) to extract defect positions.
3
+ Supports KRONOS, COMPLUS4T, and normal modes.
4
+ """
5
+
6
+ import os
7
+ import re
8
+ import pandas as pd
9
+
10
+
11
+ def extract_positions(filepath, wafer_id=None):
12
+ """
13
+ Extract defect positions from KLARF file.
14
+ Main function that detects the mode and calls the appropriate parser.
15
+
16
+ Args:
17
+ filepath: Path to the KLARF (.001) file
18
+ wafer_id: Specific wafer ID to extract (for COMPLUS4T files with multiple wafers)
19
+ If None, extracts all defects (normal mode)
20
+
21
+ Returns:
22
+ pd.DataFrame: DataFrame with columns ["defect_id", "X", "Y", "defect_size"]
23
+ """
24
+ # Detect mode by reading the file
25
+ mode = _detect_mode(filepath)
26
+
27
+ if mode == "KRONOS":
28
+ return _extract_positions_kronos(filepath)
29
+ elif mode == "COMPLUS4T":
30
+ return _extract_positions_complus4t(filepath, wafer_id)
31
+ else:
32
+ return _extract_positions_normal(filepath)
33
+
34
+
35
+ def _detect_mode(filepath):
36
+ """
37
+ Detect the mode of the KLARF file (KRONOS, COMPLUS4T, or normal).
38
+
39
+ Args:
40
+ filepath: Path to the KLARF (.001) file
41
+
42
+ Returns:
43
+ str: "KRONOS", "COMPLUS4T", or "NORMAL"
44
+ """
45
+ try:
46
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
47
+ for i, line in enumerate(f):
48
+ if i >= 20: # Only check first 20 lines
49
+ break
50
+ line_stripped = line.strip()
51
+
52
+ # Check for KRONOS format
53
+ if line_stripped.startswith("WaferID"):
54
+ if re.search(r'WaferID\s+"Read Failed\.(\d+)"', line_stripped):
55
+ return "KRONOS"
56
+ if re.search(r'WaferID\s+"@(\d+)"', line_stripped):
57
+ return "COMPLUS4T"
58
+
59
+ # Check for COMPLUS4T keyword
60
+ if 'COMPLUS4T' in line_stripped:
61
+ return "COMPLUS4T"
62
+
63
+ # Check for KRONOS keyword
64
+ if 'KRONOS' in line_stripped:
65
+ return "KRONOS"
66
+ except Exception:
67
+ pass
68
+
69
+ return "NORMAL"
70
+
71
+
72
+ def _extract_positions_kronos(filepath):
73
+ """
74
+ Extract defect positions from KRONOS format KLARF file.
75
+
76
+ Args:
77
+ filepath: Path to the KLARF (.001) file
78
+
79
+ Returns:
80
+ pd.DataFrame: DataFrame with columns ["defect_id", "X", "Y", "defect_size"]
81
+ """
82
+ data = {
83
+ "SampleSize": None,
84
+ "DiePitch": {"X": None, "Y": None},
85
+ "DieOrigin": {"X": None, "Y": None},
86
+ "SampleCenterLocation": {"X": None, "Y": None},
87
+ "Defects": []
88
+ }
89
+
90
+ dans_defect_list = False
91
+ detected_numbers = set()
92
+ kronos_wafer_id = None
93
+
94
+ with open(filepath, "r", encoding="utf-8") as f:
95
+ lines = f.readlines()
96
+
97
+ # First pass: detect KRONOS and extract wafer ID, load detection CSV
98
+ for line in lines:
99
+ line_stripped = line.strip()
100
+ if line_stripped.startswith("WaferID"):
101
+ kronos_match = re.search(r'WaferID\s+"Read Failed\.(\d+)"', line_stripped)
102
+ if kronos_match:
103
+ kronos_wafer_id = int(kronos_match.group(1))
104
+ print(f"[KRONOS] Detected wafer ID: {kronos_wafer_id}")
105
+
106
+ # Load detection_results.csv from the wafer subdirectory
107
+ file_dir = os.path.dirname(filepath)
108
+ file_dir_basename = os.path.basename(file_dir)
109
+ if file_dir_basename == str(kronos_wafer_id):
110
+ detection_csv_path = os.path.join(file_dir, "detection_results.csv")
111
+ else:
112
+ detection_csv_path = os.path.join(file_dir, str(kronos_wafer_id), "detection_results.csv")
113
+
114
+ if os.path.exists(detection_csv_path):
115
+ print(f"[KRONOS] Loading detection results from: {detection_csv_path}")
116
+ try:
117
+ detection_df = pd.read_csv(detection_csv_path)
118
+ detected_numbers = set()
119
+ for num_str in detection_df['Detected_Number']:
120
+ if num_str and str(num_str) != 'None' and str(num_str) != 'nan':
121
+ try:
122
+ num = int(float(str(num_str).strip()))
123
+ detected_numbers.add(num)
124
+ except (ValueError, TypeError):
125
+ pass
126
+ print(f"[KRONOS] Found {len(detected_numbers)} unique detected numbers: {sorted(detected_numbers)}")
127
+ except Exception as e:
128
+ print(f"[KRONOS] Error loading detection CSV: {e}")
129
+ else:
130
+ print(f"[KRONOS] Warning: detection_results.csv not found at {detection_csv_path}")
131
+ break
132
+
133
+ # Second pass: parse the file
134
+ for i, line in enumerate(lines):
135
+ line = line.strip()
136
+
137
+ if line.startswith("WaferID"):
138
+ continue
139
+
140
+ if line.startswith("SampleSize"):
141
+ match = re.search(r"SampleSize\s+1\s+(\d+)", line)
142
+ if match:
143
+ data["SampleSize"] = int(match.group(1))
144
+
145
+ elif line.startswith("DiePitch"):
146
+ match = re.search(r"DiePitch\s+([0-9.e+-]+)\s+([0-9.e+-]+);", line)
147
+ if match:
148
+ data["DiePitch"]["X"] = float(match.group(1))
149
+ data["DiePitch"]["Y"] = float(match.group(2))
150
+
151
+ elif line.startswith("DieOrigin"):
152
+ match = re.search(r"DieOrigin\s+([0-9.e+-]+)\s+([0-9.e+-]+);", line)
153
+ if match:
154
+ data["DieOrigin"]["X"] = float(match.group(1))
155
+ data["DieOrigin"]["Y"] = float(match.group(2))
156
+
157
+ elif line.startswith("SampleCenterLocation"):
158
+ match = re.search(r"SampleCenterLocation\s+([0-9.e+-]+)\s+([0-9.e+-]+);", line)
159
+ if match:
160
+ data["SampleCenterLocation"]["X"] = float(match.group(1))
161
+ data["SampleCenterLocation"]["Y"] = float(match.group(2))
162
+
163
+ elif line.startswith("DefectList"):
164
+ dans_defect_list = True
165
+ continue
166
+
167
+ elif dans_defect_list:
168
+ if re.match(r"^\d+\s", line):
169
+ value = line.split()
170
+ if len(value) >= 12:
171
+ real_defect_id = int(value[0])
172
+
173
+ # Only add defect if its ID is in detected_numbers
174
+ if real_defect_id not in detected_numbers:
175
+ continue
176
+
177
+ defect = {f"val{i+1}": float(val) for i, val in enumerate(value[:11])}
178
+ defect["defect_id"] = real_defect_id
179
+ data["Defects"].append(defect)
180
+ elif line.startswith("EndOfFile") or line.startswith("}"):
181
+ dans_defect_list = False
182
+
183
+ return _calculate_coordinates(data, is_kronos=True, filepath=filepath, kronos_wafer_id=kronos_wafer_id)
184
+
185
+
186
+ def _extract_positions_complus4t(filepath, wafer_id=None):
187
+ """
188
+ Extract defect positions from COMPLUS4T format KLARF file.
189
+
190
+ Args:
191
+ filepath: Path to the KLARF (.001) file
192
+ wafer_id: Specific wafer ID to extract (required for COMPLUS4T)
193
+
194
+ Returns:
195
+ pd.DataFrame: DataFrame with columns ["defect_id", "X", "Y", "defect_size"]
196
+ """
197
+ data = {
198
+ "SampleSize": None,
199
+ "DiePitch": {"X": None, "Y": None},
200
+ "DieOrigin": {"X": None, "Y": None},
201
+ "SampleCenterLocation": {"X": None, "Y": None},
202
+ "Defects": []
203
+ }
204
+
205
+ dans_defect_list = False
206
+ current_wafer_id = None
207
+ target_wafer_found = False
208
+ reading_target_wafer = False
209
+
210
+ with open(filepath, "r", encoding="utf-8") as f:
211
+ lines = f.readlines()
212
+
213
+ for i, line in enumerate(lines):
214
+ line = line.strip()
215
+
216
+ # Detect WaferID
217
+ if line.startswith("WaferID"):
218
+ match = re.search(r'WaferID\s+"@(\d+)"', line)
219
+ if match:
220
+ current_wafer_id = int(match.group(1))
221
+
222
+ # If looking for a specific wafer
223
+ if wafer_id is not None:
224
+ if current_wafer_id == wafer_id:
225
+ target_wafer_found = True
226
+ reading_target_wafer = True
227
+ data["Defects"] = []
228
+ elif target_wafer_found:
229
+ break
230
+ else:
231
+ reading_target_wafer = False
232
+ else:
233
+ reading_target_wafer = True
234
+ continue
235
+
236
+ # If looking for specific wafer, skip lines until finding the right wafer
237
+ if wafer_id is not None and not reading_target_wafer and not line.startswith("DefectList"):
238
+ if current_wafer_id is None:
239
+ pass
240
+ else:
241
+ continue
242
+
243
+ if line.startswith("SampleSize"):
244
+ match = re.search(r"SampleSize\s+1\s+(\d+)", line)
245
+ if match:
246
+ data["SampleSize"] = int(match.group(1))
247
+
248
+ elif line.startswith("DiePitch"):
249
+ match = re.search(r"DiePitch\s+([0-9.e+-]+)\s+([0-9.e+-]+);", line)
250
+ if match:
251
+ data["DiePitch"]["X"] = float(match.group(1))
252
+ data["DiePitch"]["Y"] = float(match.group(2))
253
+
254
+ elif line.startswith("DieOrigin"):
255
+ match = re.search(r"DieOrigin\s+([0-9.e+-]+)\s+([0-9.e+-]+);", line)
256
+ if match:
257
+ data["DieOrigin"]["X"] = float(match.group(1))
258
+ data["DieOrigin"]["Y"] = float(match.group(2))
259
+
260
+ elif line.startswith("SampleCenterLocation"):
261
+ match = re.search(r"SampleCenterLocation\s+([0-9.e+-]+)\s+([0-9.e+-]+);", line)
262
+ if match:
263
+ data["SampleCenterLocation"]["X"] = float(match.group(1))
264
+ data["SampleCenterLocation"]["Y"] = float(match.group(2))
265
+
266
+ elif line.startswith("DefectList"):
267
+ dans_defect_list = True
268
+ continue
269
+
270
+ elif dans_defect_list:
271
+ # If in DefectList, filter by wafer if necessary
272
+ if wafer_id is not None and not reading_target_wafer:
273
+ if line.startswith("EndOfFile") or line.startswith("}"):
274
+ dans_defect_list = False
275
+ continue
276
+
277
+ if re.match(r"^\d+\s", line):
278
+ value = line.split()
279
+ if len(value) >= 12:
280
+ # For COMPLUS4T: Check if next line has exactly 2 columns
281
+ if i + 1 < len(lines):
282
+ next_line = lines[i + 1].strip()
283
+ next_values = next_line.split()
284
+ if len(next_values) == 2:
285
+ real_defect_id = int(next_values[0])
286
+ defect = {f"val{i+1}": float(val) for i, val in enumerate(value[:10])}
287
+ defect["defect_id"] = real_defect_id
288
+ data["Defects"].append(defect)
289
+ elif line.startswith("EndOfFile") or line.startswith("}"):
290
+ dans_defect_list = False
291
+
292
+ return _calculate_coordinates(data, is_kronos=False, filepath=filepath, wafer_id=wafer_id)
293
+
294
+
295
+ def _extract_positions_normal(filepath):
296
+ """
297
+ Extract defect positions from normal format KLARF file.
298
+
299
+ Args:
300
+ filepath: Path to the KLARF (.001) file
301
+
302
+ Returns:
303
+ pd.DataFrame: DataFrame with columns ["defect_id", "X", "Y", "defect_size"]
304
+ """
305
+ data = {
306
+ "SampleSize": None,
307
+ "DiePitch": {"X": None, "Y": None},
308
+ "DieOrigin": {"X": None, "Y": None},
309
+ "SampleCenterLocation": {"X": None, "Y": None},
310
+ "Defects": []
311
+ }
312
+
313
+ dans_defect_list = False
314
+
315
+ with open(filepath, "r", encoding="utf-8") as f:
316
+ lines = f.readlines()
317
+
318
+ for i, line in enumerate(lines):
319
+ line = line.strip()
320
+
321
+ if line.startswith("SampleSize"):
322
+ match = re.search(r"SampleSize\s+1\s+(\d+)", line)
323
+ if match:
324
+ data["SampleSize"] = int(match.group(1))
325
+
326
+ elif line.startswith("DiePitch"):
327
+ match = re.search(r"DiePitch\s+([0-9.e+-]+)\s+([0-9.e+-]+);", line)
328
+ if match:
329
+ data["DiePitch"]["X"] = float(match.group(1))
330
+ data["DiePitch"]["Y"] = float(match.group(2))
331
+
332
+ elif line.startswith("DieOrigin"):
333
+ match = re.search(r"DieOrigin\s+([0-9.e+-]+)\s+([0-9.e+-]+);", line)
334
+ if match:
335
+ data["DieOrigin"]["X"] = float(match.group(1))
336
+ data["DieOrigin"]["Y"] = float(match.group(2))
337
+
338
+ elif line.startswith("SampleCenterLocation"):
339
+ match = re.search(r"SampleCenterLocation\s+([0-9.e+-]+)\s+([0-9.e+-]+);", line)
340
+ if match:
341
+ data["SampleCenterLocation"]["X"] = float(match.group(1))
342
+ data["SampleCenterLocation"]["Y"] = float(match.group(2))
343
+
344
+ elif line.startswith("DefectList"):
345
+ dans_defect_list = True
346
+ continue
347
+
348
+ elif dans_defect_list:
349
+ if re.match(r"^\d+\s", line):
350
+ value = line.split()
351
+ if len(value) >= 12:
352
+ # For Normal: Check if next line has exactly 2 columns
353
+ if i + 1 < len(lines):
354
+ next_line = lines[i + 1].strip()
355
+ next_values = next_line.split()
356
+ if len(next_values) == 2:
357
+ real_defect_id = int(next_values[0])
358
+ defect = {f"val{i+1}": float(val) for i, val in enumerate(value[:10])}
359
+ defect["defect_id"] = real_defect_id
360
+ data["Defects"].append(defect)
361
+ elif line.startswith("EndOfFile") or line.startswith("}"):
362
+ dans_defect_list = False
363
+
364
+ return _calculate_coordinates(data, is_kronos=False, filepath=filepath, wafer_id=None)
365
+
366
+
367
+ def _calculate_coordinates(data, is_kronos, filepath, wafer_id=None, kronos_wafer_id=None):
368
+ """
369
+ Calculate corrected coordinates from parsed defect data.
370
+
371
+ Args:
372
+ data: Dictionary containing parsed KLARF data
373
+ is_kronos: Boolean indicating if this is KRONOS mode
374
+ filepath: Path to the KLARF file (for saving CSV)
375
+ wafer_id: Wafer ID for COMPLUS4T mode
376
+ kronos_wafer_id: Wafer ID for KRONOS mode
377
+
378
+ Returns:
379
+ pd.DataFrame: DataFrame with columns ["defect_id", "X", "Y", "defect_size"]
380
+ """
381
+ pitch_x = data["DiePitch"]["X"]
382
+ pitch_y = data["DiePitch"]["Y"]
383
+ Xcenter = data["SampleCenterLocation"]["X"]
384
+ Ycenter = data["SampleCenterLocation"]["Y"]
385
+
386
+ # Check if required values are not None
387
+ if pitch_x is None or pitch_y is None or Xcenter is None or Ycenter is None:
388
+ print(f"Warning: Missing required values in KLARF file. pitch_x={pitch_x}, pitch_y={pitch_y}, x_center={Xcenter}, y_center={Ycenter}")
389
+ return pd.DataFrame(columns=["defect_id", "X", "Y", "defect_size"])
390
+
391
+ corrected_positions = []
392
+ for d in data["Defects"]:
393
+ real_defect_id = d["defect_id"]
394
+
395
+ # For KRONOS: shift columns by 1
396
+ if is_kronos:
397
+ val2 = d["val4"]
398
+ val3 = d["val5"]
399
+ val4_scaled = d["val6"] * pitch_x - Xcenter
400
+ val5_scaled = d["val7"] * pitch_y - Ycenter
401
+ defect_size = d["val11"]
402
+ else:
403
+ # COMPLUS4T/Normal mode: use original columns
404
+ val2 = d["val2"]
405
+ val3 = d["val3"]
406
+ val4_scaled = d["val4"] * pitch_x - Xcenter
407
+ val5_scaled = d["val5"] * pitch_y - Ycenter
408
+ defect_size = d["val9"]
409
+
410
+ x_corr = round((val2 + val4_scaled) / 10000, 1)
411
+ y_corr = round((val3 + val5_scaled) / 10000, 1)
412
+
413
+ corrected_positions.append({
414
+ "defect_id": real_defect_id,
415
+ "X": x_corr,
416
+ "Y": y_corr,
417
+ "defect_size": defect_size
418
+ })
419
+
420
+ coordinates = pd.DataFrame(corrected_positions, columns=["defect_id", "X", "Y", "defect_size"])
421
+
422
+ # Print coordinates for debugging
423
+ print("\n" + "="*80)
424
+ print("COORDINATES EXTRACTED:")
425
+ print("="*80)
426
+ print(f"Total defects: {len(coordinates)}")
427
+ if len(coordinates) > 0:
428
+ print("\nCoordinates DataFrame:")
429
+ print(coordinates.to_string())
430
+ else:
431
+ print("No defects found.")
432
+ print("="*80 + "\n")
433
+
434
+ # Save mapping to CSV
435
+ file_dir = os.path.dirname(filepath)
436
+
437
+ # If KRONOS mode, save to wafer subfolder
438
+ if is_kronos and kronos_wafer_id is not None:
439
+ file_dir_basename = os.path.basename(file_dir)
440
+ if file_dir_basename == str(kronos_wafer_id):
441
+ csv_folder = file_dir
442
+ else:
443
+ csv_folder = os.path.join(file_dir, str(kronos_wafer_id))
444
+ os.makedirs(csv_folder, exist_ok=True)
445
+ csv_path = os.path.join(csv_folder, "mapping.csv")
446
+ print(f"[KRONOS] Saving mapping to: {csv_path}")
447
+ # If wafer_id is specified (COMPLUS4T mode), save to wafer subfolder
448
+ elif wafer_id is not None:
449
+ csv_folder = os.path.join(file_dir, str(wafer_id))
450
+ os.makedirs(csv_folder, exist_ok=True)
451
+ csv_path = os.path.join(csv_folder, "mapping.csv")
452
+ else:
453
+ # Normal mode: save in same folder as .001 file
454
+ csv_path = os.path.join(file_dir, "mapping.csv")
455
+
456
+ coordinates.to_csv(csv_path, index=False)
457
+
458
+ if is_kronos:
459
+ print(f"[KRONOS] Extracted {len(coordinates)} defects (filtered by detected numbers)")
460
+
461
+ return coordinates