PoLab-analyzer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: PoLab_analyzer
3
+ Version: 0.1.0
4
+ Summary: CLI tools for PoLab eve and hunchback in-situ image analysis
5
+ Requires-Dist: numpy
6
+ Requires-Dist: pandas
7
+ Requires-Dist: matplotlib
8
+ Requires-Dist: scipy
9
+ Requires-Dist: openpyxl
@@ -0,0 +1,2 @@
1
+ # polab_analyzer
2
+ This repository contains in-situ image analysis program for eve and hunchback.
@@ -0,0 +1,19 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "PoLab_analyzer"
7
+ version = "0.1.0"
8
+ description = "CLI tools for PoLab eve and hunchback in-situ image analysis"
9
+ dependencies = [
10
+ "numpy",
11
+ "pandas",
12
+ "matplotlib",
13
+ "scipy",
14
+ "openpyxl"
15
+ ]
16
+
17
+ [project.scripts]
18
+ eve = "scripts.eve:main"
19
+ hb = "scripts.hb:main"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: PoLab_analyzer
3
+ Version: 0.1.0
4
+ Summary: CLI tools for PoLab eve and hunchback in-situ image analysis
5
+ Requires-Dist: numpy
6
+ Requires-Dist: pandas
7
+ Requires-Dist: matplotlib
8
+ Requires-Dist: scipy
9
+ Requires-Dist: openpyxl
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/PoLab_analyzer.egg-info/PKG-INFO
4
+ src/PoLab_analyzer.egg-info/SOURCES.txt
5
+ src/PoLab_analyzer.egg-info/dependency_links.txt
6
+ src/PoLab_analyzer.egg-info/entry_points.txt
7
+ src/PoLab_analyzer.egg-info/requires.txt
8
+ src/PoLab_analyzer.egg-info/top_level.txt
9
+ src/scripts/__init__.py
10
+ src/scripts/eve.py
11
+ src/scripts/hb.py
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ eve = scripts.eve:main
3
+ hb = scripts.hb:main
@@ -0,0 +1,5 @@
1
+ numpy
2
+ pandas
3
+ matplotlib
4
+ scipy
5
+ openpyxl
File without changes
@@ -0,0 +1,787 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import os
4
+ from posixpath import abspath
5
+ import re
6
+ import sys
7
+ import warnings
8
+ import matplotlib.pyplot as plt
9
+ import numpy as np
10
+ import pandas as pd
11
+ from openpyxl import load_workbook
12
+ from openpyxl.drawing.image import Image as XLImage
13
+ from scipy.signal import find_peaks, savgol_filter
14
+
15
+ warnings.simplefilter(action="ignore", category=FutureWarning)
16
+
17
+
18
+ # --- Helper: test run without saving ---
19
+ def test_parameters(excel_file, lower_lim, upper_lim, distance, prominence, height):
20
+ discarded_sheets = 0
21
+ total_sheets = 0
22
+
23
+ try:
24
+ xl = pd.ExcelFile(excel_file)
25
+ # Process sheets that match the 's' followed by digits pattern
26
+ sheet_names = [s for s in xl.sheet_names if re.match(r"^s\d+$", s)]
27
+ except Exception:
28
+ return 0, 0
29
+
30
+ for sheet_name in sheet_names:
31
+ try:
32
+ data = pd.read_excel(excel_file, sheet_name=sheet_name)
33
+ length = data.iloc[:, 0]
34
+ intensity = data.iloc[:, 1]
35
+
36
+ norm_intensity = intensity / intensity.max()
37
+ inverted_intensity = 1 - norm_intensity
38
+ norm_length = length / length.max()
39
+ percent_length = norm_length * 100
40
+
41
+ smoothed = savgol_filter(inverted_intensity, 11, 2)
42
+ diff = np.diff(smoothed, prepend=smoothed[0])
43
+ change_points = np.diff(np.sign(diff), prepend=0)
44
+ change_point_flags = (change_points < 0).astype(int)
45
+
46
+ peaks, _ = find_peaks(
47
+ smoothed, distance=distance, prominence=prominence, height=height
48
+ )
49
+ peak_percent_lengths = percent_length.iloc[peaks].values
50
+ valid_peak_mask = (peak_percent_lengths > lower_lim) & (
51
+ peak_percent_lengths < upper_lim
52
+ )
53
+ peak_percent_lengths = peak_percent_lengths[valid_peak_mask]
54
+
55
+ data["Percent Length"] = percent_length
56
+ data["Change Point"] = change_point_flags
57
+
58
+ change_df = data[
59
+ (data["Change Point"] == 1) & (data["Percent Length"] < upper_lim)
60
+ ]
61
+ closest_matches = []
62
+ for px in peak_percent_lengths:
63
+ if not change_df.empty:
64
+ closest = change_df.iloc[
65
+ (change_df["Percent Length"] - px).abs().argsort()[:1]
66
+ ]
67
+ closest_val = closest["Percent Length"].values[0]
68
+ closest_matches.append(closest_val)
69
+
70
+ total_sheets += 1
71
+ if len(closest_matches) != 7:
72
+ discarded_sheets += 1
73
+
74
+ except Exception:
75
+ discarded_sheets += 1
76
+
77
+ return discarded_sheets, total_sheets
78
+
79
+
80
+
81
+ def test_parameters2(input_folder, lower_lim, upper_lim, distance, prominence, height):
82
+ discarded_files = 0
83
+ total_files = 0
84
+
85
+ for filename in os.listdir(input_folder):
86
+ if filename.endswith(".xlsx"):
87
+ input_path = os.path.join(input_folder, filename)
88
+ try:
89
+ data = pd.read_excel(input_path)
90
+ length = data.iloc[:, 0]
91
+ intensity = data.iloc[:, 1]
92
+
93
+ norm_intensity = intensity / intensity.max()
94
+ inverted_intensity = 1 - norm_intensity
95
+ norm_length = length / length.max()
96
+ percent_length = norm_length * 100
97
+
98
+ smoothed = savgol_filter(inverted_intensity, 11, 2)
99
+ diff = np.diff(smoothed, prepend=smoothed[0])
100
+ change_points = np.diff(np.sign(diff), prepend=0)
101
+ change_point_flags = (change_points < 0).astype(int)
102
+
103
+ peaks, _ = find_peaks(
104
+ smoothed, distance=distance, prominence=prominence, height=height
105
+ )
106
+ peak_percent_lengths = percent_length.iloc[peaks].values
107
+ valid_peak_mask = (peak_percent_lengths > lower_lim) & (
108
+ peak_percent_lengths < upper_lim
109
+ )
110
+ peak_percent_lengths = peak_percent_lengths[valid_peak_mask]
111
+
112
+ data["Percent Length"] = percent_length
113
+ data["Change Point"] = change_point_flags
114
+
115
+ change_df = data[
116
+ (data["Change Point"] == 1) & (data["Percent Length"] < upper_lim)
117
+ ]
118
+ closest_matches = []
119
+ for px in peak_percent_lengths:
120
+ if not change_df.empty:
121
+ closest = change_df.iloc[
122
+ (change_df["Percent Length"] - px).abs().argsort()[:1]
123
+ ]
124
+ closest_val = closest["Percent Length"].values[0]
125
+ closest_matches.append(closest_val)
126
+
127
+ total_files += 1
128
+ if len(closest_matches) != 7:
129
+ discarded_files += 1
130
+
131
+ except Exception:
132
+ discarded_files += 1
133
+
134
+ return discarded_files, total_files
135
+
136
+
137
+ def generate_stripe_summary(output_folder):
138
+ summary_output_path = os.path.join(output_folder, "stripe_summary.xlsx")
139
+
140
+ all_rows = []
141
+ processed_files = sorted(
142
+ [
143
+ f
144
+ for f in os.listdir(output_folder)
145
+ if f.startswith("processed_") and f.endswith(".xlsx")
146
+ ],
147
+ key=lambda x: (
148
+ int(re.findall(r"(\d+)", x)[-1]) if re.findall(r"(\d+)", x) else 0
149
+ ),
150
+ )
151
+
152
+ for fname in processed_files:
153
+ fpath = os.path.join(output_folder, fname)
154
+ try:
155
+ wb = load_workbook(fpath, data_only=True)
156
+ if "Peaks" not in wb.sheetnames:
157
+ continue
158
+
159
+ peaks_ws = wb["Peaks"]
160
+ peaks = []
161
+ for i, r in enumerate(
162
+ peaks_ws.iter_rows(min_row=2, values_only=True), start=2
163
+ ):
164
+ if r[0] is not None:
165
+ peaks.append(r[0])
166
+
167
+ if len(peaks) != 7:
168
+ print(f"\033[91mSkipping {fname}\033[0m: found {len(peaks)} stripes, not 7.")
169
+ continue
170
+
171
+ row_data = {"File": fname}
172
+ for i, p in enumerate(peaks, start=1):
173
+ row_data[f"Stripe-{i}"] = p
174
+
175
+ all_rows.append(row_data)
176
+
177
+ except Exception as e:
178
+ print(f"Error reading {fname}: {e}")
179
+
180
+ # Save summary
181
+ summary_df = pd.DataFrame(all_rows)
182
+ summary_df.to_excel(summary_output_path, index=False)
183
+ print(f"\033[92mStripe summary written to: {summary_output_path}\033[0m")
184
+
185
+
186
+ def run_pipeline(
187
+ excel_file,
188
+ lower_lim,
189
+ upper_lim,
190
+ test,
191
+ distance,
192
+ prominence,
193
+ height,
194
+ distances,
195
+ prom_range,
196
+ height_range,
197
+ ):
198
+ """Auto-tune (or use fixed) peak-detection parameters and process every
199
+ s1, s2, s3, ... sheet of a single Excel file. Shared by both --file mode
200
+ (one file) and --folder mode (looped over every xlsx file in the folder)."""
201
+ if test:
202
+ # --- Grid search mode ---
203
+ distances_list = distances
204
+ prominences = np.arange(
205
+ prom_range[0],
206
+ prom_range[1] + prom_range[2],
207
+ prom_range[2],
208
+ )
209
+ heights = np.arange(
210
+ height_range[0],
211
+ height_range[1] + height_range[2],
212
+ height_range[2],
213
+ )
214
+
215
+ best_params = None
216
+ best_discarded = float("inf")
217
+
218
+ total_iterations = len(distances_list) * len(prominences) * len(heights)
219
+ current_iteration = 0
220
+
221
+ print(f"\n\033[94mRunning parameter grid search over sheets in '{os.path.basename(excel_file)}'...\033[0m")
222
+
223
+ for d in distances_list:
224
+ for p in prominences:
225
+ for h in heights:
226
+ discarded, total = test_parameters(
227
+ excel_file, lower_lim, upper_lim, d, p, h
228
+ )
229
+ if discarded < best_discarded:
230
+ best_discarded = discarded
231
+ best_params = (d, p, h)
232
+
233
+ current_iteration += 1
234
+ percent = (current_iteration / total_iterations) * 100
235
+
236
+ sys.stdout.write(
237
+ f"\r\033[92mProgress: [{percent:3.0f}%]\033[0m Processing parameter set {current_iteration}/{total_iterations}..."
238
+ )
239
+ sys.stdout.flush()
240
+
241
+ print("\n")
242
+
243
+ print(
244
+ f"\033[96mFound best parameters:\033[0m distance={best_params[0]}, "
245
+ f"prominence={best_params[1]:.2f}, height={best_params[2]:.2f} "
246
+ f"(\033[91mdiscarded {best_discarded} sheets\033[0m)"
247
+ )
248
+
249
+ final_distance, final_prominence, final_height = best_params
250
+
251
+ else:
252
+ # --- Direct mode ---
253
+ final_distance = distance
254
+ final_prominence = prominence
255
+ final_height = height
256
+
257
+ print(
258
+ f"\n Using user-specified parameters: "
259
+ f"distance={final_distance}, prominence={final_prominence}, height={final_height}"
260
+ )
261
+
262
+ process_excel_file(
263
+ excel_file,
264
+ lower_lim,
265
+ upper_lim,
266
+ distance=final_distance,
267
+ prominence=final_prominence,
268
+ height=final_height,
269
+ )
270
+
271
+
272
+ def run_pipeline2(input_folder, lower_lim, upper_lim, test, distance, prominence, height, distances, prom_range, height_range):
273
+ if test:
274
+ # --- Grid search mode ---
275
+ distances_list = distances
276
+ prominences = np.arange(
277
+ prom_range[0],
278
+ prom_range[1] + prom_range[2],
279
+ prom_range[2],
280
+ )
281
+ heights = np.arange(
282
+ height_range[0],
283
+ height_range[1] + height_range[2],
284
+ height_range[2],
285
+ )
286
+
287
+ best_params = None
288
+ best_discarded = float("inf")
289
+
290
+ total_iterations = len(distances_list) * len(prominences) * len(heights)
291
+ current_iteration = 0
292
+
293
+ print(f"\n\033[94mRunning parameter grid search over files in '{os.path.basename(input_folder)}'...\033[0m")
294
+
295
+ for d in distances_list:
296
+ for p in prominences:
297
+ for h in heights:
298
+ discarded, total = test_parameters2(
299
+ input_folder, lower_lim, upper_lim, d, p, h
300
+ )
301
+ if discarded < best_discarded:
302
+ best_discarded = discarded
303
+ best_params = (d, p, h)
304
+
305
+ current_iteration += 1
306
+ percent = (current_iteration / total_iterations) * 100
307
+
308
+ sys.stdout.write(
309
+ f"\r\033[92mProgress: [{percent:3.0f}%]\033[0m Processing parameter set {current_iteration}/{total_iterations}..."
310
+ )
311
+ sys.stdout.flush()
312
+
313
+ print("\n")
314
+
315
+ print(
316
+ f"\033[96mFound best parameters:\033[0m distance={best_params[0]}, "
317
+ f"prominence={best_params[1]:.2f}, height={best_params[2]:.2f} "
318
+ f"(\033[91mdiscarded {best_discarded} files\033[0m)"
319
+ )
320
+
321
+ final_distance, final_prominence, final_height = best_params
322
+
323
+ else:
324
+ # --- Direct mode ---
325
+ final_distance = distance
326
+ final_prominence = prominence
327
+ final_height = height
328
+ print(
329
+ f"\n Using user-specified parameters: "
330
+ f"distance={final_distance}, prominence={final_prominence}, height={final_height}"
331
+ )
332
+
333
+ process_folder(
334
+ input_folder,
335
+ lower_lim,
336
+ upper_lim,
337
+ distance=final_distance,
338
+ prominence=final_prominence,
339
+ height=final_height,
340
+ )
341
+
342
+
343
+ def process_folder(input_folder, lower_lim, upper_lim, distance=10, prominence=0.03, height = 0.1):
344
+ output_folder = os.path.join(input_folder, "results")
345
+ os.makedirs(output_folder, exist_ok=True)
346
+
347
+ for filename in os.listdir(input_folder):
348
+ if filename.endswith(".xlsx"):
349
+ input_path = os.path.join(input_folder, filename)
350
+ output_path = os.path.join(output_folder, f"processed_{filename}")
351
+ temp_plot_path = "temp_plot.png"
352
+ try:
353
+ data = pd.read_excel(input_path)
354
+ length = data.iloc[:, 0]
355
+ intensity = data.iloc[:, 1]
356
+
357
+ norm_intensity = intensity / intensity.max()
358
+ inverted_intensity = 1 - norm_intensity
359
+ norm_length = length / length.max()
360
+ percent_length = norm_length * 100
361
+
362
+ smoothed = savgol_filter(inverted_intensity, 11, 2)
363
+ diff = np.diff(smoothed, prepend=smoothed[0])
364
+ change_points = np.diff(np.sign(diff), prepend=0)
365
+ change_point_flags = (change_points < 0).astype(int)
366
+
367
+ # Detect peaks
368
+ peaks, _ = find_peaks(smoothed, distance=distance, prominence=prominence, height=height)
369
+ peak_percent_lengths = percent_length.iloc[peaks].values
370
+ peak_values = smoothed[peaks]
371
+ valid_peak_mask = (peak_percent_lengths > lower_lim) & (peak_percent_lengths < upper_lim)
372
+ peak_percent_lengths = peak_percent_lengths[valid_peak_mask]
373
+ peak_values = peak_values[valid_peak_mask]
374
+
375
+ data['Normalized Intensity'] = norm_intensity
376
+ data['Inverted Intensity'] = inverted_intensity
377
+ data['Normalized Length'] = norm_length
378
+ data['Percent Length'] = percent_length
379
+ data['Smoothed'] = smoothed
380
+ data['Difference'] = diff
381
+ data['Change Point'] = change_point_flags
382
+ data.to_excel(output_path, index=False)
383
+
384
+ plt.figure(figsize=(5, 3))
385
+ plt.plot(percent_length, smoothed, label="Smoothed", color='blue')
386
+ plt.scatter(peak_percent_lengths, peak_values, color='red', s=50, label=f"Peaks > {lower_lim}%")
387
+ plt.title("Smoothed Inverted Intensity")
388
+ plt.xlabel("Percent Length (%)")
389
+ plt.ylabel("Inverted Intensity")
390
+ plt.legend()
391
+ plt.tight_layout()
392
+ plt.savefig(temp_plot_path, dpi=200)
393
+ plt.close()
394
+ wb = load_workbook(output_path)
395
+ ws = wb.active
396
+ img = XLImage(temp_plot_path)
397
+ img.width = 360
398
+ img.height = 220
399
+ ws.add_image(img, "K2")
400
+ wb.save(output_path)
401
+
402
+ # Add Peaks Sheets
403
+ if "Peaks" in wb.sheetnames:
404
+ del wb["Peaks"]
405
+ peak_ws = wb.create_sheet("Peaks")
406
+ peak_ws.append(["Percent Length", "Inverted Intensity"])
407
+ for x, y in zip(peak_percent_lengths, peak_values):
408
+ peak_ws.append([x, y])
409
+
410
+ # Match peaks with change_df data frame
411
+ change_df = data[(data["Change Point"] == 1) & (data["Percent Length"] > lower_lim) & (
412
+ data["Percent Length"] < upper_lim)]
413
+ closest_matches = []
414
+ for px in peak_percent_lengths:
415
+ if not change_df.empty:
416
+ closest = change_df.iloc[(change_df["Percent Length"] - px).abs().argsort()[:1]]
417
+ closest_val = closest["Percent Length"].values[0]
418
+ closest_matches.append(closest_val)
419
+
420
+ # Discard files which have not exactly 7 stripes.
421
+ if len(closest_matches) != 7:
422
+ print(
423
+ f"\033[91mSkipping {filename}:\033[0m found {len(closest_matches)} stripes, not exactly 7."
424
+ )
425
+ if os.path.exists(temp_plot_path):
426
+ os.remove(temp_plot_path)
427
+ continue
428
+ wb.save(output_path)
429
+ if os.path.exists(temp_plot_path):
430
+ os.remove(temp_plot_path)
431
+
432
+ # Add macro sheets.
433
+ macro_df = pd.DataFrame({
434
+ "Percent Length": percent_length,
435
+ "Inverted Intensity": inverted_intensity
436
+ })
437
+ with pd.ExcelWriter(output_path, engine='openpyxl', mode='a', if_sheet_exists="replace") as writer:
438
+ macro_df.to_excel(writer, sheet_name="Macro", index=False)
439
+
440
+ # Add macro 600 points sheet
441
+ indices = np.linspace(0, len(macro_df) - 1, 600, dtype=int)
442
+ squeezed_macro_df = macro_df.iloc[indices].reset_index(drop=True)
443
+ with pd.ExcelWriter(output_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
444
+ squeezed_macro_df.to_excel(writer, sheet_name='Macro_600points', index=False)
445
+ except Exception as e:
446
+ print(f"Error processing {filename}: {e}")
447
+
448
+ def extract_numeric_suffix(filename):
449
+ nums = re.findall(r'(\d+)', filename)
450
+ return tuple(map(int, nums)) if nums else (0,)
451
+ # Combine all Macro_600points into one Excel
452
+ macro_percent_cols = []
453
+ macro_intensity_cols = []
454
+ sample_names = []
455
+ processed_files = sorted(
456
+ [f for f in os.listdir(output_folder) if f.startswith("processed_") and f.endswith(".xlsx")],
457
+ key=lambda x: int(re.findall(r'(\d+)', x)[-1])
458
+ )
459
+ for idx, fname in enumerate(processed_files, start=1):
460
+ fpath = os.path.join(output_folder, fname)
461
+ try:
462
+ df = pd.read_excel(fpath, sheet_name="Macro")
463
+ percent_col = df.iloc[:, 0] / df.iloc[:, 0].max()
464
+ intensity_col = df.iloc[:, 1]
465
+ indices = np.linspace(0, len(df) - 1, 600, dtype=int)
466
+ percent_squeezed = percent_col.iloc[indices].reset_index(drop=True)
467
+ intensity_squeezed = intensity_col.iloc[indices].reset_index(drop=True)
468
+
469
+ label = os.path.basename(input_folder) + f"_{idx}"
470
+ sample_names.append(label)
471
+ macro_percent_cols.append(percent_squeezed.rename(label))
472
+ macro_intensity_cols.append(intensity_squeezed.rename(label))
473
+ except Exception:
474
+ pass
475
+
476
+ percent_df = pd.concat(macro_percent_cols, axis=1)
477
+ intensity_df = pd.concat(macro_intensity_cols, axis=1)
478
+ percent_df[""] = ""
479
+ intensity_df[""] = ""
480
+ percent_df["Average"] = percent_df.select_dtypes(include=[np.number]).mean(axis=1)
481
+ intensity_df["Average"] = intensity_df.select_dtypes(include=[np.number]).mean(axis=1)
482
+ combined_macro_path = os.path.join(output_folder, "combined_macro_600points.xlsx")
483
+ with pd.ExcelWriter(combined_macro_path) as writer:
484
+ percent_df.to_excel(writer, sheet_name="Percent Length", index=False)
485
+ intensity_df.to_excel(writer, sheet_name="Inverted Intensity", index=False)
486
+
487
+ generate_stripe_summary(output_folder)
488
+
489
+
490
+
491
+ def process_excel_file(
492
+ excel_file, lower_lim, upper_lim, distance=10, prominence=0.03, height=0.1
493
+ ):
494
+ parent_folder = os.path.dirname(os.path.abspath(excel_file))
495
+ base_name = os.path.splitext(os.path.basename(excel_file))[0]
496
+ output_folder = os.path.join(parent_folder, "results")
497
+ os.makedirs(output_folder, exist_ok=True)
498
+
499
+ xl = pd.ExcelFile(excel_file)
500
+ sheet_names = sorted(
501
+ [s for s in xl.sheet_names if re.match(r"^s\d+$", s)],
502
+ key=lambda x: int(re.findall(r"(\d+)", x)[0])
503
+ )
504
+
505
+ for sheet_name in sheet_names:
506
+ output_path = os.path.join(output_folder, f"processed_{base_name}_{sheet_name}.xlsx")
507
+ temp_plot_path = f"temp_plot_{sheet_name}.png"
508
+ try:
509
+ data = pd.read_excel(excel_file, sheet_name=sheet_name)
510
+ length = data.iloc[:, 0]
511
+ intensity = data.iloc[:, 1]
512
+
513
+ norm_intensity = intensity / intensity.max()
514
+ inverted_intensity = 1 - norm_intensity
515
+ norm_length = length / length.max()
516
+ percent_length = norm_length * 100
517
+
518
+ smoothed = savgol_filter(inverted_intensity, 11, 2)
519
+ diff = np.diff(smoothed, prepend=smoothed[0])
520
+ change_points = np.diff(np.sign(diff), prepend=0)
521
+ change_point_flags = (change_points < 0).astype(int)
522
+
523
+ # Detect peaks
524
+ peaks, _ = find_peaks(
525
+ smoothed, distance=distance, prominence=prominence, height=height
526
+ )
527
+ peak_percent_lengths = percent_length.iloc[peaks].values
528
+ peak_values = smoothed[peaks]
529
+ valid_peak_mask = (peak_percent_lengths > lower_lim) & (
530
+ peak_percent_lengths < upper_lim
531
+ )
532
+ peak_percent_lengths = peak_percent_lengths[valid_peak_mask]
533
+ peak_values = peak_values[valid_peak_mask]
534
+
535
+ data["Normalized Intensity"] = norm_intensity
536
+ data["Inverted Intensity"] = inverted_intensity
537
+ data["Normalized Length"] = norm_length
538
+ data["Percent Length"] = percent_length
539
+ data["Smoothed"] = smoothed
540
+ data["Difference"] = diff
541
+ data["Change Point"] = change_point_flags
542
+ data.to_excel(output_path, index=False)
543
+
544
+ plt.figure(figsize=(5, 3))
545
+ plt.plot(percent_length, smoothed, label="Smoothed", color="blue")
546
+ plt.scatter(
547
+ peak_percent_lengths,
548
+ peak_values,
549
+ color="red",
550
+ s=50,
551
+ label=f"Peaks > {lower_lim}%",
552
+ )
553
+ plt.title("Smoothed Inverted Intensity")
554
+ plt.xlabel("Percent Length (%)")
555
+ plt.ylabel("Inverted Intensity")
556
+ plt.legend()
557
+ plt.tight_layout()
558
+ plt.savefig(temp_plot_path, dpi=200)
559
+ plt.close()
560
+
561
+ wb = load_workbook(output_path)
562
+ ws = wb.active
563
+ img = XLImage(temp_plot_path)
564
+ img.width = 360
565
+ img.height = 220
566
+ ws.add_image(img, "K2")
567
+ wb.save(output_path)
568
+
569
+ # Add Peaks Sheets
570
+ if "Peaks" in wb.sheetnames:
571
+ del wb["Peaks"]
572
+ peak_ws = wb.create_sheet("Peaks")
573
+ peak_ws.append(["Percent Length", "Inverted Intensity"])
574
+ for x, y in zip(peak_percent_lengths, peak_values):
575
+ peak_ws.append([x, y])
576
+
577
+ # Match peaks with change_df data frame
578
+ change_df = data[
579
+ (data["Change Point"] == 1)
580
+ & (data["Percent Length"] > lower_lim)
581
+ & (data["Percent Length"] < upper_lim)
582
+ ]
583
+ closest_matches = []
584
+ for px in peak_percent_lengths:
585
+ if not change_df.empty:
586
+ closest = change_df.iloc[
587
+ (change_df["Percent Length"] - px).abs().argsort()[:1]
588
+ ]
589
+ closest_val = closest["Percent Length"].values[0]
590
+ closest_matches.append(closest_val)
591
+
592
+ # Discard files which have not exactly 7 stripes.
593
+ if len(closest_matches) != 7:
594
+ print(
595
+ f"\033[91mSkipping sheet {sheet_name}:\033[0m found {len(closest_matches)} stripes, not exactly 7."
596
+ )
597
+ if os.path.exists(temp_plot_path):
598
+ os.remove(temp_plot_path)
599
+ continue
600
+ wb.save(output_path)
601
+ if os.path.exists(temp_plot_path):
602
+ os.remove(temp_plot_path)
603
+
604
+ # Add macro sheets.
605
+ macro_df = pd.DataFrame(
606
+ {
607
+ "Percent Length": percent_length,
608
+ "Inverted Intensity": inverted_intensity,
609
+ }
610
+ )
611
+ with pd.ExcelWriter(
612
+ output_path, engine="openpyxl", mode="a", if_sheet_exists="replace"
613
+ ) as writer:
614
+ macro_df.to_excel(writer, sheet_name="Macro", index=False)
615
+
616
+ # Add macro 600 points sheet
617
+ indices = np.linspace(0, len(macro_df) - 1, 600, dtype=int)
618
+ squeezed_macro_df = macro_df.iloc[indices].reset_index(drop=True)
619
+ with pd.ExcelWriter(
620
+ output_path, engine="openpyxl", mode="a", if_sheet_exists="replace"
621
+ ) as writer:
622
+ squeezed_macro_df.to_excel(
623
+ writer, sheet_name="Macro_600points", index=False
624
+ )
625
+ except Exception as e:
626
+ print(f"Error processing sheet {sheet_name}: {e}")
627
+ if os.path.exists(temp_plot_path):
628
+ os.remove(temp_plot_path)
629
+
630
+ # Combine all Macro_600points into one Excel
631
+ macro_percent_cols = []
632
+ macro_intensity_cols = []
633
+ processed_files = sorted(
634
+ [
635
+ f
636
+ for f in os.listdir(output_folder)
637
+ if f.startswith("processed_") and f.endswith(
638
+ ".xlsx") and "stripe_summary" not in f and "combined_macro" not in f
639
+ ],
640
+ key=lambda x: int(re.findall(r"(\d+)", x)[-1]) if re.findall(r"(\d+)", x) else 0,
641
+ )
642
+
643
+ for idx, fname in enumerate(processed_files, start=1):
644
+ fpath = os.path.join(output_folder, fname)
645
+ try:
646
+ df = pd.read_excel(fpath, sheet_name="Macro")
647
+ percent_col = df.iloc[:, 0] / df.iloc[:, 0].max()
648
+ intensity_col = df.iloc[:, 1]
649
+ indices = np.linspace(0, len(df) - 1, 600, dtype=int)
650
+ percent_squeezed = percent_col.iloc[indices].reset_index(drop=True)
651
+ intensity_squeezed = intensity_col.iloc[indices].reset_index(drop=True)
652
+
653
+ label = f"{base_name}_{idx}"
654
+ macro_percent_cols.append(percent_squeezed.rename(label))
655
+ macro_intensity_cols.append(intensity_squeezed.rename(label))
656
+ except Exception:
657
+ pass
658
+
659
+ if macro_percent_cols and macro_intensity_cols:
660
+ percent_df = pd.concat(macro_percent_cols, axis=1)
661
+ intensity_df = pd.concat(macro_intensity_cols, axis=1)
662
+ percent_df[""] = ""
663
+ intensity_df[""] = ""
664
+ percent_df["Average"] = percent_df.select_dtypes(include=[np.number]).mean(axis=1)
665
+ intensity_df["Average"] = intensity_df.select_dtypes(include=[np.number]).mean(axis=1)
666
+
667
+ combined_macro_path = os.path.join(output_folder, "combined_macro_600points.xlsx")
668
+ with pd.ExcelWriter(combined_macro_path) as writer:
669
+ percent_df.to_excel(writer, sheet_name="Percent Length", index=False)
670
+ intensity_df.to_excel(writer, sheet_name="Inverted Intensity", index=False)
671
+
672
+ generate_stripe_summary(output_folder)
673
+
674
+
675
+ def main():
676
+ parser = argparse.ArgumentParser(
677
+ description="Embryo stripe analysis: auto-tunes parameters to minimize discarded sheets from a single Excel file."
678
+ )
679
+ group = parser.add_mutually_exclusive_group(required=True)
680
+ group.add_argument(
681
+ "--file", "-file", help="Path to a single Excel file containing sheets s1, s2, s3..."
682
+ )
683
+ group.add_argument(
684
+ "--folder",
685
+ "-folder",
686
+ help="Path to a folder containing multiple Excel files, each with sheets s1, s2, s3...",
687
+ )
688
+ parser.add_argument(
689
+ "--lower",
690
+ "-l",
691
+ type=float,
692
+ required=True,
693
+ help="Lower limit for percent length",
694
+ )
695
+ parser.add_argument(
696
+ "--upper",
697
+ "-u",
698
+ type=float,
699
+ required=True,
700
+ help="Upper limit for percent length",
701
+ )
702
+
703
+ parser.add_argument(
704
+ "--test",
705
+ type=lambda x: str(x).lower() == "true",
706
+ default=True,
707
+ help="Whether to run parameter testing (true/false, default: true)",
708
+ )
709
+ parser.add_argument(
710
+ "--distance",
711
+ type=int,
712
+ default=15,
713
+ help="Distance for peak detection (used if --test false)",
714
+ )
715
+ parser.add_argument(
716
+ "--prominence",
717
+ type=float,
718
+ default=0.03,
719
+ help="Prominence for peak detection (used if --test false)",
720
+ )
721
+ parser.add_argument(
722
+ "--height",
723
+ type=float,
724
+ default=0.1,
725
+ help="Height for peak detection (used if --test false)",
726
+ )
727
+
728
+ # For test mode
729
+ parser.add_argument(
730
+ "--distances",
731
+ nargs="+",
732
+ type=int,
733
+ default=[10, 15, 20, 25, 30, 35],
734
+ help="List of distances to test (default: 10 15 20 25 30 35)",
735
+ )
736
+ parser.add_argument(
737
+ "--prom_range",
738
+ nargs=3,
739
+ type=float,
740
+ default=[0.005, 0.08, 0.05],
741
+ help="Prominence range: start end step (default: 0.05 0.8 0.05)",
742
+ )
743
+ parser.add_argument(
744
+ "--height_range",
745
+ nargs=3,
746
+ type=float,
747
+ default=[0.01, 0.5, 0.05],
748
+ help="Height range: start end step (default: 0.01 0.5 0.05)",
749
+ )
750
+
751
+ args = parser.parse_args()
752
+ lower_lim = args.lower
753
+ upper_lim = args.upper
754
+
755
+ pipeline_kwargs = dict(
756
+ lower_lim=lower_lim,
757
+ upper_lim=upper_lim,
758
+ test=args.test,
759
+ distance=args.distance,
760
+ prominence=args.prominence,
761
+ height=args.height,
762
+ distances=args.distances,
763
+ prom_range=args.prom_range,
764
+ height_range=args.height_range,
765
+ )
766
+
767
+ if args.file:
768
+ # --- Single-file mode: file contains sheets s1, s2, s3... ---
769
+ run_pipeline(args.file, **pipeline_kwargs)
770
+
771
+ else:
772
+ # --- Folder mode ---
773
+ input_folder = args.folder
774
+
775
+ if not os.path.isdir(input_folder):
776
+ print(f"\033[91mError: '{input_folder}' is not a valid directory.\033[0m")
777
+ sys.exit(1)
778
+
779
+ # Simply pass the folder path ONCE to the pipeline instead of looping here
780
+ print(f"\033[94m\nStarting analysis on folder: {abspath(input_folder)}\033[0m")
781
+ run_pipeline2(input_folder, **pipeline_kwargs)
782
+
783
+ print("\n Completed !!!")
784
+
785
+
786
+ if __name__ == "__main__":
787
+ main()
@@ -0,0 +1,375 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import os
4
+ import re
5
+ import sys
6
+ import warnings
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+ import pandas as pd
10
+ from openpyxl import load_workbook
11
+ from openpyxl.drawing.image import Image as XLImage
12
+ from scipy.signal import savgol_filter, find_peaks
13
+
14
+ warnings.simplefilter(action="ignore", category=FutureWarning)
15
+
16
+ def analyze_intensity_profile(x_orig, y_orig, sample_name):
17
+ # Normalize length to a 0-100 scale
18
+ if (x_orig.max() - x_orig.min()) == 0:
19
+ print(f" - Skipping {sample_name}: Length data is constant.")
20
+ return None
21
+ x_norm = 100 * (x_orig - x_orig.min()) / (x_orig.max() - x_orig.min())
22
+
23
+ # Normalize intensity to 0-1 and then invert it (1 - normalized_value)
24
+ if (y_orig.max() - y_orig.min()) == 0:
25
+ print(f" - Skipping {sample_name}: Intensity data is constant.")
26
+ return None
27
+ y_norm = (y_orig - y_orig.min()) / (y_orig.max() - y_orig.min())
28
+ y_proc = 1 - y_norm
29
+
30
+ # Smooth the PROCESSED data
31
+ if len(y_proc) > 51:
32
+ window_length = 51
33
+ else:
34
+ window_length = max(5, len(y_proc) // 2 * 2 + 1)
35
+
36
+ if window_length <= 3:
37
+ print(f" - Skipping {sample_name}: Not enough data points to process.")
38
+ return None
39
+
40
+ polyorder = 3
41
+ y_smooth = savgol_filter(y_proc, window_length, polyorder)
42
+
43
+ # Find peaks on the PROCESSED data
44
+ peaks, _ = find_peaks(y_smooth, prominence=0.05, height=0.05)
45
+
46
+ change_point_x = None
47
+ peak_indices = []
48
+
49
+ # Applying the peak finding logic.
50
+ if len(peaks) >= 2:
51
+ # --- Two-Peak Method ---
52
+ peak_prominences = y_smooth[peaks]
53
+ top_two_indices = np.argsort(peak_prominences)[-2:]
54
+ peak_indices = sorted(peaks[top_two_indices])
55
+ first_peak_idx, second_peak_idx = peak_indices[0], peak_indices[1]
56
+
57
+ section = slice(first_peak_idx, second_peak_idx + 1)
58
+ if len(y_smooth[section]) > 1:
59
+ intensity_gradient = np.gradient(y_smooth[section])
60
+ decreasing_indices = np.where(intensity_gradient < -0.002)[0]
61
+ if len(decreasing_indices) > 0:
62
+ groups = np.split(decreasing_indices, np.where(np.diff(decreasing_indices) > 1)[0] + 1)
63
+ longest_group = max(groups, key=len)
64
+ midpoint_local_idx = (longest_group[0] + longest_group[-1]) // 2
65
+ change_point_x = x_norm[section][midpoint_local_idx]
66
+ else:
67
+ # --- Single-Peak Method ---
68
+ first_peak_idx = np.argmax(y_smooth) if len(peaks) == 0 else peaks[0]
69
+ peak_indices = [first_peak_idx]
70
+
71
+ section = slice(first_peak_idx, len(y_smooth))
72
+ if len(y_smooth[section]) > 1:
73
+ intensity_gradient = np.gradient(y_smooth[section])
74
+ steepest_decrease_local_idx = np.argmin(intensity_gradient)
75
+ change_point_x = x_norm[section][steepest_decrease_local_idx]
76
+
77
+ return {
78
+ "x_norm": x_norm,
79
+ "y_proc": y_proc,
80
+ "y_smooth": y_smooth,
81
+ "peak_indices": peak_indices,
82
+ "change_point_x": change_point_x,
83
+ }
84
+
85
+
86
+ def _natural_sort_key(s):
87
+ return [int(t) if t.isdigit() else t.lower() for t in re.split(r"(\d+)", s)]
88
+
89
+
90
+ def generate_midpoint_summary(output_folder):
91
+ summary_output_path = os.path.join(output_folder, "midpoint_summary.xlsx")
92
+
93
+ all_rows = []
94
+ processed_files = sorted(
95
+ [
96
+ f
97
+ for f in os.listdir(output_folder)
98
+ if f.startswith("processed_") and f.endswith(".xlsx")
99
+ ],
100
+ key=_natural_sort_key,
101
+ )
102
+
103
+ for fname in processed_files:
104
+ fpath = os.path.join(output_folder, fname)
105
+ try:
106
+ wb = load_workbook(fpath, data_only=True)
107
+ if "Midpoint" not in wb.sheetnames:
108
+ continue
109
+
110
+ mp_ws = wb["Midpoint"]
111
+ rows = list(mp_ws.iter_rows(min_row=2, max_row=2, values_only=True))
112
+ if not rows:
113
+ continue
114
+
115
+ sample, midpoint = rows[0]
116
+ all_rows.append(
117
+ {
118
+ "Sample": sample,
119
+ "Midpoint (% Egg Length)": midpoint if midpoint is not None else "N/A",
120
+ }
121
+ )
122
+
123
+ except Exception as e:
124
+ print(f"Error reading {fname}: {e}")
125
+
126
+ summary_df = pd.DataFrame(all_rows)
127
+ summary_df.to_excel(summary_output_path, index=False)
128
+ print(f"\033[92mMidpoint summary written to: {summary_output_path} \033[0m")
129
+
130
+
131
+ def process_dataset(x_orig, y_orig, sample_name, output_path):
132
+ """Run the analysis/plot/save pipeline for a single (x, y) profile and
133
+ write the result to output_path. Shared by both --folder mode (one xlsx
134
+ file = one sample) and --file mode (one sheet = one sample)."""
135
+ base_name = os.path.splitext(os.path.basename(output_path))[0]
136
+ temp_plot_path = os.path.join(
137
+ os.path.dirname(output_path), f"temp_plot_{base_name}.png"
138
+ )
139
+
140
+ result = analyze_intensity_profile(x_orig, y_orig, sample_name)
141
+ if result is None:
142
+ return None
143
+
144
+ x_norm = result["x_norm"]
145
+ y_proc = result["y_proc"]
146
+ y_smooth = result["y_smooth"]
147
+ peak_indices = result["peak_indices"]
148
+ change_point_x = result["change_point_x"]
149
+
150
+ try:
151
+ # Save processed data
152
+ out_df = pd.DataFrame(
153
+ {
154
+ "Length (orig)": x_orig,
155
+ "Intensity (orig)": y_orig,
156
+ "Percent Length": x_norm,
157
+ "Processed Intensity": y_proc,
158
+ "Smoothed Intensity": y_smooth,
159
+ }
160
+ )
161
+ out_df.to_excel(output_path, index=False)
162
+
163
+ # Plot the results using PROCESSED data.
164
+ plt.figure(figsize=(12, 7))
165
+ plt.plot(x_norm, y_proc, color="grey", alpha=0.6, label="Raw datapoints")
166
+ plt.plot(x_norm, y_smooth, color="black", linewidth=2, label="Smoothed Line")
167
+
168
+ if len(peak_indices) > 0:
169
+ plt.axvline(
170
+ x=x_norm[peak_indices[0]],
171
+ color="blue",
172
+ linestyle="--",
173
+ label=f"Peak 1 (at {x_norm[peak_indices[0]]:.2f})",
174
+ )
175
+ if len(peak_indices) > 1:
176
+ plt.axvline(
177
+ x=x_norm[peak_indices[1]],
178
+ color="green",
179
+ linestyle="--",
180
+ label=f"Peak 2 (at {x_norm[peak_indices[1]]:.2f})",
181
+ )
182
+
183
+ if change_point_x is not None:
184
+ plt.axvline(
185
+ x=change_point_x,
186
+ color="red",
187
+ linestyle="--",
188
+ linewidth=2,
189
+ label="Midpoint of Decrease",
190
+ )
191
+ annotation_text = f"Midpoint: {change_point_x:.2f}"
192
+ y_range = plt.ylim()[1] - plt.ylim()[0]
193
+ plt.text(
194
+ change_point_x + 2,
195
+ plt.ylim()[0] + y_range * 0.5,
196
+ annotation_text,
197
+ color="red",
198
+ fontsize=12,
199
+ bbox=dict(facecolor="white", alpha=0.8, edgecolor="red"),
200
+ )
201
+
202
+ plt.title(f"Intensity Profile for: {sample_name}", fontsize=16)
203
+ plt.xlabel("Embryo Length (%)", fontsize=12)
204
+ plt.ylabel("Intensity", fontsize=12)
205
+ plt.grid(True, which="both", linestyle="--", linewidth=0.5)
206
+ plt.legend()
207
+ plt.tight_layout()
208
+ plt.savefig(temp_plot_path, dpi=200)
209
+ plt.close()
210
+
211
+ # Embed the plot image into the processed xlsx.
212
+ wb = load_workbook(output_path)
213
+ ws = wb.active
214
+ img = XLImage(temp_plot_path)
215
+ img.width = 480
216
+ img.height = 280
217
+ ws.add_image(img, "H2")
218
+
219
+ if "Midpoint" in wb.sheetnames:
220
+ del wb["Midpoint"]
221
+ mp_ws = wb.create_sheet("Midpoint")
222
+ mp_ws.append(["Sample", "Midpoint (% Egg Length)"])
223
+ mp_ws.append([sample_name, change_point_x if change_point_x is not None else "N/A"])
224
+
225
+ wb.save(output_path)
226
+
227
+ except Exception as e:
228
+ print(f" - Error processing {sample_name}: {e}")
229
+ return None
230
+ finally:
231
+ if os.path.exists(temp_plot_path):
232
+ os.remove(temp_plot_path)
233
+
234
+ return change_point_x
235
+
236
+
237
+ def process_excel_file(excel_file, output_folder):
238
+ """--folder mode: one xlsx file = one sample."""
239
+ base_name = os.path.splitext(os.path.basename(excel_file))[0]
240
+ output_path = os.path.join(output_folder, f"processed_{base_name}.xlsx")
241
+
242
+ try:
243
+ data = pd.read_excel(excel_file)
244
+ x_orig = data.iloc[:, 0].values
245
+ y_orig = data.iloc[:, 1].values
246
+ except Exception as e:
247
+ print(f" - Error reading {os.path.basename(excel_file)}: {e}")
248
+ return None
249
+
250
+ return process_dataset(x_orig, y_orig, base_name, output_path)
251
+
252
+
253
+ def process_file_sheets(excel_file, output_folder):
254
+ """--file mode: one xlsx file containing sheets s1, s2, s3, ... = multiple samples."""
255
+ base_name = os.path.splitext(os.path.basename(excel_file))[0]
256
+
257
+ try:
258
+ xl = pd.ExcelFile(excel_file)
259
+ sheet_names = sorted(
260
+ [s for s in xl.sheet_names if re.match(r"^s\d+$", s, re.IGNORECASE)],
261
+ key=lambda s: int(re.findall(r"\d+", s)[0]),
262
+ )
263
+ except Exception as e:
264
+ print(f"\033[91mError reading '{excel_file}': {e}\033[0m")
265
+ return
266
+
267
+ if not sheet_names:
268
+ print(
269
+ f"\033[91mNo sheets matching the 's1', 's2', ... pattern were found in "
270
+ f"'{excel_file}'.\033[0m"
271
+ )
272
+ return
273
+
274
+ total_sheets = len(sheet_names)
275
+ print(
276
+ f"\033[96m\nFound {total_sheets} sheets in '{os.path.basename(excel_file)}'.\033[0m "
277
+ f"\n\033[91mStarting analysis...\033[0m"
278
+ )
279
+
280
+ for i, sheet_name in enumerate(sheet_names, start=1):
281
+ percent = (i / total_sheets) * 100
282
+ sys.stdout.write(
283
+ f"\r\033[92mProgress: [{percent:3.0f}%]\033[0m Processing sheets {i}/{total_sheets}..."
284
+ )
285
+ sys.stdout.flush()
286
+
287
+ sample_name = f"{base_name}_{sheet_name}"
288
+ output_path = os.path.join(output_folder, f"processed_{sample_name}.xlsx")
289
+
290
+ try:
291
+ data = pd.read_excel(excel_file, sheet_name=sheet_name)
292
+ x_orig = data.iloc[:, 0].values
293
+ y_orig = data.iloc[:, 1].values
294
+ except Exception as e:
295
+ print(f"\n - Error reading sheet {sheet_name}: {e}")
296
+ continue
297
+
298
+ process_dataset(x_orig, y_orig, sample_name, output_path)
299
+
300
+ print()
301
+
302
+
303
+ def main():
304
+ parser = argparse.ArgumentParser(
305
+ description="Hb fluorescence intensity profile batch analyzer: finds the midpoint "
306
+ "of the intensity decrease for every sample file in a folder."
307
+ )
308
+ group = parser.add_mutually_exclusive_group(required=True)
309
+ group.add_argument(
310
+ "--folder",
311
+ "-folder",
312
+ help="Path to a folder containing input .xlsx sample files (one sample per file)",
313
+ )
314
+ group.add_argument(
315
+ "--file",
316
+ "-file",
317
+ help="Path to a single .xlsx file containing sheets s1, s2, s3, ... "
318
+ "(one sample per sheet)",
319
+ )
320
+ args = parser.parse_args()
321
+
322
+ if args.folder:
323
+ input_folder = args.folder
324
+
325
+ if not os.path.isdir(input_folder):
326
+ print(f"\033[91mError: '{input_folder}' is not a valid directory.\033[0m")
327
+ sys.exit(1)
328
+
329
+ xlsx_files = sorted(
330
+ f
331
+ for f in os.listdir(input_folder)
332
+ if f.endswith(".xlsx") and not f.startswith("processed_") and "summary" not in f.lower()
333
+ )
334
+
335
+ if not xlsx_files:
336
+ print(f"\nNo .xlsx files found in '{input_folder}'. Exiting.")
337
+ sys.exit(0)
338
+
339
+ output_folder = os.path.join(input_folder, "results")
340
+ os.makedirs(output_folder, exist_ok=True)
341
+
342
+ print(f"\033[96m\nFound total {len(xlsx_files)} Excel files.\033[0m \n\033[91mStarting analysis...\033[0m")
343
+
344
+ total_files = len(xlsx_files)
345
+ for i, fname in enumerate(xlsx_files, start=1):
346
+ fpath = os.path.join(input_folder, fname)
347
+ percent = (i / total_files) * 100
348
+
349
+ sys.stdout.write(
350
+ f"\r\033[92mProgress: [{percent:3.0f}%]\033[0m Processing Excel Files {i}/{total_files}..."
351
+ )
352
+ sys.stdout.flush()
353
+
354
+ process_excel_file(fpath, output_folder)
355
+
356
+ print("\n Completed !!!")
357
+
358
+ else:
359
+ input_file = args.file
360
+
361
+ if not os.path.isfile(input_file):
362
+ print(f"\033[91mError: '{input_file}' is not a valid file.\033[0m")
363
+ sys.exit(1)
364
+
365
+ output_folder = os.path.join(os.path.dirname(os.path.abspath(input_file)), "results")
366
+ os.makedirs(output_folder, exist_ok=True)
367
+
368
+ process_file_sheets(input_file, output_folder)
369
+
370
+ print("\n Completed !!!")
371
+
372
+ generate_midpoint_summary(output_folder)
373
+
374
+ if __name__ == "__main__":
375
+ main()