histoseg 0.1.8.1__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- histoseg/_version.py +2 -2
- histoseg/contours/pattern1_isoline.py +286 -26
- {histoseg-0.1.8.1.dist-info → histoseg-0.1.9.dist-info}/METADATA +9 -7
- {histoseg-0.1.8.1.dist-info → histoseg-0.1.9.dist-info}/RECORD +7 -7
- {histoseg-0.1.8.1.dist-info → histoseg-0.1.9.dist-info}/WHEEL +0 -0
- {histoseg-0.1.8.1.dist-info → histoseg-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {histoseg-0.1.8.1.dist-info → histoseg-0.1.9.dist-info}/top_level.txt +0 -0
histoseg/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.9'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 9)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -40,14 +40,47 @@ from sklearn.neighbors import KNeighborsRegressor
|
|
|
40
40
|
PathLike = Union[str, Path]
|
|
41
41
|
|
|
42
42
|
|
|
43
|
+
def _normalize_cluster_label(x: object) -> str:
|
|
44
|
+
"""Normalize cluster label to a stable string.
|
|
45
|
+
|
|
46
|
+
Goals:
|
|
47
|
+
- Support numeric clusters: 10, 10.0, "10", "10.0" -> "10"
|
|
48
|
+
- Support text clusters: "Luminal A" stays "Luminal A" (trim whitespace)
|
|
49
|
+
- Treat NaN/None as "" (caller should drop or error)
|
|
50
|
+
"""
|
|
51
|
+
if x is None:
|
|
52
|
+
return ""
|
|
53
|
+
# pandas NA
|
|
54
|
+
try:
|
|
55
|
+
if pd.isna(x): # type: ignore[arg-type]
|
|
56
|
+
return ""
|
|
57
|
+
except Exception:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
s = str(x).strip()
|
|
61
|
+
if s == "":
|
|
62
|
+
return ""
|
|
63
|
+
# "10.0" -> "10" (only if it is exactly an integer-looking float string)
|
|
64
|
+
# This avoids mangling genuine text like "A.0".
|
|
65
|
+
if s.endswith(".0"):
|
|
66
|
+
head = s[:-2]
|
|
67
|
+
if head.isdigit():
|
|
68
|
+
return head
|
|
69
|
+
return s
|
|
70
|
+
|
|
71
|
+
|
|
43
72
|
@dataclass(frozen=True)
|
|
44
73
|
class Pattern1IsolineConfig:
|
|
45
|
-
# Required inputs
|
|
74
|
+
# Required inputs (non-defaults MUST come first for dataclasses)
|
|
46
75
|
clusters_csv: PathLike
|
|
47
76
|
cells_parquet: PathLike
|
|
48
|
-
tissue_boundary_csv: Optional[PathLike]
|
|
49
77
|
out_dir: PathLike
|
|
50
|
-
pattern1_clusters: Sequence[int]
|
|
78
|
+
pattern1_clusters: Sequence[Union[int, str]]
|
|
79
|
+
|
|
80
|
+
# Optional inputs / schema controls
|
|
81
|
+
tissue_boundary_csv: Optional[PathLike] = None
|
|
82
|
+
barcode_col: str = "Barcode"
|
|
83
|
+
cluster_col: str = "Cluster"
|
|
51
84
|
|
|
52
85
|
# Core params (defaults match the notebook)
|
|
53
86
|
grid_n: int = 1200
|
|
@@ -74,6 +107,16 @@ class Pattern1IsolineConfig:
|
|
|
74
107
|
# Contour
|
|
75
108
|
isoline_level: float = 0.5
|
|
76
109
|
|
|
110
|
+
# Labeling scheme
|
|
111
|
+
# - "p1_is_one": pattern1=1, others=0 (original behavior)
|
|
112
|
+
# - "p1_is_zero": pattern1=0, others=1 (requested alternative)
|
|
113
|
+
label_scheme: str = "p1_is_one"
|
|
114
|
+
|
|
115
|
+
# Segmentation confidence score (cophenetic blue-band mean)
|
|
116
|
+
compute_confidence_score: bool = False
|
|
117
|
+
confidence_linkage_method: str = "average"
|
|
118
|
+
confidence_show_corr: bool = False
|
|
119
|
+
|
|
77
120
|
# Output controls
|
|
78
121
|
save_params_json: bool = True
|
|
79
122
|
save_contours_npy: bool = True
|
|
@@ -92,10 +135,21 @@ class Pattern1IsolineResult:
|
|
|
92
135
|
n_target_cells: int
|
|
93
136
|
n_bg0_points: int
|
|
94
137
|
contours: List[np.ndarray]
|
|
138
|
+
label_scheme: str
|
|
139
|
+
segmentation_confidence_score: Optional[float] = None
|
|
140
|
+
segmentation_confidence_stats: Optional[Mapping[str, Union[int, float]]] = None
|
|
95
141
|
params_json: Optional[Path] = None
|
|
96
142
|
preview_png: Optional[Path] = None
|
|
97
143
|
|
|
98
144
|
|
|
145
|
+
@dataclass
|
|
146
|
+
class SegmentationConfidenceResult:
|
|
147
|
+
"""Result container for the cophenetic blue-band score."""
|
|
148
|
+
score_mean: float
|
|
149
|
+
stats: Mapping[str, Union[int, float]]
|
|
150
|
+
blue_band_matrix: Optional[pd.DataFrame] = None
|
|
151
|
+
|
|
152
|
+
|
|
99
153
|
def _make_jupyterlab_tree_href(path: Path) -> str:
|
|
100
154
|
"""Build an href that opens `path` in JupyterLab's file browser.
|
|
101
155
|
|
|
@@ -305,6 +359,8 @@ def sample_background_from_other_cells_plus_synth(
|
|
|
305
359
|
def align_clusters_with_cells(
|
|
306
360
|
clusters_csv: PathLike,
|
|
307
361
|
cells_parquet: PathLike,
|
|
362
|
+
barcode_col: str = "Barcode",
|
|
363
|
+
cluster_col: str = "Cluster",
|
|
308
364
|
) -> Tuple[pd.DataFrame, str, str, str]:
|
|
309
365
|
"""Align clusters.csv(Barcode/Cluster) with cells.parquet.
|
|
310
366
|
|
|
@@ -314,18 +370,25 @@ def align_clusters_with_cells(
|
|
|
314
370
|
x_col, y_col: chosen coordinate columns
|
|
315
371
|
"""
|
|
316
372
|
cl = pd.read_csv(clusters_csv)
|
|
317
|
-
if
|
|
318
|
-
raise ValueError(
|
|
373
|
+
if barcode_col not in cl.columns or cluster_col not in cl.columns:
|
|
374
|
+
raise ValueError(
|
|
375
|
+
f"clusters.csv 需要包含 {barcode_col}/{cluster_col} 列,"
|
|
376
|
+
f"当前列={list(cl.columns)}"
|
|
377
|
+
)
|
|
319
378
|
|
|
320
379
|
cl = cl.copy()
|
|
321
|
-
cl[
|
|
322
|
-
|
|
380
|
+
cl[barcode_col] = cl[barcode_col].astype(str).str.strip()
|
|
381
|
+
# IMPORTANT: keep cluster labels as string to support non-numeric clusters.
|
|
382
|
+
cl[cluster_col] = cl[cluster_col].map(_normalize_cluster_label)
|
|
383
|
+
|
|
384
|
+
# Drop rows with empty barcode or empty cluster
|
|
385
|
+
cl = cl.loc[(cl[barcode_col] != "") & (cl[cluster_col] != ""), [barcode_col, cluster_col]].copy()
|
|
323
386
|
|
|
324
387
|
cells = pd.read_parquet(cells_parquet)
|
|
325
388
|
|
|
326
389
|
# Try to infer coordinate columns
|
|
327
|
-
cand_x = [c for c in cells.columns if c.lower() in ["x", "x_centroid", "x_center", "xcoord", "x_coord"]]
|
|
328
|
-
cand_y = [c for c in cells.columns if c.lower() in ["y", "y_centroid", "y_center", "ycoord", "y_coord"]]
|
|
390
|
+
cand_x = [c for c in cells.columns if c.lower() in ["x", "x_centroid", "x_center", "xcoord", "x_coord", "x_centroid_um", "x_centroid_px", "x_centroid_microns"]]
|
|
391
|
+
cand_y = [c for c in cells.columns if c.lower() in ["y", "y_centroid", "y_center", "ycoord", "y_coord", "y_centroid_um", "y_centroid_px", "y_centroid_microns"]]
|
|
329
392
|
if not cand_x or not cand_y:
|
|
330
393
|
raise ValueError(f"cells.parquet 找不到 x/y 列。列名示例:{list(cells.columns)[:60]}")
|
|
331
394
|
|
|
@@ -349,13 +412,13 @@ def align_clusters_with_cells(
|
|
|
349
412
|
tmp = cells.copy()
|
|
350
413
|
tmp["_join_id"] = tmp[cells_id_col].astype(str)
|
|
351
414
|
cl2 = cl.copy()
|
|
352
|
-
cl2["_join_id"] = cl2[
|
|
415
|
+
cl2["_join_id"] = cl2[barcode_col].astype(str)
|
|
353
416
|
|
|
354
417
|
if strip_suffix:
|
|
355
418
|
tmp["_join_id"] = tmp["_join_id"].str.replace(r"-1$", "", regex=True)
|
|
356
419
|
cl2["_join_id"] = cl2["_join_id"].str.replace(r"-1$", "", regex=True)
|
|
357
420
|
|
|
358
|
-
m = tmp.merge(cl2[["_join_id",
|
|
421
|
+
m = tmp.merge(cl2[["_join_id", cluster_col]], on="_join_id", how="inner")
|
|
359
422
|
return m
|
|
360
423
|
|
|
361
424
|
best: Optional[pd.DataFrame] = None
|
|
@@ -372,32 +435,190 @@ def align_clusters_with_cells(
|
|
|
372
435
|
# Provide debugging hints
|
|
373
436
|
msg = [
|
|
374
437
|
"[FAIL] 无法将 clusters.csv 的 Barcode 对齐到 cells.parquet",
|
|
375
|
-
f"clusters.csv
|
|
438
|
+
f"clusters.csv {barcode_col} 示例: {cl[barcode_col].head().tolist()}",
|
|
376
439
|
f"cells.parquet 列名: {list(cells.columns)[:80]}",
|
|
377
440
|
]
|
|
378
441
|
for c in id_candidates[:6]:
|
|
379
442
|
msg.append(f"cells[{c}] 示例: {cells[c].astype(str).head().tolist()}")
|
|
380
443
|
raise RuntimeError("\n".join(msg))
|
|
381
444
|
|
|
382
|
-
id_col_used,
|
|
383
|
-
# Rename
|
|
384
|
-
out = best.rename(columns={
|
|
445
|
+
id_col_used, _stripped = best_info
|
|
446
|
+
# Rename cluster_col -> cluster (string label)
|
|
447
|
+
out = best.rename(columns={cluster_col: "cluster"})
|
|
448
|
+
out["cluster"] = out["cluster"].map(_normalize_cluster_label)
|
|
385
449
|
return out, id_col_used, x_col, y_col
|
|
386
450
|
|
|
387
451
|
|
|
452
|
+
def _validate_label_scheme(label_scheme: str) -> str:
|
|
453
|
+
s = str(label_scheme).strip().lower()
|
|
454
|
+
if s in {"p1_is_one", "pattern1_is_one", "p1=1", "one"}:
|
|
455
|
+
return "p1_is_one"
|
|
456
|
+
if s in {"p1_is_zero", "pattern1_is_zero", "p1=0", "zero", "invert", "inverted"}:
|
|
457
|
+
return "p1_is_zero"
|
|
458
|
+
raise ValueError(
|
|
459
|
+
f"Unknown label_scheme={label_scheme!r}. Supported: 'p1_is_one' or 'p1_is_zero'."
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def compute_segmentation_confidence_score_from_merged(
|
|
464
|
+
merged_df: pd.DataFrame,
|
|
465
|
+
*,
|
|
466
|
+
pattern1_clusters: Sequence[Union[int, str]],
|
|
467
|
+
x_col: str,
|
|
468
|
+
y_col: str,
|
|
469
|
+
celltype_col: str = "cluster",
|
|
470
|
+
z_col: Optional[str] = None,
|
|
471
|
+
linkage_method: str = "average",
|
|
472
|
+
show_corr: bool = False,
|
|
473
|
+
return_blue_band_matrix: bool = False,
|
|
474
|
+
) -> SegmentationConfidenceResult:
|
|
475
|
+
"""Compute the segmentation confidence score (cophenetic blue-band mean).
|
|
476
|
+
|
|
477
|
+
Definition (as requested):
|
|
478
|
+
- Build the searcher→findee distance matrix at the cluster level.
|
|
479
|
+
- Compute the cophenetic (ultrametric) matrix from hierarchical clustering.
|
|
480
|
+
- Take all cophenetic values between PATTERN1 clusters (A) and non-PATTERN1 clusters (B).
|
|
481
|
+
- Return the mean of that cross-block ("blue band") as the confidence score.
|
|
482
|
+
|
|
483
|
+
Notes:
|
|
484
|
+
- This function is intentionally *separate* from isoline generation so you can call it independently.
|
|
485
|
+
- It uses histoseg.sfplot.Searcher_Findee_Score utilities.
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
SegmentationConfidenceResult(score_mean, stats, blue_band_matrix?)
|
|
489
|
+
"""
|
|
490
|
+
required = {x_col, y_col, celltype_col}
|
|
491
|
+
missing = required - set(merged_df.columns)
|
|
492
|
+
if missing:
|
|
493
|
+
raise ValueError(f"merged_df missing required columns: {sorted(missing)}")
|
|
494
|
+
|
|
495
|
+
# Lazy import: keep this module importable even if sfplot deps are optional.
|
|
496
|
+
from histoseg.sfplot.Searcher_Findee_Score import (
|
|
497
|
+
compute_searcher_findee_distance_matrix_from_df,
|
|
498
|
+
compute_cophenetic_from_distance_matrix,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
df = merged_df.copy()
|
|
502
|
+
df[celltype_col] = df[celltype_col].map(_normalize_cluster_label)
|
|
503
|
+
df = df.loc[df[celltype_col] != "", [x_col, y_col, celltype_col] + ([z_col] if z_col else [])].copy()
|
|
504
|
+
|
|
505
|
+
if df[celltype_col].nunique() < 2:
|
|
506
|
+
raise ValueError("Need at least 2 clusters to compute cophenetic score.")
|
|
507
|
+
|
|
508
|
+
distance_matrix = compute_searcher_findee_distance_matrix_from_df(
|
|
509
|
+
df,
|
|
510
|
+
x_col=x_col,
|
|
511
|
+
y_col=y_col,
|
|
512
|
+
z_col=z_col,
|
|
513
|
+
celltype_col=celltype_col,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
if getattr(distance_matrix, "shape", (0, 0))[0] < 2:
|
|
517
|
+
raise ValueError("distance_matrix too small; check cluster sizes.")
|
|
518
|
+
|
|
519
|
+
row_coph, col_coph = compute_cophenetic_from_distance_matrix(
|
|
520
|
+
distance_matrix,
|
|
521
|
+
method=linkage_method,
|
|
522
|
+
show_corr=show_corr,
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
# Use row_coph by default (matches the user's notebook code)
|
|
526
|
+
coph = row_coph
|
|
527
|
+
|
|
528
|
+
labels = pd.Index(coph.index).map(_normalize_cluster_label)
|
|
529
|
+
coph = coph.copy()
|
|
530
|
+
coph.index = labels
|
|
531
|
+
coph.columns = labels
|
|
532
|
+
|
|
533
|
+
A = pd.Index([_normalize_cluster_label(x) for x in pattern1_clusters if _normalize_cluster_label(x) != ""]).unique()
|
|
534
|
+
A = A.intersection(coph.index)
|
|
535
|
+
|
|
536
|
+
if len(A) == 0:
|
|
537
|
+
raise ValueError("No PATTERN1 clusters found in cophenetic matrix index. Check labels.")
|
|
538
|
+
B = pd.Index(coph.index).difference(A)
|
|
539
|
+
if len(B) == 0:
|
|
540
|
+
raise ValueError("No non-PATTERN1 clusters found; cannot compute cross-block mean.")
|
|
541
|
+
|
|
542
|
+
blue_band_matrix = coph.loc[A, B]
|
|
543
|
+
band = blue_band_matrix.to_numpy().ravel()
|
|
544
|
+
band = band[~np.isnan(band)]
|
|
545
|
+
|
|
546
|
+
if band.size == 0:
|
|
547
|
+
raise ValueError("blue band has no finite values; cannot compute score.")
|
|
548
|
+
|
|
549
|
+
stats: Mapping[str, Union[int, float]] = {
|
|
550
|
+
"n_pairs": int(band.size),
|
|
551
|
+
"min": float(np.min(band)),
|
|
552
|
+
"p05": float(np.quantile(band, 0.05)),
|
|
553
|
+
"median": float(np.median(band)),
|
|
554
|
+
"mean": float(np.mean(band)),
|
|
555
|
+
"p95": float(np.quantile(band, 0.95)),
|
|
556
|
+
"max": float(np.max(band)),
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
return SegmentationConfidenceResult(
|
|
560
|
+
score_mean=float(stats["mean"]),
|
|
561
|
+
stats=stats,
|
|
562
|
+
blue_band_matrix=blue_band_matrix if return_blue_band_matrix else None,
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def compute_segmentation_confidence_score(
|
|
567
|
+
*,
|
|
568
|
+
clusters_csv: PathLike,
|
|
569
|
+
cells_parquet: PathLike,
|
|
570
|
+
pattern1_clusters: Sequence[Union[int, str]],
|
|
571
|
+
barcode_col: str = "Barcode",
|
|
572
|
+
cluster_col: str = "Cluster",
|
|
573
|
+
linkage_method: str = "average",
|
|
574
|
+
show_corr: bool = False,
|
|
575
|
+
return_blue_band_matrix: bool = False,
|
|
576
|
+
) -> SegmentationConfidenceResult:
|
|
577
|
+
"""Convenience wrapper: load files, merge, and compute confidence score."""
|
|
578
|
+
merged, _id_col_used, x_col, y_col = align_clusters_with_cells(
|
|
579
|
+
clusters_csv=clusters_csv,
|
|
580
|
+
cells_parquet=cells_parquet,
|
|
581
|
+
barcode_col=barcode_col,
|
|
582
|
+
cluster_col=cluster_col,
|
|
583
|
+
)
|
|
584
|
+
return compute_segmentation_confidence_score_from_merged(
|
|
585
|
+
merged,
|
|
586
|
+
pattern1_clusters=pattern1_clusters,
|
|
587
|
+
x_col=x_col,
|
|
588
|
+
y_col=y_col,
|
|
589
|
+
celltype_col="cluster",
|
|
590
|
+
z_col=None,
|
|
591
|
+
linkage_method=linkage_method,
|
|
592
|
+
show_corr=show_corr,
|
|
593
|
+
return_blue_band_matrix=return_blue_band_matrix,
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
|
|
388
597
|
def run_pattern1_isoline(cfg: Pattern1IsolineConfig) -> Pattern1IsolineResult:
|
|
389
598
|
"""Run the full pipeline and (optionally) save outputs."""
|
|
390
599
|
out_dir = Path(cfg.out_dir)
|
|
391
600
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
392
601
|
|
|
393
|
-
|
|
602
|
+
label_scheme = _validate_label_scheme(cfg.label_scheme)
|
|
603
|
+
|
|
604
|
+
merged, id_col_used, x_col, y_col = align_clusters_with_cells(
|
|
605
|
+
cfg.clusters_csv,
|
|
606
|
+
cfg.cells_parquet,
|
|
607
|
+
barcode_col=cfg.barcode_col,
|
|
608
|
+
cluster_col=cfg.cluster_col,
|
|
609
|
+
)
|
|
394
610
|
|
|
395
611
|
merged = merged.copy()
|
|
396
|
-
merged["cluster"] =
|
|
397
|
-
merged = merged.
|
|
398
|
-
|
|
612
|
+
merged["cluster"] = merged["cluster"].map(_normalize_cluster_label)
|
|
613
|
+
merged = merged.loc[merged["cluster"] != ""].copy()
|
|
614
|
+
|
|
615
|
+
# pattern1 cluster labels
|
|
616
|
+
p1 = set(_normalize_cluster_label(x) for x in cfg.pattern1_clusters)
|
|
617
|
+
p1 = {x for x in p1 if x != ""}
|
|
618
|
+
|
|
619
|
+
if len(p1) == 0:
|
|
620
|
+
raise ValueError("pattern1_clusters is empty after normalization.")
|
|
399
621
|
|
|
400
|
-
p1 = set(int(x) for x in cfg.pattern1_clusters)
|
|
401
622
|
merged["_is_p1"] = merged["cluster"].isin(p1)
|
|
402
623
|
|
|
403
624
|
p1_df = merged.loc[merged["_is_p1"], [id_col_used, x_col, y_col]].copy()
|
|
@@ -439,15 +660,25 @@ def run_pattern1_isoline(cfg: Pattern1IsolineConfig) -> Pattern1IsolineResult:
|
|
|
439
660
|
if len(bg0_xy) == 0:
|
|
440
661
|
raise RuntimeError("No bg0 points sampled. Try relaxing bg_d_min/bg_d_max, or disabling synth bg.")
|
|
441
662
|
|
|
442
|
-
# Train KNN regressor
|
|
663
|
+
# Train KNN regressor
|
|
664
|
+
# - label_scheme="p1_is_one": target=1, bg=0 (original)
|
|
665
|
+
# - label_scheme="p1_is_zero": target=0, bg=1 (inverted)
|
|
443
666
|
X_train = np.vstack([bg0_xy, target_xy])
|
|
444
|
-
|
|
667
|
+
if label_scheme == "p1_is_one":
|
|
668
|
+
y_train = np.hstack([np.zeros(len(bg0_xy)), np.ones(len(target_xy))])
|
|
669
|
+
else:
|
|
670
|
+
y_train = np.hstack([np.ones(len(bg0_xy)), np.zeros(len(target_xy))])
|
|
445
671
|
|
|
446
672
|
reg = KNeighborsRegressor(n_neighbors=cfg.knn_k, weights="distance")
|
|
447
673
|
reg.fit(X_train, y_train)
|
|
448
674
|
|
|
449
675
|
# Predict on mesh + smooth
|
|
450
|
-
xx, yy, grid = make_mesh_from_xy(
|
|
676
|
+
xx, yy, grid = make_mesh_from_xy(
|
|
677
|
+
target_xy,
|
|
678
|
+
grid_n=cfg.grid_n,
|
|
679
|
+
pad_fraction=cfg.pad_fraction,
|
|
680
|
+
margin_um=cfg.margin_um,
|
|
681
|
+
)
|
|
451
682
|
prob = reg.predict(grid).reshape(xx.shape)
|
|
452
683
|
prob_smooth = gaussian_filter(prob, sigma=cfg.smooth_sigma)
|
|
453
684
|
|
|
@@ -458,7 +689,7 @@ def run_pattern1_isoline(cfg: Pattern1IsolineConfig) -> Pattern1IsolineResult:
|
|
|
458
689
|
prob_smooth_masked = prob_smooth.copy()
|
|
459
690
|
prob_smooth_masked[~tissue_mask] = np.nan
|
|
460
691
|
|
|
461
|
-
# 0.5 isoline
|
|
692
|
+
# 0.5 isoline (or cfg.isoline_level)
|
|
462
693
|
verts_list = extract_contour_paths(xx, yy, prob_smooth_masked, level=cfg.isoline_level)
|
|
463
694
|
verts_list = filter_loops_by_cell_count(verts_list, target_xy, min_cells_inside=cfg.min_cells_inside)
|
|
464
695
|
|
|
@@ -468,6 +699,24 @@ def run_pattern1_isoline(cfg: Pattern1IsolineConfig) -> Pattern1IsolineResult:
|
|
|
468
699
|
"建议:min_cells_inside 降低(如 10->3),smooth_sigma 增大(如 5->8),knn_k 增大(如 30->50),或降低 grid_n。"
|
|
469
700
|
)
|
|
470
701
|
|
|
702
|
+
# Optional: compute segmentation confidence score (cophenetic blue-band mean)
|
|
703
|
+
conf_score: Optional[float] = None
|
|
704
|
+
conf_stats: Optional[Mapping[str, Union[int, float]]] = None
|
|
705
|
+
if cfg.compute_confidence_score:
|
|
706
|
+
conf_res = compute_segmentation_confidence_score_from_merged(
|
|
707
|
+
merged,
|
|
708
|
+
pattern1_clusters=cfg.pattern1_clusters,
|
|
709
|
+
x_col=x_col,
|
|
710
|
+
y_col=y_col,
|
|
711
|
+
celltype_col="cluster",
|
|
712
|
+
z_col=None,
|
|
713
|
+
linkage_method=cfg.confidence_linkage_method,
|
|
714
|
+
show_corr=cfg.confidence_show_corr,
|
|
715
|
+
return_blue_band_matrix=False,
|
|
716
|
+
)
|
|
717
|
+
conf_score = conf_res.score_mean
|
|
718
|
+
conf_stats = conf_res.stats
|
|
719
|
+
|
|
471
720
|
params_path: Optional[Path] = None
|
|
472
721
|
if cfg.save_params_json:
|
|
473
722
|
params = asdict(cfg)
|
|
@@ -479,6 +728,9 @@ def run_pattern1_isoline(cfg: Pattern1IsolineConfig) -> Pattern1IsolineResult:
|
|
|
479
728
|
n_target_cells=int(len(target_xy)),
|
|
480
729
|
n_bg0=int(len(bg0_xy)),
|
|
481
730
|
n_contours=int(len(verts_list)),
|
|
731
|
+
label_scheme=label_scheme,
|
|
732
|
+
segmentation_confidence_score=conf_score,
|
|
733
|
+
segmentation_confidence_stats=conf_stats,
|
|
482
734
|
)
|
|
483
735
|
)
|
|
484
736
|
params_path = out_dir / "params.json"
|
|
@@ -497,7 +749,12 @@ def run_pattern1_isoline(cfg: Pattern1IsolineConfig) -> Pattern1IsolineResult:
|
|
|
497
749
|
for v in verts_list:
|
|
498
750
|
plt.plot(v[:, 0], v[:, 1], linewidth=2)
|
|
499
751
|
plt.gca().set_aspect("equal")
|
|
500
|
-
|
|
752
|
+
|
|
753
|
+
title = f"Pattern1 segmentation | isoline={cfg.isoline_level:g} | contours={len(verts_list)} | label_scheme={label_scheme}"
|
|
754
|
+
if conf_score is not None:
|
|
755
|
+
title += f" | confidence(mean)={conf_score:.4f}"
|
|
756
|
+
plt.title(title)
|
|
757
|
+
|
|
501
758
|
plt.legend(frameon=False)
|
|
502
759
|
plt.tight_layout()
|
|
503
760
|
|
|
@@ -528,6 +785,9 @@ def run_pattern1_isoline(cfg: Pattern1IsolineConfig) -> Pattern1IsolineResult:
|
|
|
528
785
|
n_target_cells=int(len(target_xy)),
|
|
529
786
|
n_bg0_points=int(len(bg0_xy)),
|
|
530
787
|
contours=list(verts_list),
|
|
788
|
+
label_scheme=label_scheme,
|
|
789
|
+
segmentation_confidence_score=conf_score,
|
|
790
|
+
segmentation_confidence_stats=conf_stats,
|
|
531
791
|
params_json=params_path,
|
|
532
792
|
preview_png=preview_path,
|
|
533
793
|
)
|
|
@@ -539,7 +799,7 @@ def run_pattern1_isoline_from_hf(
|
|
|
539
799
|
*,
|
|
540
800
|
revision: str = "main",
|
|
541
801
|
out_dir: PathLike = "outputs/pattern1_isoline0p5_from_graphclust",
|
|
542
|
-
pattern1_clusters: Sequence[int] = (10, 23, 19, 27, 14, 20, 25, 26),
|
|
802
|
+
pattern1_clusters: Sequence[Union[int, str]] = (10, 23, 19, 27, 14, 20, 25, 26),
|
|
543
803
|
clusters_relpath: str = "analysis/clustering/gene_expression_graphclust/clusters.csv",
|
|
544
804
|
cache_dir: Optional[PathLike] = None,
|
|
545
805
|
**cfg_overrides,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: histoseg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.9
|
|
4
4
|
Summary: ...
|
|
5
5
|
Author-email: Taobo Hu <taobo.hu@scilifelab.se>
|
|
6
6
|
License: Required Notice: Copyright (c) 2025 SPATHO AB.
|
|
@@ -86,6 +86,14 @@ Project-URL: Documentation, https://histoseg.readthedocs.io
|
|
|
86
86
|
Requires-Python: >=3.10
|
|
87
87
|
Description-Content-Type: text/markdown
|
|
88
88
|
License-File: LICENSE
|
|
89
|
+
Requires-Dist: numpy
|
|
90
|
+
Requires-Dist: pandas
|
|
91
|
+
Requires-Dist: pyarrow
|
|
92
|
+
Requires-Dist: scipy
|
|
93
|
+
Requires-Dist: scikit-learn
|
|
94
|
+
Requires-Dist: seaborn
|
|
95
|
+
Requires-Dist: shapely
|
|
96
|
+
Requires-Dist: matplotlib
|
|
89
97
|
Dynamic: license-file
|
|
90
98
|
|
|
91
99
|
<div align="center">
|
|
@@ -155,12 +163,6 @@ The Pattern1 isoline workflow uses:
|
|
|
155
163
|
- matplotlib
|
|
156
164
|
- a Parquet engine (**pyarrow is recommended**)
|
|
157
165
|
|
|
158
|
-
If you run into missing imports, install them explicitly:
|
|
159
|
-
|
|
160
|
-
```bash
|
|
161
|
-
pip install -U numpy pandas pyarrow scipy scikit-learn matplotlib
|
|
162
|
-
```
|
|
163
|
-
|
|
164
166
|
Optional:
|
|
165
167
|
|
|
166
168
|
- Hugging Face downloader: `pip install -U huggingface_hub`
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
histoseg/__init__.py,sha256=ssVwpgspXb7TWi1aAbWYA82FBJhGZH1g-JIvosVocTM,982
|
|
2
|
-
histoseg/_version.py,sha256=
|
|
2
|
+
histoseg/_version.py,sha256=ib8ckvf-NNDfacXd8unW0p5cf-gl57XyQvjoEMc_pvc,704
|
|
3
3
|
histoseg/contours/__init__.py,sha256=8YEy98MnGOhJg1BHkVZ_qPtTYUWPoIDAOT60F5EfHgY,32
|
|
4
|
-
histoseg/contours/pattern1_isoline.py,sha256=
|
|
4
|
+
histoseg/contours/pattern1_isoline.py,sha256=ljEE-_Y0tWCRHTNytRDgWHgft6oqcFuwRQgZPwoMHTg,28836
|
|
5
5
|
histoseg/geometry/__init__.py,sha256=5Aep5GBj2u4k2415QIECd0vK3y2eVBfQZRIskOgzb78,101
|
|
6
6
|
histoseg/geometry/tissue_boundary.py,sha256=-12lyNYI93JkNlBDT_Fv7SLAeTLKMZ9fPhqpSTowFz0,3680
|
|
7
7
|
histoseg/gui/__init__.py,sha256=SJvM-gRCwQ0X7fufPmdNO43X3wQvlnFHFmFNswt1hlw,133
|
|
@@ -9,8 +9,8 @@ histoseg/gui/gui_app.py,sha256=copFjJzMEeg2T4HV4--KHq9GG-v58xeW-eqRQ0-uJww,15259
|
|
|
9
9
|
histoseg/io/__init__.py,sha256=kH_F15ApTutYbEUGAkV9QxBv8Ho863xvWK0mRmKeOCA,27
|
|
10
10
|
histoseg/io/huggingface.py,sha256=YL_aXAXiHMMk6T6bqX-zPXFFChO83CVhTxI6mX52Z7g,2124
|
|
11
11
|
histoseg/sfplot/Searcher_Findee_Score.py,sha256=Y9UzWrqap029BOyOnFPklvK4v-2wm3r_PkmUh65DEqo,14951
|
|
12
|
-
histoseg-0.1.
|
|
13
|
-
histoseg-0.1.
|
|
14
|
-
histoseg-0.1.
|
|
15
|
-
histoseg-0.1.
|
|
16
|
-
histoseg-0.1.
|
|
12
|
+
histoseg-0.1.9.dist-info/licenses/LICENSE,sha256=z7Ztufk460DPfU3rgZEstjCQK3EbwHbD4JSmF_7y0qA,4764
|
|
13
|
+
histoseg-0.1.9.dist-info/METADATA,sha256=TunvQghh-VpDRKWo8ZXgXslZLLHh5rsZRd2mCxQym7o,13239
|
|
14
|
+
histoseg-0.1.9.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
15
|
+
histoseg-0.1.9.dist-info/top_level.txt,sha256=dvphnIeqaZamvJULm-I5qztYoGO8WLLCY85z0xrXsb0,9
|
|
16
|
+
histoseg-0.1.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|