masster 0.5.10__tar.gz → 0.5.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.5.10 → masster-0.5.11}/PKG-INFO +1 -1
- {masster-0.5.10 → masster-0.5.11}/pyproject.toml +1 -1
- {masster-0.5.10 → masster-0.5.11}/src/masster/_version.py +1 -1
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/plot.py +238 -69
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/export.py +4 -6
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/h5.py +34 -1
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/helpers.py +1 -1
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/merge.py +14 -10
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/processing.py +12 -9
- {masster-0.5.10 → masster-0.5.11}/uv.lock +1 -1
- {masster-0.5.10 → masster-0.5.11}/.github/workflows/publish.yml +0 -0
- {masster-0.5.10 → masster-0.5.11}/.github/workflows/security.yml +0 -0
- {masster-0.5.10 → masster-0.5.11}/.github/workflows/test.yml +0 -0
- {masster-0.5.10 → masster-0.5.11}/.gitignore +0 -0
- {masster-0.5.10 → masster-0.5.11}/.pre-commit-config.yaml +0 -0
- {masster-0.5.10 → masster-0.5.11}/LICENSE +0 -0
- {masster-0.5.10 → masster-0.5.11}/Makefile +0 -0
- {masster-0.5.10 → masster-0.5.11}/README.md +0 -0
- {masster-0.5.10 → masster-0.5.11}/TESTING.md +0 -0
- {masster-0.5.10 → masster-0.5.11}/demo/example_batch_process.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/demo/example_sample_process.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/__init__.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/chromatogram.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/libs/aa.csv +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/libs/ccm.csv +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/libs/hilic.csv +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/libs/urine.csv +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/lib/__init__.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/lib/lib.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/logger.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/__init__.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/adducts.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/find_adducts_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/sample_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/h5.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/helpers.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/lib.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/load.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/parameters.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/processing.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/quant.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/sample.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/sample5_schema.json +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/save.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/sample/sciex.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/spectrum.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/__init__.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/analysis.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/fill_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/identify_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/integrate_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/merge_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/study_def.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/id.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/load.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/parameters.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/plot.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/save.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/study.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/study/study5_schema.json +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/wizard/README.md +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/wizard/__init__.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/wizard/example.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/src/masster/wizard/wizard.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/conftest.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_chromatogram.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_defaults.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_imports.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_integration.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_logger.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_parameters.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_sample.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_spectrum.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_study.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tests/test_version.py +0 -0
- {masster-0.5.10 → masster-0.5.11}/tox.ini +0 -0
|
@@ -1041,7 +1041,9 @@ def plot_2d(
|
|
|
1041
1041
|
height=600,
|
|
1042
1042
|
width=750,
|
|
1043
1043
|
mz_range=None,
|
|
1044
|
-
rt_range=None
|
|
1044
|
+
rt_range=None,
|
|
1045
|
+
legend=None,
|
|
1046
|
+
colorby=None
|
|
1045
1047
|
):
|
|
1046
1048
|
"""
|
|
1047
1049
|
Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
|
|
@@ -1087,6 +1089,13 @@ def plot_2d(
|
|
|
1087
1089
|
Maximum pixel size for dynamic rasterization when using dynspread.
|
|
1088
1090
|
raster_threshold (float, default 0.8):
|
|
1089
1091
|
Threshold used for the dynspread process in dynamic rasterization.
|
|
1092
|
+
legend (str, optional):
|
|
1093
|
+
Legend position for categorical feature coloring ("top_right", "bottom_left", etc.) or None.
|
|
1094
|
+
Only applies when colorby is not None and contains categorical data.
|
|
1095
|
+
colorby (str, optional):
|
|
1096
|
+
Feature property to use for coloring. If None (default), uses current green/red scheme
|
|
1097
|
+
for features with/without MS2 data. If specified and contains categorical data, applies
|
|
1098
|
+
categorical coloring with legend support (similar to plot_2d_oracle).
|
|
1090
1099
|
Behavior:
|
|
1091
1100
|
- Checks for a loaded mzML file by verifying that self.file_obj is not None.
|
|
1092
1101
|
- Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
|
|
@@ -1203,6 +1212,12 @@ def plot_2d(
|
|
|
1203
1212
|
|
|
1204
1213
|
color_1 = "forestgreen"
|
|
1205
1214
|
color_2 = "darkorange"
|
|
1215
|
+
|
|
1216
|
+
# Handle colorby parameter for feature coloring
|
|
1217
|
+
use_categorical_coloring = False
|
|
1218
|
+
feature_colors = {}
|
|
1219
|
+
categorical_groups = []
|
|
1220
|
+
|
|
1206
1221
|
if filename is not None:
|
|
1207
1222
|
dyn = False
|
|
1208
1223
|
if not filename.endswith(".html"):
|
|
@@ -1270,74 +1285,203 @@ def plot_2d(
|
|
|
1270
1285
|
feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
|
|
1271
1286
|
# keep only iso==0, i.e. the main
|
|
1272
1287
|
feats = feats[feats["iso"] == 0]
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
(
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
(
|
|
1318
|
-
|
|
1319
|
-
(
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1288
|
+
|
|
1289
|
+
# Handle colorby parameter
|
|
1290
|
+
if colorby is not None and colorby in feats.columns:
|
|
1291
|
+
# Check if colorby data is categorical (string-like)
|
|
1292
|
+
colorby_values = feats[colorby].dropna()
|
|
1293
|
+
is_categorical = (
|
|
1294
|
+
feats[colorby].dtype in ["object", "string", "category"] or
|
|
1295
|
+
(len(colorby_values) > 0 and isinstance(colorby_values.iloc[0], str))
|
|
1296
|
+
)
|
|
1297
|
+
|
|
1298
|
+
if is_categorical:
|
|
1299
|
+
use_categorical_coloring = True
|
|
1300
|
+
# Get unique categories, sorted
|
|
1301
|
+
categorical_groups = sorted(feats[colorby].dropna().unique())
|
|
1302
|
+
|
|
1303
|
+
# Set up colors for categorical data using matplotlib colormap
|
|
1304
|
+
from matplotlib.colors import to_hex
|
|
1305
|
+
try:
|
|
1306
|
+
from matplotlib.cm import get_cmap
|
|
1307
|
+
colormap_func = get_cmap(cmap if cmap != 'iridescent' else 'tab20')
|
|
1308
|
+
feature_colors = {}
|
|
1309
|
+
for i, group in enumerate(categorical_groups):
|
|
1310
|
+
if len(categorical_groups) <= 20:
|
|
1311
|
+
# Use qualitative colors for small number of categories
|
|
1312
|
+
color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
|
|
1313
|
+
else:
|
|
1314
|
+
# Use continuous colormap for many categories
|
|
1315
|
+
color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
|
|
1316
|
+
feature_colors[group] = to_hex(color_val)
|
|
1317
|
+
except Exception as e:
|
|
1318
|
+
self.logger.warning(f"Could not set up categorical coloring: {e}, using default colors")
|
|
1319
|
+
use_categorical_coloring = False
|
|
1320
|
+
|
|
1321
|
+
if use_categorical_coloring and colorby is not None:
|
|
1322
|
+
# Create separate feature points for each category
|
|
1323
|
+
for i, group in enumerate(categorical_groups):
|
|
1324
|
+
group_features = feats[feats[colorby] == group]
|
|
1325
|
+
if len(group_features) == 0:
|
|
1326
|
+
continue
|
|
1327
|
+
|
|
1328
|
+
# Split by MS2 status
|
|
1329
|
+
group_with_ms2 = group_features[group_features["ms2_scans"].notnull()]
|
|
1330
|
+
group_without_ms2 = group_features[group_features["ms2_scans"].isnull()]
|
|
1331
|
+
|
|
1332
|
+
group_color = feature_colors.get(group, color_1)
|
|
1333
|
+
|
|
1334
|
+
if len(group_with_ms2) > 0:
|
|
1335
|
+
feature_hover = HoverTool(
|
|
1336
|
+
tooltips=[
|
|
1337
|
+
("rt", "@rt"),
|
|
1338
|
+
("m/z", "@mz{0.0000}"),
|
|
1339
|
+
("feature_uid", "@feature_uid"),
|
|
1340
|
+
("inty", "@inty"),
|
|
1341
|
+
("iso", "@iso"),
|
|
1342
|
+
("adduct", "@adduct"),
|
|
1343
|
+
("chrom_coherence", "@chrom_coherence"),
|
|
1344
|
+
("chrom_prominence_scaled", "@chrom_prominence_scaled"),
|
|
1345
|
+
(colorby, f"@{colorby}"),
|
|
1346
|
+
],
|
|
1347
|
+
)
|
|
1348
|
+
group_points_ms2 = hv.Points(
|
|
1349
|
+
group_with_ms2,
|
|
1350
|
+
kdims=["rt", "mz"],
|
|
1351
|
+
vdims=[
|
|
1352
|
+
"feature_uid",
|
|
1353
|
+
"inty",
|
|
1354
|
+
"iso",
|
|
1355
|
+
"adduct",
|
|
1356
|
+
"ms2_scans",
|
|
1357
|
+
"chrom_coherence",
|
|
1358
|
+
"chrom_prominence_scaled",
|
|
1359
|
+
colorby,
|
|
1360
|
+
],
|
|
1361
|
+
label=f"{group} (MS2)",
|
|
1362
|
+
).options(
|
|
1363
|
+
color=group_color,
|
|
1364
|
+
marker=marker_type,
|
|
1365
|
+
size=size_1,
|
|
1366
|
+
tools=[feature_hover],
|
|
1367
|
+
hooks=hooks,
|
|
1368
|
+
)
|
|
1369
|
+
if feature_points_1 is None:
|
|
1370
|
+
feature_points_1 = group_points_ms2
|
|
1371
|
+
else:
|
|
1372
|
+
feature_points_1 = feature_points_1 * group_points_ms2
|
|
1373
|
+
|
|
1374
|
+
if len(group_without_ms2) > 0:
|
|
1375
|
+
feature_hover = HoverTool(
|
|
1376
|
+
tooltips=[
|
|
1377
|
+
("rt", "@rt"),
|
|
1378
|
+
("m/z", "@mz{0.0000}"),
|
|
1379
|
+
("feature_uid", "@feature_uid"),
|
|
1380
|
+
("inty", "@inty"),
|
|
1381
|
+
("iso", "@iso"),
|
|
1382
|
+
("adduct", "@adduct"),
|
|
1383
|
+
("chrom_coherence", "@chrom_coherence"),
|
|
1384
|
+
("chrom_prominence_scaled", "@chrom_prominence_scaled"),
|
|
1385
|
+
(colorby, f"@{colorby}"),
|
|
1386
|
+
],
|
|
1387
|
+
)
|
|
1388
|
+
group_points_no_ms2 = hv.Points(
|
|
1389
|
+
group_without_ms2,
|
|
1390
|
+
kdims=["rt", "mz"],
|
|
1391
|
+
vdims=[
|
|
1392
|
+
"feature_uid",
|
|
1393
|
+
"inty",
|
|
1394
|
+
"iso",
|
|
1395
|
+
"adduct",
|
|
1396
|
+
"chrom_coherence",
|
|
1397
|
+
"chrom_prominence_scaled",
|
|
1398
|
+
colorby,
|
|
1399
|
+
],
|
|
1400
|
+
label=f"{group} (no MS2)",
|
|
1401
|
+
).options(
|
|
1402
|
+
color=group_color,
|
|
1403
|
+
marker=marker_type,
|
|
1404
|
+
size=size_2,
|
|
1405
|
+
tools=[feature_hover],
|
|
1406
|
+
hooks=hooks,
|
|
1407
|
+
)
|
|
1408
|
+
if feature_points_2 is None:
|
|
1409
|
+
feature_points_2 = group_points_no_ms2
|
|
1410
|
+
else:
|
|
1411
|
+
feature_points_2 = feature_points_2 * group_points_no_ms2
|
|
1412
|
+
else:
|
|
1413
|
+
# Use original green/red coloring scheme for MS2 presence
|
|
1414
|
+
# find features with ms2_scans not None and iso==0
|
|
1415
|
+
features_df = feats[feats["ms2_scans"].notnull()]
|
|
1416
|
+
# Create feature points with proper sizing method
|
|
1417
|
+
feature_hover_1 = HoverTool(
|
|
1418
|
+
tooltips=[
|
|
1419
|
+
("rt", "@rt"),
|
|
1420
|
+
("m/z", "@mz{0.0000}"),
|
|
1421
|
+
("feature_uid", "@feature_uid"),
|
|
1422
|
+
("inty", "@inty"),
|
|
1423
|
+
("iso", "@iso"),
|
|
1424
|
+
("adduct", "@adduct"),
|
|
1425
|
+
("chrom_coherence", "@chrom_coherence"),
|
|
1426
|
+
("chrom_prominence_scaled", "@chrom_prominence_scaled"),
|
|
1427
|
+
],
|
|
1428
|
+
)
|
|
1429
|
+
if len(features_df) > 0:
|
|
1430
|
+
feature_points_1 = hv.Points(
|
|
1431
|
+
features_df,
|
|
1432
|
+
kdims=["rt", "mz"],
|
|
1433
|
+
vdims=[
|
|
1434
|
+
"feature_uid",
|
|
1435
|
+
"inty",
|
|
1436
|
+
"iso",
|
|
1437
|
+
"adduct",
|
|
1438
|
+
"ms2_scans",
|
|
1439
|
+
"chrom_coherence",
|
|
1440
|
+
"chrom_prominence_scaled",
|
|
1441
|
+
],
|
|
1442
|
+
label="Features with MS2 data",
|
|
1443
|
+
).options(
|
|
1444
|
+
color=color_1,
|
|
1445
|
+
marker=marker_type,
|
|
1446
|
+
size=size_1,
|
|
1447
|
+
tools=[feature_hover_1],
|
|
1448
|
+
hooks=hooks,
|
|
1449
|
+
)
|
|
1450
|
+
|
|
1451
|
+
# find features without MS2 data
|
|
1452
|
+
features_df = feats[feats["ms2_scans"].isnull()]
|
|
1453
|
+
feature_hover_2 = HoverTool(
|
|
1454
|
+
tooltips=[
|
|
1455
|
+
("rt", "@rt"),
|
|
1456
|
+
("m/z", "@mz{0.0000}"),
|
|
1457
|
+
("feature_uid", "@feature_uid"),
|
|
1458
|
+
("inty", "@inty"),
|
|
1459
|
+
("iso", "@iso"),
|
|
1460
|
+
("adduct", "@adduct"),
|
|
1461
|
+
("chrom_coherence", "@chrom_coherence"),
|
|
1462
|
+
("chrom_prominence_scaled", "@chrom_prominence_scaled"),
|
|
1463
|
+
],
|
|
1464
|
+
)
|
|
1465
|
+
if len(features_df) > 0:
|
|
1466
|
+
feature_points_2 = hv.Points(
|
|
1467
|
+
features_df,
|
|
1468
|
+
kdims=["rt", "mz"],
|
|
1469
|
+
vdims=[
|
|
1470
|
+
"feature_uid",
|
|
1471
|
+
"inty",
|
|
1472
|
+
"iso",
|
|
1473
|
+
"adduct",
|
|
1474
|
+
"chrom_coherence",
|
|
1475
|
+
"chrom_prominence_scaled",
|
|
1476
|
+
],
|
|
1477
|
+
label="Features without MS2 data",
|
|
1478
|
+
).options(
|
|
1479
|
+
color="red",
|
|
1480
|
+
marker=marker_type,
|
|
1481
|
+
size=size_2,
|
|
1482
|
+
tools=[feature_hover_2],
|
|
1483
|
+
hooks=hooks,
|
|
1484
|
+
)
|
|
1341
1485
|
|
|
1342
1486
|
if show_isotopes:
|
|
1343
1487
|
# Use proper Polars filter syntax to avoid boolean indexing issues
|
|
@@ -1451,6 +1595,31 @@ def plot_2d(
|
|
|
1451
1595
|
if title is not None:
|
|
1452
1596
|
overlay = overlay.opts(title=title)
|
|
1453
1597
|
|
|
1598
|
+
# Handle legend positioning for categorical coloring
|
|
1599
|
+
if legend is not None and use_categorical_coloring and len(categorical_groups) > 1:
|
|
1600
|
+
# Map legend position parameter to HoloViews legend position
|
|
1601
|
+
legend_position_map = {
|
|
1602
|
+
"top_right": "top_right",
|
|
1603
|
+
"top_left": "top_left",
|
|
1604
|
+
"bottom_right": "bottom_right",
|
|
1605
|
+
"bottom_left": "bottom_left",
|
|
1606
|
+
"right": "right",
|
|
1607
|
+
"left": "left",
|
|
1608
|
+
"top": "top",
|
|
1609
|
+
"bottom": "bottom"
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
hv_legend_pos = legend_position_map.get(legend, "bottom_right")
|
|
1613
|
+
|
|
1614
|
+
# Apply legend configuration to the overlay
|
|
1615
|
+
overlay = overlay.opts(
|
|
1616
|
+
legend_position=hv_legend_pos,
|
|
1617
|
+
legend_opts={'title': '', 'padding': 2, 'spacing': 2}
|
|
1618
|
+
)
|
|
1619
|
+
elif legend is None and use_categorical_coloring:
|
|
1620
|
+
# Explicitly hide legend when legend=None but categorical coloring is used
|
|
1621
|
+
overlay = overlay.opts(show_legend=False)
|
|
1622
|
+
|
|
1454
1623
|
# Handle slider functionality
|
|
1455
1624
|
if use_slider_sizing:
|
|
1456
1625
|
# For slider functionality, we need to work with the feature points directly
|
|
@@ -496,7 +496,7 @@ def export_mgf(self, **kwargs):
|
|
|
496
496
|
# Write END IONS
|
|
497
497
|
f.write("END IONS\n\n")
|
|
498
498
|
|
|
499
|
-
self.logger.
|
|
499
|
+
self.logger.success(f"Exported {len(mgf_data)} spectra to {filename}")
|
|
500
500
|
|
|
501
501
|
|
|
502
502
|
def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs) -> None:
|
|
@@ -1183,7 +1183,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
1183
1183
|
for line in mgf_lines:
|
|
1184
1184
|
f.write(line + "\n")
|
|
1185
1185
|
|
|
1186
|
-
self.logger.
|
|
1186
|
+
self.logger.success(f"Exported mzTab-M to {filename}")
|
|
1187
1187
|
|
|
1188
1188
|
|
|
1189
1189
|
def export_xlsx(self, filename: str | None = None) -> None:
|
|
@@ -1311,7 +1311,7 @@ def export_xlsx(self, filename: str | None = None) -> None:
|
|
|
1311
1311
|
f"Written worksheet '{sheet_name}' with shape {data.shape}",
|
|
1312
1312
|
)
|
|
1313
1313
|
|
|
1314
|
-
self.logger.
|
|
1314
|
+
self.logger.success(f"Study exported to {filename}")
|
|
1315
1315
|
|
|
1316
1316
|
except Exception as e:
|
|
1317
1317
|
self.logger.error(f"Error writing Excel file: {e}")
|
|
@@ -1424,8 +1424,6 @@ def export_parquet(self, filename: str | None = None) -> None:
|
|
|
1424
1424
|
|
|
1425
1425
|
# Report results
|
|
1426
1426
|
if exported_files:
|
|
1427
|
-
self.logger.
|
|
1428
|
-
for file_path in exported_files:
|
|
1429
|
-
self.logger.info(f" - {file_path}")
|
|
1427
|
+
self.logger.success(f"Study exported to {len(exported_files)} Parquet files.")
|
|
1430
1428
|
else:
|
|
1431
1429
|
self.logger.error("No Parquet files were created - no data available to export")
|
|
@@ -834,6 +834,19 @@ def _create_dataframe_with_objects(data: dict, object_columns: list) -> pl.DataF
|
|
|
834
834
|
object_data = {k: v for k, v in data.items() if k in object_columns}
|
|
835
835
|
regular_data = {k: v for k, v in data.items() if k not in object_columns}
|
|
836
836
|
|
|
837
|
+
# Final check: ensure no numpy object arrays in regular_data
|
|
838
|
+
problematic_cols = []
|
|
839
|
+
for k, v in regular_data.items():
|
|
840
|
+
if hasattr(v, 'dtype') and str(v.dtype) == 'object':
|
|
841
|
+
problematic_cols.append(k)
|
|
842
|
+
|
|
843
|
+
if problematic_cols:
|
|
844
|
+
# Move these to object_data
|
|
845
|
+
for col in problematic_cols:
|
|
846
|
+
object_data[col] = _reconstruct_object_column(regular_data[col], col)
|
|
847
|
+
del regular_data[col]
|
|
848
|
+
object_columns.append(col)
|
|
849
|
+
|
|
837
850
|
# Determine expected length from regular data or first object column
|
|
838
851
|
expected_length = None
|
|
839
852
|
if regular_data:
|
|
@@ -1185,9 +1198,29 @@ def _load_dataframe_from_group(
|
|
|
1185
1198
|
logger.debug(
|
|
1186
1199
|
f"Object column '{col}': length={len(data[col]) if data[col] is not None else 'None'}",
|
|
1187
1200
|
)
|
|
1201
|
+
|
|
1202
|
+
# Debug: check for problematic data types in all columns before DataFrame creation
|
|
1203
|
+
for col, values in data.items():
|
|
1204
|
+
if hasattr(values, 'dtype') and str(values.dtype) == 'object':
|
|
1205
|
+
logger.warning(f"Column '{col}' has numpy object dtype but is not in object_columns: {object_columns}")
|
|
1206
|
+
if col not in object_columns:
|
|
1207
|
+
object_columns.append(col)
|
|
1208
|
+
|
|
1188
1209
|
df = _create_dataframe_with_objects(data, object_columns)
|
|
1189
1210
|
else:
|
|
1190
|
-
|
|
1211
|
+
# Debug: check for problematic data types when no object columns are expected
|
|
1212
|
+
for col, values in data.items():
|
|
1213
|
+
if hasattr(values, 'dtype') and str(values.dtype) == 'object':
|
|
1214
|
+
logger.warning(f"Column '{col}' has numpy object dtype but no object_columns specified!")
|
|
1215
|
+
# Treat as object column
|
|
1216
|
+
if object_columns is None:
|
|
1217
|
+
object_columns = []
|
|
1218
|
+
object_columns.append(col)
|
|
1219
|
+
|
|
1220
|
+
if object_columns:
|
|
1221
|
+
df = _create_dataframe_with_objects(data, object_columns)
|
|
1222
|
+
else:
|
|
1223
|
+
df = pl.DataFrame(data)
|
|
1191
1224
|
|
|
1192
1225
|
# Clean null values and apply schema
|
|
1193
1226
|
df = _clean_string_nulls(df)
|
|
@@ -2663,7 +2663,7 @@ def features_filter(
|
|
|
2663
2663
|
removed_count = initial_count - final_count
|
|
2664
2664
|
|
|
2665
2665
|
self.logger.info(
|
|
2666
|
-
f"Filtered features:
|
|
2666
|
+
f"Filtered features. Kept: {final_count:,}. Removed: {removed_count:,}."
|
|
2667
2667
|
)
|
|
2668
2668
|
|
|
2669
2669
|
|
|
@@ -427,9 +427,13 @@ def merge(study, **kwargs) -> None:
|
|
|
427
427
|
# Feature maps will be generated on-demand within each merge method
|
|
428
428
|
|
|
429
429
|
study.logger.info(
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
430
|
+
f"Merging samples using {params.method}, min_samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
|
|
431
|
+
)
|
|
432
|
+
if "chunked" in params.method:
|
|
433
|
+
study.logger.info(
|
|
434
|
+
f"threads={params.threads}, chunk_size={params.chunk_size}, dechunking='{params.dechunking}'"
|
|
435
|
+
)
|
|
436
|
+
|
|
433
437
|
# Initialize
|
|
434
438
|
study.consensus_df = pl.DataFrame()
|
|
435
439
|
study.consensus_ms2 = pl.DataFrame()
|
|
@@ -751,7 +755,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
|
|
|
751
755
|
|
|
752
756
|
else:
|
|
753
757
|
# Parallel processing
|
|
754
|
-
study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
|
|
758
|
+
#study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
|
|
755
759
|
|
|
756
760
|
# Prepare chunk data for parallel processing using features_df slices
|
|
757
761
|
chunk_data_list = []
|
|
@@ -812,7 +816,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
|
|
|
812
816
|
serialized_chunk_results.append((chunk_start_idx, consensus_features))
|
|
813
817
|
completed_chunks += 1
|
|
814
818
|
n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
|
|
815
|
-
study.logger.
|
|
819
|
+
study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
|
|
816
820
|
except Exception as exc:
|
|
817
821
|
# Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
|
|
818
822
|
if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
|
|
@@ -846,7 +850,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
|
|
|
846
850
|
serialized_chunk_results.append((chunk_start_idx, consensus_features))
|
|
847
851
|
completed_chunks += 1
|
|
848
852
|
n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
|
|
849
|
-
study.logger.
|
|
853
|
+
study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
|
|
850
854
|
except Exception as exc:
|
|
851
855
|
study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
|
|
852
856
|
raise exc
|
|
@@ -926,7 +930,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
|
|
|
926
930
|
|
|
927
931
|
else:
|
|
928
932
|
# Parallel processing
|
|
929
|
-
study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
|
|
933
|
+
#study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
|
|
930
934
|
|
|
931
935
|
# Prepare chunk data for parallel processing using features_df slices
|
|
932
936
|
chunk_data_list = []
|
|
@@ -987,7 +991,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
|
|
|
987
991
|
serialized_chunk_results.append((chunk_start_idx, consensus_features))
|
|
988
992
|
completed_chunks += 1
|
|
989
993
|
n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
|
|
990
|
-
study.logger.
|
|
994
|
+
study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
|
|
991
995
|
except Exception as exc:
|
|
992
996
|
# Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
|
|
993
997
|
if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
|
|
@@ -1021,7 +1025,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
|
|
|
1021
1025
|
serialized_chunk_results.append((chunk_start_idx, consensus_features))
|
|
1022
1026
|
completed_chunks += 1
|
|
1023
1027
|
n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
|
|
1024
|
-
study.logger.
|
|
1028
|
+
study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
|
|
1025
1029
|
except Exception as exc:
|
|
1026
1030
|
study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
|
|
1027
1031
|
raise exc
|
|
@@ -2259,7 +2263,7 @@ def _perform_adduct_grouping(study, rt_tol, mz_tol):
|
|
|
2259
2263
|
)
|
|
2260
2264
|
|
|
2261
2265
|
# Use optimized adduct grouping
|
|
2262
|
-
study.logger.info(f"About to call adduct grouping for {len(consensus_data)} consensus features")
|
|
2266
|
+
#study.logger.info(f"About to call adduct grouping for {len(consensus_data)} consensus features")
|
|
2263
2267
|
adduct_group_list, adduct_of_list = __merge_adduct_grouping(
|
|
2264
2268
|
study, consensus_data, rt_tol/3, mz_tol
|
|
2265
2269
|
)
|
|
@@ -86,8 +86,6 @@ def align(self, **kwargs):
|
|
|
86
86
|
self.logger.error(f"Unknown alignment algorithm '{algorithm}'")
|
|
87
87
|
return
|
|
88
88
|
|
|
89
|
-
self.logger.success("Alignment completed.")
|
|
90
|
-
|
|
91
89
|
# Reset consensus data structures after alignment since RT changes invalidate consensus
|
|
92
90
|
consensus_reset_count = 0
|
|
93
91
|
if not self.consensus_df.is_empty():
|
|
@@ -681,16 +679,15 @@ def _align_pose_clustering(study_obj, params):
|
|
|
681
679
|
params_oms.setValue("pairfinder:distance_RT:exponent", 2.0)
|
|
682
680
|
|
|
683
681
|
aligner = oms.MapAlignmentAlgorithmPoseClustering()
|
|
684
|
-
study_obj.logger.info(
|
|
682
|
+
study_obj.logger.info(
|
|
683
|
+
f"Align RTs with Pose clustering: rt_tol={params.get('rt_tol')}",
|
|
684
|
+
)
|
|
685
685
|
|
|
686
686
|
# Set ref_index to feature map index with largest number of features
|
|
687
687
|
ref_index = [
|
|
688
688
|
i[0] for i in sorted(enumerate([fm.size() for fm in fmaps]), key=lambda x: x[1])
|
|
689
689
|
][-1]
|
|
690
|
-
|
|
691
|
-
f"Reference map is {study_obj.samples_df.row(ref_index, named=True)['sample_name']}",
|
|
692
|
-
)
|
|
693
|
-
|
|
690
|
+
|
|
694
691
|
aligner.setParameters(params_oms)
|
|
695
692
|
aligner.setReference(fmaps[ref_index])
|
|
696
693
|
study_obj.logger.debug(f"Parameters for alignment: {params}")
|
|
@@ -836,6 +833,12 @@ def _align_pose_clustering(study_obj, params):
|
|
|
836
833
|
# Clean up temporary feature maps to release memory
|
|
837
834
|
del fmaps
|
|
838
835
|
study_obj.logger.debug("Temporary feature maps deleted to release memory")
|
|
836
|
+
|
|
837
|
+
# Resolve reference sample UID from the reference index
|
|
838
|
+
ref_sample_uid = sample_uid_lookup.get(ref_index)
|
|
839
|
+
study_obj.logger.success(
|
|
840
|
+
f"Alignment completed. Reference sample UID {ref_sample_uid}.",
|
|
841
|
+
)
|
|
839
842
|
|
|
840
843
|
|
|
841
844
|
def _align_kd_algorithm(study_obj, params):
|
|
@@ -879,7 +882,7 @@ def _align_kd_algorithm(study_obj, params):
|
|
|
879
882
|
_raw_mp = None
|
|
880
883
|
max_points = int(_raw_mp) if _raw_mp is not None else 1000
|
|
881
884
|
study_obj.logger.info(
|
|
882
|
-
f"KD
|
|
885
|
+
f"Align RTs with KD-Tree: rt_tol={params.get('rt_tol')}, max_points={max_points}",
|
|
883
886
|
)
|
|
884
887
|
|
|
885
888
|
# Work directly with features_df instead of feature maps
|
|
@@ -1092,7 +1095,7 @@ def _align_kd_algorithm(study_obj, params):
|
|
|
1092
1095
|
)
|
|
1093
1096
|
|
|
1094
1097
|
study_obj.logger.success(
|
|
1095
|
-
f"Alignment completed. Reference sample UID {ref_sample_uid}
|
|
1098
|
+
f"Alignment completed. Reference sample UID {ref_sample_uid}.",
|
|
1096
1099
|
)
|
|
1097
1100
|
|
|
1098
1101
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|