Rhapso 0.1.92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Rhapso/__init__.py +1 -0
- Rhapso/data_prep/__init__.py +2 -0
- Rhapso/data_prep/n5_reader.py +188 -0
- Rhapso/data_prep/s3_big_stitcher_reader.py +55 -0
- Rhapso/data_prep/xml_to_dataframe.py +215 -0
- Rhapso/detection/__init__.py +5 -0
- Rhapso/detection/advanced_refinement.py +203 -0
- Rhapso/detection/difference_of_gaussian.py +324 -0
- Rhapso/detection/image_reader.py +117 -0
- Rhapso/detection/metadata_builder.py +130 -0
- Rhapso/detection/overlap_detection.py +327 -0
- Rhapso/detection/points_validation.py +49 -0
- Rhapso/detection/save_interest_points.py +265 -0
- Rhapso/detection/view_transform_models.py +67 -0
- Rhapso/fusion/__init__.py +0 -0
- Rhapso/fusion/affine_fusion/__init__.py +2 -0
- Rhapso/fusion/affine_fusion/blend.py +289 -0
- Rhapso/fusion/affine_fusion/fusion.py +601 -0
- Rhapso/fusion/affine_fusion/geometry.py +159 -0
- Rhapso/fusion/affine_fusion/io.py +546 -0
- Rhapso/fusion/affine_fusion/script_utils.py +111 -0
- Rhapso/fusion/affine_fusion/setup.py +4 -0
- Rhapso/fusion/affine_fusion_worker.py +234 -0
- Rhapso/fusion/multiscale/__init__.py +0 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/__init__.py +19 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/__init__.py +3 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/czi_to_zarr.py +698 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/compress/zarr_writer.py +265 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/models.py +81 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/__init__.py +3 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/utils/utils.py +526 -0
- Rhapso/fusion/multiscale/aind_hcr_data_transformation/zeiss_job.py +249 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/__init__.py +21 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/array_to_zarr.py +257 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/radial_correction.py +557 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/run_capsule.py +98 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/__init__.py +3 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/utils/utils.py +266 -0
- Rhapso/fusion/multiscale/aind_z1_radial_correction/worker.py +89 -0
- Rhapso/fusion/multiscale_worker.py +113 -0
- Rhapso/fusion/neuroglancer_link_gen/__init__.py +8 -0
- Rhapso/fusion/neuroglancer_link_gen/dispim_link.py +235 -0
- Rhapso/fusion/neuroglancer_link_gen/exaspim_link.py +127 -0
- Rhapso/fusion/neuroglancer_link_gen/hcr_link.py +368 -0
- Rhapso/fusion/neuroglancer_link_gen/iSPIM_top.py +47 -0
- Rhapso/fusion/neuroglancer_link_gen/link_utils.py +239 -0
- Rhapso/fusion/neuroglancer_link_gen/main.py +299 -0
- Rhapso/fusion/neuroglancer_link_gen/ng_layer.py +1434 -0
- Rhapso/fusion/neuroglancer_link_gen/ng_state.py +1123 -0
- Rhapso/fusion/neuroglancer_link_gen/parsers.py +336 -0
- Rhapso/fusion/neuroglancer_link_gen/raw_link.py +116 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/__init__.py +4 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/shader_utils.py +85 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/transfer.py +43 -0
- Rhapso/fusion/neuroglancer_link_gen/utils/utils.py +303 -0
- Rhapso/fusion/neuroglancer_link_gen_worker.py +30 -0
- Rhapso/matching/__init__.py +0 -0
- Rhapso/matching/load_and_transform_points.py +458 -0
- Rhapso/matching/ransac_matching.py +544 -0
- Rhapso/matching/save_matches.py +120 -0
- Rhapso/matching/xml_parser.py +302 -0
- Rhapso/pipelines/__init__.py +0 -0
- Rhapso/pipelines/ray/__init__.py +0 -0
- Rhapso/pipelines/ray/aws/__init__.py +0 -0
- Rhapso/pipelines/ray/aws/alignment_pipeline.py +227 -0
- Rhapso/pipelines/ray/aws/config/__init__.py +0 -0
- Rhapso/pipelines/ray/evaluation.py +71 -0
- Rhapso/pipelines/ray/interest_point_detection.py +137 -0
- Rhapso/pipelines/ray/interest_point_matching.py +110 -0
- Rhapso/pipelines/ray/local/__init__.py +0 -0
- Rhapso/pipelines/ray/local/alignment_pipeline.py +167 -0
- Rhapso/pipelines/ray/matching_stats.py +104 -0
- Rhapso/pipelines/ray/param/__init__.py +0 -0
- Rhapso/pipelines/ray/solver.py +120 -0
- Rhapso/pipelines/ray/split_dataset.py +78 -0
- Rhapso/solver/__init__.py +0 -0
- Rhapso/solver/compute_tiles.py +562 -0
- Rhapso/solver/concatenate_models.py +116 -0
- Rhapso/solver/connected_graphs.py +111 -0
- Rhapso/solver/data_prep.py +181 -0
- Rhapso/solver/global_optimization.py +410 -0
- Rhapso/solver/model_and_tile_setup.py +109 -0
- Rhapso/solver/pre_align_tiles.py +323 -0
- Rhapso/solver/save_results.py +97 -0
- Rhapso/solver/view_transforms.py +75 -0
- Rhapso/solver/xml_to_dataframe_solver.py +213 -0
- Rhapso/split_dataset/__init__.py +0 -0
- Rhapso/split_dataset/compute_grid_rules.py +78 -0
- Rhapso/split_dataset/save_points.py +101 -0
- Rhapso/split_dataset/save_xml.py +377 -0
- Rhapso/split_dataset/split_images.py +537 -0
- Rhapso/split_dataset/xml_to_dataframe_split.py +219 -0
- rhapso-0.1.92.dist-info/METADATA +39 -0
- rhapso-0.1.92.dist-info/RECORD +101 -0
- rhapso-0.1.92.dist-info/WHEEL +5 -0
- rhapso-0.1.92.dist-info/licenses/LICENSE +21 -0
- rhapso-0.1.92.dist-info/top_level.txt +2 -0
- tests/__init__.py +1 -0
- tests/test_detection.py +17 -0
- tests/test_matching.py +21 -0
- tests/test_solving.py +21 -0
Rhapso/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# n5_reader.py – N5 data verification script
|
|
3
|
+
#
|
|
4
|
+
# Setup & run:
|
|
5
|
+
# 1. python -m venv n5Venv # create a fresh Python virtual environment
|
|
6
|
+
# 2. source n5Venv/bin/activate # activate the virtual environment
|
|
7
|
+
# 3. pip install .[n5_reader] # install n5_reader dependencies from setup.py
|
|
8
|
+
# 4. python Rhapso/data_prep/n5_reader.py
|
|
9
|
+
# # run the N5 reader for inspecting datasets
|
|
10
|
+
# -----------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
import zarr
|
|
13
|
+
import s3fs
|
|
14
|
+
import os
|
|
15
|
+
import numpy as np
|
|
16
|
+
import matplotlib.pyplot as plt
|
|
17
|
+
import json
|
|
18
|
+
from zarr.storage import FSStore
|
|
19
|
+
|
|
20
|
+
def list_files_under_prefix(node, path):
|
|
21
|
+
try:
|
|
22
|
+
for item in node[path]:
|
|
23
|
+
new_path = f"{path}/{item}"
|
|
24
|
+
if isinstance(node[new_path], zarr.hierarchy.Group):
|
|
25
|
+
print(f"Group: {new_path}")
|
|
26
|
+
list_files_under_prefix(node, new_path)
|
|
27
|
+
else:
|
|
28
|
+
print(f"Dataset: {new_path} - {node[new_path].shape}")
|
|
29
|
+
except KeyError:
|
|
30
|
+
print(f"No items found under the path {path}")
|
|
31
|
+
|
|
32
|
+
# Amount of interest points in view 18,0 is 1061
|
|
33
|
+
# Max value for view 18,0 in corr ip index is 1017
|
|
34
|
+
|
|
35
|
+
def read_n5_data(n5_path):
|
|
36
|
+
import zarr, s3fs, os
|
|
37
|
+
|
|
38
|
+
# guard missing local path
|
|
39
|
+
if not n5_path.startswith("s3://") and not os.path.isdir(n5_path):
|
|
40
|
+
print(f"❌ Local N5 path not found: {n5_path}")
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
# open the store (S3 or local N5)
|
|
44
|
+
if n5_path.startswith("s3://"):
|
|
45
|
+
s3 = s3fs.S3FileSystem(anon=False)
|
|
46
|
+
store = s3fs.S3Map(root=n5_path, s3=s3)
|
|
47
|
+
else:
|
|
48
|
+
store = zarr.N5Store(n5_path)
|
|
49
|
+
|
|
50
|
+
print(f"\n🔍 Reading N5 data at: {n5_path}")
|
|
51
|
+
root = zarr.open(store, mode='r')
|
|
52
|
+
|
|
53
|
+
def visit_fn(path, node):
|
|
54
|
+
if isinstance(node, zarr.Array):
|
|
55
|
+
print(f"\n📂 Dataset: {path}")
|
|
56
|
+
print(f" 🔢 dtype: {node.dtype}")
|
|
57
|
+
shape = node.shape
|
|
58
|
+
print(f" 📏 shape: {shape}")
|
|
59
|
+
if len(shape) > 1:
|
|
60
|
+
print(f" 📊 count: {shape[0]} arrays of shape {shape[1:]}")
|
|
61
|
+
else:
|
|
62
|
+
print(f" 📊 count: {shape[0]} elements")
|
|
63
|
+
print(f" 🗂 chunks: {node.chunks}")
|
|
64
|
+
print(f" 🛠 compressor: {node.compressor}")
|
|
65
|
+
|
|
66
|
+
print(" 🔎 first 5 entries:")
|
|
67
|
+
sample = node[:5]
|
|
68
|
+
for i, entry in enumerate(sample, start=1):
|
|
69
|
+
# ensure nested array is printed clearly
|
|
70
|
+
val = entry.tolist() if hasattr(entry, "tolist") else entry
|
|
71
|
+
print(f" {i}. {val}")
|
|
72
|
+
|
|
73
|
+
root.visititems(visit_fn)
|
|
74
|
+
|
|
75
|
+
# # read_n5_data('/home/martin/Documents/Allen/Data/IP_TIFF_XML_2/interestpoints.n5')
|
|
76
|
+
|
|
77
|
+
def read_correspondences(dataset_path):
|
|
78
|
+
if dataset_path.startswith("s3://"):
|
|
79
|
+
store = zarr.storage.FSStore(dataset_path, mode="r")
|
|
80
|
+
root = zarr.open(store, mode="r")
|
|
81
|
+
else:
|
|
82
|
+
store = zarr.N5Store(dataset_path)
|
|
83
|
+
root = zarr.open(store, mode="r")
|
|
84
|
+
|
|
85
|
+
if "data" not in root:
|
|
86
|
+
print("Key 'data' not found in root.")
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
group = root["data"]
|
|
90
|
+
data = group[:]
|
|
91
|
+
print(f"Loaded {len(data)} entries.")
|
|
92
|
+
|
|
93
|
+
# for i, entry in enumerate(data):
|
|
94
|
+
# print(f"{i}: {entry}")
|
|
95
|
+
|
|
96
|
+
# print("hi")
|
|
97
|
+
|
|
98
|
+
# Big Stitcher Output
|
|
99
|
+
# # base_path = "/Users/seanfite/Desktop/interest_point_detection/interestpoints.n5"
|
|
100
|
+
# base_path = "/Users/seanfite/Desktop/ip_rigid_alignment/interestpoints.n5"
|
|
101
|
+
# # base_path = "/Users/seanfite/Desktop/ip_affine_alignment/interestpoints.n5"
|
|
102
|
+
# # base_path = "s3://rhapso-matching-test/output/interestpoints.n5"
|
|
103
|
+
# for tp_id in [0]:
|
|
104
|
+
# for setup_id in range(20):
|
|
105
|
+
# path = f"{base_path}/tpId_{tp_id}_viewSetupId_{setup_id}/beads/correspondences"
|
|
106
|
+
# print(f"Reading: {path}")
|
|
107
|
+
# read_correspondences(path)
|
|
108
|
+
|
|
109
|
+
def read_interest_points(full_path):
|
|
110
|
+
if full_path.startswith("s3://"):
|
|
111
|
+
# s3 = s3fs.S3FileSystem(anon=False)
|
|
112
|
+
# store = s3fs.S3Map(root=full_path, s3=s3)
|
|
113
|
+
# zarray = zarr.open_array(store, mode='r')
|
|
114
|
+
# data = zarray[:]
|
|
115
|
+
|
|
116
|
+
path = full_path.replace("s3://", "", 1)
|
|
117
|
+
bucket = path.split("/")[0]
|
|
118
|
+
prefix = "/".join(path.split("/")[1:])
|
|
119
|
+
|
|
120
|
+
s3 = s3fs.S3FileSystem()
|
|
121
|
+
store = FSStore(f"{bucket}/{prefix}", fs=s3, mode='r')
|
|
122
|
+
root = zarr.open(store, mode="r")
|
|
123
|
+
|
|
124
|
+
group = root["data"]
|
|
125
|
+
data = group[:]
|
|
126
|
+
count = len(data)
|
|
127
|
+
print(count)
|
|
128
|
+
print("")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
else:
|
|
132
|
+
full_path = full_path.rstrip("/") # remove trailing slash if any
|
|
133
|
+
components = full_path.split("/")
|
|
134
|
+
|
|
135
|
+
# Find index of the N5 root (assumes .n5 marks the root)
|
|
136
|
+
try:
|
|
137
|
+
n5_index = next(i for i, c in enumerate(components) if c.endswith(".n5"))
|
|
138
|
+
except StopIteration:
|
|
139
|
+
raise ValueError("No .n5 directory found in path")
|
|
140
|
+
|
|
141
|
+
dataset_path = "/".join(components[:n5_index + 1]) # the store root
|
|
142
|
+
dataset_rel_path = "/".join(components[n5_index + 1:]) # relative dataset path
|
|
143
|
+
|
|
144
|
+
# Open N5 store and dataset
|
|
145
|
+
store = zarr.N5Store(dataset_path)
|
|
146
|
+
root = zarr.open(store, mode='r')
|
|
147
|
+
|
|
148
|
+
if dataset_rel_path not in root:
|
|
149
|
+
print(f"Skipping: {dataset_rel_path} (not found)")
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
zarray = root[dataset_rel_path + "/loc"]
|
|
153
|
+
data = zarray[:]
|
|
154
|
+
|
|
155
|
+
print("\n--- Detection Stats (Raw Rhapso Output) ---")
|
|
156
|
+
print(f"Total Points: {len(data)}")
|
|
157
|
+
|
|
158
|
+
# for dim, name in zip(range(3), ['X', 'Y', 'Z']):
|
|
159
|
+
# values = data[:, dim]
|
|
160
|
+
# print(f"{name} Range: {values.min():.2f} – {values.max():.2f} | Spread (std): {values.std():.2f}")
|
|
161
|
+
|
|
162
|
+
# volume = np.ptp(data[:, 0]) * np.ptp(data[:, 1]) * np.ptp(data[:, 2])
|
|
163
|
+
# density = len(data) / (volume / 1e9) if volume > 0 else 0
|
|
164
|
+
# print(f"Estimated Density: {density:.2f} points per 1000³ volume")
|
|
165
|
+
# print("-----------------------")
|
|
166
|
+
|
|
167
|
+
# # --- 3D Plot ---
|
|
168
|
+
# max_points = 1000000000000
|
|
169
|
+
# sample = data if len(data) <= max_points else data[np.random.choice(len(data), max_points, replace=False)]
|
|
170
|
+
|
|
171
|
+
# fig = plt.figure(figsize=(10, 8))
|
|
172
|
+
# ax = fig.add_subplot(111, projection='3d')
|
|
173
|
+
# ax.scatter(sample[:, 0], sample[:, 1], sample[:, 2], c='blue', alpha=0.5, s=1)
|
|
174
|
+
# ax.set_xlabel('X')
|
|
175
|
+
# ax.set_ylabel('Y')
|
|
176
|
+
# ax.set_zlabel('Z')
|
|
177
|
+
# ax.set_title(f"Interest Points in 3D (showing {len(sample)} points)")
|
|
178
|
+
# plt.tight_layout()
|
|
179
|
+
# plt.show()
|
|
180
|
+
|
|
181
|
+
# base_path = "s3://rhapso-matching-test/output/interestpoints.n5"
|
|
182
|
+
# base_path = "/Users/seanfite/Desktop/IP_TIFF_XML/interestpoints.n5"
|
|
183
|
+
base_path = "/Users/seanfite/Desktop/interestpoints.n5"
|
|
184
|
+
for tp_id in [0]:
|
|
185
|
+
for setup_id in range(20):
|
|
186
|
+
path = f"{base_path}/tpId_{tp_id}_viewSetupId_{setup_id}/beads/interestpoints"
|
|
187
|
+
print(f"For view: {setup_id}")
|
|
188
|
+
read_interest_points(path)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import s3fs
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Utility class for downloading BigStitcher outputs from S3 to local storage for N5 reader compatibility
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
class S3BigStitcherReader:
|
|
10
|
+
def __init__(self, s3_uri, local_directory):
|
|
11
|
+
self.s3_uri = s3_uri
|
|
12
|
+
self.local_directory = local_directory
|
|
13
|
+
|
|
14
|
+
def download_n5_from_s3_to_local(self):
|
|
15
|
+
"""
|
|
16
|
+
Recursively download an N5 dataset from S3 to a local directory.
|
|
17
|
+
"""
|
|
18
|
+
s3 = s3fs.S3FileSystem(anon=False)
|
|
19
|
+
s3_path = self.s3_uri.replace("s3://", "")
|
|
20
|
+
all_keys = s3.find(s3_path, detail=True)
|
|
21
|
+
|
|
22
|
+
for key, obj in all_keys.items():
|
|
23
|
+
if obj["type"] == "file":
|
|
24
|
+
rel_path = key.replace(s3_path + "/", "")
|
|
25
|
+
local_file_path = os.path.join(self.local_directory, rel_path)
|
|
26
|
+
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
|
|
27
|
+
s3.get(key, local_file_path)
|
|
28
|
+
|
|
29
|
+
# Check for the specific interestpoints path
|
|
30
|
+
if rel_path.endswith("beads/interestpoints/attributes.json") and "interestpoints.n5" in rel_path:
|
|
31
|
+
# Construct the path to the attributes file
|
|
32
|
+
attributes_path = os.path.join(os.path.dirname(local_file_path), "attributes.json")
|
|
33
|
+
attributes_data = {
|
|
34
|
+
"pointcloud": "1.0.0",
|
|
35
|
+
"type": "list",
|
|
36
|
+
"list version": "1.0.0"
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
with open(attributes_path, "w") as f:
|
|
40
|
+
json.dump(attributes_data, f, indent=2)
|
|
41
|
+
|
|
42
|
+
def run(self):
|
|
43
|
+
self.download_n5_from_s3_to_local()
|
|
44
|
+
|
|
45
|
+
s3_path = self.s3_uri.replace("s3://", "")
|
|
46
|
+
full_local_path = os.path.join(self.local_directory, s3_path)
|
|
47
|
+
|
|
48
|
+
# Final paths
|
|
49
|
+
xml_input_path = os.path.join(full_local_path, "bigstitcher_ip.xml")
|
|
50
|
+
n5_output_path = os.path.join(full_local_path, "interestpoints.n5")
|
|
51
|
+
|
|
52
|
+
print("XML Input Path:", xml_input_path)
|
|
53
|
+
print("N5 Output Path:", n5_output_path)
|
|
54
|
+
|
|
55
|
+
return xml_input_path, n5_output_path
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import xml.etree.ElementTree as ET
|
|
3
|
+
|
|
4
|
+
# This component recieves an XML file containing Tiff or Zarr image metadata and converts
|
|
5
|
+
# it into several Dataframes
|
|
6
|
+
|
|
7
|
+
class XMLToDataFrame:
|
|
8
|
+
def __init__(self, xml_file):
|
|
9
|
+
self.xml_content = xml_file
|
|
10
|
+
|
|
11
|
+
def parse_image_loader_zarr(self, root):
|
|
12
|
+
"""
|
|
13
|
+
Parses image loader configuration from a Zarr file's XML structure and constructs a DataFrame containing the
|
|
14
|
+
metadata for each image group.
|
|
15
|
+
"""
|
|
16
|
+
image_loader_data = []
|
|
17
|
+
|
|
18
|
+
for il in root.findall(".//ImageLoader/zgroups/zgroup"):
|
|
19
|
+
view_setup = il.get("setup")
|
|
20
|
+
timepoint = il.get("timepoint")
|
|
21
|
+
file_path = il.find("path").text if il.find("path") is not None else None
|
|
22
|
+
channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0]
|
|
23
|
+
|
|
24
|
+
image_loader_data.append(
|
|
25
|
+
{
|
|
26
|
+
"view_setup": view_setup,
|
|
27
|
+
"timepoint": timepoint,
|
|
28
|
+
"series": 1,
|
|
29
|
+
"channel": channel,
|
|
30
|
+
"file_path": file_path,
|
|
31
|
+
}
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
return pd.DataFrame(image_loader_data)
|
|
35
|
+
|
|
36
|
+
def parse_image_loader_tiff(self, root):
|
|
37
|
+
"""
|
|
38
|
+
Parses image loader configuration from a TIFF file's XML structure and constructs a DataFrame containing
|
|
39
|
+
metadata for each image group.
|
|
40
|
+
"""
|
|
41
|
+
image_loader_data = []
|
|
42
|
+
|
|
43
|
+
# Ensure that file mappings are present in the XML
|
|
44
|
+
if not root.findall(".//ImageLoader/files/FileMapping"):
|
|
45
|
+
raise Exception("There are no files in this XML")
|
|
46
|
+
|
|
47
|
+
# Check for required labels in the XML
|
|
48
|
+
if not self.check_labels(root):
|
|
49
|
+
raise Exception("Required labels do not exist")
|
|
50
|
+
|
|
51
|
+
# Validate that the lengths of view setups, registrations, and tiles match
|
|
52
|
+
if not self.check_length(root):
|
|
53
|
+
raise Exception(
|
|
54
|
+
"The amount of view setups, view registrations, and tiles do not match"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Iterate over each file mapping in the XML
|
|
58
|
+
for fm in root.findall(".//ImageLoader/files/FileMapping"):
|
|
59
|
+
view_setup = fm.get("view_setup")
|
|
60
|
+
timepoint = fm.get("timepoint")
|
|
61
|
+
series = fm.get("series")
|
|
62
|
+
channel = fm.get("channel")
|
|
63
|
+
file_path = fm.find("file").text if fm.find("file") is not None else None
|
|
64
|
+
|
|
65
|
+
image_loader_data.append(
|
|
66
|
+
{
|
|
67
|
+
"view_setup": view_setup,
|
|
68
|
+
"timepoint": timepoint,
|
|
69
|
+
"series": series,
|
|
70
|
+
"channel": channel,
|
|
71
|
+
"file_path": file_path,
|
|
72
|
+
}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Convert the list to a DataFrame and return
|
|
76
|
+
return pd.DataFrame(image_loader_data)
|
|
77
|
+
|
|
78
|
+
def parse_image_loader_split_zarr(self):
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
def route_image_loader(self, root):
|
|
82
|
+
"""
|
|
83
|
+
Directs the XML parsing process based on the image loader format specified in the XML.
|
|
84
|
+
"""
|
|
85
|
+
format_node = root.find(".//ImageLoader")
|
|
86
|
+
format_type = format_node.get("format")
|
|
87
|
+
|
|
88
|
+
if "filemap" in format_type:
|
|
89
|
+
return self.parse_image_loader_tiff(root)
|
|
90
|
+
else:
|
|
91
|
+
return self.parse_image_loader_zarr(root)
|
|
92
|
+
|
|
93
|
+
def parse_view_setups(self, root):
|
|
94
|
+
"""
|
|
95
|
+
Parses the view setups from an XML structure and constructs a DataFrame containing metadata for each view setup.
|
|
96
|
+
"""
|
|
97
|
+
viewsetups_data = []
|
|
98
|
+
|
|
99
|
+
for vs in root.findall(".//ViewSetup"):
|
|
100
|
+
id_ = vs.find("id").text
|
|
101
|
+
# name = vs.find("name").text
|
|
102
|
+
name = vs.findtext("name")
|
|
103
|
+
size = vs.find("size").text
|
|
104
|
+
voxel_unit = vs.find(".//voxelSize/unit").text
|
|
105
|
+
voxel_size = " ".join(vs.find(".//voxelSize/size").text.split())
|
|
106
|
+
attributes = {attr.tag: attr.text for attr in vs.find("attributes")}
|
|
107
|
+
viewsetups_data.append(
|
|
108
|
+
{
|
|
109
|
+
"id": id_,
|
|
110
|
+
"name": name,
|
|
111
|
+
"size": size,
|
|
112
|
+
"voxel_unit": voxel_unit,
|
|
113
|
+
"voxel_size": voxel_size,
|
|
114
|
+
**attributes,
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
return pd.DataFrame(viewsetups_data)
|
|
118
|
+
|
|
119
|
+
def parse_view_registrations(self, root):
|
|
120
|
+
"""
|
|
121
|
+
Parses view registrations from an XML structure and constructs a DataFrame containing registration metadata
|
|
122
|
+
for each view.
|
|
123
|
+
"""
|
|
124
|
+
viewregistrations_data = []
|
|
125
|
+
for vr in root.findall(".//ViewRegistration"):
|
|
126
|
+
timepoint = vr.get("timepoint")
|
|
127
|
+
setup = vr.get("setup")
|
|
128
|
+
|
|
129
|
+
for vt in vr.findall(".//ViewTransform"):
|
|
130
|
+
affine_text = (
|
|
131
|
+
vt.find("affine").text.replace("\n", "").replace(" ", ", ")
|
|
132
|
+
)
|
|
133
|
+
viewregistrations_data.append(
|
|
134
|
+
{
|
|
135
|
+
"timepoint": timepoint,
|
|
136
|
+
"setup": setup,
|
|
137
|
+
"type": vt.get("type"),
|
|
138
|
+
"name": vt.find("Name").text.strip(),
|
|
139
|
+
"affine": affine_text,
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
return pd.DataFrame(viewregistrations_data)
|
|
143
|
+
|
|
144
|
+
def parse_view_interest_points(self, root):
|
|
145
|
+
"""
|
|
146
|
+
Parses interest points data from an XML structure and constructs a DataFrame containing metadata and paths
|
|
147
|
+
for each set of interest points.
|
|
148
|
+
"""
|
|
149
|
+
view_interest_points_data = []
|
|
150
|
+
|
|
151
|
+
# if self.key == "detection":
|
|
152
|
+
# if len(root.findall(".//ViewInterestPointsFile")) != 0:
|
|
153
|
+
# raise Exception("There should be no interest points in this file yet.")
|
|
154
|
+
|
|
155
|
+
for vip in root.findall(".//ViewInterestPointsFile"):
|
|
156
|
+
timepoint = vip.get("timepoint")
|
|
157
|
+
setup = vip.get("setup")
|
|
158
|
+
label = vip.get("label")
|
|
159
|
+
params = vip.get("params")
|
|
160
|
+
path = vip.text.strip() if vip.text is not None else None
|
|
161
|
+
view_interest_points_data.append(
|
|
162
|
+
{
|
|
163
|
+
"timepoint": timepoint,
|
|
164
|
+
"setup": setup,
|
|
165
|
+
"label": label,
|
|
166
|
+
"params": params,
|
|
167
|
+
"path": path,
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
return pd.DataFrame(view_interest_points_data)
|
|
171
|
+
|
|
172
|
+
def check_labels(self, root):
|
|
173
|
+
"""
|
|
174
|
+
Verifies the presence of required XML labels including bounding boxes, point spread functions,
|
|
175
|
+
stitching results, and intensity adjustments.
|
|
176
|
+
"""
|
|
177
|
+
labels = True
|
|
178
|
+
if root.find(".//BoundingBoxes") is None:
|
|
179
|
+
labels = False
|
|
180
|
+
if root.find(".//PointSpreadFunctions") is None:
|
|
181
|
+
labels = False
|
|
182
|
+
if root.find(".//StitchingResults") is None:
|
|
183
|
+
labels = False
|
|
184
|
+
if root.find(".//IntensityAdjustments") is None:
|
|
185
|
+
labels = False
|
|
186
|
+
|
|
187
|
+
return labels
|
|
188
|
+
|
|
189
|
+
def check_length(self, root):
|
|
190
|
+
"""
|
|
191
|
+
Validates that the count of elements within the XML structure aligns with expected relationships
|
|
192
|
+
between file mappings, view setups, and view registrations.
|
|
193
|
+
"""
|
|
194
|
+
length = True
|
|
195
|
+
if len(root.findall(".//ImageLoader/files/FileMapping")) != len(root.findall(".//ViewRegistration")) or \
|
|
196
|
+
len(root.findall(".//ViewSetup")) != len(root.findall(".//ViewRegistration")) * (1 / 2):
|
|
197
|
+
length = False # Set to False if the relationships do not match expected counts
|
|
198
|
+
return length
|
|
199
|
+
|
|
200
|
+
def run(self):
|
|
201
|
+
"""
|
|
202
|
+
Executes the entry point of the script.
|
|
203
|
+
"""
|
|
204
|
+
root = ET.fromstring(self.xml_content)
|
|
205
|
+
image_loader_df = self.route_image_loader(root)
|
|
206
|
+
view_setups_df = self.parse_view_setups(root)
|
|
207
|
+
view_registrations_df = self.parse_view_registrations(root)
|
|
208
|
+
view_interest_points_df = self.parse_view_interest_points(root)
|
|
209
|
+
|
|
210
|
+
return {
|
|
211
|
+
"image_loader": image_loader_df,
|
|
212
|
+
"view_setups": view_setups_df,
|
|
213
|
+
"view_registrations": view_registrations_df,
|
|
214
|
+
"view_interest_points": view_interest_points_df,
|
|
215
|
+
}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
from scipy.spatial import cKDTree
|
|
3
|
+
import numpy as np
|
|
4
|
+
from collections import defaultdict, OrderedDict
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
Advanced Refinement is the final pass over detected interest points: it groups chunks by view/overlap interval, keeps only the
|
|
8
|
+
strongest points scaled to interval size, then merges and de-duplicates nearby points with a KD-tree.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
class AdvancedRefinement:
|
|
12
|
+
def __init__(self, interest_points, combine_distance, dataframes, overlapping_area, max_interval_size, max_spots):
|
|
13
|
+
self.interest_points = interest_points
|
|
14
|
+
self.consolidated_data = {}
|
|
15
|
+
self.combine_distance = combine_distance
|
|
16
|
+
self.image_loader_df = dataframes['image_loader']
|
|
17
|
+
self.overlapping_area = overlapping_area
|
|
18
|
+
self.max_interval_size = max_interval_size
|
|
19
|
+
self.max_spots = max_spots
|
|
20
|
+
self.overlapping_only = True
|
|
21
|
+
self.sorted_view_ids = None
|
|
22
|
+
self.result = interest_points
|
|
23
|
+
self.store_intensities = False
|
|
24
|
+
self._max_spots = 0
|
|
25
|
+
self.max_spots_per_overlap = False
|
|
26
|
+
self.to_process = interest_points
|
|
27
|
+
self.interest_points_per_view_id = {}
|
|
28
|
+
self.intensities_per_view_id = {}
|
|
29
|
+
self.intervals_per_view_id = {}
|
|
30
|
+
|
|
31
|
+
def kd_tree(self, ips_lists_by_view, ints_lists_by_view):
|
|
32
|
+
"""
|
|
33
|
+
KD-tree implementation to filter out duplicates. Merging into the tree per bound, per iteration
|
|
34
|
+
"""
|
|
35
|
+
radius = float(self.combine_distance)
|
|
36
|
+
out = OrderedDict()
|
|
37
|
+
|
|
38
|
+
for view_id in sorted(ips_lists_by_view.keys()):
|
|
39
|
+
ips_lists = ips_lists_by_view[view_id]
|
|
40
|
+
ints_lists = ints_lists_by_view[view_id]
|
|
41
|
+
|
|
42
|
+
my_ips: list = []
|
|
43
|
+
my_ints: list = []
|
|
44
|
+
|
|
45
|
+
for l, ips in enumerate(ips_lists):
|
|
46
|
+
intens = ints_lists[l]
|
|
47
|
+
|
|
48
|
+
# First list - accept all
|
|
49
|
+
if not my_ips:
|
|
50
|
+
my_ips.extend(ips)
|
|
51
|
+
my_ints.extend(intens)
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
# Build KDTree from the CURRENT accepted points for this view
|
|
55
|
+
base = np.asarray(my_ips, dtype=np.float32)
|
|
56
|
+
tree = cKDTree(base)
|
|
57
|
+
|
|
58
|
+
# Batch query all new points against the tree
|
|
59
|
+
cand = np.asarray(ips, dtype=np.float32)
|
|
60
|
+
|
|
61
|
+
if cand.size == 0:
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
dists, _ = tree.query(cand, k=1)
|
|
65
|
+
|
|
66
|
+
# Keep only points farther than combineDistance
|
|
67
|
+
mask = dists > radius
|
|
68
|
+
if np.any(mask):
|
|
69
|
+
for p, val in zip(cand[mask], np.asarray(intens)[mask]):
|
|
70
|
+
my_ips.append(p.tolist())
|
|
71
|
+
my_ints.append(float(val))
|
|
72
|
+
|
|
73
|
+
out[view_id] = list(zip(my_ips, my_ints))
|
|
74
|
+
|
|
75
|
+
self.consolidated_data = out
|
|
76
|
+
|
|
77
|
+
def size(self, interval):
|
|
78
|
+
"""
|
|
79
|
+
Finds the number of voxels in a 3D interval
|
|
80
|
+
"""
|
|
81
|
+
lb, ub = interval[0], interval[1]
|
|
82
|
+
prod = 1
|
|
83
|
+
for l, u in zip(lb, ub):
|
|
84
|
+
prod *= (int(u) - int(l) + 1)
|
|
85
|
+
return prod
|
|
86
|
+
|
|
87
|
+
def contains(self, containing, contained):
|
|
88
|
+
"""
|
|
89
|
+
Boolean check if the 3D interval `contained` lies fully inside the 3D interval `containing`
|
|
90
|
+
"""
|
|
91
|
+
lc, uc = containing[0], containing[1]
|
|
92
|
+
li, ui = contained[0], contained[1]
|
|
93
|
+
return all(lc[d] <= li[d] and uc[d] >= ui[d] for d in range(3))
|
|
94
|
+
|
|
95
|
+
def filter_lists(self, ips, intensities, my_max_spots):
|
|
96
|
+
"""
|
|
97
|
+
Pick the top-N interest points by intensity and return them
|
|
98
|
+
"""
|
|
99
|
+
if intensities is None or len(ips) == 0 or my_max_spots <= 0:
|
|
100
|
+
return ips, intensities
|
|
101
|
+
|
|
102
|
+
intens_arr = np.asarray(intensities)
|
|
103
|
+
n = min(len(ips), intens_arr.shape[0])
|
|
104
|
+
if n == 0:
|
|
105
|
+
return ips, intensities
|
|
106
|
+
|
|
107
|
+
# indices of top-N by descending intensity
|
|
108
|
+
top_idx = np.argsort(intens_arr[:n])[::-1][:my_max_spots]
|
|
109
|
+
|
|
110
|
+
if isinstance(ips, np.ndarray):
|
|
111
|
+
ips_filtered = ips[top_idx]
|
|
112
|
+
else:
|
|
113
|
+
ips_filtered = [ips[i] for i in top_idx]
|
|
114
|
+
|
|
115
|
+
intens_filtered = intens_arr[top_idx]
|
|
116
|
+
if isinstance(intensities, list):
|
|
117
|
+
intens_filtered = intens_filtered.tolist()
|
|
118
|
+
|
|
119
|
+
return ips_filtered, intens_filtered
|
|
120
|
+
|
|
121
|
+
def filter(self):
|
|
122
|
+
"""
|
|
123
|
+
Merge all interest-point chunks that fall inside the requested overlap intervals,
|
|
124
|
+
then keep only the strongest points per interval (scaled by interval size)
|
|
125
|
+
"""
|
|
126
|
+
ips_lists_by_view = defaultdict(list)
|
|
127
|
+
ints_lists_by_view = defaultdict(list)
|
|
128
|
+
intervals_by_view = defaultdict(list)
|
|
129
|
+
|
|
130
|
+
# Group incoming interest-point chunks by view
|
|
131
|
+
for entry in self.interest_points:
|
|
132
|
+
vid = entry["view_id"]
|
|
133
|
+
ips = entry["interest_points"]
|
|
134
|
+
intens = entry["intensities"]
|
|
135
|
+
interval = entry["interval_key"]
|
|
136
|
+
ips_lists_by_view[vid].append(ips)
|
|
137
|
+
ints_lists_by_view[vid].append(intens)
|
|
138
|
+
intervals_by_view[vid].append(interval)
|
|
139
|
+
|
|
140
|
+
# Process each view from the image metadata table.
|
|
141
|
+
for i, row_i in self.image_loader_df.iterrows():
|
|
142
|
+
view_id = f"timepoint: {row_i['timepoint']}, setup: {row_i['view_setup']}"
|
|
143
|
+
|
|
144
|
+
ips_list = ips_lists_by_view[view_id]
|
|
145
|
+
intensities_list = ints_lists_by_view[view_id]
|
|
146
|
+
interval_list = intervals_by_view[view_id]
|
|
147
|
+
|
|
148
|
+
if not interval_list or not ips_list:
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
interval_data = []
|
|
152
|
+
|
|
153
|
+
# Build the set of intervals to process from overlap metadata.
|
|
154
|
+
to_process = [
|
|
155
|
+
{'view_id': vid, **d}
|
|
156
|
+
for vid, lst in self.overlapping_area.items()
|
|
157
|
+
for d in lst
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
# Collect all chunks fully contained in each target interval
|
|
161
|
+
for row in to_process:
|
|
162
|
+
vid = row['view_id']
|
|
163
|
+
lb = row['lower_bound']
|
|
164
|
+
ub = row['upper_bound']
|
|
165
|
+
if vid == view_id:
|
|
166
|
+
to_process_interval = (lb, ub)
|
|
167
|
+
ips_block = []
|
|
168
|
+
intensities_block = []
|
|
169
|
+
|
|
170
|
+
for i in range(len(ips_list)):
|
|
171
|
+
block_interval = interval_list[i]
|
|
172
|
+
|
|
173
|
+
# Merge all blocks that fall inside the target interval
|
|
174
|
+
if self.contains(to_process_interval, block_interval):
|
|
175
|
+
ips_block.extend(ips_list[i])
|
|
176
|
+
intensities_block.extend(intensities_list[i])
|
|
177
|
+
|
|
178
|
+
interval_data.append((to_process_interval, ips_block, intensities_block))
|
|
179
|
+
|
|
180
|
+
ips_lists_by_view[view_id] = []
|
|
181
|
+
ints_lists_by_view[view_id] = []
|
|
182
|
+
|
|
183
|
+
# Cap the number of spots by interval size, then keep best
|
|
184
|
+
for interval, ips, intensities in interval_data:
|
|
185
|
+
size = self.size(interval)
|
|
186
|
+
my_max_spots = int(round(self.max_spots * (size / self.max_interval_size)))
|
|
187
|
+
|
|
188
|
+
if my_max_spots > 0 and my_max_spots < len(ips):
|
|
189
|
+
ips, intensities = self.filter_lists(ips, intensities, my_max_spots)
|
|
190
|
+
|
|
191
|
+
ips_lists_by_view[view_id].append(ips)
|
|
192
|
+
ints_lists_by_view[view_id].append(intensities)
|
|
193
|
+
|
|
194
|
+
return ips_lists_by_view, ints_lists_by_view
|
|
195
|
+
|
|
196
|
+
def run(self):
|
|
197
|
+
"""
|
|
198
|
+
Executes the entry point of the script.
|
|
199
|
+
"""
|
|
200
|
+
ips_lists_by_view, ints_lits_by_view = self.filter()
|
|
201
|
+
self.kd_tree(ips_lists_by_view, ints_lits_by_view)
|
|
202
|
+
|
|
203
|
+
return self.consolidated_data
|