nmcp-precomputed 3.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,252 @@
1
+ import json
2
+ import logging
3
+ import pickle
4
+ from copy import deepcopy
5
+ from typing_extensions import List
6
+
7
+ from cloudvolume import CloudVolume
8
+ from cloudfiles import CloudFiles
9
+
10
+ from .nmcp_skeleton import (create_skeleton, vertex_attributes, create_skeleton_components,
11
+ SkeletonComponents)
12
+ from .segment_info import SegmentInfo, NmcpPropertyValues
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def ensure_bucket_folders(cloud_location: str):
18
+ try:
19
+ cf = CloudFiles(cloud_location)
20
+ cf.touch("full/temp.txt")
21
+ cf.touch("axon/temp.txt")
22
+ cf.touch("dendrite/temp.txt")
23
+ except Exception as ex:
24
+ logger.error("could not create bucket folders", None, exc_info=False)
25
+
26
+
27
+ def create_from_json_files(json_files: [], cloud_location: str, info: dict | None = None):
28
+ """
29
+ Convenience function for a list of JSON neuron files. Primarily used for development and testing.
30
+ """
31
+ for json_file in json_files:
32
+ with open(json_file) as f:
33
+ data = json.load(f)
34
+ create_from_dict(data["neurons"][0], cloud_location, info)
35
+
36
+
37
+ def create_from_dict(neuron: dict, cloud_location: str, info: dict | None = None):
38
+ skeleton_id = None
39
+
40
+ if "idString" in neuron:
41
+ try:
42
+ skeleton_id = int(neuron["idString"][1:4])
43
+ except:
44
+ pass # Ok to fail for some unsupported skeleton id interpretation.
45
+
46
+ if skeleton_id is not None:
47
+ axon, dendrite = create_skeleton_components(neuron)
48
+ properties = extract_neuron_properties(neuron)
49
+ create_from_data(axon, dendrite, properties, cloud_location, skeleton_id, info)
50
+
51
+
52
+ def create_from_data(axon: SkeletonComponents, dendrite: SkeletonComponents, properties: NmcpPropertyValues,
53
+ cloud_location: str, skeleton_id: int, info: dict | None = None):
54
+ """
55
+ Add one or more neurons to the precomputed dataset.
56
+ """
57
+ cv = _create_dataset_info(cloud_location, info)
58
+
59
+ # remove_skeleton(cloud_location, skeleton_id)
60
+
61
+ try:
62
+ cf = CloudFiles(cloud_location)
63
+ except Exception as ex:
64
+ logger.error("could not create cloud files", None, exc_info=False)
65
+ return
66
+
67
+ try:
68
+ existing = cf.get("segment_properties/info.pickle")
69
+ except Exception as ex:
70
+ logger.error("could not ask for get segment info", None, exc_info=False)
71
+ return
72
+
73
+ try:
74
+ if existing is not None:
75
+ segment_info = pickle.loads(existing)
76
+ else:
77
+ segment_info = SegmentInfo()
78
+ except Exception as ex:
79
+ logger.error("could not get segment info", None, exc_info=False)
80
+ return
81
+
82
+ try:
83
+ # TODO: Could be left in an odd state if the skeleton is created but segment_info append fails.
84
+ skeleton = create_skeleton(skeleton_id, axon, dendrite)
85
+ except Exception as ex:
86
+ logger.error("could not create skeleton", None, exc_info=False)
87
+ return
88
+
89
+ try:
90
+ segment_info.append(skeleton_id, properties)
91
+ except Exception as ex:
92
+ logger.error("could not append segment info", None, exc_info=False)
93
+ return
94
+
95
+ try:
96
+ cv.skeleton.upload(skeleton)
97
+ except Exception as ex:
98
+ logger.error("could not upload skeleton", None, exc_info=False)
99
+ return
100
+
101
+ try:
102
+ _create_segment_properties(cloud_location, segment_info)
103
+ except Exception as ex:
104
+ logger.error(f"could create segment properties {skeleton_id}", None, exc_info=True)
105
+ logger.exception(ex, exc_info=True)
106
+
107
+
108
+ def remove_skeleton(cloud_location: str, skeleton_id: int) -> bool:
109
+ cf = CloudFiles(cloud_location)
110
+
111
+ existing = cf.get("segment_properties/info.pickle")
112
+
113
+ if existing is None:
114
+ return False
115
+
116
+ segment_info = pickle.loads(existing)
117
+
118
+ segment_info.remove(skeleton_id)
119
+
120
+ _create_segment_properties(cloud_location, segment_info)
121
+
122
+ cf.delete(f"skeleton/{skeleton_id}")
123
+
124
+ return True
125
+
126
+
127
+ def list_skeletons(cloud_location: str) -> List[int]:
128
+ cf = CloudFiles(cloud_location)
129
+
130
+ existing = cf.get("segment_properties/info.pickle")
131
+
132
+ if existing is None:
133
+ return []
134
+
135
+ segment_info = pickle.loads(existing)
136
+
137
+ return segment_info.ids
138
+
139
+
140
+ def _default_cloudvolume_info() -> dict:
141
+ info = CloudVolume.create_new_info(
142
+ num_channels=1,
143
+ layer_type="segmentation",
144
+ data_type="uint64", # Channel images might be "uint8"
145
+ # raw, png, jpeg, compressed_segmentation, fpzip, compressed, zfpc, compresso, crackle
146
+ encoding="raw",
147
+ resolution=[1000, 1000, 1000], # Voxel scaling, units are in nanometers
148
+ voxel_offset=[0, 0, 0], # x,y,z offset in voxels from the origin
149
+ # mesh="mesh",
150
+ skeletons="skeleton",
151
+ # Pick a convenient size for your underlying chunk representation
152
+ # Powers of two are recommended, doesn't need to cover image exactly
153
+ chunk_size=[512, 512, 512], # units are voxels
154
+ volume_size=[13200, 8000, 11400], # e.g. a cubic millimeter dataset
155
+ )
156
+
157
+ info["segment_properties"] = "segment_properties"
158
+
159
+ return info
160
+
161
+
162
+ def _extract_resolution(info_dict: dict) -> List[float]:
163
+ resolution = None
164
+
165
+ scales = info_dict.get("scales")
166
+ if isinstance(scales, list) and len(scales) > 0 and isinstance(scales[0], dict):
167
+ resolution = scales[0].get("resolution")
168
+
169
+ if resolution is None:
170
+ resolution = info_dict.get("resolution")
171
+
172
+ if not isinstance(resolution, (list, tuple)) or len(resolution) != 3:
173
+ raise ValueError("CloudVolume info must define a 3-value resolution.")
174
+
175
+ values = []
176
+ for value in resolution:
177
+ if isinstance(value, bool) or not isinstance(value, (int, float)):
178
+ raise ValueError("CloudVolume resolution values must be numeric.")
179
+ if value <= 0:
180
+ raise ValueError("CloudVolume resolution values must be greater than zero.")
181
+ values.append(value)
182
+
183
+ return values
184
+
185
+
186
+ def _prepare_cloudvolume_info(info: dict | None) -> dict:
187
+ if info is None:
188
+ return _default_cloudvolume_info()
189
+
190
+ if not isinstance(info, dict):
191
+ raise ValueError("CloudVolume info must be provided as a dict.")
192
+
193
+ prepared = deepcopy(info)
194
+
195
+ if "segment_properties" not in prepared:
196
+ prepared["segment_properties"] = "segment_properties"
197
+
198
+ _extract_resolution(prepared)
199
+
200
+ return prepared
201
+
202
+
203
+ def _create_dataset_info(cloud_location: str, info: dict | None = None) -> CloudVolume:
204
+ """ Once per dataset """
205
+ prepared_info = _prepare_cloudvolume_info(info)
206
+ resolution = _extract_resolution(prepared_info)
207
+
208
+ full_location = f"precomputed://{cloud_location}"
209
+
210
+ logger.info(f"creating CloudVolume at {full_location}")
211
+
212
+ cv = CloudVolume(full_location, info=prepared_info, compress=False)
213
+
214
+ sk_info = cv.skeleton.meta.default_info()
215
+
216
+ sk_info["transform"] = [resolution[0], 0, 0, 0, 0, resolution[1], 0, 0, 0, 0, resolution[2], 0]
217
+ sk_info["vertex_attributes"] = vertex_attributes
218
+ cv.skeleton.meta.info = sk_info
219
+ cv.skeleton.meta.commit_info()
220
+
221
+ cv.commit_info()
222
+
223
+ return cv
224
+
225
+
226
+ def extract_neuron_properties(data: dict) -> NmcpPropertyValues:
227
+ if "allenId" in data["soma"]:
228
+ soma_allen_id = data["soma"]["allenId"]
229
+ else:
230
+ soma_allen_id = None
231
+
232
+ label = (data["idString"])
233
+
234
+ sample = data.get("sample")
235
+ if sample is not None and "strain" in sample and sample["strain"] is not None:
236
+ strain = sample["strain"]
237
+ else:
238
+ strain = "unknown"
239
+
240
+ return NmcpPropertyValues(label, strain, soma_allen_id)
241
+
242
+
243
+ def _create_segment_properties(cloud_location: str, segment_property_info: SegmentInfo):
244
+ """ One per dataset"""
245
+ cf = CloudFiles(cloud_location)
246
+
247
+ # The required precomputed segment properties info file.
248
+ cf.put_json("segment_properties/info", segment_property_info.as_dict())
249
+
250
+ # Stash the internal representation of the segment properties info for additional context that would need to be
251
+ # rebuilt if deserializing `info`.
252
+ cf.put("segment_properties/info.pickle", pickle.dumps(segment_property_info))
@@ -0,0 +1,142 @@
1
+ from dataclasses import dataclass, field
2
+ from typing_extensions import Self, List
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from cloudvolume import Skeleton
8
+
9
+ vertex_attributes = [
10
+ {
11
+ 'id': 'radius',
12
+ 'data_type': 'float32',
13
+ 'num_components': 1
14
+ },
15
+ {
16
+ "id": "allenId",
17
+ "data_type": "float32",
18
+ "num_components": 1
19
+
20
+ },
21
+ {
22
+ "id": "compartment",
23
+ "data_type": "float32",
24
+ "num_components": 1
25
+ }
26
+ ]
27
+
28
+ _NP_EMPTY = np.empty(0, dtype=np.float32)
29
+ _NP_EMPTY_VERTEX = np.empty((0, 3), dtype=np.float32)
30
+ _NP_EMPTY_EDGE = np.empty((0, 2), dtype=np.float32)
31
+
32
+
33
+ @dataclass
34
+ class SkeletonComponents:
35
+ vertices: np.ndarray = field(default_factory=lambda: _NP_EMPTY_VERTEX)
36
+ edges: np.ndarray = field(default_factory=lambda: _NP_EMPTY_EDGE)
37
+ radii: np.ndarray = field(default_factory=lambda: _NP_EMPTY)
38
+ ccf_ids: np.ndarray = field(default_factory=lambda: _NP_EMPTY)
39
+ compartments: np.ndarray = field(default_factory=lambda: _NP_EMPTY)
40
+
41
+ @classmethod
42
+ def create(cls, nodes: List[dict]):
43
+ skeleton = cls()
44
+ skeleton.append(nodes)
45
+ return skeleton
46
+
47
+ def append(self, nodes: List[dict]):
48
+ """
49
+ Add segment data to the skeleton. This is presumed to be a continuation of existing skeleton part such as
50
+ axon chunks being accumulated. See `concat` for merging axon and dendrite parts with adjustment for both
51
+ containing a soma reference.
52
+ """
53
+ if nodes is None or len(nodes) == 0:
54
+ return
55
+
56
+ df = pd.DataFrame(nodes)
57
+
58
+ vertices = df[["x", "y", "z"]].values
59
+
60
+ edges = df[["sampleNumber", "parentNumber"]].values[1:] - 1
61
+
62
+ radii = df["radius"].values.astype(np.float32)
63
+
64
+ if "allenId" in df.columns:
65
+ if df.allenId.isna().all():
66
+ df["allenId"] = 0
67
+ else:
68
+ # fill all the na values with 0
69
+ df["allenId"] = df["allenId"].fillna(0)
70
+
71
+ ccf_ids = df["allenId"].values.astype(np.float32)
72
+ else:
73
+ ccf_ids = np.full(len(nodes), 0, dtype=np.float32)
74
+
75
+ compartments = df["structureIdentifier"].values.astype(np.float32)
76
+
77
+ self.vertices = np.concatenate([self.vertices, vertices])
78
+ self.edges = np.concatenate([self.edges, edges])
79
+ self.radii = np.concatenate([self.radii, radii])
80
+ self.ccf_ids = np.concatenate([self.ccf_ids, ccf_ids])
81
+ self.compartments = np.concatenate([self.compartments, compartments])
82
+
83
+ def concat(self, other: Self) -> Self:
84
+ # Assumed to be an axon with dendrite in that order.
85
+ if not isinstance(other, SkeletonComponents):
86
+ raise TypeError("can only concatenate SkeletonComponents")
87
+
88
+ # Get the current number of vertices to adjust edge indices
89
+ existing_vertex_count = len(self.vertices)
90
+
91
+ # Adjust new edges to reference correct vertex indices
92
+ adjusted_edges = other.edges + existing_vertex_count - 1
93
+
94
+ if len(self.vertices) > 0 and len(other.edges) > 0:
95
+ # Map any dendrite edges that would have referenced the soma in the dendrite structure
96
+ # to the axon soma vertex.
97
+ adjusted_edges[adjusted_edges[:, 1] == self.vertices.shape[0] - 1, 1] = 0
98
+
99
+ return SkeletonComponents(
100
+ vertices=np.concatenate([self.vertices, other.vertices[1:]]),
101
+ edges=np.concatenate([self.edges, adjusted_edges]),
102
+ radii=np.concatenate([self.radii, other.radii[1:]]),
103
+ ccf_ids=np.concatenate([self.ccf_ids, other.ccf_ids[1:]]),
104
+ compartments=np.concatenate([self.compartments, other.compartments[1:]])
105
+ )
106
+
107
+
108
+ def create_skeleton_components(data: dict) -> tuple[SkeletonComponents | None, SkeletonComponents | None]:
109
+ axon = None
110
+ dendrite = None
111
+
112
+ if data["axon"]:
113
+ axon = SkeletonComponents.create(data["axon"])
114
+
115
+ if data["dendrite"]:
116
+ dendrite = SkeletonComponents.create(data["dendrite"])
117
+
118
+ return axon, dendrite
119
+
120
+
121
+ def create_skeleton(skeleton_id: int, axon: SkeletonComponents, dendrite: SkeletonComponents) -> Skeleton:
122
+ if axon is None:
123
+ assert dendrite is not None
124
+ output = dendrite
125
+ elif dendrite is None:
126
+ assert axon is not None
127
+ output = axon
128
+ else:
129
+ output = axon.concat(dendrite)
130
+
131
+ sk = Skeleton(segid=skeleton_id)
132
+
133
+ sk.vertices = output.vertices
134
+ sk.edges = output.edges
135
+ sk.radii = output.radii
136
+ sk.extra_attributes = vertex_attributes
137
+
138
+ sk.radius = output.radii
139
+ sk.allenId = output.ccf_ids
140
+ sk.compartment = output.compartments
141
+
142
+ return sk
@@ -0,0 +1,61 @@
1
+ from typing_extensions import NamedTuple
2
+
3
+ from .segment_tag_property import SomaSegmentTagProperty
4
+ from .segment_property import SegmentProperty
5
+
6
+
7
+ class NmcpPropertyValues(NamedTuple):
8
+ label: str
9
+ strain: str
10
+ soma_id: int
11
+
12
+
13
+ class SegmentInfo:
14
+ """
15
+ Create, update, and persist the `segment_properties` `info` file for Neuroglancer precomputed datasets with
16
+ properties specific to the NMCP reconstruction skeletons. It is intended to support datasets that change over time,
17
+ including the addition of new skeletons, the modification of existing skeletons, and removal of existing skeletons.
18
+ """
19
+
20
+ def __init__(self):
21
+ self.ids = list()
22
+ self.labels = SegmentProperty("label", "label", "filename")
23
+ self.strains = SegmentProperty("strain", "string", "mouse line used")
24
+ self.tags = SomaSegmentTagProperty("tags")
25
+
26
+ def append(self, segment_id: int, values: NmcpPropertyValues):
27
+ if segment_id not in self.ids:
28
+ self.ids.append(segment_id)
29
+ self.labels.append(values.label)
30
+ self.strains.append(values.strain)
31
+ self.tags.append_soma(values.soma_id)
32
+ return
33
+
34
+ index = self.ids.index(segment_id)
35
+ self.labels.update(index, values.label)
36
+ self.strains.update(index, values.strain)
37
+ self.tags.update_soma(index, values.soma_id)
38
+
39
+ def remove(self, segment_id: int):
40
+ if segment_id in self.ids:
41
+ index = self.ids.index(segment_id)
42
+ del self.ids[index]
43
+ self.labels.remove(index)
44
+ self.strains.remove(index)
45
+ self.tags.remove_soma(index)
46
+
47
+ def as_dict(self) -> dict:
48
+ """
49
+ Generates a JSON-serializable dictionary representation suitable for the `segment_properties/info` file.
50
+ """
51
+ return {
52
+ "@type": "neuroglancer_segment_properties",
53
+ "inline": {
54
+ "ids": [f'{s}' for s in self.ids],
55
+ "properties": [
56
+ self.labels.as_dict(),
57
+ self.strains.as_dict(),
58
+ self.tags.as_dict(),
59
+ ]
60
+ }
61
+ }
@@ -0,0 +1,33 @@
1
+ class SegmentProperty:
2
+ def __init__(self, prop_id: str, prop_type: str, description: str = None, values=None):
3
+ self.id = prop_id
4
+ self.type = prop_type
5
+ self.description = description
6
+ self.values = values or list()
7
+
8
+ def append(self, value) -> None:
9
+ self.values.append(value)
10
+
11
+ def remove(self, index: int) -> None:
12
+ if index < len(self.values):
13
+ del self.values[index]
14
+
15
+ def update(self, index, value):
16
+ if index < len(self.values):
17
+ self.values[index] = value
18
+
19
+ def as_dict(self) -> dict:
20
+ property_desc = {
21
+ "id": self.id,
22
+ "type": self.type
23
+ }
24
+
25
+ if self.description is not None:
26
+ property_desc["description"] = self.description
27
+
28
+ property_desc["values"] = self._create_export_values()
29
+
30
+ return property_desc
31
+
32
+ def _create_export_values(self) -> list:
33
+ return self.values
@@ -0,0 +1,85 @@
1
+ import numpy
2
+
3
+ from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache
4
+
5
+ from .segment_property import SegmentProperty
6
+
7
+ _structure_id_lookup = None
8
+
9
+ _acronym_not_found = "none"
10
+
11
+ _name_not_found = "none"
12
+
13
+
14
+ class SegmentTagProperty(SegmentProperty):
15
+ def __init__(self, prop_id: str):
16
+ super().__init__(prop_id, "tags")
17
+
18
+ self.descriptions = list()
19
+
20
+ self.tags = None
21
+ self.tag_descriptions = None
22
+
23
+ def append_tag(self, tag: str, tag_description: object):
24
+ super(SegmentTagProperty, self).append(tag)
25
+
26
+ self.descriptions.append(tag_description)
27
+
28
+ def update_tag(self, index: int, tag: str, tag_description: object):
29
+ super(SegmentTagProperty, self).update(index, tag)
30
+
31
+ self.descriptions[index] = tag_description
32
+
33
+ def remove_tag(self, index: int):
34
+ super(SegmentTagProperty, self).remove(index)
35
+
36
+ if index < len(self.descriptions):
37
+ del self.descriptions[index]
38
+
39
+ def as_dict(self) -> dict:
40
+ property_desc = super(SegmentTagProperty, self).as_dict()
41
+
42
+ if self.tags is not None:
43
+ property_desc["tags"] = self.tags
44
+
45
+ if self.tag_descriptions is not None:
46
+ property_desc["tag_descriptions"] = self.tag_descriptions
47
+
48
+ return property_desc
49
+
50
+ def _create_export_values(self) -> list:
51
+ unique_tags, unique_ref, tag_ref = numpy.unique(self.values, return_index=True, return_inverse=True)
52
+
53
+ self.tags = unique_tags.tolist()
54
+
55
+ if len(self.descriptions) > 0:
56
+ self.tag_descriptions = [s for s in numpy.array(self.descriptions)[unique_ref]]
57
+
58
+ return [[t] for t in tag_ref]
59
+
60
+
61
+ class SomaSegmentTagProperty(SegmentTagProperty):
62
+ def __init__(self, prop_id: str):
63
+ super().__init__(prop_id)
64
+
65
+ def append_soma(self, soma_id: int | None):
66
+ self.append_tag(*_use_soma_lookup(soma_id))
67
+
68
+ def update_soma(self, index: int, soma_id: int | None):
69
+ self.update_tag(index, *_use_soma_lookup(soma_id))
70
+
71
+ def remove_soma(self, index: int):
72
+ self.remove_tag(index)
73
+
74
+
75
+ def _use_soma_lookup(soma_id: int | None) -> (str, str):
76
+ global _structure_id_lookup
77
+
78
+ if soma_id is not None:
79
+ if _structure_id_lookup is None:
80
+ _structure_id_lookup = MouseConnectivityCache(resolution=10).get_structure_tree()
81
+ structure = _structure_id_lookup.get_structures_by_id([soma_id])[0]
82
+ if structure is not None:
83
+ return structure["acronym"], structure["name"]
84
+
85
+ return _acronym_not_found, _name_not_found