PyPI - nmcp-precomputed - Versions diffs - 3.0.4__py3-none-any.whl - Mend

nmcp-precomputed 3.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

nmcp/__init__.py +4 -0
nmcp/__main__.py +13 -0
nmcp/data/__init__.py +2 -0
nmcp/data/precomputed_entry.py +10 -0
nmcp/data/remote_data_client.py +466 -0
nmcp/from_json.py +34 -0
nmcp/from_service.py +32 -0
nmcp/list_skeletons.py +24 -0
nmcp/precomputed/__init__.py +5 -0
nmcp/precomputed/nmcp_precomputed.py +252 -0
nmcp/precomputed/nmcp_skeleton.py +142 -0
nmcp/precomputed/segment_info.py +61 -0
nmcp/precomputed/segment_property.py +33 -0
nmcp/precomputed/segment_tag_property.py +85 -0
nmcp/precomputed_worker.py +273 -0
nmcp/remove_skeleton.py +21 -0
nmcp_precomputed-3.0.4.dist-info/METADATA +17 -0
nmcp_precomputed-3.0.4.dist-info/RECORD +19 -0
nmcp_precomputed-3.0.4.dist-info/WHEEL +4 -0

nmcp/precomputed/nmcp_precomputed.py ADDED Viewed

@@ -0,0 +1,252 @@
+import json
+import logging
+import pickle
+from copy import deepcopy
+from typing_extensions import List
+from cloudvolume import CloudVolume
+from cloudfiles import CloudFiles
+from .nmcp_skeleton import (create_skeleton, vertex_attributes, create_skeleton_components,
+                            SkeletonComponents)
+from .segment_info import SegmentInfo, NmcpPropertyValues
+logger = logging.getLogger(__name__)
+def ensure_bucket_folders(cloud_location: str):
+    try:
+        cf = CloudFiles(cloud_location)
+        cf.touch("full/temp.txt")
+        cf.touch("axon/temp.txt")
+        cf.touch("dendrite/temp.txt")
+    except Exception as ex:
+        logger.error("could not create bucket folders", None, exc_info=False)
+def create_from_json_files(json_files: [], cloud_location: str, info: dict | None = None):
+    """
+    Convenience function for a list of JSON neuron files.  Primarily used for development and testing.
+    """
+    for json_file in json_files:
+        with open(json_file) as f:
+            data = json.load(f)
+            create_from_dict(data["neurons"][0], cloud_location, info)
+def create_from_dict(neuron: dict, cloud_location: str, info: dict | None = None):
+    skeleton_id = None
+    if "idString" in neuron:
+        try:
+            skeleton_id = int(neuron["idString"][1:4])
+        except:
+            pass  # Ok to fail for some unsupported skeleton id interpretation.
+    if skeleton_id is not None:
+        axon, dendrite = create_skeleton_components(neuron)
+        properties = extract_neuron_properties(neuron)
+        create_from_data(axon, dendrite, properties, cloud_location, skeleton_id, info)
+def create_from_data(axon: SkeletonComponents, dendrite: SkeletonComponents, properties: NmcpPropertyValues,
+                     cloud_location: str, skeleton_id: int, info: dict | None = None):
+    """
+    Add one or more neurons to the precomputed dataset.
+    """
+    cv = _create_dataset_info(cloud_location, info)
+    # remove_skeleton(cloud_location, skeleton_id)
+    try:
+        cf = CloudFiles(cloud_location)
+    except Exception as ex:
+        logger.error("could not create cloud files", None, exc_info=False)
+        return
+    try:
+        existing = cf.get("segment_properties/info.pickle")
+    except Exception as ex:
+        logger.error("could not ask for get segment info", None, exc_info=False)
+        return
+    try:
+        if existing is not None:
+            segment_info = pickle.loads(existing)
+        else:
+            segment_info = SegmentInfo()
+    except Exception as ex:
+        logger.error("could not get segment info", None, exc_info=False)
+        return
+    try:
+        # TODO: Could be left in an odd state if the skeleton is created but segment_info append fails.
+        skeleton = create_skeleton(skeleton_id, axon, dendrite)
+    except Exception as ex:
+        logger.error("could not create skeleton", None, exc_info=False)
+        return
+    try:
+        segment_info.append(skeleton_id, properties)
+    except Exception as ex:
+        logger.error("could not append segment info", None, exc_info=False)
+        return
+    try:
+        cv.skeleton.upload(skeleton)
+    except Exception as ex:
+        logger.error("could not upload skeleton", None, exc_info=False)
+        return
+    try:
+        _create_segment_properties(cloud_location, segment_info)
+    except Exception as ex:
+        logger.error(f"could create segment properties {skeleton_id}", None, exc_info=True)
+        logger.exception(ex, exc_info=True)
+def remove_skeleton(cloud_location: str, skeleton_id: int) -> bool:
+    cf = CloudFiles(cloud_location)
+    existing = cf.get("segment_properties/info.pickle")
+    if existing is None:
+        return False
+    segment_info = pickle.loads(existing)
+    segment_info.remove(skeleton_id)
+    _create_segment_properties(cloud_location, segment_info)
+    cf.delete(f"skeleton/{skeleton_id}")
+    return True
+def list_skeletons(cloud_location: str) -> List[int]:
+    cf = CloudFiles(cloud_location)
+    existing = cf.get("segment_properties/info.pickle")
+    if existing is None:
+        return []
+    segment_info = pickle.loads(existing)
+    return segment_info.ids
+def _default_cloudvolume_info() -> dict:
+    info = CloudVolume.create_new_info(
+        num_channels=1,
+        layer_type="segmentation",
+        data_type="uint64",  # Channel images might be "uint8"
+        # raw, png, jpeg, compressed_segmentation, fpzip, compressed, zfpc, compresso, crackle
+        encoding="raw",
+        resolution=[1000, 1000, 1000],  # Voxel scaling, units are in nanometers
+        voxel_offset=[0, 0, 0],  # x,y,z offset in voxels from the origin
+        # mesh="mesh",
+        skeletons="skeleton",
+        # Pick a convenient size for your underlying chunk representation
+        # Powers of two are recommended, doesn't need to cover image exactly
+        chunk_size=[512, 512, 512],  # units are voxels
+        volume_size=[13200, 8000, 11400],  # e.g. a cubic millimeter dataset
+    )
+    info["segment_properties"] = "segment_properties"
+    return info
+def _extract_resolution(info_dict: dict) -> List[float]:
+    resolution = None
+    scales = info_dict.get("scales")
+    if isinstance(scales, list) and len(scales) > 0 and isinstance(scales[0], dict):
+        resolution = scales[0].get("resolution")
+    if resolution is None:
+        resolution = info_dict.get("resolution")
+    if not isinstance(resolution, (list, tuple)) or len(resolution) != 3:
+        raise ValueError("CloudVolume info must define a 3-value resolution.")
+    values = []
+    for value in resolution:
+        if isinstance(value, bool) or not isinstance(value, (int, float)):
+            raise ValueError("CloudVolume resolution values must be numeric.")
+        if value <= 0:
+            raise ValueError("CloudVolume resolution values must be greater than zero.")
+        values.append(value)
+    return values
+def _prepare_cloudvolume_info(info: dict | None) -> dict:
+    if info is None:
+        return _default_cloudvolume_info()
+    if not isinstance(info, dict):
+        raise ValueError("CloudVolume info must be provided as a dict.")
+    prepared = deepcopy(info)
+    if "segment_properties" not in prepared:
+        prepared["segment_properties"] = "segment_properties"
+    _extract_resolution(prepared)
+    return prepared
+def _create_dataset_info(cloud_location: str, info: dict | None = None) -> CloudVolume:
+    """ Once per dataset """
+    prepared_info = _prepare_cloudvolume_info(info)
+    resolution = _extract_resolution(prepared_info)
+    full_location = f"precomputed://{cloud_location}"
+    logger.info(f"creating CloudVolume at {full_location}")
+    cv = CloudVolume(full_location, info=prepared_info, compress=False)
+    sk_info = cv.skeleton.meta.default_info()
+    sk_info["transform"] = [resolution[0], 0, 0, 0, 0, resolution[1], 0, 0, 0, 0, resolution[2], 0]
+    sk_info["vertex_attributes"] = vertex_attributes
+    cv.skeleton.meta.info = sk_info
+    cv.skeleton.meta.commit_info()
+    cv.commit_info()
+    return cv
+def extract_neuron_properties(data: dict) -> NmcpPropertyValues:
+    if "allenId" in data["soma"]:
+        soma_allen_id = data["soma"]["allenId"]
+    else:
+        soma_allen_id = None
+    label = (data["idString"])
+    sample = data.get("sample")
+    if sample is not None and "strain" in sample and sample["strain"] is not None:
+        strain = sample["strain"]
+    else:
+        strain = "unknown"
+    return NmcpPropertyValues(label, strain, soma_allen_id)
+def _create_segment_properties(cloud_location: str, segment_property_info: SegmentInfo):
+    """ One per dataset"""
+    cf = CloudFiles(cloud_location)
+    # The required precomputed segment properties info file.
+    cf.put_json("segment_properties/info", segment_property_info.as_dict())
+    # Stash the internal representation of the segment properties info for additional context that would need to be
+    # rebuilt if deserializing `info`.
+    cf.put("segment_properties/info.pickle", pickle.dumps(segment_property_info))

nmcp/precomputed/nmcp_skeleton.py ADDED Viewed

@@ -0,0 +1,142 @@
+from dataclasses import dataclass, field
+from typing_extensions import Self, List
+import numpy as np
+import pandas as pd
+from cloudvolume import Skeleton
+vertex_attributes = [
+    {
+        'id': 'radius',
+        'data_type': 'float32',
+        'num_components': 1
+    },
+    {
+        "id": "allenId",
+        "data_type": "float32",
+        "num_components": 1
+    },
+    {
+        "id": "compartment",
+        "data_type": "float32",
+        "num_components": 1
+    }
+]
+_NP_EMPTY = np.empty(0, dtype=np.float32)
+_NP_EMPTY_VERTEX = np.empty((0, 3), dtype=np.float32)
+_NP_EMPTY_EDGE = np.empty((0, 2), dtype=np.float32)
+@dataclass
+class SkeletonComponents:
+    vertices: np.ndarray = field(default_factory=lambda: _NP_EMPTY_VERTEX)
+    edges: np.ndarray = field(default_factory=lambda: _NP_EMPTY_EDGE)
+    radii: np.ndarray = field(default_factory=lambda: _NP_EMPTY)
+    ccf_ids: np.ndarray = field(default_factory=lambda: _NP_EMPTY)
+    compartments: np.ndarray = field(default_factory=lambda: _NP_EMPTY)
+    @classmethod
+    def create(cls, nodes: List[dict]):
+        skeleton = cls()
+        skeleton.append(nodes)
+        return skeleton
+    def append(self, nodes: List[dict]):
+        """
+        Add segment data to the skeleton.  This is presumed to be a continuation of existing skeleton part such as
+        axon chunks being accumulated.  See `concat` for merging axon and dendrite parts with adjustment for both
+        containing a soma reference.
+        """
+        if nodes is None or len(nodes) == 0:
+            return
+        df = pd.DataFrame(nodes)
+        vertices = df[["x", "y", "z"]].values
+        edges = df[["sampleNumber", "parentNumber"]].values[1:] - 1
+        radii = df["radius"].values.astype(np.float32)
+        if "allenId" in df.columns:
+            if df.allenId.isna().all():
+                df["allenId"] = 0
+            else:
+                # fill all the na values with 0
+                df["allenId"] = df["allenId"].fillna(0)
+            ccf_ids = df["allenId"].values.astype(np.float32)
+        else:
+            ccf_ids = np.full(len(nodes), 0, dtype=np.float32)
+        compartments = df["structureIdentifier"].values.astype(np.float32)
+        self.vertices = np.concatenate([self.vertices, vertices])
+        self.edges = np.concatenate([self.edges, edges])
+        self.radii = np.concatenate([self.radii, radii])
+        self.ccf_ids = np.concatenate([self.ccf_ids, ccf_ids])
+        self.compartments = np.concatenate([self.compartments, compartments])
+    def concat(self, other: Self) -> Self:
+        # Assumed to be an axon with dendrite in that order.
+        if not isinstance(other, SkeletonComponents):
+            raise TypeError("can only concatenate SkeletonComponents")
+        # Get the current number of vertices to adjust edge indices
+        existing_vertex_count = len(self.vertices)
+        # Adjust new edges to reference correct vertex indices
+        adjusted_edges = other.edges + existing_vertex_count - 1
+        if len(self.vertices) > 0 and len(other.edges) > 0:
+            # Map any dendrite edges that would have referenced the soma in the dendrite structure
+            # to the axon soma vertex.
+            adjusted_edges[adjusted_edges[:, 1] == self.vertices.shape[0] - 1, 1] = 0
+        return SkeletonComponents(
+            vertices=np.concatenate([self.vertices, other.vertices[1:]]),
+            edges=np.concatenate([self.edges, adjusted_edges]),
+            radii=np.concatenate([self.radii, other.radii[1:]]),
+            ccf_ids=np.concatenate([self.ccf_ids, other.ccf_ids[1:]]),
+            compartments=np.concatenate([self.compartments, other.compartments[1:]])
+        )
+def create_skeleton_components(data: dict) -> tuple[SkeletonComponents | None, SkeletonComponents | None]:
+    axon = None
+    dendrite = None
+    if data["axon"]:
+        axon = SkeletonComponents.create(data["axon"])
+    if data["dendrite"]:
+        dendrite = SkeletonComponents.create(data["dendrite"])
+    return axon, dendrite
+def create_skeleton(skeleton_id: int, axon: SkeletonComponents, dendrite: SkeletonComponents) -> Skeleton:
+    if axon is None:
+        assert dendrite is not None
+        output = dendrite
+    elif dendrite is None:
+        assert axon is not None
+        output = axon
+    else:
+        output = axon.concat(dendrite)
+    sk = Skeleton(segid=skeleton_id)
+    sk.vertices = output.vertices
+    sk.edges = output.edges
+    sk.radii = output.radii
+    sk.extra_attributes = vertex_attributes
+    sk.radius = output.radii
+    sk.allenId = output.ccf_ids
+    sk.compartment = output.compartments
+    return sk

nmcp/precomputed/segment_info.py ADDED Viewed

@@ -0,0 +1,61 @@
+from typing_extensions import NamedTuple
+from .segment_tag_property import SomaSegmentTagProperty
+from .segment_property import SegmentProperty
+class NmcpPropertyValues(NamedTuple):
+    label: str
+    strain: str
+    soma_id: int
+class SegmentInfo:
+    """
+    Create, update, and persist the `segment_properties` `info` file for Neuroglancer precomputed datasets with
+    properties specific to the NMCP reconstruction skeletons.  It is intended to support datasets that change over time,
+    including the addition of new skeletons, the modification of existing skeletons, and removal of existing skeletons.
+    """
+    def __init__(self):
+        self.ids = list()
+        self.labels = SegmentProperty("label", "label", "filename")
+        self.strains = SegmentProperty("strain", "string", "mouse line used")
+        self.tags = SomaSegmentTagProperty("tags")
+    def append(self, segment_id: int, values: NmcpPropertyValues):
+        if segment_id not in self.ids:
+            self.ids.append(segment_id)
+            self.labels.append(values.label)
+            self.strains.append(values.strain)
+            self.tags.append_soma(values.soma_id)
+            return
+        index = self.ids.index(segment_id)
+        self.labels.update(index, values.label)
+        self.strains.update(index, values.strain)
+        self.tags.update_soma(index, values.soma_id)
+    def remove(self, segment_id: int):
+        if segment_id in self.ids:
+            index = self.ids.index(segment_id)
+            del self.ids[index]
+            self.labels.remove(index)
+            self.strains.remove(index)
+            self.tags.remove_soma(index)
+    def as_dict(self) -> dict:
+        """
+        Generates a JSON-serializable dictionary representation suitable for the `segment_properties/info` file.
+        """
+        return {
+            "@type": "neuroglancer_segment_properties",
+            "inline": {
+                "ids": [f'{s}' for s in self.ids],
+                "properties": [
+                    self.labels.as_dict(),
+                    self.strains.as_dict(),
+                    self.tags.as_dict(),
+                ]
+            }
+        }

nmcp/precomputed/segment_property.py ADDED Viewed

@@ -0,0 +1,33 @@
+class SegmentProperty:
+    def __init__(self, prop_id: str, prop_type: str, description: str = None, values=None):
+        self.id = prop_id
+        self.type = prop_type
+        self.description = description
+        self.values = values or list()
+    def append(self, value) -> None:
+        self.values.append(value)
+    def remove(self, index: int) -> None:
+        if index < len(self.values):
+            del self.values[index]
+    def update(self, index, value):
+        if index < len(self.values):
+            self.values[index] = value
+    def as_dict(self) -> dict:
+        property_desc = {
+            "id": self.id,
+            "type": self.type
+        }
+        if self.description is not None:
+            property_desc["description"] = self.description
+        property_desc["values"] = self._create_export_values()
+        return property_desc
+    def _create_export_values(self) -> list:
+        return self.values

nmcp/precomputed/segment_tag_property.py ADDED Viewed

@@ -0,0 +1,85 @@
+import numpy
+from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache
+from .segment_property import SegmentProperty
+_structure_id_lookup = None
+_acronym_not_found = "none"
+_name_not_found = "none"
+class SegmentTagProperty(SegmentProperty):
+    def __init__(self, prop_id: str):
+        super().__init__(prop_id, "tags")
+        self.descriptions = list()
+        self.tags = None
+        self.tag_descriptions = None
+    def append_tag(self, tag: str, tag_description: object):
+        super(SegmentTagProperty, self).append(tag)
+        self.descriptions.append(tag_description)
+    def update_tag(self, index: int, tag: str, tag_description: object):
+        super(SegmentTagProperty, self).update(index, tag)
+        self.descriptions[index] = tag_description
+    def remove_tag(self, index: int):
+        super(SegmentTagProperty, self).remove(index)
+        if index < len(self.descriptions):
+            del self.descriptions[index]
+    def as_dict(self) -> dict:
+        property_desc = super(SegmentTagProperty, self).as_dict()
+        if self.tags is not None:
+            property_desc["tags"] = self.tags
+        if self.tag_descriptions is not None:
+            property_desc["tag_descriptions"] = self.tag_descriptions
+        return property_desc
+    def _create_export_values(self) -> list:
+        unique_tags, unique_ref, tag_ref = numpy.unique(self.values, return_index=True, return_inverse=True)
+        self.tags = unique_tags.tolist()
+        if len(self.descriptions) > 0:
+            self.tag_descriptions = [s for s in numpy.array(self.descriptions)[unique_ref]]
+        return [[t] for t in tag_ref]
+class SomaSegmentTagProperty(SegmentTagProperty):
+    def __init__(self, prop_id: str):
+        super().__init__(prop_id)
+    def append_soma(self, soma_id: int | None):
+        self.append_tag(*_use_soma_lookup(soma_id))
+    def update_soma(self, index: int, soma_id: int | None):
+        self.update_tag(index, *_use_soma_lookup(soma_id))
+    def remove_soma(self, index: int):
+        self.remove_tag(index)
+def _use_soma_lookup(soma_id: int | None) -> (str, str):
+    global _structure_id_lookup
+    if soma_id is not None:
+        if _structure_id_lookup is None:
+            _structure_id_lookup = MouseConnectivityCache(resolution=10).get_structure_tree()
+        structure = _structure_id_lookup.get_structures_by_id([soma_id])[0]
+        if structure is not None:
+            return structure["acronym"], structure["name"]
+    return _acronym_not_found, _name_not_found