morphgen-rates 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
1
  from .rates import compute_rates
2
2
  from .data import get_data
3
- __all__ = ["compute_rates", "get_data"]
3
+ from .init_count import compute_init_number_probs
4
+ __all__ = ["compute_rates", "get_data", "compute_init_number_probs"]
morphgen_rates/data.py CHANGED
@@ -22,85 +22,82 @@ def _local_data_path(filename='morph_data', ext="csv"):
22
22
  return work_dir / f"{filename}.{ext}"
23
23
 
24
24
 
25
- def get_data(key):
25
+ def get_data(area, neuron_type):
26
26
  """
27
- Retrieve a dataset entry using a key-path of the form
28
- "<brain region>/<neuron class>/<subcellular section>".
27
+ Retrieve summary morphology statistics for a given brain area and neuron class.
29
28
 
30
- The argument `data_path` is interpreted as a slash-separated path of keys used
31
- to traverse a nested dataset dictionary. The selected dataset is expected to
32
- contain both Sholl-plot statistics and bifurcation statistics; when both are
33
- available, this function returns a standardized dictionary compatible with
34
- `compute_rates`.
29
+ This function loads a local CSV dataset, filters rows matching the requested
30
+ `area` and `neuron_type`, and aggregates statistics by `section_type`. The
31
+ output is a nested dictionary keyed by section type (e.g., soma, apical, basal),
32
+ containing:
33
+
34
+ - Summary statistics for bifurcation counts and total length
35
+ - Estimated number of primary neurites at the soma (Count0)
36
+ - Sholl plot summary statistics (bin size, mean counts, standard deviation)
35
37
 
36
38
  Parameters
37
39
  ----------
38
- key : str
39
- Dataset identifier expressed as a key path:
40
-
41
- "<brain region>/<neuron class>/<subcellular section>"
42
-
43
- Examples:
44
- - "CTX/pyr/apical"
45
- - "HPC/pyr/basal"
46
-
47
- Each component is used as a successive key lookup into the nested dataset
48
- container.
40
+ area : str
41
+ Brain region identifier used in the dataset (must match values in the
42
+ 'area' column of the CSV)
43
+ neuron_type : str
44
+ Neuron class identifier used in the dataset (must match values in the
45
+ 'neuron_type' column of the CSV)
49
46
 
50
47
  Returns
51
48
  -------
52
49
  dict
53
- If both Sholl and bifurcation information are present for the selected dataset,
54
- returns:
50
+ Nested dictionary structured as:
55
51
 
56
52
  data = {
57
- "sholl": {
58
- "bin_size": float,
59
- "mean": numpy.ndarray, # shape (K,)
60
- "var": numpy.ndarray, # shape (K,)
61
- },
62
- "bifurcations": {
63
- "mean": float,
64
- "var": float,
53
+ "<section_type>": {
54
+ "bifurcation_count": {"mean": ..., "std": ..., "min": ..., "max": ...},
55
+ "total_length": {"mean": ..., "std": ..., "min": ..., "max": ...},
56
+ "primary_count": {"mean": ..., "std": ..., "min": ..., "max": ...},
57
+ "sholl_plot": {
58
+ "bin_size": float,
59
+ "mean": list[float],
60
+ "std": list[float],
61
+ },
65
62
  },
63
+ ...
66
64
  }
67
65
 
68
- Where:
69
- - `data["sholl"]["bin_size"]` is the spatial bin size used to define Sholl shells
70
- - `data["sholl"]["mean"]` is the mean Sholl intersection count per radial bin
71
- - `data["sholl"]["var"]` is the variance of the Sholl intersection count per bin
72
- - `data["bifurcations"]["mean"]` is the mean bifurcation count
73
- - `data["bifurcations"]["var"]` is the variance of the bifurcation count
66
+ Notes on fields:
67
+ - `primary_count` corresponds to the row group labeled 'Count0'
68
+ - Sholl values are collected from rows whose metric name starts with 'Count'
69
+ (including 'Count0'); users may want to interpret/plot them as a function
70
+ of radial bin index multiplied by `bin_size`
74
71
 
75
72
  Raises
76
73
  ------
77
- KeyError
78
- If any key along `data_path` is missing (brain region, neuron class, or section)
79
- ValueError
80
- If the selected dataset does not contain both Sholl and bifurcation data, or
81
- if the provided arrays have incompatible shapes
74
+ AssertionError
75
+ If no rows match the requested `area` and `neuron_type`
82
76
 
83
77
  Notes
84
78
  -----
85
- - `data_path` is a *key path*, not a filesystem path
86
- - The function assumes the dataset entry referenced by `data_path` includes:
87
- - Sholl bin size, mean array, variance array
88
- - Bifurcation mean and variance
79
+ - The function expects the local CSV to include at least the following columns:
80
+ 'area', 'neuron_type', 'neuron_name', 'section_type', 'bin_size'
81
+ plus metric columns including:
82
+ - 'bifurcation_count'
83
+ - 'total_length'
84
+ - 'Count0', 'Count1', ... (Sholl counts per radial bin)
85
+ - Statistics are computed using `pandas.DataFrame.groupby(...).describe()`.
86
+ Only the summary columns 'mean', 'std', 'min', 'max' are retained.
89
87
 
90
88
  Examples
91
89
  --------
92
- >>> data = get("CTX/pyr/apical")
93
- >>> data["sholl"]["bin_size"]
90
+ >>> data = get_data("CTX", "pyr")
91
+ >>> data["apical"]["bifurcation_count"]["mean"]
92
+ 42.0
93
+ >>> data["apical"]["sholl_plot"]["bin_size"]
94
94
  50.0
95
- >>> data["bifurcations"]["mean"]
96
- 12.3
95
+ >>> len(data["apical"]["sholl_plot"]["mean"])
96
+ 20
97
97
  """
98
+
98
99
  data = {}
99
100
 
100
- # split the key
101
- parts = tuple(p.strip() for p in key.split("/") if p.strip())
102
- if len(parts) != 2:
103
- raise ValueError(f"Expected key like 'area/neuron_type', got: {key!r}")
104
101
  area, neuron_type = parts
105
102
 
106
103
  # load data
@@ -108,6 +105,9 @@ def get_data(key):
108
105
 
109
106
  # select specific area and neuron type
110
107
  df = df[(df['area'] == area) & (df['neuron_type'] == neuron_type)]
108
+
109
+ # ensure that there are area and neuron_type in the df
110
+ assert df.shape[0] > 0, "The area {area} or neuron class {neuron_type} are not known"
111
111
 
112
112
  # neuron name unnecessary
113
113
  df.drop(['area', 'neuron_type', 'neuron_name'], axis=1, inplace=True)
@@ -122,8 +122,6 @@ def get_data(key):
122
122
  for section_type, row in df.iterrows():
123
123
  data[section_type] = {}
124
124
 
125
- print()
126
-
127
125
  # get statistics
128
126
  for data_type in ['bifurcation_count', 'total_length']:
129
127
  tmp = row.loc[row.index.get_level_values(0) == data_type, :]
@@ -0,0 +1,208 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, Optional, Sequence, Union
4
+
5
+ import numpy as np
6
+ import pyomo.environ as pyo
7
+
8
+
9
+ def compute_init_number_probs(
10
+ mean_primary_dendrites: float,
11
+ sd_primary_dendrites: float,
12
+ min_primary_dendrites: int,
13
+ max_primary_dendrites: int,
14
+ *,
15
+ support_values: Optional[Sequence[float]] = None,
16
+ epsilon: float = 1e-12,
17
+ slack_penalty: float = 1e-1,
18
+ use_variance_form: bool = True,
19
+ use_abs_slack: bool = False,
20
+ solver: str = "ipopt",
21
+ solver_options: Optional[Dict[str, Union[str, int, float]]] = None,
22
+ ) -> np.ndarray:
23
+ """
24
+ Maximum-entropy PMF for the (discrete) number of primary dendrites.
25
+
26
+ This returns a numpy array p of length n = max_primary_dendrites + 1, where:
27
+ - p[i] is the probability of observing i primary dendrites
28
+ - p[i] = 0 for i < min_primary_dendrites or i > max_primary_dendrites
29
+
30
+ The distribution is obtained by maximizing Shannon entropy:
31
+ H(p) = -sum_i p[i] * log(p[i])
32
+
33
+ Subject to:
34
+ - Normalization: sum_{i in [min,max]} p[i] = 1
35
+ - Soft mean constraint (with slack):
36
+ sum i*p[i] - mean_primary_dendrites = slack_mean
37
+ - Soft dispersion constraint (with slack):
38
+ If use_variance_form=True (recommended):
39
+ sum (i-mean)^2 * p[i] - (sd_primary_dendrites^2) = slack_disp
40
+ If use_variance_form=False:
41
+ sqrt( sum (i-mean)^2 * p[i] + tiny ) - sd_primary_dendrites = slack_disp
42
+
43
+ The objective is penalized to keep slacks small:
44
+ maximize H(p) - slack_penalty * (slack terms)
45
+
46
+ Parameters
47
+ ----------
48
+ mean_primary_dendrites : float
49
+ Target mean number of primary dendrites
50
+ sd_primary_dendrites : float
51
+ Target standard deviation (>= 0)
52
+ min_primary_dendrites : int
53
+ Minimum allowed dendrite count (inclusive)
54
+ max_primary_dendrites : int
55
+ Maximum allowed dendrite count (inclusive). Also sets array length n=max+1
56
+
57
+ Keyword-only parameters
58
+ ----------------------
59
+ support_values : Sequence[float] | None
60
+ Optional support for indices 0..max. If None, uses support=i (integers).
61
+ Keep this None if you truly mean "i is the dendrite count".
62
+ epsilon : float
63
+ Lower bound on active probabilities to avoid log(0)
64
+ slack_penalty : float
65
+ Larger values enforce closer moment matching
66
+ use_variance_form : bool
67
+ Recommended True: match variance to sd^2 (smoother than sqrt constraint)
68
+ use_abs_slack : bool
69
+ If True, use L1-like slack penalty via +/- variables; otherwise squared (smooth)
70
+ solver : str
71
+ Nonlinear solver name (typically "ipopt")
72
+ solver_options : dict | None
73
+ Passed to the solver (e.g., {"max_iter": 5000})
74
+
75
+ Returns
76
+ -------
77
+ np.ndarray
78
+ Probability vector p with length max_primary_dendrites + 1
79
+
80
+ Raises
81
+ ------
82
+ ValueError
83
+ For invalid inputs
84
+ RuntimeError
85
+ If the requested solver is not available
86
+ """
87
+ if max_primary_dendrites < 0:
88
+ raise ValueError("max_primary_dendrites must be >= 0")
89
+ if sd_primary_dendrites < 0:
90
+ raise ValueError("sd_primary_dendrites must be nonnegative")
91
+ if not (0 <= min_primary_dendrites <= max_primary_dendrites):
92
+ raise ValueError("Require 0 <= min_primary_dendrites <= max_primary_dendrites")
93
+ if slack_penalty <= 0:
94
+ raise ValueError("slack_penalty must be positive")
95
+ if epsilon <= 0:
96
+ raise ValueError("epsilon must be positive")
97
+
98
+ n = max_primary_dendrites + 1
99
+ active = list(range(min_primary_dendrites, max_primary_dendrites + 1))
100
+
101
+ # Support values for each index i (default: i itself)
102
+ if support_values is None:
103
+ support_values = list(range(n))
104
+ if len(support_values) != n:
105
+ raise ValueError("support_values must have length n = max_primary_dendrites + 1")
106
+
107
+ support = {i: float(support_values[i]) for i in range(n)}
108
+ mu = float(mean_primary_dendrites)
109
+ sd = float(sd_primary_dendrites)
110
+ target_var = sd * sd
111
+
112
+ # -----------------------------
113
+ # Pyomo model
114
+ # -----------------------------
115
+ m = pyo.ConcreteModel()
116
+ m.A = pyo.Set(initialize=active, ordered=True)
117
+
118
+ # Decision variables for active probabilities only
119
+ m.p = pyo.Var(m.A, domain=pyo.NonNegativeReals, bounds=(epsilon, 1.0))
120
+
121
+ # Normalization over active set
122
+ m.norm = pyo.Constraint(expr=sum(m.p[i] for i in m.A) == 1.0)
123
+
124
+ # Moment expressions
125
+ mean_expr = sum(support[i] * m.p[i] for i in m.A)
126
+ var_expr = sum((support[i] - mu) ** 2 * m.p[i] for i in m.A)
127
+
128
+ # Soft constraints with slack
129
+ if use_abs_slack:
130
+ # L1 slack via +/- decomposition
131
+ m.s_mean_pos = pyo.Var(domain=pyo.NonNegativeReals)
132
+ m.s_mean_neg = pyo.Var(domain=pyo.NonNegativeReals)
133
+ m.s_disp_pos = pyo.Var(domain=pyo.NonNegativeReals)
134
+ m.s_disp_neg = pyo.Var(domain=pyo.NonNegativeReals)
135
+
136
+ m.mean_soft = pyo.Constraint(expr=mean_expr - mu == m.s_mean_pos - m.s_mean_neg)
137
+
138
+ if use_variance_form:
139
+ m.disp_soft = pyo.Constraint(expr=var_expr - target_var == m.s_disp_pos - m.s_disp_neg)
140
+ else:
141
+ tiny = 1e-18
142
+ m.disp_soft = pyo.Constraint(
143
+ expr=pyo.sqrt(var_expr + tiny) - sd == m.s_disp_pos - m.s_disp_neg
144
+ )
145
+
146
+ slack_term = (m.s_mean_pos + m.s_mean_neg) + (m.s_disp_pos + m.s_disp_neg)
147
+
148
+ else:
149
+ # Smooth squared slacks
150
+ m.s_mean = pyo.Var(domain=pyo.Reals)
151
+ m.s_disp = pyo.Var(domain=pyo.Reals)
152
+
153
+ m.mean_soft = pyo.Constraint(expr=mean_expr - mu == m.s_mean)
154
+
155
+ if use_variance_form:
156
+ m.disp_soft = pyo.Constraint(expr=var_expr - target_var == m.s_disp)
157
+ else:
158
+ tiny = 1e-18
159
+ m.disp_soft = pyo.Constraint(expr=pyo.sqrt(var_expr + tiny) - sd == m.s_disp)
160
+
161
+ slack_term = m.s_mean**2 + m.s_disp**2
162
+
163
+ # Entropy objective (active probs only; inactive probs are exactly 0)
164
+ entropy = -sum(m.p[i] * pyo.log(m.p[i]) for i in m.A)
165
+ m.obj = pyo.Objective(expr=entropy - float(slack_penalty) * slack_term, sense=pyo.maximize)
166
+
167
+ # Solve
168
+ opt = pyo.SolverFactory(solver)
169
+ if opt is None or not opt.available():
170
+ raise RuntimeError(
171
+ f"Solver '{solver}' is not available. Install/configure it (e.g., ipopt) "
172
+ "or pass a different solver name."
173
+ )
174
+ if solver_options:
175
+ for k, v in solver_options.items():
176
+ opt.options[k] = v
177
+
178
+ res = opt.solve(m, tee=False)
179
+
180
+ # -----------------------------
181
+ # Extract solution into numpy array
182
+ # -----------------------------
183
+ p = np.zeros(n, dtype=float)
184
+ for i in active:
185
+ p[i] = float(pyo.value(m.p[i]))
186
+
187
+ # Optional: renormalize tiny numerical drift (keeps zeros outside band)
188
+ s = p.sum()
189
+ if s > 0:
190
+ p[active] /= s
191
+
192
+ return p
193
+
194
+
195
+ if __name__ == "__main__":
196
+ p = maxent_primary_dendrite_pmf(
197
+ mean_primary_dendrites=2.33,
198
+ sd_primary_dendrites=1.53,
199
+ min_primary_dendrites=1,
200
+ max_primary_dendrites=4,
201
+ slack_penalty=0.1,
202
+ use_variance_form=True,
203
+ use_abs_slack=False,
204
+ solver="ipopt",
205
+ )
206
+ print("p shape:", p.shape)
207
+ print("sum:", p.sum())
208
+ print(p)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: morphgen-rates
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: Compute bifurcation and annihilation rates from morphology data
5
5
  Author-email: Francesco Cavarretta <fcavarretta@ualr.edu>
6
6
  Requires-Python: >=3.9
@@ -0,0 +1,9 @@
1
+ morphgen_rates/__init__.py,sha256=UE8YWsulDIfeYhGb5GHdkakUIFx4j9H3ZkoKoaDCd_0,179
2
+ morphgen_rates/data.py,sha256=Onc2dRlB_QXpgScDzHCE7DRtg6PLtFld5W91QGuDkYo,4518
3
+ morphgen_rates/init_count.py,sha256=PhYlp0-CzRdf8opTKb-om3cFIKSv5M8eTcyKy1_IFMI,7283
4
+ morphgen_rates/rates.py,sha256=2Gn3Ew2uVJ7c_LdYJogxS-jAM9q-039y0maWi4CNpTM,6442
5
+ morphgen_rates-0.5.0.dist-info/licenses/LICENSE,sha256=VONsnKVXQRcWwCaHWHuwMtemIj9jNJSmpunazxlyvOk,670
6
+ morphgen_rates-0.5.0.dist-info/METADATA,sha256=xYYNva-7mn6Vk-iFKKJJUg3jw_phgW-iZvHeSd4z7gk,1178
7
+ morphgen_rates-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
8
+ morphgen_rates-0.5.0.dist-info/top_level.txt,sha256=UYPGC2dGp9xD_4iVxVVTkKaizBA4XeDNM7OBC_DCWRk,15
9
+ morphgen_rates-0.5.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- morphgen_rates/__init__.py,sha256=p347dyzb_8MuKdh4YUIrZOmdctfd-9xEhUJU9XOOVdU,100
2
- morphgen_rates/data.py,sha256=yj_GT3ks6ukwtALfC4Bklcwu3MeTOr-2BGGo5W0ZxM0,4330
3
- morphgen_rates/rates.py,sha256=2Gn3Ew2uVJ7c_LdYJogxS-jAM9q-039y0maWi4CNpTM,6442
4
- morphgen_rates-0.3.0.dist-info/licenses/LICENSE,sha256=VONsnKVXQRcWwCaHWHuwMtemIj9jNJSmpunazxlyvOk,670
5
- morphgen_rates-0.3.0.dist-info/METADATA,sha256=4umattnyl1InefhNOyEE0KXkqhLt_Y9PSimPO0qimRk,1178
6
- morphgen_rates-0.3.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
7
- morphgen_rates-0.3.0.dist-info/top_level.txt,sha256=UYPGC2dGp9xD_4iVxVVTkKaizBA4XeDNM7OBC_DCWRk,15
8
- morphgen_rates-0.3.0.dist-info/RECORD,,