nugap 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nugap-0.1.0/LICENSE +21 -0
- nugap-0.1.0/PKG-INFO +262 -0
- nugap-0.1.0/README.md +242 -0
- nugap-0.1.0/pyproject.toml +25 -0
- nugap-0.1.0/setup.cfg +4 -0
- nugap-0.1.0/src/nugap/__init__.py +29 -0
- nugap-0.1.0/src/nugap/fitting.py +267 -0
- nugap-0.1.0/src/nugap/metric.py +179 -0
- nugap-0.1.0/src/nugap/network.py +222 -0
- nugap-0.1.0/src/nugap/pipeline.py +106 -0
- nugap-0.1.0/src/nugap/replicates.py +231 -0
- nugap-0.1.0/src/nugap/systems.py +132 -0
- nugap-0.1.0/src/nugap/viz.py +183 -0
- nugap-0.1.0/src/nugap.egg-info/PKG-INFO +262 -0
- nugap-0.1.0/src/nugap.egg-info/SOURCES.txt +17 -0
- nugap-0.1.0/src/nugap.egg-info/dependency_links.txt +1 -0
- nugap-0.1.0/src/nugap.egg-info/requires.txt +13 -0
- nugap-0.1.0/src/nugap.egg-info/top_level.txt +1 -0
- nugap-0.1.0/tests/test_metric.py +87 -0
nugap-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 <COPYRIGHT HOLDER>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
nugap-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nugap
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Vinnicombe nu-gap metric and a pipeline for comparing time-course data across two conditions
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: numpy>=1.23
|
|
10
|
+
Requires-Dist: scipy>=1.9
|
|
11
|
+
Requires-Dist: pandas>=1.5
|
|
12
|
+
Provides-Extra: control
|
|
13
|
+
Requires-Dist: control>=0.9; extra == "control"
|
|
14
|
+
Provides-Extra: viz
|
|
15
|
+
Requires-Dist: matplotlib>=3.5; extra == "viz"
|
|
16
|
+
Requires-Dist: networkx>=2.6; extra == "viz"
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# nugap
|
|
22
|
+
|
|
23
|
+
A Python implementation of the **Vinnicombe nu-gap metric** (δν) and a
|
|
24
|
+
pipeline that uses it to find which variables change their *dynamics* between
|
|
25
|
+
two experimental conditions, from time-course data.
|
|
26
|
+
|
|
27
|
+
There is no nu-gap implementation in the standard Python control ecosystem —
|
|
28
|
+
it has lived almost exclusively in MATLAB's Robust Control Toolbox
|
|
29
|
+
(`gapmetric`). This package provides a tested, dependency-light one
|
|
30
|
+
(numpy + scipy + pandas only).
|
|
31
|
+
|
|
32
|
+
## What the nu-gap metric is
|
|
33
|
+
|
|
34
|
+
For two linear systems P1 and P2, δν(P1, P2) is a number in **[0, 1]**:
|
|
35
|
+
|
|
36
|
+
* **0** — identical dynamics,
|
|
37
|
+
* **near 1** — very different dynamics.
|
|
38
|
+
|
|
39
|
+
It is computed from the *chordal distance* between the two frequency responses,
|
|
40
|
+
gated by a winding-number (topological) condition. Unlike a naive comparison of
|
|
41
|
+
fitted parameters, it is a true metric, it is bounded, and it correctly treats
|
|
42
|
+
systems that look very different on paper but behave similarly (and vice versa).
|
|
43
|
+
|
|
44
|
+
Reference: G. Vinnicombe, *Frequency domain uncertainty and the graph
|
|
45
|
+
topology*, IEEE TAC 38 (1993) 1371–1383.
|
|
46
|
+
|
|
47
|
+
## The idea for your application
|
|
48
|
+
|
|
49
|
+
You have time-course data for thousands of variables under two conditions. The
|
|
50
|
+
analysis is a **pairwise dynamic network**: within each condition, every
|
|
51
|
+
variable is treated as a candidate input for every other variable, and a
|
|
52
|
+
first-order input->output model is fitted for each ordered pair (i -> j). Then
|
|
53
|
+
the nu-gap compares condition A's model with condition B's model for each edge.
|
|
54
|
+
Edges with a large nu-gap are interactions whose dynamics changed.
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from nugap import compare_network
|
|
58
|
+
|
|
59
|
+
# data_A, data_B: dict variable name -> array (n_replicates x n_timepoints)
|
|
60
|
+
edges = compare_network(
|
|
61
|
+
data_A, data_B, t,
|
|
62
|
+
order=1, # model poles: 1 = first-order, 2 = second-order
|
|
63
|
+
n_zeros=None, # numerator zeros (default 0 -> all-pole model)
|
|
64
|
+
n=256, # contour resolution; 256 is plenty for low order
|
|
65
|
+
min_r2=0.5, # only test pairs with a real relationship
|
|
66
|
+
gate="either", # how to combine the two conditions' fit quality
|
|
67
|
+
global_null=True, # pool within-condition nu-gaps -> p_global, q_global
|
|
68
|
+
)
|
|
69
|
+
# one row per edge (source, target, nu_gap, within_median, separation,
|
|
70
|
+
# max_r2, q_global), sorted by significance. Flag changes with q_global < 0.1.
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
**`order` / `n_zeros`** choose the per-edge model. `order=1, n_zeros=0`
|
|
74
|
+
(default) is the first-order K/(τs+1); `order=2` is a two-pole system, with
|
|
75
|
+
`n_zeros=1` if you want a zero (the discrete analogue of MATLAB
|
|
76
|
+
`tfest(data, 2, 1)`). Higher order needs more time points per trajectory.
|
|
77
|
+
|
|
78
|
+
**`gate`** controls the fit-quality gate across the two conditions:
|
|
79
|
+
`"either"` keeps an edge if the relationship is well fit in at least one
|
|
80
|
+
condition (so relationships that appear or disappear are tested); `"both"`
|
|
81
|
+
requires a good fit in both conditions; `"mean"` uses the mean R² over all
|
|
82
|
+
replicate fits.
|
|
83
|
+
|
|
84
|
+
### Two things that matter a lot here
|
|
85
|
+
|
|
86
|
+
1. **Gate on fit quality (`min_r2`).** Most variable pairs have *no* real
|
|
87
|
+
first-order relationship; those fits are meaningless and produce large,
|
|
88
|
+
high-variance nu-gaps that would swamp the null. `compare_network` only
|
|
89
|
+
tests an edge if a first-order relationship actually holds (R^2 above
|
|
90
|
+
`min_r2`) in at least one condition. This is essential — without it nothing
|
|
91
|
+
is detectable.
|
|
92
|
+
|
|
93
|
+
2. **Confounding.** Pairwise first-order identification assumes the i->j
|
|
94
|
+
relationship is approximately self-contained. In a densely coupled system
|
|
95
|
+
each output depends on many inputs, so a single pairwise model is
|
|
96
|
+
misspecified and the within-condition noise floor rises. This is a property
|
|
97
|
+
of the method (the same one you ran in MATLAB), not the metric. Sparse or
|
|
98
|
+
modular systems behave well; dense ones need care.
|
|
99
|
+
|
|
100
|
+
### Scale
|
|
101
|
+
|
|
102
|
+
N variables -> N*(N-1) ordered edges (a million at N=1000). Each fit is a
|
|
103
|
+
2-parameter least squares; the cost is the metric, kept cheap by the small
|
|
104
|
+
`n`. The per-edge work is independent, so wrap the edge loop in
|
|
105
|
+
`joblib.Parallel` / `multiprocessing` for real datasets, and/or pass
|
|
106
|
+
`include_pairs=` to test only a prescreened candidate set.
|
|
107
|
+
|
|
108
|
+
## Install
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
pip install -e .
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Usage
|
|
115
|
+
|
|
116
|
+
The metric on two systems directly:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from nugap import tf, nu_gap
|
|
120
|
+
|
|
121
|
+
P1 = tf([1.0], [1.0, 1.0]) # 1/(s+1), continuous
|
|
122
|
+
P2 = tf([1.0], [1.0, 1.2]) # 1/(s+1.2)
|
|
123
|
+
print(nu_gap(P1, P2)) # ~0.07
|
|
124
|
+
|
|
125
|
+
# discrete systems use dt; the metric uses the unit circle automatically
|
|
126
|
+
Pd = tf([0.5], [1.0, -0.5], dt=0.1)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
The full two-condition comparison:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from nugap import compare_conditions
|
|
133
|
+
|
|
134
|
+
# data_A, data_B: dict mapping variable name -> trajectory (sampled at t)
|
|
135
|
+
# u_A, u_B: the known stimulus, if you have one (else omit -> Prony fit)
|
|
136
|
+
df = compare_conditions(
|
|
137
|
+
data_A, data_B, t,
|
|
138
|
+
u_A=u, u_B=u, # drop these for output-only data
|
|
139
|
+
orders=range(1, 5), # candidate model orders (AIC-selected)
|
|
140
|
+
method="arx", # or "prony" (output-only), or "auto"
|
|
141
|
+
min_r2=0.9, # flag variables with poor fits
|
|
142
|
+
)
|
|
143
|
+
# df is sorted by nu_gap descending, with fit quality per condition
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
See `examples/demo_network.py` for a small validated example, and
|
|
147
|
+
`examples/demo_clock.py` for a full biological test case: synthetic circadian
|
|
148
|
+
RNA-seq for 50 genes (core clock genes, clock outputs, CLOCK-independent
|
|
149
|
+
rhythmic genes, and background) under wild-type vs **CLOCK knockout**. The KO
|
|
150
|
+
collapses the cell-autonomous oscillation (a Hopf bifurcation to a damped fixed
|
|
151
|
+
point) while CLOCK mRNA is still expressed; `compare_network` recovers the
|
|
152
|
+
collapse, flagging ~90% of clock/output edges as changed while leaving the
|
|
153
|
+
CLOCK-independent rhythmic edges (rhythmic in both conditions) and background
|
|
154
|
+
alone. The generator is `examples/clock_sim.py`.
|
|
155
|
+
|
|
156
|
+
### The null and short/flat conditions
|
|
157
|
+
|
|
158
|
+
`null_from_reliable_only=True` (default) builds the within-condition null only
|
|
159
|
+
from edges where a real relationship exists in that condition. This matters
|
|
160
|
+
whenever one condition loses dynamics (e.g. genes go flat in a knockout):
|
|
161
|
+
fitting noise-to-noise there produces large, meaningless within-condition
|
|
162
|
+
nu-gaps that would otherwise inflate the null and hide the real changes.
|
|
163
|
+
|
|
164
|
+
## Visualising results
|
|
165
|
+
|
|
166
|
+
`nugap.viz` (needs matplotlib + networkx: `pip install nugap[viz]`) provides
|
|
167
|
+
three views of a `compare_network` edge table:
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from nugap.viz import volcano, hub_barplot, hub_network
|
|
171
|
+
|
|
172
|
+
volcano(df, q_thresh=0.1) # effect size (nu_gap) vs -log10 FDR; best overview
|
|
173
|
+
hub_barplot(df, top=20) # genes ranked by # of significant changed edges
|
|
174
|
+
hub_network(df, top_hubs=15, # directed graph of the most-rewired genes;
|
|
175
|
+
node_groups=classes) # nodes sized by degree, edges coloured by nu_gap
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
The volcano is the recommended default (scale-independent, shows everything at
|
|
179
|
+
once). `hub_barplot` gives the node-level summary that is usually the most
|
|
180
|
+
interpretable. `hub_network` draws the most-rewired genes and the significant
|
|
181
|
+
changed edges among them as a directed graph (pass `node_groups`, a dict
|
|
182
|
+
gene -> label, to colour by class); `top_hubs` controls how many genes appear,
|
|
183
|
+
so you can keep it readable on dense results. `changed_edge_counts(df)` returns
|
|
184
|
+
the per-gene counts behind both hub views. Each plotting function returns a
|
|
185
|
+
matplotlib Axes so you can compose or restyle. `examples/demo_clock.py` writes
|
|
186
|
+
`clock_viz.png` (volcano + hubs) and `clock_hub_network.png`.
|
|
187
|
+
|
|
188
|
+
## Modules
|
|
189
|
+
|
|
190
|
+
* `nugap.metric` — the nu-gap metric, chordal distance, winding condition.
|
|
191
|
+
* `nugap.systems` — lightweight SISO LTI type (`tf`, `from_zpk`,
|
|
192
|
+
`from_control`).
|
|
193
|
+
* `nugap.fitting` — identify discrete LTI models from data (ARX / Prony, with
|
|
194
|
+
AIC order selection). **Swap this out** to match your MATLAB procedure.
|
|
195
|
+
* `nugap.network` — **pairwise (input->output) network comparison across
|
|
196
|
+
conditions** with fit-quality gating and FDR; the main entry point for your
|
|
197
|
+
application. `compare_network`, `fit_first_order`.
|
|
198
|
+
* `nugap.pipeline` / `nugap.replicates` — single-variable comparison (one model
|
|
199
|
+
per variable), with and without replicates.
|
|
200
|
+
|
|
201
|
+
## Comparing conditions with replicates (recommended)
|
|
202
|
+
|
|
203
|
+
If you have replicates, use the replicate-aware pipeline. It fits a model to
|
|
204
|
+
every replicate, then uses *within*-condition nu-gaps (replicate vs replicate
|
|
205
|
+
of the same condition) as a noise floor and compares the *between*-condition
|
|
206
|
+
nu-gap against it:
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
from nugap import compare_conditions_replicates
|
|
210
|
+
|
|
211
|
+
# reps_A[var], reps_B[var]: 2D array (n_replicates x n_timepoints)
|
|
212
|
+
df = compare_conditions_replicates(
|
|
213
|
+
reps_A, reps_B, t, u=u,
|
|
214
|
+
orders=[1], # fix the order low for short series (see below)
|
|
215
|
+
method="arx",
|
|
216
|
+
global_null=True, # pool within-condition gaps across all variables
|
|
217
|
+
)
|
|
218
|
+
# columns include between_median, within_median, p_global, q_global (BH-FDR)
|
|
219
|
+
# sorted by q_global ascending; flag changes with e.g. q_global < 0.1
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
`global_null=True` pools the within-condition noise across all of your
|
|
223
|
+
variables into one well-estimated null, which is far more powerful than the
|
|
224
|
+
~3 within-pairs a single variable provides.
|
|
225
|
+
|
|
226
|
+
### The most important knob: model order
|
|
227
|
+
|
|
228
|
+
With few time points, **fix the model order low (1, sometimes 2)** rather than
|
|
229
|
+
letting AIC roam. A too-high order makes the per-replicate fit unstable, so
|
|
230
|
+
replicates of *identical* dynamics produce a large nu-gap — that variance lands
|
|
231
|
+
directly in your noise floor and destroys sensitivity. In the bundled demo,
|
|
232
|
+
order 2 on 14 points gives a within-condition median nu-gap of ~0.16 and
|
|
233
|
+
detects nothing; order 1 gives ~0.04 and recovers every true change at
|
|
234
|
+
FDR < 0.1.
|
|
235
|
+
|
|
236
|
+
**Diagnostic:** look at `within_median` / the within-condition null. If it is
|
|
237
|
+
large (say > ~0.1), your fits are too unstable — lower the order, average
|
|
238
|
+
replicates, or get more points before trusting the between-condition results.
|
|
239
|
+
|
|
240
|
+
### Matching your MATLAB `tfest` workflow
|
|
241
|
+
|
|
242
|
+
`tfest` needs an input and a response, and returns a *continuous* transfer
|
|
243
|
+
function; you then ran `gapmetric` on those. Here, fit with `method="arx"`
|
|
244
|
+
(input/output) and, if you want continuous-domain numbers to match MATLAB, map
|
|
245
|
+
each fitted model with `nugap.to_continuous` before comparing. The *ranking* of
|
|
246
|
+
variables is essentially the same in discrete or continuous form, so for
|
|
247
|
+
discovery you can stay discrete.
|
|
248
|
+
|
|
249
|
+
## Known limitations (v0.1)
|
|
250
|
+
|
|
251
|
+
* SISO only (one signal per variable). MIMO would need the determinant form of
|
|
252
|
+
the winding condition.
|
|
253
|
+
* Systems with poles *exactly* on the stability boundary (pure integrators /
|
|
254
|
+
undamped oscillators) are an edge case in the winding condition; fitted
|
|
255
|
+
models from real data essentially never hit this.
|
|
256
|
+
* The fitting layer is intentionally basic. For best results, match the model
|
|
257
|
+
class and order you used in MATLAB.
|
|
258
|
+
|
|
259
|
+
## License
|
|
260
|
+
|
|
261
|
+
MIT — see the [LICENSE](LICENSE) file. (Fill in the copyright holder in that
|
|
262
|
+
file before publishing.)
|
nugap-0.1.0/README.md
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# nugap
|
|
2
|
+
|
|
3
|
+
A Python implementation of the **Vinnicombe nu-gap metric** (δν) and a
|
|
4
|
+
pipeline that uses it to find which variables change their *dynamics* between
|
|
5
|
+
two experimental conditions, from time-course data.
|
|
6
|
+
|
|
7
|
+
There is no nu-gap implementation in the standard Python control ecosystem —
|
|
8
|
+
it has lived almost exclusively in MATLAB's Robust Control Toolbox
|
|
9
|
+
(`gapmetric`). This package provides a tested, dependency-light one
|
|
10
|
+
(numpy + scipy + pandas only).
|
|
11
|
+
|
|
12
|
+
## What the nu-gap metric is
|
|
13
|
+
|
|
14
|
+
For two linear systems P1 and P2, δν(P1, P2) is a number in **[0, 1]**:
|
|
15
|
+
|
|
16
|
+
* **0** — identical dynamics,
|
|
17
|
+
* **near 1** — very different dynamics.
|
|
18
|
+
|
|
19
|
+
It is computed from the *chordal distance* between the two frequency responses,
|
|
20
|
+
gated by a winding-number (topological) condition. Unlike a naive comparison of
|
|
21
|
+
fitted parameters, it is a true metric, it is bounded, and it correctly treats
|
|
22
|
+
systems that look very different on paper but behave similarly (and vice versa).
|
|
23
|
+
|
|
24
|
+
Reference: G. Vinnicombe, *Frequency domain uncertainty and the graph
|
|
25
|
+
topology*, IEEE TAC 38 (1993) 1371–1383.
|
|
26
|
+
|
|
27
|
+
## The idea for your application
|
|
28
|
+
|
|
29
|
+
You have time-course data for thousands of variables under two conditions. The
|
|
30
|
+
analysis is a **pairwise dynamic network**: within each condition, every
|
|
31
|
+
variable is treated as a candidate input for every other variable, and a
|
|
32
|
+
first-order input->output model is fitted for each ordered pair (i -> j). Then
|
|
33
|
+
the nu-gap compares condition A's model with condition B's model for each edge.
|
|
34
|
+
Edges with a large nu-gap are interactions whose dynamics changed.
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from nugap import compare_network
|
|
38
|
+
|
|
39
|
+
# data_A, data_B: dict variable name -> array (n_replicates x n_timepoints)
|
|
40
|
+
edges = compare_network(
|
|
41
|
+
data_A, data_B, t,
|
|
42
|
+
order=1, # model poles: 1 = first-order, 2 = second-order
|
|
43
|
+
n_zeros=None, # numerator zeros (default 0 -> all-pole model)
|
|
44
|
+
n=256, # contour resolution; 256 is plenty for low order
|
|
45
|
+
min_r2=0.5, # only test pairs with a real relationship
|
|
46
|
+
gate="either", # how to combine the two conditions' fit quality
|
|
47
|
+
global_null=True, # pool within-condition nu-gaps -> p_global, q_global
|
|
48
|
+
)
|
|
49
|
+
# one row per edge (source, target, nu_gap, within_median, separation,
|
|
50
|
+
# max_r2, q_global), sorted by significance. Flag changes with q_global < 0.1.
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**`order` / `n_zeros`** choose the per-edge model. `order=1, n_zeros=0`
|
|
54
|
+
(default) is the first-order K/(τs+1); `order=2` is a two-pole system, with
|
|
55
|
+
`n_zeros=1` if you want a zero (the discrete analogue of MATLAB
|
|
56
|
+
`tfest(data, 2, 1)`). Higher order needs more time points per trajectory.
|
|
57
|
+
|
|
58
|
+
**`gate`** controls the fit-quality gate across the two conditions:
|
|
59
|
+
`"either"` keeps an edge if the relationship is well fit in at least one
|
|
60
|
+
condition (so relationships that appear or disappear are tested); `"both"`
|
|
61
|
+
requires a good fit in both conditions; `"mean"` uses the mean R² over all
|
|
62
|
+
replicate fits.
|
|
63
|
+
|
|
64
|
+
### Two things that matter a lot here
|
|
65
|
+
|
|
66
|
+
1. **Gate on fit quality (`min_r2`).** Most variable pairs have *no* real
|
|
67
|
+
first-order relationship; those fits are meaningless and produce large,
|
|
68
|
+
high-variance nu-gaps that would swamp the null. `compare_network` only
|
|
69
|
+
tests an edge if a first-order relationship actually holds (R^2 above
|
|
70
|
+
`min_r2`) in at least one condition. This is essential — without it nothing
|
|
71
|
+
is detectable.
|
|
72
|
+
|
|
73
|
+
2. **Confounding.** Pairwise first-order identification assumes the i->j
|
|
74
|
+
relationship is approximately self-contained. In a densely coupled system
|
|
75
|
+
each output depends on many inputs, so a single pairwise model is
|
|
76
|
+
misspecified and the within-condition noise floor rises. This is a property
|
|
77
|
+
of the method (the same one you ran in MATLAB), not the metric. Sparse or
|
|
78
|
+
modular systems behave well; dense ones need care.
|
|
79
|
+
|
|
80
|
+
### Scale
|
|
81
|
+
|
|
82
|
+
N variables -> N*(N-1) ordered edges (a million at N=1000). Each fit is a
|
|
83
|
+
2-parameter least squares; the cost is the metric, kept cheap by the small
|
|
84
|
+
`n`. The per-edge work is independent, so wrap the edge loop in
|
|
85
|
+
`joblib.Parallel` / `multiprocessing` for real datasets, and/or pass
|
|
86
|
+
`include_pairs=` to test only a prescreened candidate set.
|
|
87
|
+
|
|
88
|
+
## Install
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install -e .
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Usage
|
|
95
|
+
|
|
96
|
+
The metric on two systems directly:
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from nugap import tf, nu_gap
|
|
100
|
+
|
|
101
|
+
P1 = tf([1.0], [1.0, 1.0]) # 1/(s+1), continuous
|
|
102
|
+
P2 = tf([1.0], [1.0, 1.2]) # 1/(s+1.2)
|
|
103
|
+
print(nu_gap(P1, P2)) # ~0.07
|
|
104
|
+
|
|
105
|
+
# discrete systems use dt; the metric uses the unit circle automatically
|
|
106
|
+
Pd = tf([0.5], [1.0, -0.5], dt=0.1)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
The full two-condition comparison:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from nugap import compare_conditions
|
|
113
|
+
|
|
114
|
+
# data_A, data_B: dict mapping variable name -> trajectory (sampled at t)
|
|
115
|
+
# u_A, u_B: the known stimulus, if you have one (else omit -> Prony fit)
|
|
116
|
+
df = compare_conditions(
|
|
117
|
+
data_A, data_B, t,
|
|
118
|
+
u_A=u, u_B=u, # drop these for output-only data
|
|
119
|
+
orders=range(1, 5), # candidate model orders (AIC-selected)
|
|
120
|
+
method="arx", # or "prony" (output-only), or "auto"
|
|
121
|
+
min_r2=0.9, # flag variables with poor fits
|
|
122
|
+
)
|
|
123
|
+
# df is sorted by nu_gap descending, with fit quality per condition
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
See `examples/demo_network.py` for a small validated example, and
|
|
127
|
+
`examples/demo_clock.py` for a full biological test case: synthetic circadian
|
|
128
|
+
RNA-seq for 50 genes (core clock genes, clock outputs, CLOCK-independent
|
|
129
|
+
rhythmic genes, and background) under wild-type vs **CLOCK knockout**. The KO
|
|
130
|
+
collapses the cell-autonomous oscillation (a Hopf bifurcation to a damped fixed
|
|
131
|
+
point) while CLOCK mRNA is still expressed; `compare_network` recovers the
|
|
132
|
+
collapse, flagging ~90% of clock/output edges as changed while leaving the
|
|
133
|
+
CLOCK-independent rhythmic edges (rhythmic in both conditions) and background
|
|
134
|
+
alone. The generator is `examples/clock_sim.py`.
|
|
135
|
+
|
|
136
|
+
### The null and short/flat conditions
|
|
137
|
+
|
|
138
|
+
`null_from_reliable_only=True` (default) builds the within-condition null only
|
|
139
|
+
from edges where a real relationship exists in that condition. This matters
|
|
140
|
+
whenever one condition loses dynamics (e.g. genes go flat in a knockout):
|
|
141
|
+
fitting noise-to-noise there produces large, meaningless within-condition
|
|
142
|
+
nu-gaps that would otherwise inflate the null and hide the real changes.
|
|
143
|
+
|
|
144
|
+
## Visualising results
|
|
145
|
+
|
|
146
|
+
`nugap.viz` (needs matplotlib + networkx: `pip install nugap[viz]`) provides
|
|
147
|
+
three views of a `compare_network` edge table:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from nugap.viz import volcano, hub_barplot, hub_network
|
|
151
|
+
|
|
152
|
+
volcano(df, q_thresh=0.1) # effect size (nu_gap) vs -log10 FDR; best overview
|
|
153
|
+
hub_barplot(df, top=20) # genes ranked by # of significant changed edges
|
|
154
|
+
hub_network(df, top_hubs=15, # directed graph of the most-rewired genes;
|
|
155
|
+
node_groups=classes) # nodes sized by degree, edges coloured by nu_gap
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
The volcano is the recommended default (scale-independent, shows everything at
|
|
159
|
+
once). `hub_barplot` gives the node-level summary that is usually the most
|
|
160
|
+
interpretable. `hub_network` draws the most-rewired genes and the significant
|
|
161
|
+
changed edges among them as a directed graph (pass `node_groups`, a dict
|
|
162
|
+
gene -> label, to colour by class); `top_hubs` controls how many genes appear,
|
|
163
|
+
so you can keep it readable on dense results. `changed_edge_counts(df)` returns
|
|
164
|
+
the per-gene counts behind both hub views. Each plotting function returns a
|
|
165
|
+
matplotlib Axes so you can compose or restyle. `examples/demo_clock.py` writes
|
|
166
|
+
`clock_viz.png` (volcano + hubs) and `clock_hub_network.png`.
|
|
167
|
+
|
|
168
|
+
## Modules
|
|
169
|
+
|
|
170
|
+
* `nugap.metric` — the nu-gap metric, chordal distance, winding condition.
|
|
171
|
+
* `nugap.systems` — lightweight SISO LTI type (`tf`, `from_zpk`,
|
|
172
|
+
`from_control`).
|
|
173
|
+
* `nugap.fitting` — identify discrete LTI models from data (ARX / Prony, with
|
|
174
|
+
AIC order selection). **Swap this out** to match your MATLAB procedure.
|
|
175
|
+
* `nugap.network` — **pairwise (input->output) network comparison across
|
|
176
|
+
conditions** with fit-quality gating and FDR; the main entry point for your
|
|
177
|
+
application. `compare_network`, `fit_first_order`.
|
|
178
|
+
* `nugap.pipeline` / `nugap.replicates` — single-variable comparison (one model
|
|
179
|
+
per variable), with and without replicates.
|
|
180
|
+
|
|
181
|
+
## Comparing conditions with replicates (recommended)
|
|
182
|
+
|
|
183
|
+
If you have replicates, use the replicate-aware pipeline. It fits a model to
|
|
184
|
+
every replicate, then uses *within*-condition nu-gaps (replicate vs replicate
|
|
185
|
+
of the same condition) as a noise floor and compares the *between*-condition
|
|
186
|
+
nu-gap against it:
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from nugap import compare_conditions_replicates
|
|
190
|
+
|
|
191
|
+
# reps_A[var], reps_B[var]: 2D array (n_replicates x n_timepoints)
|
|
192
|
+
df = compare_conditions_replicates(
|
|
193
|
+
reps_A, reps_B, t, u=u,
|
|
194
|
+
orders=[1], # fix the order low for short series (see below)
|
|
195
|
+
method="arx",
|
|
196
|
+
global_null=True, # pool within-condition gaps across all variables
|
|
197
|
+
)
|
|
198
|
+
# columns include between_median, within_median, p_global, q_global (BH-FDR)
|
|
199
|
+
# sorted by q_global ascending; flag changes with e.g. q_global < 0.1
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
`global_null=True` pools the within-condition noise across all of your
|
|
203
|
+
variables into one well-estimated null, which is far more powerful than the
|
|
204
|
+
~3 within-pairs a single variable provides.
|
|
205
|
+
|
|
206
|
+
### The most important knob: model order
|
|
207
|
+
|
|
208
|
+
With few time points, **fix the model order low (1, sometimes 2)** rather than
|
|
209
|
+
letting AIC roam. A too-high order makes the per-replicate fit unstable, so
|
|
210
|
+
replicates of *identical* dynamics produce a large nu-gap — that variance lands
|
|
211
|
+
directly in your noise floor and destroys sensitivity. In the bundled demo,
|
|
212
|
+
order 2 on 14 points gives a within-condition median nu-gap of ~0.16 and
|
|
213
|
+
detects nothing; order 1 gives ~0.04 and recovers every true change at
|
|
214
|
+
FDR < 0.1.
|
|
215
|
+
|
|
216
|
+
**Diagnostic:** look at `within_median` / the within-condition null. If it is
|
|
217
|
+
large (say > ~0.1), your fits are too unstable — lower the order, average
|
|
218
|
+
replicates, or get more points before trusting the between-condition results.
|
|
219
|
+
|
|
220
|
+
### Matching your MATLAB `tfest` workflow
|
|
221
|
+
|
|
222
|
+
`tfest` needs an input and a response, and returns a *continuous* transfer
|
|
223
|
+
function; you then ran `gapmetric` on those. Here, fit with `method="arx"`
|
|
224
|
+
(input/output) and, if you want continuous-domain numbers to match MATLAB, map
|
|
225
|
+
each fitted model with `nugap.to_continuous` before comparing. The *ranking* of
|
|
226
|
+
variables is essentially the same in discrete or continuous form, so for
|
|
227
|
+
discovery you can stay discrete.
|
|
228
|
+
|
|
229
|
+
## Known limitations (v0.1)
|
|
230
|
+
|
|
231
|
+
* SISO only (one signal per variable). MIMO would need the determinant form of
|
|
232
|
+
the winding condition.
|
|
233
|
+
* Systems with poles *exactly* on the stability boundary (pure integrators /
|
|
234
|
+
undamped oscillators) are an edge case in the winding condition; fitted
|
|
235
|
+
models from real data essentially never hit this.
|
|
236
|
+
* The fitting layer is intentionally basic. For best results, match the model
|
|
237
|
+
class and order you used in MATLAB.
|
|
238
|
+
|
|
239
|
+
## License
|
|
240
|
+
|
|
241
|
+
MIT — see the [LICENSE](LICENSE) file. (Fill in the copyright holder in that
|
|
242
|
+
file before publishing.)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77.0.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "nugap"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Vinnicombe nu-gap metric and a pipeline for comparing time-course data across two conditions"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"numpy>=1.23",
|
|
15
|
+
"scipy>=1.9",
|
|
16
|
+
"pandas>=1.5",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.optional-dependencies]
|
|
20
|
+
control = ["control>=0.9"] # optional interop with python-control
|
|
21
|
+
viz = ["matplotlib>=3.5", "networkx>=2.6"] # required for nugap.viz
|
|
22
|
+
dev = ["pytest>=7"]
|
|
23
|
+
|
|
24
|
+
[tool.setuptools.packages.find]
|
|
25
|
+
where = ["src"]
|
nugap-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""nugap: the Vinnicombe nu-gap metric and a pipeline for comparing
|
|
2
|
+
time-course data across two conditions.
|
|
3
|
+
|
|
4
|
+
Quick start
|
|
5
|
+
-----------
|
|
6
|
+
from nugap import tf, nu_gap
|
|
7
|
+
d = nu_gap(tf([1],[1,1]), tf([1],[1,1.2])) # ~0.07
|
|
8
|
+
|
|
9
|
+
from nugap import compare_conditions
|
|
10
|
+
df = compare_conditions(data_A, data_B, t) # ranked table of changes
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from .systems import LTI, tf, from_zpk, from_control, to_continuous
|
|
14
|
+
from .metric import nu_gap, chordal_distance, winding_condition
|
|
15
|
+
from .fitting import fit_model, fit_prony, fit_arx, fit_arx_fast, fit_first_order, FitResult
|
|
16
|
+
from .pipeline import compare_conditions, compare_variable
|
|
17
|
+
from .replicates import compare_conditions_replicates, compare_variable_replicates
|
|
18
|
+
from .network import compare_network
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"LTI", "tf", "from_zpk", "from_control", "to_continuous",
|
|
22
|
+
"nu_gap", "chordal_distance", "winding_condition",
|
|
23
|
+
"fit_model", "fit_prony", "fit_arx", "fit_arx_fast", "fit_first_order", "FitResult",
|
|
24
|
+
"compare_conditions", "compare_variable",
|
|
25
|
+
"compare_conditions_replicates", "compare_variable_replicates",
|
|
26
|
+
"compare_network",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
__version__ = "0.1.0"
|