GLDF 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GLDF/__init__.py +2 -0
- GLDF/bridges/__init__.py +0 -0
- GLDF/bridges/causal_learn.py +185 -0
- GLDF/bridges/tigramite.py +143 -0
- GLDF/bridges/tigramite_plotting_modified.py +4764 -0
- GLDF/cit.py +274 -0
- GLDF/data_management.py +588 -0
- GLDF/data_processing.py +754 -0
- GLDF/frontend.py +537 -0
- GLDF/hccd.py +403 -0
- GLDF/hyperparams.py +205 -0
- GLDF/independence_atoms.py +78 -0
- GLDF/state_space_construction.py +288 -0
- GLDF/tutorials/01_preconfigured_quickstart.ipynb +302 -0
- GLDF/tutorials/02_detailed_configuration.ipynb +394 -0
- GLDF/tutorials/03_custom_patterns.ipynb +447 -0
- gldf-0.9.0.dist-info/METADATA +101 -0
- gldf-0.9.0.dist-info/RECORD +20 -0
- gldf-0.9.0.dist-info/WHEEL +4 -0
- gldf-0.9.0.dist-info/licenses/LICENSE +621 -0
GLDF/hccd.py
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
from .data_management import CI_Identifier, CI_Identifier_TimeSeries
|
|
2
|
+
from .data_processing import ITestMarkedCI
|
|
3
|
+
import numpy as np
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
type abstract_cit_t = Callable[[CI_Identifier],bool] #: Specifies the signature of abstract cits as used by CD-algorithms :py:type:`abstract_cd_t`.
|
|
10
|
+
type graph_t = np.ndarray #: Used to annotate graphs in tigramite-format.
|
|
11
|
+
type abstract_cd_t = Callable[[abstract_cit_t], graph_t] #: Specifies the signature of abstract CD-algorithms.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class IProvideIndependenceAtoms:
|
|
15
|
+
"""
|
|
16
|
+
Interface specifying how to expose (custom) implementations of independence-atom providing backends.
|
|
17
|
+
Provide (as opposed to test) here means that :py:mod:`data_management<GLDF.data_management>` and
|
|
18
|
+
:py:mod:`data_processing<GLDF.data_processing>` functionality are bundled.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def marked_independence(self, ci: CI_Identifier) -> ITestMarkedCI.Result:
|
|
22
|
+
"""Provide a marked-independence statement.
|
|
23
|
+
|
|
24
|
+
.. seealso::
|
|
25
|
+
Provides functionality described by
|
|
26
|
+
:py:meth:`TestMarkedCI.marked_independence<GLDF.TestMarkedCI.marked_independence>`
|
|
27
|
+
but with actual data made opaque.
|
|
28
|
+
|
|
29
|
+
:param ci: conditional independence to test
|
|
30
|
+
:type ci: CI_Identifier
|
|
31
|
+
:return: marked CIT result
|
|
32
|
+
:rtype: ITestMarkedCI.Result
|
|
33
|
+
"""
|
|
34
|
+
raise NotImplementedError()
|
|
35
|
+
|
|
36
|
+
def regime_implication(self, lhs: list[CI_Identifier], rhs: CI_Identifier) -> bool:
|
|
37
|
+
"""Provide a regime-implication statement.
|
|
38
|
+
|
|
39
|
+
.. seealso::
|
|
40
|
+
Provides functionality described by
|
|
41
|
+
:py:meth:`ITestIndicatorImplications.is_implied_regime<GLDF.ITestIndicatorImplications.is_implied_regime>`
|
|
42
|
+
but with actual data made opaque.
|
|
43
|
+
|
|
44
|
+
:param lhs: lhs of the implication
|
|
45
|
+
:type lhs: list[CI_Identifier]
|
|
46
|
+
:param rhs: rhs of the implication
|
|
47
|
+
:type rhs: CI_Identifier
|
|
48
|
+
:return: test-result for truth-value of the implication
|
|
49
|
+
:rtype: bool
|
|
50
|
+
"""
|
|
51
|
+
raise NotImplementedError()
|
|
52
|
+
|
|
53
|
+
def found_globally_independent_for_some_Z(self, undirected_link: tuple) -> bool:
|
|
54
|
+
"""Provide information about wheter among the test-results provided so far,
|
|
55
|
+
there was a CIT on the specified :py:obj:`undirected_link` for any conditioning
|
|
56
|
+
set Z which was reported globally independent.
|
|
57
|
+
|
|
58
|
+
:param undirected_link: the undirected link on which to check for independent CIT-results
|
|
59
|
+
:type undirected_link: tuple
|
|
60
|
+
:return: Truth-value about wheter any globally independent outcome was encountered on this link.
|
|
61
|
+
:rtype: bool
|
|
62
|
+
"""
|
|
63
|
+
raise NotImplementedError()
|
|
64
|
+
|
|
65
|
+
def _extract_cache_id(self, fname: str, *args, **kwargs) -> tuple:
|
|
66
|
+
args = (*args, *(kwarg for kwarg in kwargs.values()))
|
|
67
|
+
if fname == "marked_independence":
|
|
68
|
+
return args[0] # "ci"
|
|
69
|
+
else:
|
|
70
|
+
return (tuple(sorted(args[0])), args[1]) # "lhs", "rhs"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class IHandleExplicitTransitionToMCI:
|
|
75
|
+
"""
|
|
76
|
+
Interface specifying how to expose (custom) implementations of independence-atom providing backends
|
|
77
|
+
for PCMCI-familiy [RNK+19]_ algorithms. These backends typically handle PC1 and MCI tests differently,
|
|
78
|
+
and through this interface are notified by the HCCD-controller about transitions between these phases.
|
|
79
|
+
"""
|
|
80
|
+
def enter_pc1(self) -> None:
|
|
81
|
+
"""Callback for notification that the underlying cd-algorithm has (re)entered
|
|
82
|
+
(what is considered by the controller) to be part of the PC1-phase.
|
|
83
|
+
"""
|
|
84
|
+
raise NotImplementedError()
|
|
85
|
+
|
|
86
|
+
def enter_mci(self) -> None:
|
|
87
|
+
"""Callback for notification that the underlying cd-algorithm has (re)entered
|
|
88
|
+
(what is considered by the controller) to be part of the MCI-phase.
|
|
89
|
+
"""
|
|
90
|
+
raise NotImplementedError()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class IPresentResult:
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
Interface specifying how to expose (custom) implementations of backend hccd-results.
|
|
99
|
+
|
|
100
|
+
.. seealso::
|
|
101
|
+
The :py:mod:`frontend<GLDF.frontend>` typically translates this into a more
|
|
102
|
+
user-friendly :py:class:`frontend.Result<GLDF.frontend.Result>`.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
def union_graph(self) -> graph_t:
|
|
106
|
+
"""Get the union-graph
|
|
107
|
+
|
|
108
|
+
:return: union-graph (tigramite encoded)
|
|
109
|
+
:rtype: graph_t
|
|
110
|
+
"""
|
|
111
|
+
raise NotImplementedError()
|
|
112
|
+
|
|
113
|
+
def state_graphs(self) -> list[graph_t]:
|
|
114
|
+
"""Get the state-specific graphs.
|
|
115
|
+
|
|
116
|
+
:return: list of state-specific graphs (tigramite encoded)
|
|
117
|
+
:rtype: list[graph_t]
|
|
118
|
+
"""
|
|
119
|
+
raise NotImplementedError()
|
|
120
|
+
|
|
121
|
+
def model_indicators(self) -> list:
|
|
122
|
+
"""Get the model-indicators. Translation is state-space-construction specific.
|
|
123
|
+
|
|
124
|
+
:return: model-indicators
|
|
125
|
+
:rtype: list
|
|
126
|
+
"""
|
|
127
|
+
raise NotImplementedError()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class IResolveRegimeStructure:
|
|
131
|
+
"""
|
|
132
|
+
Interface specifying how to expose (custom) implementations of approximate
|
|
133
|
+
regime-structure resolution for post-processing.
|
|
134
|
+
"""
|
|
135
|
+
def resolve_model_indicator(self, model_indicator) -> np.ndarray:
|
|
136
|
+
"""Resolve a model-indicator.
|
|
137
|
+
|
|
138
|
+
:param model_indicator: model-indicator
|
|
139
|
+
:type model_indicator: state-space construction specific encoding
|
|
140
|
+
:return: Resolved (in index-space) indicator.
|
|
141
|
+
:rtype: np.ndarray
|
|
142
|
+
"""
|
|
143
|
+
raise NotImplementedError()
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class IRepresentState:
|
|
147
|
+
"""
|
|
148
|
+
Interface specifying how to expose (custom) implementations of state-representation
|
|
149
|
+
during state-space construction for use with :py:class:`Controller`.
|
|
150
|
+
State-space construction will typically internally attach further information,
|
|
151
|
+
this interface only specifies which aspects must be exposed for the :py:class:`Controller`
|
|
152
|
+
during HCCD.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def state_space(self) -> 'IRepresentStateSpace':
|
|
156
|
+
"""Get containing state-space.
|
|
157
|
+
|
|
158
|
+
:return: state-space
|
|
159
|
+
:rtype: IRepresentStateSpace
|
|
160
|
+
"""
|
|
161
|
+
raise NotImplementedError()
|
|
162
|
+
|
|
163
|
+
def overwrites_ci(self, ci: CI_Identifier) -> bool:
|
|
164
|
+
"""Is this conditional independence marked?
|
|
165
|
+
|
|
166
|
+
:param ci: conditional independence
|
|
167
|
+
:type ci: CI_Identifier
|
|
168
|
+
:return: truth-value of CI being marked
|
|
169
|
+
:rtype: bool
|
|
170
|
+
"""
|
|
171
|
+
raise NotImplementedError()
|
|
172
|
+
|
|
173
|
+
def get_ci_pseudo_value(self, ci: CI_Identifier) -> bool:
|
|
174
|
+
"""Get a (state-encoded) value for a marked conditional independence.
|
|
175
|
+
|
|
176
|
+
:param ci: marked conditional independence
|
|
177
|
+
:type ci: CI_Identifier
|
|
178
|
+
:return: state-specific dependence-value (true for dependent).
|
|
179
|
+
:rtype: bool
|
|
180
|
+
"""
|
|
181
|
+
raise NotImplementedError()
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class IRepresentStateSpace:
|
|
185
|
+
"""
|
|
186
|
+
Interface specifying how to expose (custom) implementations of state-space-representation
|
|
187
|
+
during state-space construction for use with :py:class:`Controller`.
|
|
188
|
+
State-space construction will typically internally attach further information,
|
|
189
|
+
this interface only specifies which aspects must be exposed for the :py:class:`Controller`
|
|
190
|
+
during HCCD.
|
|
191
|
+
"""
|
|
192
|
+
def states(self) -> list[IRepresentState]:
|
|
193
|
+
"""States contained in this state-space.
|
|
194
|
+
|
|
195
|
+
:return: list of states
|
|
196
|
+
:rtype: list[IRepresentState]
|
|
197
|
+
"""
|
|
198
|
+
raise NotImplementedError()
|
|
199
|
+
|
|
200
|
+
#todo
|
|
201
|
+
def finalize(self, graphs: dict[IRepresentState,graph_t]) -> IPresentResult:
|
|
202
|
+
"""Translate to model-properties and transfer information between states
|
|
203
|
+
into a result-summary.
|
|
204
|
+
|
|
205
|
+
:param graphs: state-specific graph for each state
|
|
206
|
+
:type graphs: dict[IRepresentState,graph_t]
|
|
207
|
+
:return: the summarized result
|
|
208
|
+
:rtype: IPresentResult
|
|
209
|
+
"""
|
|
210
|
+
# translate_and_transfer
|
|
211
|
+
raise NotImplementedError()
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class IConstructStateSpace:
|
|
215
|
+
"""
|
|
216
|
+
Interface specifying how to expose (custom) implementations of state-space-construction
|
|
217
|
+
for use with :py:class:`Controller`.
|
|
218
|
+
"""
|
|
219
|
+
def construct_statespace(self, testing_backend: IProvideIndependenceAtoms, marked_tests: set[CI_Identifier], previous_graphs: dict[IRepresentState,graph_t]) -> IRepresentStateSpace:
|
|
220
|
+
"""Construct state-space.
|
|
221
|
+
|
|
222
|
+
:param testing_backend: the independece-atom backend
|
|
223
|
+
:type testing_backend: IProvideIndependenceAtoms
|
|
224
|
+
:param marked_tests: the set of marked conditional independencies
|
|
225
|
+
:type marked_tests: set[CI_Identifier]
|
|
226
|
+
:param previous_graphs: state-specific graph for each state found in the previous iteration
|
|
227
|
+
:type previous_graphs: dict[IRepresentState,graph_t]
|
|
228
|
+
:return: state-space
|
|
229
|
+
:rtype: IRepresentStateSpace
|
|
230
|
+
"""
|
|
231
|
+
raise NotImplementedError()
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class Controller:
|
|
236
|
+
"""
|
|
237
|
+
The HCCD-controller. Coordinates repeated CD-algorithm runs with state-space construction.
|
|
238
|
+
|
|
239
|
+
.. seealso::
|
|
240
|
+
Specialized versions for PCMCI-family time-series algorithms :py:class:`ControllerTimeseriesMCI`
|
|
241
|
+
and for LPCMCI :py:class:`ControllerTimeseriesLPCMCI` are available.
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
def __init__(self, universal_cd: abstract_cd_t, testing_backend: IProvideIndependenceAtoms, state_space_construction: IConstructStateSpace):
|
|
245
|
+
"""Construct from components.
|
|
246
|
+
|
|
247
|
+
:param universal_cd: universal (underlying) CD-algorithm
|
|
248
|
+
:type universal_cd: abstract_cd_t
|
|
249
|
+
:param testing_backend: independence-testing backend to use
|
|
250
|
+
:type testing_backend: IProvideIndependenceAtoms
|
|
251
|
+
:param state_space_construction: state-space construction strategy to use
|
|
252
|
+
:type state_space_construction: IConstructStateSpace
|
|
253
|
+
"""
|
|
254
|
+
self.CD = universal_cd
|
|
255
|
+
self.testing_backend = testing_backend
|
|
256
|
+
self.state_space_construction = state_space_construction
|
|
257
|
+
|
|
258
|
+
def get_marked_independence(self, ci):
|
|
259
|
+
"""provide an extension hook to attach an observer to marked-independence
|
|
260
|
+
lookups by overriding this method, see e.g. :py:class:`ControllerTimeseriesLPCMCI`.
|
|
261
|
+
"""
|
|
262
|
+
return self.testing_backend.marked_independence(ci)
|
|
263
|
+
|
|
264
|
+
def run_cd(self, state: IRepresentState) -> tuple[graph_t, set[CI_Identifier]]:
|
|
265
|
+
"""*Implements part of the "core-algorithm" Algo. 1 in* [RR25]_.
|
|
266
|
+
|
|
267
|
+
Run underlying CD with a state-specific pseudo-cit.
|
|
268
|
+
|
|
269
|
+
:param state: state considered active
|
|
270
|
+
:type state: IRepresentState
|
|
271
|
+
:return: tuple consisting of state-graph and set of (newly) marked tests.
|
|
272
|
+
:rtype: tuple[graph_t, set[CI_Identifier]]
|
|
273
|
+
"""
|
|
274
|
+
newly_marked_tests = set()
|
|
275
|
+
|
|
276
|
+
def pseudo_cit_is_dependent(ci: CI_Identifier) -> bool:
|
|
277
|
+
if state.overwrites_ci(ci):
|
|
278
|
+
return state.get_ci_pseudo_value(ci)
|
|
279
|
+
else:
|
|
280
|
+
result = self.get_marked_independence(ci)
|
|
281
|
+
if result.is_regime():
|
|
282
|
+
newly_marked_tests.add(ci)
|
|
283
|
+
return True
|
|
284
|
+
else:
|
|
285
|
+
return result.is_globally_dependent()
|
|
286
|
+
|
|
287
|
+
graph = self.CD( generalized_cit=pseudo_cit_is_dependent )
|
|
288
|
+
return graph, newly_marked_tests
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def run_hccd(self, max_iterations: int=10) -> IPresentResult:
|
|
292
|
+
"""*Implements part of the "core-algorithm" Algo. 1 from* [RR25]_.
|
|
293
|
+
|
|
294
|
+
Run HCCD.
|
|
295
|
+
|
|
296
|
+
:param max_iterations: limit for iterations, defaults to 10
|
|
297
|
+
:type max_iterations: int, optional
|
|
298
|
+
:raises RuntimeError: Throws an exception if maximum iterations
|
|
299
|
+
are reached.
|
|
300
|
+
:return: hccd result
|
|
301
|
+
:rtype: IPresentResult
|
|
302
|
+
"""
|
|
303
|
+
marked_tests = set()
|
|
304
|
+
converged = False
|
|
305
|
+
graphs = None
|
|
306
|
+
|
|
307
|
+
while not converged:
|
|
308
|
+
state_space = self.state_space_construction.construct_statespace(testing_backend=self.testing_backend, marked_tests=marked_tests, previous_graphs=graphs)
|
|
309
|
+
graphs = {}
|
|
310
|
+
all_newly_marked_tests = set()
|
|
311
|
+
for state in state_space.states():
|
|
312
|
+
graphs[state], newly_marked_tests_from_current_state = self.run_cd(state)
|
|
313
|
+
all_newly_marked_tests = set.union(all_newly_marked_tests, newly_marked_tests_from_current_state)
|
|
314
|
+
|
|
315
|
+
# Check for convergence
|
|
316
|
+
marked_tests, converged = self.check_for_convergence(previously_marked=marked_tests, newly_marked=all_newly_marked_tests, graphs=graphs)
|
|
317
|
+
|
|
318
|
+
# Bail if there is a serious convergence issue
|
|
319
|
+
if max_iterations == 1:
|
|
320
|
+
raise RuntimeError("run_regime_cd reached maximum number of iterations. Terminating with last state.")
|
|
321
|
+
else:
|
|
322
|
+
max_iterations -= 1
|
|
323
|
+
|
|
324
|
+
return state_space.finalize(graphs=graphs)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def check_for_convergence(self, previously_marked: set[CI_Identifier], newly_marked: set[CI_Identifier], graphs)\
|
|
329
|
+
-> tuple[set[CI_Identifier], bool]:
|
|
330
|
+
"""Check if the core-algorithm has converged. May be overridden by more derived controllers.
|
|
331
|
+
"""
|
|
332
|
+
previous_count = len(previously_marked)
|
|
333
|
+
marked = set.union(previously_marked, newly_marked)
|
|
334
|
+
return marked, (previous_count == len(marked))
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
class ControllerTimeseriesMCI(Controller):
|
|
339
|
+
"""Timer-series specific controller, for PCMCI [RNK+19]_ family algorithms. See §B.6 in [RR25]_\\ .
|
|
340
|
+
|
|
341
|
+
.. seealso::
|
|
342
|
+
Details are descibed at :py:class:`Controller`.
|
|
343
|
+
"""
|
|
344
|
+
|
|
345
|
+
def __init__(self, universal_cd: abstract_cd_t, testing_backend: IProvideIndependenceAtoms, state_space_construction: IConstructStateSpace):
|
|
346
|
+
super().__init__(universal_cd, testing_backend, state_space_construction)
|
|
347
|
+
if not ( isinstance(testing_backend, IHandleExplicitTransitionToMCI) ):
|
|
348
|
+
raise RuntimeError( "Timeseries with MCI should use a transitionable backend, see 'independence_atoms.IndependenceAtoms_TimeSeries'." )
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class ControllerTimeseriesLPCMCI(Controller):
|
|
353
|
+
"""LPCMCI [GR20]_ specific controller.
|
|
354
|
+
|
|
355
|
+
.. seealso::
|
|
356
|
+
Details are descibed at :py:class:`Controller`.
|
|
357
|
+
"""
|
|
358
|
+
def __init__(self, universal_cd: abstract_cd_t, testing_backend: IProvideIndependenceAtoms, state_space_construction: IConstructStateSpace):
|
|
359
|
+
super().__init__(universal_cd, testing_backend, state_space_construction)
|
|
360
|
+
if not ( isinstance(testing_backend, IHandleExplicitTransitionToMCI) ):
|
|
361
|
+
raise RuntimeError( "Timeseries with MCI should use a transitionable backend, see 'independence_atoms.IndependenceAtoms_TimeSeries'." )
|
|
362
|
+
self.union_graph = None
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def check_for_convergence(self, previously_marked: set[CI_Identifier], newly_marked: set[CI_Identifier], graphs: dict)\
|
|
367
|
+
-> tuple[set[CI_Identifier], bool]:
|
|
368
|
+
"""
|
|
369
|
+
Overriden from :py:class:`Controller` to reset after an initial iteration (union-graph discovery),
|
|
370
|
+
cf :py:meth:`get_marked_independence`.
|
|
371
|
+
"""
|
|
372
|
+
if self.union_graph is None:
|
|
373
|
+
assert len(graphs) == 1
|
|
374
|
+
_, self.union_graph = graphs.popitem()
|
|
375
|
+
return set(), False
|
|
376
|
+
else:
|
|
377
|
+
return super().check_for_convergence(previously_marked, newly_marked, graphs)
|
|
378
|
+
|
|
379
|
+
def lagged_parents(self, node: int, lag_shift: int=0) -> set[tuple[int,int]]:
|
|
380
|
+
lagged_links_into_node = (self.union_graph[:,node,1:] == '-->')
|
|
381
|
+
parents, parent_lags = np.nonzero(lagged_links_into_node)
|
|
382
|
+
parent_lags = -(parent_lags + 1 - lag_shift)
|
|
383
|
+
parent_ts = zip(parents, parent_lags)
|
|
384
|
+
return set(parent_ts)
|
|
385
|
+
|
|
386
|
+
def contains_all_lagged_parents(self, ci: CI_Identifier_TimeSeries) -> bool:
|
|
387
|
+
# use union-graph to determine this
|
|
388
|
+
return ( ( self.lagged_parents(*ci.idx_x) <= ci.conditioning_set() ) # <= is "subset"-test on sets
|
|
389
|
+
and ( self.lagged_parents(*ci.idx_y) <= ci.conditioning_set() ) )
|
|
390
|
+
|
|
391
|
+
def should_consider_mci(self, ci: CI_Identifier_TimeSeries) -> bool:
|
|
392
|
+
return (self.union_graph is not None) and self.contains_all_lagged_parents(ci)
|
|
393
|
+
|
|
394
|
+
def get_marked_independence(self, ci):
|
|
395
|
+
"""
|
|
396
|
+
Overriden from :py:class:`Controller` to attach logic for deciding
|
|
397
|
+
if this test should be considerd MCI based on the union-graph.
|
|
398
|
+
"""
|
|
399
|
+
if self.should_consider_mci(ci):
|
|
400
|
+
self.testing_backend.enter_mci()
|
|
401
|
+
else:
|
|
402
|
+
self.testing_backend.enter_pc1()
|
|
403
|
+
return super().get_marked_independence(ci)
|
GLDF/hyperparams.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
from . import data_processing
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
r"""Submodule for hyperparameter helpers
|
|
5
|
+
------------------------------------
|
|
6
|
+
|
|
7
|
+
Typically the user should configure the marked-independence stages through these helpers.
|
|
8
|
+
To configure stage XYZ use ForXYZ.Configure\\ *subtype*\\ .
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from scipy.stats import binom
|
|
13
|
+
from scipy.optimize import root_scalar
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Hyperparams_HomogeneityBinomial_ParCorr(data_processing.IProvideHyperparamsForBinomial):
|
|
17
|
+
"""Helper to configure standard hyper-parameter sets for binomial homogeneity test when using partial correlation."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, alpha_error_control: float=0.05, min_block_count: int=5, min_effective_sample_count: int=5, regimes_are_large:bool=True):
|
|
20
|
+
"""Construct Hyper-parameter set for given parameters.
|
|
21
|
+
|
|
22
|
+
:param alpha_error_control: Error-control target :math:`\\alpha`, defaults to 0.05
|
|
23
|
+
:type alpha_error_control: float, optional
|
|
24
|
+
:param min_block_count: Pick block-size to ensure minimum number of blocks, defaults to 5
|
|
25
|
+
:type min_block_count: int, optional
|
|
26
|
+
:param min_effective_sample_count: Pick block-size to ensure minimum effective sample-size per block (for partial correlation), defaults to 5
|
|
27
|
+
:type min_effective_sample_count: int, optional
|
|
28
|
+
:param regimes_are_large: Consider hyper-parameters for large regimes, defaults to True
|
|
29
|
+
:type regimes_are_large: bool, optional
|
|
30
|
+
"""
|
|
31
|
+
self.alpha = alpha_error_control
|
|
32
|
+
self.min_block_count = min_block_count
|
|
33
|
+
self.min_effective_sample_count = min_effective_sample_count
|
|
34
|
+
self.tolerance = 1e-5
|
|
35
|
+
self.validate_choice = True
|
|
36
|
+
self.regimes_are_large = regimes_are_large
|
|
37
|
+
self.cache : dict[(int,int),data_processing.IProvideHyperparamsForBinomial.Hyperparams] = dict() #: Set to None to disable cache.
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def hyperparams_for_binomial(self, N: int, dim_Z: int) -> data_processing.IProvideHyperparamsForBinomial.Hyperparams:
|
|
41
|
+
result = None
|
|
42
|
+
if self.cache is not None:
|
|
43
|
+
result = self.cache.get((N,dim_Z))
|
|
44
|
+
if result is None:
|
|
45
|
+
result = self.compute_for_fixed_sample_count(N=N, dim_Z=dim_Z)
|
|
46
|
+
self.cache[(N,dim_Z)] = result
|
|
47
|
+
return result
|
|
48
|
+
else:
|
|
49
|
+
return self.compute_for_fixed_sample_count(N=N, dim_Z=dim_Z)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def get_opt_beta(alpha_homogeneity_err_control_requested: float, block_count: int, beta_start_value: float=0.1) -> tuple[float, int]:
|
|
55
|
+
"""
|
|
56
|
+
The binomial test rejects at an integer count. To target a specific error-rate :math:`\\alpha`, this function modifies the quantile :math:`\\beta`-start-value such that
|
|
57
|
+
that for the returned :math:`\\beta'\\geq\\beta` will realize the error-rate :math:`\\alpha` for the (also returned) integer cutoff-count.
|
|
58
|
+
|
|
59
|
+
:param alpha_homogeneity_err_control_requested: Requrested error-control :math:`\\alpha`.
|
|
60
|
+
:type alpha_homogeneity_err_control_requested: float
|
|
61
|
+
:param block_count: The total number of blocks.
|
|
62
|
+
:type block_count: int
|
|
63
|
+
:param beta_start_value: Initial value for :math:`\\beta`, defaults to 0.1
|
|
64
|
+
:type beta_start_value: float, optional
|
|
65
|
+
:raises RuntimeWarning: The solution for :math:`\\beta'` is found numerically. In case of convergence problems, an exception is raised.
|
|
66
|
+
:return: Tuple (:math:`\\beta'`, cutoff-count).
|
|
67
|
+
:rtype: tuple[float, int]
|
|
68
|
+
"""
|
|
69
|
+
discrete_cutoff = binom.ppf(1.0 - alpha_homogeneity_err_control_requested, n=block_count, p=beta_start_value)
|
|
70
|
+
result = root_scalar(lambda beta: binom.sf(k=discrete_cutoff, n=block_count, p=beta) - alpha_homogeneity_err_control_requested, x0=beta_start_value)
|
|
71
|
+
if not result.converged:
|
|
72
|
+
raise RuntimeWarning(f"Convergence problem in hyperparam-choice for block count={block_count}, using beta={result.root}.")
|
|
73
|
+
return result.root, int(discrete_cutoff)
|
|
74
|
+
|
|
75
|
+
def _get_opt_blocksize_base(self, N: int) -> float:
|
|
76
|
+
"""Heuristic hyper-parameter choice for block-size :math:`B` based on sample-count :math:`N` for empty conditioning-set.
|
|
77
|
+
|
|
78
|
+
:param N: Sample-size.
|
|
79
|
+
:type N: int
|
|
80
|
+
:return: Block-size (further processed by :py:meth:`get_opt_block_size`, and rounded to an integer there).
|
|
81
|
+
:rtype: float
|
|
82
|
+
"""
|
|
83
|
+
if self.regimes_are_large:
|
|
84
|
+
return 30
|
|
85
|
+
else:
|
|
86
|
+
return 5.0*np.log10(N)-3
|
|
87
|
+
|
|
88
|
+
def _sanitize_blocksize(self, B: int, N: int, dim_Z: int) -> int:
|
|
89
|
+
"""Ensure a chosen block-size makes sense for use with partial correlation.
|
|
90
|
+
|
|
91
|
+
:param B: Targeted block-size.
|
|
92
|
+
:type B: int
|
|
93
|
+
:param N: Sample-size.
|
|
94
|
+
:type N: int
|
|
95
|
+
:param dim_Z: Size of conditioning-set.
|
|
96
|
+
:type dim_Z: int
|
|
97
|
+
:return: Reasonable block-size to use.
|
|
98
|
+
:rtype: int
|
|
99
|
+
"""
|
|
100
|
+
if int(N/B) < self.min_block_count:
|
|
101
|
+
B = int(N/self.min_block_count)
|
|
102
|
+
if B <= dim_Z + 3:
|
|
103
|
+
B = self.min_effective_sample_count + dim_Z + 3
|
|
104
|
+
return B
|
|
105
|
+
|
|
106
|
+
def get_opt_blocksize(self, N: int, dim_Z: int) -> int:
|
|
107
|
+
"""Get heuristic hyper-parameter choice for block-size :math:`B`.
|
|
108
|
+
|
|
109
|
+
:param N: Sample-size.
|
|
110
|
+
:type N: int
|
|
111
|
+
:param dim_Z: Conditioning-set size.
|
|
112
|
+
:type dim_Z: int
|
|
113
|
+
:return: Recommended block-size to use.
|
|
114
|
+
:rtype: int
|
|
115
|
+
"""
|
|
116
|
+
base_value = self._get_opt_blocksize_base(N)
|
|
117
|
+
result = int(base_value + 1.5 * dim_Z)
|
|
118
|
+
return self._sanitize_blocksize(result, N, dim_Z)
|
|
119
|
+
|
|
120
|
+
def _validate(self, k, n, beta):
|
|
121
|
+
if binom.sf(k=k, n=n, p=beta) - self.alpha > self.tolerance:
|
|
122
|
+
raise ValueError(f"Hyperparameter-configuration failed to self-validate at tolerance={self.tolerance}")
|
|
123
|
+
|
|
124
|
+
def compute_for_fixed_sample_count(self, N: int, dim_Z: int) -> data_processing.IProvideHyperparamsForBinomial.Hyperparams:
|
|
125
|
+
"""Obtain actual runtime parameters. Typically called only by implementation of the corresponding marked-independence stage.
|
|
126
|
+
|
|
127
|
+
:param N: Sample-size.
|
|
128
|
+
:type N: int
|
|
129
|
+
:param dim_Z: Conditioning-set size.
|
|
130
|
+
:type dim_Z: int
|
|
131
|
+
:return: Execution-parameters for the corresponding algorithm.
|
|
132
|
+
:rtype: dpl.IProvideHyperparamsForBinomial.Hyperparams
|
|
133
|
+
"""
|
|
134
|
+
B = self.get_opt_blocksize(N, dim_Z)
|
|
135
|
+
block_count = int(N/B)
|
|
136
|
+
beta, k0 = self.get_opt_beta(alpha_homogeneity_err_control_requested=self.alpha, block_count=block_count, beta_start_value=0.1)
|
|
137
|
+
if self.validate_choice : self._validate(k0, block_count, beta)
|
|
138
|
+
return data_processing.IProvideHyperparamsForBinomial.Hyperparams( B=B, alpha=self.alpha, beta=beta, max_acceptable_count=k0 )
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class Hyperparams_WeakInterval_ParCorr(data_processing.IProvideHyperparamsForAcceptanceInterval):
|
|
142
|
+
"""Helper to configure standard hyper-parameter sets for acceptance-interval weak-regime test when using partial correlation."""
|
|
143
|
+
|
|
144
|
+
def __init__(self, alpha=0.05, regimes_are_large=True):
|
|
145
|
+
"""Configure hyper-parameters set based on given parameters.
|
|
146
|
+
|
|
147
|
+
:param alpha: Error-control target :math:`\\alpha`, defaults to 0.05
|
|
148
|
+
:type alpha: float, optional
|
|
149
|
+
:param regimes_are_large: Consider hyper-parameters for large regimes, defaults to True
|
|
150
|
+
:type regimes_are_large: bool, optional
|
|
151
|
+
"""
|
|
152
|
+
self.alpha = alpha
|
|
153
|
+
self.regimes_are_large = regimes_are_large
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def hyperparams_for_acceptance_interval(self, N: int, dim_Z: int) -> data_processing.IProvideHyperparamsForAcceptanceInterval.Hyperparams:
|
|
157
|
+
return self.get_for_fixed_sample_count(N=N, dim_Z=dim_Z)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def get_opt_blocksize(self, N: int, dim_Z: int) -> int:
|
|
161
|
+
"""Heuristic choice of hyper-parameter for block-size :math:`B`.
|
|
162
|
+
|
|
163
|
+
:param N: Sample-size.
|
|
164
|
+
:type N: int
|
|
165
|
+
:param dim_Z: Conditioning-set size.
|
|
166
|
+
:type dim_Z: int
|
|
167
|
+
:return: Heuristic block-size choice.
|
|
168
|
+
:rtype: int
|
|
169
|
+
"""
|
|
170
|
+
if self.regimes_are_large:
|
|
171
|
+
return int(round(31 + dim_Z * (59-31) / 20))
|
|
172
|
+
else:
|
|
173
|
+
return int(round(11 + dim_Z * (43-11) / 20))
|
|
174
|
+
|
|
175
|
+
def get_opt_cutoff(self, N: int, dim_Z: int) -> float:
|
|
176
|
+
"""Heuristic choice of hyper-parameter for cutoff :math:`c`.
|
|
177
|
+
|
|
178
|
+
:param N: Sample-size.
|
|
179
|
+
:type N: int
|
|
180
|
+
:param dim_Z: Conditioning-set size.
|
|
181
|
+
:type dim_Z: int
|
|
182
|
+
:return: Heuristic cutoff choice.
|
|
183
|
+
:rtype: float
|
|
184
|
+
"""
|
|
185
|
+
if self.regimes_are_large:
|
|
186
|
+
return 0.2
|
|
187
|
+
else:
|
|
188
|
+
return 0.275 + dim_Z * (0.25-0.275) / 20
|
|
189
|
+
|
|
190
|
+
def get_for_fixed_sample_count(self, N: int, dim_Z: int) -> data_processing.IProvideHyperparamsForAcceptanceInterval.Hyperparams:
|
|
191
|
+
"""Obtain actual runtime parameters. Typically called only by implementation of the corresponding marked-independence stage.
|
|
192
|
+
|
|
193
|
+
:param N: Sample-size.
|
|
194
|
+
:type N: int
|
|
195
|
+
:param dim_Z: Size of the conditioning set.
|
|
196
|
+
:type dim_Z: int
|
|
197
|
+
:return: Execution-parameters for the corresponding algorithm.
|
|
198
|
+
:rtype: dpl.IProvideHyperparamsForAcceptanceInterval.Hyperparams
|
|
199
|
+
"""
|
|
200
|
+
return data_processing.IProvideHyperparamsForAcceptanceInterval.Hyperparams(
|
|
201
|
+
B=int(self.get_opt_blocksize(N=N, dim_Z=dim_Z)),
|
|
202
|
+
alpha=self.alpha,
|
|
203
|
+
cutoff=self.get_opt_cutoff(N=N, dim_Z=dim_Z)
|
|
204
|
+
)
|
|
205
|
+
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from .hccd import IProvideIndependenceAtoms, IHandleExplicitTransitionToMCI
|
|
2
|
+
from .data_processing import ITestMarkedCI, ITestIndicatorImplications
|
|
3
|
+
from .data_management import CI_Identifier, CI_Identifier_TimeSeries, IManageData
|
|
4
|
+
|
|
5
|
+
class IndependenceAtoms_Backend(IProvideIndependenceAtoms):
|
|
6
|
+
"""
|
|
7
|
+
Integrate data-provider with mCIT and implication-testing into a independence-atom backend
|
|
8
|
+
for use in :py:mod:`hccd<GLDF.hccd>`.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, data_manager: IManageData, m_cit: ITestMarkedCI, implication_test: ITestIndicatorImplications=None):
|
|
12
|
+
"""Consstruct from data-manager, mCIT and implication-test.
|
|
13
|
+
|
|
14
|
+
:param data_manager: data-manager
|
|
15
|
+
:type data_manager: IManageData
|
|
16
|
+
:param m_cit: mCIT
|
|
17
|
+
:type m_cit: ITestMarkedCI
|
|
18
|
+
:param implication_test: indicator implication test, defaults to None
|
|
19
|
+
:type implication_test: ITestIndicatorImplications, optional
|
|
20
|
+
"""
|
|
21
|
+
self.data_manager = data_manager
|
|
22
|
+
self.m_cit = m_cit
|
|
23
|
+
self.implication_test = implication_test
|
|
24
|
+
self._found_globally_independent_for_some_Z = set()
|
|
25
|
+
|
|
26
|
+
def marked_independence(self, ci: CI_Identifier) -> ITestMarkedCI.Result:
|
|
27
|
+
result = self.m_cit.marked_independence(self.data_manager.get_patterned_data(ci))
|
|
28
|
+
if result.is_globally_independent():
|
|
29
|
+
self._found_globally_independent_for_some_Z.add( ci.undirected_link() )
|
|
30
|
+
return result
|
|
31
|
+
|
|
32
|
+
def regime_implication(self, lhs: list[CI_Identifier], rhs: CI_Identifier) -> bool:
|
|
33
|
+
return self.implication_test.is_implied_regime(
|
|
34
|
+
[self.data_manager.get_patterned_data(ci) for ci in lhs],
|
|
35
|
+
self.data_manager.get_patterned_data(rhs)
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def found_globally_independent_for_some_Z(self, undirected_link: tuple) -> bool:
|
|
39
|
+
return undirected_link in self._found_globally_independent_for_some_Z
|
|
40
|
+
|
|
41
|
+
class IndependenceAtoms_TimeSeries(IProvideIndependenceAtoms, IHandleExplicitTransitionToMCI):
|
|
42
|
+
"""Dual-phase/transitionable backend to account for different configurations in PC1 and MCI
|
|
43
|
+
phases of PCMCI-family algorithms.
|
|
44
|
+
"""
|
|
45
|
+
def __init__(self, independence_atoms_pc1: IProvideIndependenceAtoms, independence_atoms_mci: IProvideIndependenceAtoms):
|
|
46
|
+
"""Construct from two separate backends for PC1 and MCI phases.
|
|
47
|
+
|
|
48
|
+
:param independence_atoms_pc1: backend for PC1 phase
|
|
49
|
+
:type independence_atoms_pc1: IProvideIndependenceAtoms
|
|
50
|
+
:param independence_atoms_mci: backend for MCI phase
|
|
51
|
+
:type independence_atoms_mci: IProvideIndependenceAtoms
|
|
52
|
+
"""
|
|
53
|
+
self.independence_atoms_pc1 = independence_atoms_pc1
|
|
54
|
+
self.independence_atoms_mci = independence_atoms_mci
|
|
55
|
+
self._active_backend = independence_atoms_pc1
|
|
56
|
+
|
|
57
|
+
def marked_independence(self, ci: CI_Identifier_TimeSeries) -> ITestMarkedCI.Result:
|
|
58
|
+
return self._active_backend.marked_independence(ci=ci)
|
|
59
|
+
|
|
60
|
+
def regime_implication(self, lhs: list[CI_Identifier_TimeSeries], rhs: CI_Identifier_TimeSeries) -> bool:
|
|
61
|
+
return self._active_backend.regime_implication(lhs, rhs)
|
|
62
|
+
|
|
63
|
+
def found_globally_independent_for_some_Z(self, undirected_link: tuple) -> bool:
|
|
64
|
+
return ( self.independence_atoms_mci.found_globally_independent_for_some_Z(undirected_link)
|
|
65
|
+
or self.independence_atoms_pc1.found_globally_independent_for_some_Z(undirected_link) )
|
|
66
|
+
|
|
67
|
+
def enter_pc1(self) -> None:
|
|
68
|
+
self._active_backend = self.independence_atoms_pc1
|
|
69
|
+
|
|
70
|
+
def enter_mci(self) -> None:
|
|
71
|
+
self._active_backend = self.independence_atoms_mci
|
|
72
|
+
|
|
73
|
+
def _extract_cache_id(self, fname: str, **args) -> tuple:
|
|
74
|
+
# In principle can also cache the multi-mode backend if tracking
|
|
75
|
+
# state (active backend). Usually not necessary, if indiviudal
|
|
76
|
+
# backends are cached (cf. frontend).
|
|
77
|
+
return (self._active_backend, self._active_backend._extract_cache_id(fname, **args))
|
|
78
|
+
|