compiled-knowledge 4.0.0a20__cp312-cp312-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of compiled-knowledge might be problematic. Click here for more details.
- ck/__init__.py +0 -0
- ck/circuit/__init__.py +17 -0
- ck/circuit/_circuit_cy.c +37525 -0
- ck/circuit/_circuit_cy.cpython-312-darwin.so +0 -0
- ck/circuit/_circuit_cy.pxd +32 -0
- ck/circuit/_circuit_cy.pyx +768 -0
- ck/circuit/_circuit_py.py +836 -0
- ck/circuit/tmp_const.py +74 -0
- ck/circuit_compiler/__init__.py +2 -0
- ck/circuit_compiler/circuit_compiler.py +26 -0
- ck/circuit_compiler/cython_vm_compiler/__init__.py +1 -0
- ck/circuit_compiler/cython_vm_compiler/_compiler.c +19826 -0
- ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-312-darwin.so +0 -0
- ck/circuit_compiler/cython_vm_compiler/_compiler.pyx +380 -0
- ck/circuit_compiler/cython_vm_compiler/cython_vm_compiler.py +121 -0
- ck/circuit_compiler/interpret_compiler.py +223 -0
- ck/circuit_compiler/llvm_compiler.py +388 -0
- ck/circuit_compiler/llvm_vm_compiler.py +546 -0
- ck/circuit_compiler/named_circuit_compilers.py +57 -0
- ck/circuit_compiler/support/__init__.py +0 -0
- ck/circuit_compiler/support/circuit_analyser/__init__.py +13 -0
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +10620 -0
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-312-darwin.so +0 -0
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.pyx +98 -0
- ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_py.py +93 -0
- ck/circuit_compiler/support/input_vars.py +148 -0
- ck/circuit_compiler/support/llvm_ir_function.py +234 -0
- ck/example/__init__.py +53 -0
- ck/example/alarm.py +366 -0
- ck/example/asia.py +28 -0
- ck/example/binary_clique.py +32 -0
- ck/example/bow_tie.py +33 -0
- ck/example/cancer.py +37 -0
- ck/example/chain.py +38 -0
- ck/example/child.py +199 -0
- ck/example/clique.py +33 -0
- ck/example/cnf_pgm.py +39 -0
- ck/example/diamond_square.py +68 -0
- ck/example/earthquake.py +36 -0
- ck/example/empty.py +10 -0
- ck/example/hailfinder.py +539 -0
- ck/example/hepar2.py +628 -0
- ck/example/insurance.py +504 -0
- ck/example/loop.py +40 -0
- ck/example/mildew.py +38161 -0
- ck/example/munin.py +22982 -0
- ck/example/pathfinder.py +53747 -0
- ck/example/rain.py +39 -0
- ck/example/rectangle.py +161 -0
- ck/example/run.py +30 -0
- ck/example/sachs.py +129 -0
- ck/example/sprinkler.py +30 -0
- ck/example/star.py +44 -0
- ck/example/stress.py +64 -0
- ck/example/student.py +43 -0
- ck/example/survey.py +46 -0
- ck/example/triangle_square.py +54 -0
- ck/example/truss.py +49 -0
- ck/in_out/__init__.py +3 -0
- ck/in_out/parse_ace_lmap.py +216 -0
- ck/in_out/parse_ace_nnf.py +322 -0
- ck/in_out/parse_net.py +480 -0
- ck/in_out/parser_utils.py +185 -0
- ck/in_out/pgm_pickle.py +42 -0
- ck/in_out/pgm_python.py +268 -0
- ck/in_out/render_bugs.py +111 -0
- ck/in_out/render_net.py +177 -0
- ck/in_out/render_pomegranate.py +184 -0
- ck/pgm.py +3475 -0
- ck/pgm_circuit/__init__.py +1 -0
- ck/pgm_circuit/marginals_program.py +352 -0
- ck/pgm_circuit/mpe_program.py +237 -0
- ck/pgm_circuit/pgm_circuit.py +79 -0
- ck/pgm_circuit/program_with_slotmap.py +236 -0
- ck/pgm_circuit/slot_map.py +35 -0
- ck/pgm_circuit/support/__init__.py +0 -0
- ck/pgm_circuit/support/compile_circuit.py +83 -0
- ck/pgm_circuit/target_marginals_program.py +103 -0
- ck/pgm_circuit/wmc_program.py +323 -0
- ck/pgm_compiler/__init__.py +2 -0
- ck/pgm_compiler/ace/__init__.py +1 -0
- ck/pgm_compiler/ace/ace.py +299 -0
- ck/pgm_compiler/factor_elimination.py +395 -0
- ck/pgm_compiler/named_pgm_compilers.py +63 -0
- ck/pgm_compiler/pgm_compiler.py +19 -0
- ck/pgm_compiler/recursive_conditioning.py +231 -0
- ck/pgm_compiler/support/__init__.py +0 -0
- ck/pgm_compiler/support/circuit_table/__init__.py +17 -0
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +16398 -0
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-312-darwin.so +0 -0
- ck/pgm_compiler/support/circuit_table/_circuit_table_cy.pyx +332 -0
- ck/pgm_compiler/support/circuit_table/_circuit_table_py.py +304 -0
- ck/pgm_compiler/support/clusters.py +568 -0
- ck/pgm_compiler/support/factor_tables.py +406 -0
- ck/pgm_compiler/support/join_tree.py +332 -0
- ck/pgm_compiler/support/named_compiler_maker.py +43 -0
- ck/pgm_compiler/variable_elimination.py +91 -0
- ck/probability/__init__.py +0 -0
- ck/probability/empirical_probability_space.py +50 -0
- ck/probability/pgm_probability_space.py +32 -0
- ck/probability/probability_space.py +622 -0
- ck/program/__init__.py +3 -0
- ck/program/program.py +137 -0
- ck/program/program_buffer.py +180 -0
- ck/program/raw_program.py +67 -0
- ck/sampling/__init__.py +0 -0
- ck/sampling/forward_sampler.py +211 -0
- ck/sampling/marginals_direct_sampler.py +113 -0
- ck/sampling/sampler.py +62 -0
- ck/sampling/sampler_support.py +232 -0
- ck/sampling/uniform_sampler.py +72 -0
- ck/sampling/wmc_direct_sampler.py +171 -0
- ck/sampling/wmc_gibbs_sampler.py +153 -0
- ck/sampling/wmc_metropolis_sampler.py +165 -0
- ck/sampling/wmc_rejection_sampler.py +115 -0
- ck/utils/__init__.py +0 -0
- ck/utils/iter_extras.py +163 -0
- ck/utils/local_config.py +270 -0
- ck/utils/map_list.py +128 -0
- ck/utils/map_set.py +128 -0
- ck/utils/np_extras.py +51 -0
- ck/utils/random_extras.py +64 -0
- ck/utils/tmp_dir.py +94 -0
- ck_demos/__init__.py +0 -0
- ck_demos/ace/__init__.py +0 -0
- ck_demos/ace/copy_ace_to_ck.py +15 -0
- ck_demos/ace/demo_ace.py +49 -0
- ck_demos/all_demos.py +88 -0
- ck_demos/circuit/__init__.py +0 -0
- ck_demos/circuit/demo_circuit_dump.py +22 -0
- ck_demos/circuit/demo_derivatives.py +43 -0
- ck_demos/circuit_compiler/__init__.py +0 -0
- ck_demos/circuit_compiler/compare_circuit_compilers.py +32 -0
- ck_demos/circuit_compiler/show_llvm_program.py +26 -0
- ck_demos/pgm/__init__.py +0 -0
- ck_demos/pgm/demo_pgm_dump.py +18 -0
- ck_demos/pgm/demo_pgm_dump_stress.py +18 -0
- ck_demos/pgm/demo_pgm_string_rendering.py +15 -0
- ck_demos/pgm/show_examples.py +25 -0
- ck_demos/pgm_compiler/__init__.py +0 -0
- ck_demos/pgm_compiler/compare_pgm_compilers.py +63 -0
- ck_demos/pgm_compiler/demo_compiler_dump.py +60 -0
- ck_demos/pgm_compiler/demo_factor_elimination.py +47 -0
- ck_demos/pgm_compiler/demo_join_tree.py +25 -0
- ck_demos/pgm_compiler/demo_marginals_program.py +53 -0
- ck_demos/pgm_compiler/demo_mpe_program.py +55 -0
- ck_demos/pgm_compiler/demo_pgm_compiler.py +38 -0
- ck_demos/pgm_compiler/demo_recursive_conditioning.py +33 -0
- ck_demos/pgm_compiler/demo_variable_elimination.py +33 -0
- ck_demos/pgm_compiler/demo_wmc_program.py +29 -0
- ck_demos/pgm_compiler/time_fe_compiler.py +93 -0
- ck_demos/pgm_inference/__init__.py +0 -0
- ck_demos/pgm_inference/demo_inferencing_basic.py +188 -0
- ck_demos/pgm_inference/demo_inferencing_mpe_cancer.py +45 -0
- ck_demos/pgm_inference/demo_inferencing_wmc_and_mpe_sprinkler.py +154 -0
- ck_demos/pgm_inference/demo_inferencing_wmc_student.py +110 -0
- ck_demos/programs/__init__.py +0 -0
- ck_demos/programs/demo_program_buffer.py +24 -0
- ck_demos/programs/demo_program_multi.py +24 -0
- ck_demos/programs/demo_program_none.py +19 -0
- ck_demos/programs/demo_program_single.py +23 -0
- ck_demos/programs/demo_raw_program_interpreted.py +21 -0
- ck_demos/programs/demo_raw_program_llvm.py +21 -0
- ck_demos/sampling/__init__.py +0 -0
- ck_demos/sampling/check_sampler.py +71 -0
- ck_demos/sampling/demo_marginal_direct_sampler.py +40 -0
- ck_demos/sampling/demo_uniform_sampler.py +38 -0
- ck_demos/sampling/demo_wmc_direct_sampler.py +40 -0
- ck_demos/utils/__init__.py +0 -0
- ck_demos/utils/compare.py +120 -0
- ck_demos/utils/convert_network.py +45 -0
- ck_demos/utils/sample_model.py +216 -0
- ck_demos/utils/stop_watch.py +384 -0
- compiled_knowledge-4.0.0a20.dist-info/METADATA +50 -0
- compiled_knowledge-4.0.0a20.dist-info/RECORD +178 -0
- compiled_knowledge-4.0.0a20.dist-info/WHEEL +6 -0
- compiled_knowledge-4.0.0a20.dist-info/licenses/LICENSE.txt +21 -0
- compiled_knowledge-4.0.0a20.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,622 @@
|
|
|
1
|
+
"""
|
|
2
|
+
An abstract class for object providing probabilities.
|
|
3
|
+
"""
|
|
4
|
+
import math
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from itertools import chain
|
|
7
|
+
from typing import Sequence, Tuple, Iterable, Callable
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from ck.pgm import Indicator, RandomVariable, rv_instances_as_indicators, number_of_states, rv_instances, Instance
|
|
12
|
+
from ck.utils.iter_extras import combos as _combos
|
|
13
|
+
from ck.utils.map_set import MapSet
|
|
14
|
+
from ck.utils.np_extras import dtype_for_number_of_states, NDArrayFloat64, DTypeStates, NDArrayNumeric
|
|
15
|
+
|
|
16
|
+
# Type defining a condition.
|
|
17
|
+
Condition = None | Indicator | Iterable[Indicator]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ProbabilitySpace(ABC):
|
|
21
|
+
"""
|
|
22
|
+
An abstract mixin class for a class providing probabilities over a state space defined by random variables.
|
|
23
|
+
Each possible world of the state space is referred to as an 'instance'.
|
|
24
|
+
"""
|
|
25
|
+
__slots__ = ()
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def rvs(self) -> Sequence[RandomVariable]:
|
|
30
|
+
"""
|
|
31
|
+
Return the random variables that define the state space.
|
|
32
|
+
Each random variable, rv, has a length len(rv) which
|
|
33
|
+
is the number of states, and rv[i] is the 'indicator' for
|
|
34
|
+
the ith state of the random variable. Indicators must
|
|
35
|
+
be unique across all rvs as rv[i] indicates the
|
|
36
|
+
condition 'rv == i'.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def wmc(self, *condition: Condition) -> float:
|
|
41
|
+
"""
|
|
42
|
+
Return the weight of instances matching the given condition.
|
|
43
|
+
|
|
44
|
+
If multiple indicators of the same random variable appear in
|
|
45
|
+
the parameter 'indicators' then they are interpreted as
|
|
46
|
+
a disjunction, otherwise indicators are interpreted as
|
|
47
|
+
a conjunction. E.g.: X=0, Y=1, Y=3 means X=0 and (Y=1 or Y=3)
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
condition: zero or more indicators that specify a condition.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def z(self) -> float:
|
|
56
|
+
"""
|
|
57
|
+
Return the summed weight of all instances.
|
|
58
|
+
This is equivalent to self.wmc(), with no arguments.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def probability(self, *indicators: Indicator, condition: Condition = ()) -> float:
|
|
62
|
+
"""
|
|
63
|
+
Return the joint probability of the given indicators,
|
|
64
|
+
conditioned on any conditions, and
|
|
65
|
+
marginalised over any unmentioned random variables.
|
|
66
|
+
|
|
67
|
+
If multiple indicators of the same random variable appear in
|
|
68
|
+
the parameters 'indicators' or 'condition' then they are interpreted as
|
|
69
|
+
a disjunction, otherwise indicators are interpreted as
|
|
70
|
+
a conjunction. E.g.: X=0, Y=1, Y=3 means X=0 and (Y=1 or Y=3).
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
indicators: Indicators that specify which set of instances to compute probability.
|
|
74
|
+
condition: Indicators that specify conditions for a conditional probability.
|
|
75
|
+
Returns:
|
|
76
|
+
the probability of the given indicators, conditioned on the given conditions.
|
|
77
|
+
"""
|
|
78
|
+
condition: Tuple[Indicator, ...] = check_condition(condition)
|
|
79
|
+
|
|
80
|
+
if len(condition) == 0:
|
|
81
|
+
z = self.z
|
|
82
|
+
if z <= 0:
|
|
83
|
+
return np.nan
|
|
84
|
+
else:
|
|
85
|
+
z = self.wmc(*condition)
|
|
86
|
+
if z <= 0:
|
|
87
|
+
return np.nan
|
|
88
|
+
|
|
89
|
+
# Combine the indicators with the condition
|
|
90
|
+
# If a variable is mentioned in both the indicators and condition, then
|
|
91
|
+
# we need to take the intersection, and check for contradictions.
|
|
92
|
+
# If a variable is mentioned in the condition but not indicators, then
|
|
93
|
+
# the rv condition needs to be added to the indicators.
|
|
94
|
+
indicator_groups: MapSet[int, Indicator] = _group_indicators(indicators)
|
|
95
|
+
condition_groups: MapSet[int, Indicator] = _group_indicators(condition)
|
|
96
|
+
|
|
97
|
+
for rv_idx, indicators in condition_groups.items():
|
|
98
|
+
indicator_group = indicator_groups.get(rv_idx)
|
|
99
|
+
if indicator_group is None:
|
|
100
|
+
indicator_groups.add_all(rv_idx, indicators)
|
|
101
|
+
else:
|
|
102
|
+
indicator_group.intersection_update(indicators)
|
|
103
|
+
if len(indicator_group) == 0:
|
|
104
|
+
# A contradiction between the indicators and conditions
|
|
105
|
+
return 0.0
|
|
106
|
+
|
|
107
|
+
# Collect all the indicators from the updated indicator_groups
|
|
108
|
+
indicators = chain(*indicator_groups.values())
|
|
109
|
+
|
|
110
|
+
return self.wmc(*indicators) / z
|
|
111
|
+
|
|
112
|
+
def marginal_distribution(self, *rvs: RandomVariable, condition: Condition = ()) -> NDArrayNumeric:
|
|
113
|
+
"""
|
|
114
|
+
What is the marginal probability distribution over the states of the given random variables.
|
|
115
|
+
Assumes that no indicators of rv in rvs appear in the conditions (if supplied).
|
|
116
|
+
|
|
117
|
+
When multiple rvs are supplied, the order of instantiations is as per
|
|
118
|
+
`rv_instances_as_indicators(*rvs)`.
|
|
119
|
+
|
|
120
|
+
If multiple indicators of the same random variable appear in
|
|
121
|
+
the parameter 'condition' then they are interpreted as
|
|
122
|
+
a disjunction, otherwise indicators are interpreted as
|
|
123
|
+
a conjunction. E.g.: X=0, Y=1, Y=3 means X=0 and (Y=1 or Y=3).
|
|
124
|
+
|
|
125
|
+
This is not an efficient implementation as it will call self.probability(...)
|
|
126
|
+
for each possible state of the given random variable. If efficient marginal
|
|
127
|
+
probability calculations are required, consider using a different method.
|
|
128
|
+
|
|
129
|
+
Warning:
|
|
130
|
+
If the probability of each state of rv (given the condition) is
|
|
131
|
+
zero, then the marginal distribution is il-defined and the returned probabilities will
|
|
132
|
+
all be NAN.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
rvs: Random variables to compute the marginal distribution over.
|
|
136
|
+
condition: Indicators that specify conditions for conditional probability.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
marginal probability distribution as an array co-indexed with `rv_instances_as_indicators(*rvs)`.
|
|
140
|
+
"""
|
|
141
|
+
condition = check_condition(condition)
|
|
142
|
+
|
|
143
|
+
# We have to be careful of the situation where indicators of rvs appear in condition.
|
|
144
|
+
# If an RV has at least 1 indicator in condition then it must match it to have non-zero probability.
|
|
145
|
+
wmc = self._get_wmc_for_marginals(rvs, condition)
|
|
146
|
+
|
|
147
|
+
result: NDArrayFloat64 = np.fromiter(
|
|
148
|
+
(wmc(indicators) for indicators in rv_instances_as_indicators(*rvs)),
|
|
149
|
+
count=number_of_states(*rvs),
|
|
150
|
+
dtype=np.float64
|
|
151
|
+
)
|
|
152
|
+
_normalise_marginal(result)
|
|
153
|
+
return result
|
|
154
|
+
|
|
155
|
+
def map(self, *rvs: RandomVariable, condition: Condition = ()) -> Tuple[float, Instance]:
|
|
156
|
+
"""
|
|
157
|
+
Determine the maximum apriori probability (MAP).
|
|
158
|
+
|
|
159
|
+
If there are tied solutions, one solution is returned, which
|
|
160
|
+
is selected arbitrarily.
|
|
161
|
+
|
|
162
|
+
If multiple indicators of the same random variable appear in
|
|
163
|
+
the parameter 'condition' then they are interpreted as
|
|
164
|
+
a disjunction, otherwise indicators are interpreted as
|
|
165
|
+
a conjunction. E.g.: X=0, Y=1, Y=3 means X=0 and (Y=1 or Y=3)
|
|
166
|
+
|
|
167
|
+
Warning:
|
|
168
|
+
This is not an efficient implementation as it will call `self.wmc`
|
|
169
|
+
for each possible state of the given random variables. If efficient MAP
|
|
170
|
+
probability calculations are required, consider using a different method.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
rvs: random variables to find the MAP over.
|
|
174
|
+
condition: any conditioning indicators.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
(probability, instance) where
|
|
178
|
+
probability: is the MAP probability
|
|
179
|
+
instance: is the MAP state (co-indexed with the given rvs).
|
|
180
|
+
"""
|
|
181
|
+
condition: Sequence[Indicator] = check_condition(condition)
|
|
182
|
+
|
|
183
|
+
rv_indexes = set(rv.idx for rv in rvs)
|
|
184
|
+
assert len(rv_indexes) == len(rvs), 'duplicated random variables not allowed'
|
|
185
|
+
|
|
186
|
+
# Group conditioning indicators by random variable.
|
|
187
|
+
conditions_by_rvs = _group_states(condition)
|
|
188
|
+
|
|
189
|
+
# See if any MAP random variable is also conditioned.
|
|
190
|
+
# Reduce the state space of any conditioned MAP rv.
|
|
191
|
+
loop_rvs = []
|
|
192
|
+
reduced_space = False
|
|
193
|
+
for rv in rvs:
|
|
194
|
+
states = conditions_by_rvs.get(rv.idx)
|
|
195
|
+
if states is None:
|
|
196
|
+
loop_rvs.append(rv)
|
|
197
|
+
else:
|
|
198
|
+
loop_rvs.append([rv[i] for i in sorted(states)])
|
|
199
|
+
reduced_space = True
|
|
200
|
+
|
|
201
|
+
# If the random variables we are looping over does not have any conditions
|
|
202
|
+
# then it is expected to be faster by using computed marginal probabilities.
|
|
203
|
+
if not reduced_space:
|
|
204
|
+
prs = self.marginal_distribution(*rvs, condition=condition)
|
|
205
|
+
best_probability = float('-inf')
|
|
206
|
+
best_states = None
|
|
207
|
+
for probability, inst in zip(prs, rv_instances(*rvs)):
|
|
208
|
+
if probability > best_probability:
|
|
209
|
+
best_probability = probability
|
|
210
|
+
best_states = inst
|
|
211
|
+
return best_probability, best_states
|
|
212
|
+
|
|
213
|
+
else:
|
|
214
|
+
# Remove any condition indicators with rv in rvs.
|
|
215
|
+
new_conditions = tuple(ind for ind in condition if ind.rv_idx not in rv_indexes)
|
|
216
|
+
|
|
217
|
+
# Loop over the state space of the 'loop' rvs
|
|
218
|
+
best_probability = float('-inf')
|
|
219
|
+
best_states = None
|
|
220
|
+
indicators: Tuple[Indicator, ...]
|
|
221
|
+
for indicators in _combos(loop_rvs):
|
|
222
|
+
probability = self.wmc(*(indicators + new_conditions))
|
|
223
|
+
if probability > best_probability:
|
|
224
|
+
best_probability = probability
|
|
225
|
+
best_states = tuple(ind.state_idx for ind in indicators)
|
|
226
|
+
condition_probability = self.wmc(*condition)
|
|
227
|
+
return best_probability / condition_probability, best_states
|
|
228
|
+
|
|
229
|
+
def correlation(self, indicator1: Indicator, indicator2: Indicator, condition: Condition = ()) -> float:
|
|
230
|
+
"""
|
|
231
|
+
What is the correlation between the two given indicators, r(indicator1, indicator2).
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
indicator1: a first random variable and its state.
|
|
235
|
+
indicator2: a second random variable and its state.
|
|
236
|
+
condition: any conditioning indicators.
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
correlation between the two given indicators.
|
|
240
|
+
"""
|
|
241
|
+
condition = check_condition(condition)
|
|
242
|
+
|
|
243
|
+
p1 = self.probability(indicator1, condition=condition)
|
|
244
|
+
p2 = self.probability(indicator2, condition=condition)
|
|
245
|
+
p12 = self._joint_probability(indicator1, indicator2, condition=condition)
|
|
246
|
+
d = p1 * (1.0 - p1) * p2 * (1.0 - p2)
|
|
247
|
+
if d == 0.0:
|
|
248
|
+
# As any marginal probability approaches zero, correlation approaches zero
|
|
249
|
+
return 0.0
|
|
250
|
+
else:
|
|
251
|
+
return (p12 - p1 * p2) / math.sqrt(d)
|
|
252
|
+
|
|
253
|
+
def entropy(self, rv: RandomVariable, condition: Condition = ()) -> float:
|
|
254
|
+
"""
|
|
255
|
+
Calculate the entropy of the given random variable, H(rv).
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
rv: random variable to calculate the entropy for.
|
|
259
|
+
condition: any conditioning indicators.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
entropy of the given random variable.
|
|
263
|
+
"""
|
|
264
|
+
condition = check_condition(condition)
|
|
265
|
+
e = 0.0
|
|
266
|
+
for ind in rv:
|
|
267
|
+
p = self.probability(ind, condition=condition)
|
|
268
|
+
if p > 0.0:
|
|
269
|
+
e -= p * math.log2(p)
|
|
270
|
+
return e
|
|
271
|
+
|
|
272
|
+
def conditional_entropy(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
|
|
273
|
+
"""
|
|
274
|
+
Calculate the conditional entropy, H(rv1 | rv2).
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
rv1: random variable to calculate the entropy for.
|
|
278
|
+
rv2: the conditioning random variable for entropy calculation.
|
|
279
|
+
condition: any conditioning indicators to restrict the state space.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
entropy of rv1, conditioned on rv2.
|
|
283
|
+
"""
|
|
284
|
+
condition = check_condition(condition)
|
|
285
|
+
e = 0.0
|
|
286
|
+
for ind1 in rv1:
|
|
287
|
+
for ind2 in rv2:
|
|
288
|
+
p = self._joint_probability(ind1, ind2, condition=condition)
|
|
289
|
+
if p > 0.0:
|
|
290
|
+
# if p > 0 then p2 > 0, as p <= p2
|
|
291
|
+
p2 = self.probability(ind2, condition=condition)
|
|
292
|
+
e -= p * math.log2(p / p2)
|
|
293
|
+
return e
|
|
294
|
+
|
|
295
|
+
def joint_entropy(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
|
|
296
|
+
"""
|
|
297
|
+
Calculate the joint entropy of the two random variables, H(rv1; rv2).
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
rv1: a first random variable to calculate joint entropy.
|
|
301
|
+
rv2: a second random variable to calculate joint entropy.
|
|
302
|
+
condition: any conditioning indicators to restrict the state space.
|
|
303
|
+
Returns:
|
|
304
|
+
joint entropy of the given random variables.
|
|
305
|
+
"""
|
|
306
|
+
condition = check_condition(condition)
|
|
307
|
+
e = 0.0
|
|
308
|
+
for ind1 in rv1:
|
|
309
|
+
for ind2 in rv2:
|
|
310
|
+
p = self._joint_probability(ind1, ind2, condition=condition)
|
|
311
|
+
if p > 0.0:
|
|
312
|
+
e -= p * math.log2(p)
|
|
313
|
+
return e
|
|
314
|
+
|
|
315
|
+
def mutual_information(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
|
|
316
|
+
"""
|
|
317
|
+
Calculate the mutual information between two random variables, I(rv1; rv2).
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
rv1: a first random variable
|
|
321
|
+
rv2: a second random variable
|
|
322
|
+
condition: indicators to specify a condition restricting the state space.
|
|
323
|
+
Returns:
|
|
324
|
+
mutual_information(rv1, rv2) / denominator
|
|
325
|
+
"""
|
|
326
|
+
condition = check_condition(condition)
|
|
327
|
+
p1s = self.marginal_distribution(rv1, condition=condition)
|
|
328
|
+
p2s = self.marginal_distribution(rv2, condition=condition)
|
|
329
|
+
info = 0.0
|
|
330
|
+
for ind1, p1 in zip(rv1, p1s):
|
|
331
|
+
for ind2, p2 in zip(rv2, p2s):
|
|
332
|
+
p12 = self._joint_probability(ind1, ind2, condition=condition)
|
|
333
|
+
if p12 > 0.0:
|
|
334
|
+
info += p12 * math.log2(p12 / p1 / p2)
|
|
335
|
+
return info
|
|
336
|
+
|
|
337
|
+
def total_correlation(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
|
|
338
|
+
"""
|
|
339
|
+
Calculate the 'total correlation' measure.
|
|
340
|
+
total_correlation = I(rv1; rv2) / min(H(rv1), H(rv2)).
|
|
341
|
+
This is a normalised mutual information between two random variables.
|
|
342
|
+
0 => no mutual information.
|
|
343
|
+
1 => perfect mutual information.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
rv1: a first random variable
|
|
347
|
+
rv2: a second random variable
|
|
348
|
+
condition: indicators to specify a condition restricting the state space.
|
|
349
|
+
Returns:
|
|
350
|
+
total correlation between the given random variables.
|
|
351
|
+
"""
|
|
352
|
+
condition = check_condition(condition)
|
|
353
|
+
denominator = min(self.entropy(rv1), self.entropy(rv2, condition=condition))
|
|
354
|
+
return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
|
|
355
|
+
|
|
356
|
+
def uncertainty(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
|
|
357
|
+
"""
|
|
358
|
+
Calculate the 'uncertainty' measure, C, between two random variables
|
|
359
|
+
C(rv1, rv2) = I(rv1; rv2) / H(rv2)
|
|
360
|
+
This is a normalised mutual information between two random variables.
|
|
361
|
+
Note that it is not a symmetric measure; in general C(rv1, rv2) does not equal C(rv2, rv1).
|
|
362
|
+
0 => no mutual information.
|
|
363
|
+
1 => perfect mutual information.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
rv1: a first random variable
|
|
367
|
+
rv2: a second random variable
|
|
368
|
+
condition: indicators to specify a condition restricting the state space.
|
|
369
|
+
Returns:
|
|
370
|
+
uncertainty between the given random variables.
|
|
371
|
+
"""
|
|
372
|
+
condition = check_condition(condition)
|
|
373
|
+
denominator = self.entropy(rv2, condition=condition)
|
|
374
|
+
return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
|
|
375
|
+
|
|
376
|
+
def symmetric_uncertainty(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
|
|
377
|
+
"""
|
|
378
|
+
Calculate the 'symmetric uncertainty' measure.
|
|
379
|
+
symmetric_uncertainty = 2 * I(rv1, rv2) / (H(rv1) + H(rv2)).
|
|
380
|
+
This is the harmonic mean of the two uncertainty coefficients,
|
|
381
|
+
C(rv1, rv2) = I(rv1; rv2) / H(rv2) and C(rv2, rv1) = I(rv1; rv2) / H(rv1).
|
|
382
|
+
This is a normalised mutual information between two random variables.
|
|
383
|
+
0 => no mutual information.
|
|
384
|
+
1 => perfect mutual information.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
rv1: a first random variable
|
|
388
|
+
rv2: a second random variable
|
|
389
|
+
condition: indicators to specify a condition restricting the state space.
|
|
390
|
+
Returns:
|
|
391
|
+
symmetric uncertainty between the given random variables.
|
|
392
|
+
"""
|
|
393
|
+
condition = check_condition(condition)
|
|
394
|
+
denominator = self.entropy(rv1) + self.entropy(rv2, condition=condition)
|
|
395
|
+
return 2.0 * self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
|
|
396
|
+
|
|
397
|
+
def iqr(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
|
|
398
|
+
"""
|
|
399
|
+
Calculate the Information Quality Ratio (IQR).
|
|
400
|
+
IQR = I(rv1; rv2) / H(rv1; rv2).
|
|
401
|
+
Also known as 'dual total correlation'.
|
|
402
|
+
This is a normalised mutual information between two random variables.
|
|
403
|
+
0 => no mutual information.
|
|
404
|
+
1 => perfect mutual information.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
rv1: a first random variable
|
|
408
|
+
rv2: a second random variable
|
|
409
|
+
condition: indicators to specify a condition restricting the state space.
|
|
410
|
+
Returns:
|
|
411
|
+
Information Quality Ratio between the given random variables.
|
|
412
|
+
"""
|
|
413
|
+
condition = check_condition(condition)
|
|
414
|
+
denominator = self.joint_entropy(rv1, rv2, condition=condition)
|
|
415
|
+
return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
|
|
416
|
+
|
|
417
|
+
def covariant_normalised_mutual_information(self, rv1: RandomVariable, rv2: RandomVariable,
|
|
418
|
+
condition: Condition = ()) -> float:
|
|
419
|
+
"""
|
|
420
|
+
Calculate the covariant normalised mutual information
|
|
421
|
+
= I(rv1; rv2) / sqrt(H(rv1) * H(rv2)).
|
|
422
|
+
This is a normalised mutual information between two random variables.
|
|
423
|
+
0 => no mutual information.
|
|
424
|
+
1 => perfect mutual information.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
rv1: a first random variable
|
|
428
|
+
rv2: a second random variable
|
|
429
|
+
condition: indicators to specify a condition restricting the state space.
|
|
430
|
+
Returns:
|
|
431
|
+
covariant normalised mutual information between the given random variables.
|
|
432
|
+
"""
|
|
433
|
+
condition = check_condition(condition)
|
|
434
|
+
denominator = math.sqrt(self.entropy(rv1, condition=condition) * self.entropy(rv2, condition=condition))
|
|
435
|
+
return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
|
|
436
|
+
|
|
437
|
+
def _normalised_mutual_information(
|
|
438
|
+
self,
|
|
439
|
+
rv1: RandomVariable,
|
|
440
|
+
rv2: RandomVariable,
|
|
441
|
+
denominator: float,
|
|
442
|
+
condition: Tuple[Indicator, ...],
|
|
443
|
+
) -> float:
|
|
444
|
+
"""
|
|
445
|
+
Helper function for normalised mutual information calculations.
|
|
446
|
+
|
|
447
|
+
Args:
|
|
448
|
+
rv1: a first random variable
|
|
449
|
+
rv2: a second random variable
|
|
450
|
+
denominator: the normalisation factor
|
|
451
|
+
condition: indicators to specify a condition restricting the state space.
|
|
452
|
+
Returns:
|
|
453
|
+
mutual_information(rv1, rv2) / denominator
|
|
454
|
+
"""
|
|
455
|
+
if denominator == 0.0:
|
|
456
|
+
return 0.0
|
|
457
|
+
else:
|
|
458
|
+
return self.mutual_information(rv1, rv2, condition) / denominator
|
|
459
|
+
|
|
460
|
+
def _joint_probability(
|
|
461
|
+
self,
|
|
462
|
+
indicator1: Indicator,
|
|
463
|
+
indicator2: Indicator,
|
|
464
|
+
condition: Tuple[Indicator, ...],
|
|
465
|
+
) -> float:
|
|
466
|
+
"""
|
|
467
|
+
Helper function to correctly calculate a joint probability even if the two indicators
|
|
468
|
+
are from the same random variable.
|
|
469
|
+
|
|
470
|
+
If the indicators are from the different random variables then
|
|
471
|
+
probability(indicator1 and indicator2 | condition).
|
|
472
|
+
|
|
473
|
+
If the indicators are from the same random variable then
|
|
474
|
+
probability(indicator1 or indicator2 | condition).
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
indicator1: a first Indicator.
|
|
478
|
+
indicator2: a second Indicator
|
|
479
|
+
condition: indicators to specify a condition restricting the state space.
|
|
480
|
+
Returns:
|
|
481
|
+
joint probability of the two indicators, given the condition.
|
|
482
|
+
"""
|
|
483
|
+
if indicator1 == indicator2:
|
|
484
|
+
# Ensure correct behaviour, same random variable and same states
|
|
485
|
+
return self.probability(indicator1, condition=condition)
|
|
486
|
+
elif indicator1.rv_idx == indicator2.rv_idx:
|
|
487
|
+
# Efficiency shortcut, same random variable but different states
|
|
488
|
+
return 0.0
|
|
489
|
+
else:
|
|
490
|
+
# General case, two different random variables
|
|
491
|
+
return self.probability(indicator1, indicator2, condition=condition)
|
|
492
|
+
|
|
493
|
+
def _get_wmc_for_marginals(
|
|
494
|
+
self,
|
|
495
|
+
rvs: Sequence[RandomVariable],
|
|
496
|
+
condition: Tuple[Indicator, ...],
|
|
497
|
+
) -> Callable[[Sequence[Indicator]], float]:
|
|
498
|
+
"""
|
|
499
|
+
Return a wmc function that is suitable for calculating marginal distributions.
|
|
500
|
+
|
|
501
|
+
This implementation is careful of the situation where indicators of rvs appear in condition.
|
|
502
|
+
If an RV has at least 1 indicator in condition then it must match it to have non-zero probability.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
rvs: random variables to calculate marginal distributions for.
|
|
506
|
+
condition: indicators to specify a condition restricting the state space.
|
|
507
|
+
Returns:
|
|
508
|
+
A function from a condition, specified as a sequence of indicators, to a weighted model count.
|
|
509
|
+
"""
|
|
510
|
+
if len(condition) > 0:
|
|
511
|
+
check_sets = []
|
|
512
|
+
overlap_detected = False
|
|
513
|
+
cond_set = set(condition)
|
|
514
|
+
for rv in rvs:
|
|
515
|
+
in_condition = set()
|
|
516
|
+
for ind in rv:
|
|
517
|
+
if ind in cond_set:
|
|
518
|
+
in_condition.add(ind)
|
|
519
|
+
cond_set.discard(ind)
|
|
520
|
+
overlap_detected = True
|
|
521
|
+
if len(in_condition) == 0:
|
|
522
|
+
in_condition.update(rv)
|
|
523
|
+
check_sets.append(in_condition)
|
|
524
|
+
|
|
525
|
+
if overlap_detected:
|
|
526
|
+
__wmc__condition = tuple(cond_set)
|
|
527
|
+
|
|
528
|
+
def wmc(indicators: Sequence[Indicator]) -> float:
|
|
529
|
+
for indicator, check_set in zip(indicators, check_sets):
|
|
530
|
+
if indicator not in check_set:
|
|
531
|
+
return 0.0
|
|
532
|
+
full_condition = tuple(indicators) + __wmc__condition
|
|
533
|
+
return self.wmc(*full_condition)
|
|
534
|
+
else:
|
|
535
|
+
__wmc__condition = tuple(condition)
|
|
536
|
+
|
|
537
|
+
def wmc(indicators: Sequence[Indicator]) -> float:
|
|
538
|
+
full_condition = tuple(indicators) + __wmc__condition
|
|
539
|
+
return self.wmc(*full_condition)
|
|
540
|
+
else:
|
|
541
|
+
def wmc(indicators: Sequence[Indicator]) -> float:
|
|
542
|
+
return self.wmc(*indicators)
|
|
543
|
+
|
|
544
|
+
return wmc
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def check_condition(condition: Condition) -> Tuple[Indicator, ...]:
|
|
548
|
+
"""
|
|
549
|
+
Make the best effort to interpret the given condition.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
condition: a relaxed specification of a condition.
|
|
553
|
+
Returns:
|
|
554
|
+
a formal specification of the condition as a tuple of indicators with no duplicates.
|
|
555
|
+
"""
|
|
556
|
+
if condition is None:
|
|
557
|
+
return ()
|
|
558
|
+
elif isinstance(condition, Indicator):
|
|
559
|
+
return (condition,)
|
|
560
|
+
else:
|
|
561
|
+
return tuple(set(condition))
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def dtype_for_state_indexes(rvs: Iterable[RandomVariable]) -> DTypeStates:
|
|
565
|
+
"""
|
|
566
|
+
Infer a numpy dtype to hold any state index from any given random variable.
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
rvs: some random variables.
|
|
570
|
+
Returns:
|
|
571
|
+
a numpy dtype.
|
|
572
|
+
"""
|
|
573
|
+
return dtype_for_number_of_states(max((len(rv) for rv in rvs), default=0))
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def _group_indicators(indicators: Iterable[Indicator]) -> MapSet[int, Indicator]:
|
|
577
|
+
"""
|
|
578
|
+
Group the given indicators by rv_idx.
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
indicators: the indicators to group.
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
A mapping from rv_idx to set of indicators.
|
|
585
|
+
"""
|
|
586
|
+
groups: MapSet[int, Indicator] = MapSet()
|
|
587
|
+
for indicator in indicators:
|
|
588
|
+
groups.add(indicator.rv_idx, indicator)
|
|
589
|
+
return groups
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def _group_states(indicators: Iterable[Indicator]) -> MapSet[int, int]:
|
|
593
|
+
"""
|
|
594
|
+
Group the given indicator states by rv_idx.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
indicators: the indicators to group.
|
|
598
|
+
|
|
599
|
+
Returns:
|
|
600
|
+
A mapping from rv_idx to set of state indexes.
|
|
601
|
+
"""
|
|
602
|
+
groups: MapSet[int, int] = MapSet()
|
|
603
|
+
for indicator in indicators:
|
|
604
|
+
groups.add(indicator.rv_idx, indicator.state_idx)
|
|
605
|
+
return groups
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def _normalise_marginal(distribution: NDArrayFloat64) -> None:
|
|
609
|
+
"""
|
|
610
|
+
Update the values in the given distribution to
|
|
611
|
+
properly represent a marginal distribution.
|
|
612
|
+
|
|
613
|
+
The update is made in-place.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
a 1D numpy array of likelihoods.
|
|
617
|
+
"""
|
|
618
|
+
total = np.sum(distribution)
|
|
619
|
+
if total <= 0:
|
|
620
|
+
distribution[:] = np.nan
|
|
621
|
+
elif total != 1:
|
|
622
|
+
distribution /= total
|
ck/program/__init__.py
ADDED