compiled-knowledge 4.0.0a20__cp313-cp313-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (178) hide show
  1. ck/__init__.py +0 -0
  2. ck/circuit/__init__.py +17 -0
  3. ck/circuit/_circuit_cy.c +37525 -0
  4. ck/circuit/_circuit_cy.cpython-313-darwin.so +0 -0
  5. ck/circuit/_circuit_cy.pxd +32 -0
  6. ck/circuit/_circuit_cy.pyx +768 -0
  7. ck/circuit/_circuit_py.py +836 -0
  8. ck/circuit/tmp_const.py +74 -0
  9. ck/circuit_compiler/__init__.py +2 -0
  10. ck/circuit_compiler/circuit_compiler.py +26 -0
  11. ck/circuit_compiler/cython_vm_compiler/__init__.py +1 -0
  12. ck/circuit_compiler/cython_vm_compiler/_compiler.c +19826 -0
  13. ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-313-darwin.so +0 -0
  14. ck/circuit_compiler/cython_vm_compiler/_compiler.pyx +380 -0
  15. ck/circuit_compiler/cython_vm_compiler/cython_vm_compiler.py +121 -0
  16. ck/circuit_compiler/interpret_compiler.py +223 -0
  17. ck/circuit_compiler/llvm_compiler.py +388 -0
  18. ck/circuit_compiler/llvm_vm_compiler.py +546 -0
  19. ck/circuit_compiler/named_circuit_compilers.py +57 -0
  20. ck/circuit_compiler/support/__init__.py +0 -0
  21. ck/circuit_compiler/support/circuit_analyser/__init__.py +13 -0
  22. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +10620 -0
  23. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-313-darwin.so +0 -0
  24. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.pyx +98 -0
  25. ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_py.py +93 -0
  26. ck/circuit_compiler/support/input_vars.py +148 -0
  27. ck/circuit_compiler/support/llvm_ir_function.py +234 -0
  28. ck/example/__init__.py +53 -0
  29. ck/example/alarm.py +366 -0
  30. ck/example/asia.py +28 -0
  31. ck/example/binary_clique.py +32 -0
  32. ck/example/bow_tie.py +33 -0
  33. ck/example/cancer.py +37 -0
  34. ck/example/chain.py +38 -0
  35. ck/example/child.py +199 -0
  36. ck/example/clique.py +33 -0
  37. ck/example/cnf_pgm.py +39 -0
  38. ck/example/diamond_square.py +68 -0
  39. ck/example/earthquake.py +36 -0
  40. ck/example/empty.py +10 -0
  41. ck/example/hailfinder.py +539 -0
  42. ck/example/hepar2.py +628 -0
  43. ck/example/insurance.py +504 -0
  44. ck/example/loop.py +40 -0
  45. ck/example/mildew.py +38161 -0
  46. ck/example/munin.py +22982 -0
  47. ck/example/pathfinder.py +53747 -0
  48. ck/example/rain.py +39 -0
  49. ck/example/rectangle.py +161 -0
  50. ck/example/run.py +30 -0
  51. ck/example/sachs.py +129 -0
  52. ck/example/sprinkler.py +30 -0
  53. ck/example/star.py +44 -0
  54. ck/example/stress.py +64 -0
  55. ck/example/student.py +43 -0
  56. ck/example/survey.py +46 -0
  57. ck/example/triangle_square.py +54 -0
  58. ck/example/truss.py +49 -0
  59. ck/in_out/__init__.py +3 -0
  60. ck/in_out/parse_ace_lmap.py +216 -0
  61. ck/in_out/parse_ace_nnf.py +322 -0
  62. ck/in_out/parse_net.py +480 -0
  63. ck/in_out/parser_utils.py +185 -0
  64. ck/in_out/pgm_pickle.py +42 -0
  65. ck/in_out/pgm_python.py +268 -0
  66. ck/in_out/render_bugs.py +111 -0
  67. ck/in_out/render_net.py +177 -0
  68. ck/in_out/render_pomegranate.py +184 -0
  69. ck/pgm.py +3475 -0
  70. ck/pgm_circuit/__init__.py +1 -0
  71. ck/pgm_circuit/marginals_program.py +352 -0
  72. ck/pgm_circuit/mpe_program.py +237 -0
  73. ck/pgm_circuit/pgm_circuit.py +79 -0
  74. ck/pgm_circuit/program_with_slotmap.py +236 -0
  75. ck/pgm_circuit/slot_map.py +35 -0
  76. ck/pgm_circuit/support/__init__.py +0 -0
  77. ck/pgm_circuit/support/compile_circuit.py +83 -0
  78. ck/pgm_circuit/target_marginals_program.py +103 -0
  79. ck/pgm_circuit/wmc_program.py +323 -0
  80. ck/pgm_compiler/__init__.py +2 -0
  81. ck/pgm_compiler/ace/__init__.py +1 -0
  82. ck/pgm_compiler/ace/ace.py +299 -0
  83. ck/pgm_compiler/factor_elimination.py +395 -0
  84. ck/pgm_compiler/named_pgm_compilers.py +63 -0
  85. ck/pgm_compiler/pgm_compiler.py +19 -0
  86. ck/pgm_compiler/recursive_conditioning.py +231 -0
  87. ck/pgm_compiler/support/__init__.py +0 -0
  88. ck/pgm_compiler/support/circuit_table/__init__.py +17 -0
  89. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +16398 -0
  90. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-313-darwin.so +0 -0
  91. ck/pgm_compiler/support/circuit_table/_circuit_table_cy.pyx +332 -0
  92. ck/pgm_compiler/support/circuit_table/_circuit_table_py.py +304 -0
  93. ck/pgm_compiler/support/clusters.py +568 -0
  94. ck/pgm_compiler/support/factor_tables.py +406 -0
  95. ck/pgm_compiler/support/join_tree.py +332 -0
  96. ck/pgm_compiler/support/named_compiler_maker.py +43 -0
  97. ck/pgm_compiler/variable_elimination.py +91 -0
  98. ck/probability/__init__.py +0 -0
  99. ck/probability/empirical_probability_space.py +50 -0
  100. ck/probability/pgm_probability_space.py +32 -0
  101. ck/probability/probability_space.py +622 -0
  102. ck/program/__init__.py +3 -0
  103. ck/program/program.py +137 -0
  104. ck/program/program_buffer.py +180 -0
  105. ck/program/raw_program.py +67 -0
  106. ck/sampling/__init__.py +0 -0
  107. ck/sampling/forward_sampler.py +211 -0
  108. ck/sampling/marginals_direct_sampler.py +113 -0
  109. ck/sampling/sampler.py +62 -0
  110. ck/sampling/sampler_support.py +232 -0
  111. ck/sampling/uniform_sampler.py +72 -0
  112. ck/sampling/wmc_direct_sampler.py +171 -0
  113. ck/sampling/wmc_gibbs_sampler.py +153 -0
  114. ck/sampling/wmc_metropolis_sampler.py +165 -0
  115. ck/sampling/wmc_rejection_sampler.py +115 -0
  116. ck/utils/__init__.py +0 -0
  117. ck/utils/iter_extras.py +163 -0
  118. ck/utils/local_config.py +270 -0
  119. ck/utils/map_list.py +128 -0
  120. ck/utils/map_set.py +128 -0
  121. ck/utils/np_extras.py +51 -0
  122. ck/utils/random_extras.py +64 -0
  123. ck/utils/tmp_dir.py +94 -0
  124. ck_demos/__init__.py +0 -0
  125. ck_demos/ace/__init__.py +0 -0
  126. ck_demos/ace/copy_ace_to_ck.py +15 -0
  127. ck_demos/ace/demo_ace.py +49 -0
  128. ck_demos/all_demos.py +88 -0
  129. ck_demos/circuit/__init__.py +0 -0
  130. ck_demos/circuit/demo_circuit_dump.py +22 -0
  131. ck_demos/circuit/demo_derivatives.py +43 -0
  132. ck_demos/circuit_compiler/__init__.py +0 -0
  133. ck_demos/circuit_compiler/compare_circuit_compilers.py +32 -0
  134. ck_demos/circuit_compiler/show_llvm_program.py +26 -0
  135. ck_demos/pgm/__init__.py +0 -0
  136. ck_demos/pgm/demo_pgm_dump.py +18 -0
  137. ck_demos/pgm/demo_pgm_dump_stress.py +18 -0
  138. ck_demos/pgm/demo_pgm_string_rendering.py +15 -0
  139. ck_demos/pgm/show_examples.py +25 -0
  140. ck_demos/pgm_compiler/__init__.py +0 -0
  141. ck_demos/pgm_compiler/compare_pgm_compilers.py +63 -0
  142. ck_demos/pgm_compiler/demo_compiler_dump.py +60 -0
  143. ck_demos/pgm_compiler/demo_factor_elimination.py +47 -0
  144. ck_demos/pgm_compiler/demo_join_tree.py +25 -0
  145. ck_demos/pgm_compiler/demo_marginals_program.py +53 -0
  146. ck_demos/pgm_compiler/demo_mpe_program.py +55 -0
  147. ck_demos/pgm_compiler/demo_pgm_compiler.py +38 -0
  148. ck_demos/pgm_compiler/demo_recursive_conditioning.py +33 -0
  149. ck_demos/pgm_compiler/demo_variable_elimination.py +33 -0
  150. ck_demos/pgm_compiler/demo_wmc_program.py +29 -0
  151. ck_demos/pgm_compiler/time_fe_compiler.py +93 -0
  152. ck_demos/pgm_inference/__init__.py +0 -0
  153. ck_demos/pgm_inference/demo_inferencing_basic.py +188 -0
  154. ck_demos/pgm_inference/demo_inferencing_mpe_cancer.py +45 -0
  155. ck_demos/pgm_inference/demo_inferencing_wmc_and_mpe_sprinkler.py +154 -0
  156. ck_demos/pgm_inference/demo_inferencing_wmc_student.py +110 -0
  157. ck_demos/programs/__init__.py +0 -0
  158. ck_demos/programs/demo_program_buffer.py +24 -0
  159. ck_demos/programs/demo_program_multi.py +24 -0
  160. ck_demos/programs/demo_program_none.py +19 -0
  161. ck_demos/programs/demo_program_single.py +23 -0
  162. ck_demos/programs/demo_raw_program_interpreted.py +21 -0
  163. ck_demos/programs/demo_raw_program_llvm.py +21 -0
  164. ck_demos/sampling/__init__.py +0 -0
  165. ck_demos/sampling/check_sampler.py +71 -0
  166. ck_demos/sampling/demo_marginal_direct_sampler.py +40 -0
  167. ck_demos/sampling/demo_uniform_sampler.py +38 -0
  168. ck_demos/sampling/demo_wmc_direct_sampler.py +40 -0
  169. ck_demos/utils/__init__.py +0 -0
  170. ck_demos/utils/compare.py +120 -0
  171. ck_demos/utils/convert_network.py +45 -0
  172. ck_demos/utils/sample_model.py +216 -0
  173. ck_demos/utils/stop_watch.py +384 -0
  174. compiled_knowledge-4.0.0a20.dist-info/METADATA +50 -0
  175. compiled_knowledge-4.0.0a20.dist-info/RECORD +178 -0
  176. compiled_knowledge-4.0.0a20.dist-info/WHEEL +6 -0
  177. compiled_knowledge-4.0.0a20.dist-info/licenses/LICENSE.txt +21 -0
  178. compiled_knowledge-4.0.0a20.dist-info/top_level.txt +2 -0
@@ -0,0 +1,622 @@
1
+ """
2
+ An abstract class for object providing probabilities.
3
+ """
4
+ import math
5
+ from abc import ABC, abstractmethod
6
+ from itertools import chain
7
+ from typing import Sequence, Tuple, Iterable, Callable
8
+
9
+ import numpy as np
10
+
11
+ from ck.pgm import Indicator, RandomVariable, rv_instances_as_indicators, number_of_states, rv_instances, Instance
12
+ from ck.utils.iter_extras import combos as _combos
13
+ from ck.utils.map_set import MapSet
14
+ from ck.utils.np_extras import dtype_for_number_of_states, NDArrayFloat64, DTypeStates, NDArrayNumeric
15
+
16
+ # Type defining a condition.
17
+ Condition = None | Indicator | Iterable[Indicator]
18
+
19
+
20
+ class ProbabilitySpace(ABC):
21
+ """
22
+ An abstract mixin class for a class providing probabilities over a state space defined by random variables.
23
+ Each possible world of the state space is referred to as an 'instance'.
24
+ """
25
+ __slots__ = ()
26
+
27
+ @property
28
+ @abstractmethod
29
+ def rvs(self) -> Sequence[RandomVariable]:
30
+ """
31
+ Return the random variables that define the state space.
32
+ Each random variable, rv, has a length len(rv) which
33
+ is the number of states, and rv[i] is the 'indicator' for
34
+ the ith state of the random variable. Indicators must
35
+ be unique across all rvs as rv[i] indicates the
36
+ condition 'rv == i'.
37
+ """
38
+
39
+ @abstractmethod
40
+ def wmc(self, *condition: Condition) -> float:
41
+ """
42
+ Return the weight of instances matching the given condition.
43
+
44
+ If multiple indicators of the same random variable appear in
45
+ the parameter 'indicators' then they are interpreted as
46
+ a disjunction, otherwise indicators are interpreted as
47
+ a conjunction. E.g.: X=0, Y=1, Y=3 means X=0 and (Y=1 or Y=3)
48
+
49
+ Args:
50
+ condition: zero or more indicators that specify a condition.
51
+ """
52
+
53
+ @property
54
+ @abstractmethod
55
+ def z(self) -> float:
56
+ """
57
+ Return the summed weight of all instances.
58
+ This is equivalent to self.wmc(), with no arguments.
59
+ """
60
+
61
+ def probability(self, *indicators: Indicator, condition: Condition = ()) -> float:
62
+ """
63
+ Return the joint probability of the given indicators,
64
+ conditioned on any conditions, and
65
+ marginalised over any unmentioned random variables.
66
+
67
+ If multiple indicators of the same random variable appear in
68
+ the parameters 'indicators' or 'condition' then they are interpreted as
69
+ a disjunction, otherwise indicators are interpreted as
70
+ a conjunction. E.g.: X=0, Y=1, Y=3 means X=0 and (Y=1 or Y=3).
71
+
72
+ Args:
73
+ indicators: Indicators that specify which set of instances to compute probability.
74
+ condition: Indicators that specify conditions for a conditional probability.
75
+ Returns:
76
+ the probability of the given indicators, conditioned on the given conditions.
77
+ """
78
+ condition: Tuple[Indicator, ...] = check_condition(condition)
79
+
80
+ if len(condition) == 0:
81
+ z = self.z
82
+ if z <= 0:
83
+ return np.nan
84
+ else:
85
+ z = self.wmc(*condition)
86
+ if z <= 0:
87
+ return np.nan
88
+
89
+ # Combine the indicators with the condition
90
+ # If a variable is mentioned in both the indicators and condition, then
91
+ # we need to take the intersection, and check for contradictions.
92
+ # If a variable is mentioned in the condition but not indicators, then
93
+ # the rv condition needs to be added to the indicators.
94
+ indicator_groups: MapSet[int, Indicator] = _group_indicators(indicators)
95
+ condition_groups: MapSet[int, Indicator] = _group_indicators(condition)
96
+
97
+ for rv_idx, indicators in condition_groups.items():
98
+ indicator_group = indicator_groups.get(rv_idx)
99
+ if indicator_group is None:
100
+ indicator_groups.add_all(rv_idx, indicators)
101
+ else:
102
+ indicator_group.intersection_update(indicators)
103
+ if len(indicator_group) == 0:
104
+ # A contradiction between the indicators and conditions
105
+ return 0.0
106
+
107
+ # Collect all the indicators from the updated indicator_groups
108
+ indicators = chain(*indicator_groups.values())
109
+
110
+ return self.wmc(*indicators) / z
111
+
112
+ def marginal_distribution(self, *rvs: RandomVariable, condition: Condition = ()) -> NDArrayNumeric:
113
+ """
114
+ What is the marginal probability distribution over the states of the given random variables.
115
+ Assumes that no indicators of rv in rvs appear in the conditions (if supplied).
116
+
117
+ When multiple rvs are supplied, the order of instantiations is as per
118
+ `rv_instances_as_indicators(*rvs)`.
119
+
120
+ If multiple indicators of the same random variable appear in
121
+ the parameter 'condition' then they are interpreted as
122
+ a disjunction, otherwise indicators are interpreted as
123
+ a conjunction. E.g.: X=0, Y=1, Y=3 means X=0 and (Y=1 or Y=3).
124
+
125
+ This is not an efficient implementation as it will call self.probability(...)
126
+ for each possible state of the given random variable. If efficient marginal
127
+ probability calculations are required, consider using a different method.
128
+
129
+ Warning:
130
+ If the probability of each state of rv (given the condition) is
131
+ zero, then the marginal distribution is il-defined and the returned probabilities will
132
+ all be NAN.
133
+
134
+ Args:
135
+ rvs: Random variables to compute the marginal distribution over.
136
+ condition: Indicators that specify conditions for conditional probability.
137
+
138
+ Returns:
139
+ marginal probability distribution as an array co-indexed with `rv_instances_as_indicators(*rvs)`.
140
+ """
141
+ condition = check_condition(condition)
142
+
143
+ # We have to be careful of the situation where indicators of rvs appear in condition.
144
+ # If an RV has at least 1 indicator in condition then it must match it to have non-zero probability.
145
+ wmc = self._get_wmc_for_marginals(rvs, condition)
146
+
147
+ result: NDArrayFloat64 = np.fromiter(
148
+ (wmc(indicators) for indicators in rv_instances_as_indicators(*rvs)),
149
+ count=number_of_states(*rvs),
150
+ dtype=np.float64
151
+ )
152
+ _normalise_marginal(result)
153
+ return result
154
+
155
+ def map(self, *rvs: RandomVariable, condition: Condition = ()) -> Tuple[float, Instance]:
156
+ """
157
+ Determine the maximum apriori probability (MAP).
158
+
159
+ If there are tied solutions, one solution is returned, which
160
+ is selected arbitrarily.
161
+
162
+ If multiple indicators of the same random variable appear in
163
+ the parameter 'condition' then they are interpreted as
164
+ a disjunction, otherwise indicators are interpreted as
165
+ a conjunction. E.g.: X=0, Y=1, Y=3 means X=0 and (Y=1 or Y=3)
166
+
167
+ Warning:
168
+ This is not an efficient implementation as it will call `self.wmc`
169
+ for each possible state of the given random variables. If efficient MAP
170
+ probability calculations are required, consider using a different method.
171
+
172
+ Args:
173
+ rvs: random variables to find the MAP over.
174
+ condition: any conditioning indicators.
175
+
176
+ Returns:
177
+ (probability, instance) where
178
+ probability: is the MAP probability
179
+ instance: is the MAP state (co-indexed with the given rvs).
180
+ """
181
+ condition: Sequence[Indicator] = check_condition(condition)
182
+
183
+ rv_indexes = set(rv.idx for rv in rvs)
184
+ assert len(rv_indexes) == len(rvs), 'duplicated random variables not allowed'
185
+
186
+ # Group conditioning indicators by random variable.
187
+ conditions_by_rvs = _group_states(condition)
188
+
189
+ # See if any MAP random variable is also conditioned.
190
+ # Reduce the state space of any conditioned MAP rv.
191
+ loop_rvs = []
192
+ reduced_space = False
193
+ for rv in rvs:
194
+ states = conditions_by_rvs.get(rv.idx)
195
+ if states is None:
196
+ loop_rvs.append(rv)
197
+ else:
198
+ loop_rvs.append([rv[i] for i in sorted(states)])
199
+ reduced_space = True
200
+
201
+ # If the random variables we are looping over does not have any conditions
202
+ # then it is expected to be faster by using computed marginal probabilities.
203
+ if not reduced_space:
204
+ prs = self.marginal_distribution(*rvs, condition=condition)
205
+ best_probability = float('-inf')
206
+ best_states = None
207
+ for probability, inst in zip(prs, rv_instances(*rvs)):
208
+ if probability > best_probability:
209
+ best_probability = probability
210
+ best_states = inst
211
+ return best_probability, best_states
212
+
213
+ else:
214
+ # Remove any condition indicators with rv in rvs.
215
+ new_conditions = tuple(ind for ind in condition if ind.rv_idx not in rv_indexes)
216
+
217
+ # Loop over the state space of the 'loop' rvs
218
+ best_probability = float('-inf')
219
+ best_states = None
220
+ indicators: Tuple[Indicator, ...]
221
+ for indicators in _combos(loop_rvs):
222
+ probability = self.wmc(*(indicators + new_conditions))
223
+ if probability > best_probability:
224
+ best_probability = probability
225
+ best_states = tuple(ind.state_idx for ind in indicators)
226
+ condition_probability = self.wmc(*condition)
227
+ return best_probability / condition_probability, best_states
228
+
229
+ def correlation(self, indicator1: Indicator, indicator2: Indicator, condition: Condition = ()) -> float:
230
+ """
231
+ What is the correlation between the two given indicators, r(indicator1, indicator2).
232
+
233
+ Args:
234
+ indicator1: a first random variable and its state.
235
+ indicator2: a second random variable and its state.
236
+ condition: any conditioning indicators.
237
+
238
+ Returns:
239
+ correlation between the two given indicators.
240
+ """
241
+ condition = check_condition(condition)
242
+
243
+ p1 = self.probability(indicator1, condition=condition)
244
+ p2 = self.probability(indicator2, condition=condition)
245
+ p12 = self._joint_probability(indicator1, indicator2, condition=condition)
246
+ d = p1 * (1.0 - p1) * p2 * (1.0 - p2)
247
+ if d == 0.0:
248
+ # As any marginal probability approaches zero, correlation approaches zero
249
+ return 0.0
250
+ else:
251
+ return (p12 - p1 * p2) / math.sqrt(d)
252
+
253
+ def entropy(self, rv: RandomVariable, condition: Condition = ()) -> float:
254
+ """
255
+ Calculate the entropy of the given random variable, H(rv).
256
+
257
+ Args:
258
+ rv: random variable to calculate the entropy for.
259
+ condition: any conditioning indicators.
260
+
261
+ Returns:
262
+ entropy of the given random variable.
263
+ """
264
+ condition = check_condition(condition)
265
+ e = 0.0
266
+ for ind in rv:
267
+ p = self.probability(ind, condition=condition)
268
+ if p > 0.0:
269
+ e -= p * math.log2(p)
270
+ return e
271
+
272
+ def conditional_entropy(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
273
+ """
274
+ Calculate the conditional entropy, H(rv1 | rv2).
275
+
276
+ Args:
277
+ rv1: random variable to calculate the entropy for.
278
+ rv2: the conditioning random variable for entropy calculation.
279
+ condition: any conditioning indicators to restrict the state space.
280
+
281
+ Returns:
282
+ entropy of rv1, conditioned on rv2.
283
+ """
284
+ condition = check_condition(condition)
285
+ e = 0.0
286
+ for ind1 in rv1:
287
+ for ind2 in rv2:
288
+ p = self._joint_probability(ind1, ind2, condition=condition)
289
+ if p > 0.0:
290
+ # if p > 0 then p2 > 0, as p <= p2
291
+ p2 = self.probability(ind2, condition=condition)
292
+ e -= p * math.log2(p / p2)
293
+ return e
294
+
295
+ def joint_entropy(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
296
+ """
297
+ Calculate the joint entropy of the two random variables, H(rv1; rv2).
298
+
299
+ Args:
300
+ rv1: a first random variable to calculate joint entropy.
301
+ rv2: a second random variable to calculate joint entropy.
302
+ condition: any conditioning indicators to restrict the state space.
303
+ Returns:
304
+ joint entropy of the given random variables.
305
+ """
306
+ condition = check_condition(condition)
307
+ e = 0.0
308
+ for ind1 in rv1:
309
+ for ind2 in rv2:
310
+ p = self._joint_probability(ind1, ind2, condition=condition)
311
+ if p > 0.0:
312
+ e -= p * math.log2(p)
313
+ return e
314
+
315
+ def mutual_information(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
316
+ """
317
+ Calculate the mutual information between two random variables, I(rv1; rv2).
318
+
319
+ Args:
320
+ rv1: a first random variable
321
+ rv2: a second random variable
322
+ condition: indicators to specify a condition restricting the state space.
323
+ Returns:
324
+ mutual_information(rv1, rv2) / denominator
325
+ """
326
+ condition = check_condition(condition)
327
+ p1s = self.marginal_distribution(rv1, condition=condition)
328
+ p2s = self.marginal_distribution(rv2, condition=condition)
329
+ info = 0.0
330
+ for ind1, p1 in zip(rv1, p1s):
331
+ for ind2, p2 in zip(rv2, p2s):
332
+ p12 = self._joint_probability(ind1, ind2, condition=condition)
333
+ if p12 > 0.0:
334
+ info += p12 * math.log2(p12 / p1 / p2)
335
+ return info
336
+
337
+ def total_correlation(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
338
+ """
339
+ Calculate the 'total correlation' measure.
340
+ total_correlation = I(rv1; rv2) / min(H(rv1), H(rv2)).
341
+ This is a normalised mutual information between two random variables.
342
+ 0 => no mutual information.
343
+ 1 => perfect mutual information.
344
+
345
+ Args:
346
+ rv1: a first random variable
347
+ rv2: a second random variable
348
+ condition: indicators to specify a condition restricting the state space.
349
+ Returns:
350
+ total correlation between the given random variables.
351
+ """
352
+ condition = check_condition(condition)
353
+ denominator = min(self.entropy(rv1), self.entropy(rv2, condition=condition))
354
+ return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
355
+
356
+ def uncertainty(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
357
+ """
358
+ Calculate the 'uncertainty' measure, C, between two random variables
359
+ C(rv1, rv2) = I(rv1; rv2) / H(rv2)
360
+ This is a normalised mutual information between two random variables.
361
+ Note that it is not a symmetric measure; in general C(rv1, rv2) does not equal C(rv2, rv1).
362
+ 0 => no mutual information.
363
+ 1 => perfect mutual information.
364
+
365
+ Args:
366
+ rv1: a first random variable
367
+ rv2: a second random variable
368
+ condition: indicators to specify a condition restricting the state space.
369
+ Returns:
370
+ uncertainty between the given random variables.
371
+ """
372
+ condition = check_condition(condition)
373
+ denominator = self.entropy(rv2, condition=condition)
374
+ return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
375
+
376
+ def symmetric_uncertainty(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
377
+ """
378
+ Calculate the 'symmetric uncertainty' measure.
379
+ symmetric_uncertainty = 2 * I(rv1, rv2) / (H(rv1) + H(rv2)).
380
+ This is the harmonic mean of the two uncertainty coefficients,
381
+ C(rv1, rv2) = I(rv1; rv2) / H(rv2) and C(rv2, rv1) = I(rv1; rv2) / H(rv1).
382
+ This is a normalised mutual information between two random variables.
383
+ 0 => no mutual information.
384
+ 1 => perfect mutual information.
385
+
386
+ Args:
387
+ rv1: a first random variable
388
+ rv2: a second random variable
389
+ condition: indicators to specify a condition restricting the state space.
390
+ Returns:
391
+ symmetric uncertainty between the given random variables.
392
+ """
393
+ condition = check_condition(condition)
394
+ denominator = self.entropy(rv1) + self.entropy(rv2, condition=condition)
395
+ return 2.0 * self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
396
+
397
+ def iqr(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
398
+ """
399
+ Calculate the Information Quality Ratio (IQR).
400
+ IQR = I(rv1; rv2) / H(rv1; rv2).
401
+ Also known as 'dual total correlation'.
402
+ This is a normalised mutual information between two random variables.
403
+ 0 => no mutual information.
404
+ 1 => perfect mutual information.
405
+
406
+ Args:
407
+ rv1: a first random variable
408
+ rv2: a second random variable
409
+ condition: indicators to specify a condition restricting the state space.
410
+ Returns:
411
+ Information Quality Ratio between the given random variables.
412
+ """
413
+ condition = check_condition(condition)
414
+ denominator = self.joint_entropy(rv1, rv2, condition=condition)
415
+ return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
416
+
417
+ def covariant_normalised_mutual_information(self, rv1: RandomVariable, rv2: RandomVariable,
418
+ condition: Condition = ()) -> float:
419
+ """
420
+ Calculate the covariant normalised mutual information
421
+ = I(rv1; rv2) / sqrt(H(rv1) * H(rv2)).
422
+ This is a normalised mutual information between two random variables.
423
+ 0 => no mutual information.
424
+ 1 => perfect mutual information.
425
+
426
+ Args:
427
+ rv1: a first random variable
428
+ rv2: a second random variable
429
+ condition: indicators to specify a condition restricting the state space.
430
+ Returns:
431
+ covariant normalised mutual information between the given random variables.
432
+ """
433
+ condition = check_condition(condition)
434
+ denominator = math.sqrt(self.entropy(rv1, condition=condition) * self.entropy(rv2, condition=condition))
435
+ return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
436
+
437
+ def _normalised_mutual_information(
438
+ self,
439
+ rv1: RandomVariable,
440
+ rv2: RandomVariable,
441
+ denominator: float,
442
+ condition: Tuple[Indicator, ...],
443
+ ) -> float:
444
+ """
445
+ Helper function for normalised mutual information calculations.
446
+
447
+ Args:
448
+ rv1: a first random variable
449
+ rv2: a second random variable
450
+ denominator: the normalisation factor
451
+ condition: indicators to specify a condition restricting the state space.
452
+ Returns:
453
+ mutual_information(rv1, rv2) / denominator
454
+ """
455
+ if denominator == 0.0:
456
+ return 0.0
457
+ else:
458
+ return self.mutual_information(rv1, rv2, condition) / denominator
459
+
460
+ def _joint_probability(
461
+ self,
462
+ indicator1: Indicator,
463
+ indicator2: Indicator,
464
+ condition: Tuple[Indicator, ...],
465
+ ) -> float:
466
+ """
467
+ Helper function to correctly calculate a joint probability even if the two indicators
468
+ are from the same random variable.
469
+
470
+ If the indicators are from the different random variables then
471
+ probability(indicator1 and indicator2 | condition).
472
+
473
+ If the indicators are from the same random variable then
474
+ probability(indicator1 or indicator2 | condition).
475
+
476
+ Args:
477
+ indicator1: a first Indicator.
478
+ indicator2: a second Indicator
479
+ condition: indicators to specify a condition restricting the state space.
480
+ Returns:
481
+ joint probability of the two indicators, given the condition.
482
+ """
483
+ if indicator1 == indicator2:
484
+ # Ensure correct behaviour, same random variable and same states
485
+ return self.probability(indicator1, condition=condition)
486
+ elif indicator1.rv_idx == indicator2.rv_idx:
487
+ # Efficiency shortcut, same random variable but different states
488
+ return 0.0
489
+ else:
490
+ # General case, two different random variables
491
+ return self.probability(indicator1, indicator2, condition=condition)
492
+
493
+ def _get_wmc_for_marginals(
494
+ self,
495
+ rvs: Sequence[RandomVariable],
496
+ condition: Tuple[Indicator, ...],
497
+ ) -> Callable[[Sequence[Indicator]], float]:
498
+ """
499
+ Return a wmc function that is suitable for calculating marginal distributions.
500
+
501
+ This implementation is careful of the situation where indicators of rvs appear in condition.
502
+ If an RV has at least 1 indicator in condition then it must match it to have non-zero probability.
503
+
504
+ Args:
505
+ rvs: random variables to calculate marginal distributions for.
506
+ condition: indicators to specify a condition restricting the state space.
507
+ Returns:
508
+ A function from a condition, specified as a sequence of indicators, to a weighted model count.
509
+ """
510
+ if len(condition) > 0:
511
+ check_sets = []
512
+ overlap_detected = False
513
+ cond_set = set(condition)
514
+ for rv in rvs:
515
+ in_condition = set()
516
+ for ind in rv:
517
+ if ind in cond_set:
518
+ in_condition.add(ind)
519
+ cond_set.discard(ind)
520
+ overlap_detected = True
521
+ if len(in_condition) == 0:
522
+ in_condition.update(rv)
523
+ check_sets.append(in_condition)
524
+
525
+ if overlap_detected:
526
+ __wmc__condition = tuple(cond_set)
527
+
528
+ def wmc(indicators: Sequence[Indicator]) -> float:
529
+ for indicator, check_set in zip(indicators, check_sets):
530
+ if indicator not in check_set:
531
+ return 0.0
532
+ full_condition = tuple(indicators) + __wmc__condition
533
+ return self.wmc(*full_condition)
534
+ else:
535
+ __wmc__condition = tuple(condition)
536
+
537
+ def wmc(indicators: Sequence[Indicator]) -> float:
538
+ full_condition = tuple(indicators) + __wmc__condition
539
+ return self.wmc(*full_condition)
540
+ else:
541
+ def wmc(indicators: Sequence[Indicator]) -> float:
542
+ return self.wmc(*indicators)
543
+
544
+ return wmc
545
+
546
+
547
+ def check_condition(condition: Condition) -> Tuple[Indicator, ...]:
548
+ """
549
+ Make the best effort to interpret the given condition.
550
+
551
+ Args:
552
+ condition: a relaxed specification of a condition.
553
+ Returns:
554
+ a formal specification of the condition as a tuple of indicators with no duplicates.
555
+ """
556
+ if condition is None:
557
+ return ()
558
+ elif isinstance(condition, Indicator):
559
+ return (condition,)
560
+ else:
561
+ return tuple(set(condition))
562
+
563
+
564
+ def dtype_for_state_indexes(rvs: Iterable[RandomVariable]) -> DTypeStates:
565
+ """
566
+ Infer a numpy dtype to hold any state index from any given random variable.
567
+
568
+ Args:
569
+ rvs: some random variables.
570
+ Returns:
571
+ a numpy dtype.
572
+ """
573
+ return dtype_for_number_of_states(max((len(rv) for rv in rvs), default=0))
574
+
575
+
576
+ def _group_indicators(indicators: Iterable[Indicator]) -> MapSet[int, Indicator]:
577
+ """
578
+ Group the given indicators by rv_idx.
579
+
580
+ Args:
581
+ indicators: the indicators to group.
582
+
583
+ Returns:
584
+ A mapping from rv_idx to set of indicators.
585
+ """
586
+ groups: MapSet[int, Indicator] = MapSet()
587
+ for indicator in indicators:
588
+ groups.add(indicator.rv_idx, indicator)
589
+ return groups
590
+
591
+
592
+ def _group_states(indicators: Iterable[Indicator]) -> MapSet[int, int]:
593
+ """
594
+ Group the given indicator states by rv_idx.
595
+
596
+ Args:
597
+ indicators: the indicators to group.
598
+
599
+ Returns:
600
+ A mapping from rv_idx to set of state indexes.
601
+ """
602
+ groups: MapSet[int, int] = MapSet()
603
+ for indicator in indicators:
604
+ groups.add(indicator.rv_idx, indicator.state_idx)
605
+ return groups
606
+
607
+
608
+ def _normalise_marginal(distribution: NDArrayFloat64) -> None:
609
+ """
610
+ Update the values in the given distribution to
611
+ properly represent a marginal distribution.
612
+
613
+ The update is made in-place.
614
+
615
+ Args:
616
+ a 1D numpy array of likelihoods.
617
+ """
618
+ total = np.sum(distribution)
619
+ if total <= 0:
620
+ distribution[:] = np.nan
621
+ elif total != 1:
622
+ distribution /= total
ck/program/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .raw_program import RawProgram
2
+ from .program_buffer import ProgramBuffer
3
+ from .program import Program