PyPI - compiled-knowledge - Versions diffs - 4.1.0a1__cp312-cp312-macosx_11_0_arm64.whl → 4.1.0a3__cp312-cp312-macosx_11_0_arm64.whl - Mend

compiled-knowledge 4.1.0a1__cp312-cp312-macosx_11_0_arm64.whl → 4.1.0a3__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of compiled-knowledge might be problematic. Click here for more details.

Files changed (36) hide show

ck/circuit/_circuit_cy.c +1 -1
ck/circuit/_circuit_cy.cpython-312-darwin.so +0 -0
ck/circuit_compiler/cython_vm_compiler/_compiler.c +152 -152
ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-312-darwin.so +0 -0
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c +1 -1
ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-312-darwin.so +0 -0
ck/dataset/cross_table.py +143 -79
ck/dataset/dataset.py +143 -38
ck/dataset/dataset_builder.py +519 -0
ck/dataset/dataset_from_crosstable.py +21 -2
ck/dataset/dataset_from_csv.py +5 -1
ck/learning/coalesce_cross_tables.py +395 -0
ck/learning/model_from_cross_tables.py +242 -0
ck/learning/parameters.py +117 -0
ck/learning/train_generative_bn.py +198 -0
ck/pgm.py +10 -8
ck/pgm_circuit/marginals_program.py +5 -0
ck/pgm_circuit/wmc_program.py +5 -0
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c +1 -1
ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-312-darwin.so +0 -0
ck/probability/divergence.py +226 -0
ck/probability/probability_space.py +43 -19
ck_demos/dataset/__init__.py +0 -0
ck_demos/dataset/demo_dataset_builder.py +37 -0
ck_demos/dataset/demo_dataset_from_sampler.py +18 -0
ck_demos/learning/__init__.py +0 -0
ck_demos/learning/demo_bayesian_network_from_cross_tables.py +71 -0
ck_demos/learning/demo_simple_learning.py +55 -0
ck_demos/sampling/demo_wmc_direct_sampler.py +2 -2
{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/METADATA +2 -1
{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/RECORD +35 -24
ck/learning/train_generative.py +0 -149
/ck/{dataset/cross_table_probabilities.py → probability/cross_table_probability_space.py} +0 -0
{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/WHEEL +0 -0
{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/licenses/LICENSE.txt +0 -0
{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/top_level.txt +0 -0

ck/probability/probability_space.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import math
 from abc import ABC, abstractmethod
 from itertools import chain
@@ -203,16 +205,19 @@ class ProbabilitySpace(ABC):
                 loop_rvs.append([rv[i] for i in sorted(states)])
                 reduced_space = True
+        best_probability = float('-inf')
+        best_states = None
         # If the random variables we are looping over does not have any conditions
         # then it is expected to be faster by using computed marginal probabilities.
         if not reduced_space:
             prs = self.marginal_distribution(*rvs, condition=condition)
-            best_probability = float('-inf')
-            best_states = None
             for probability, inst in zip(prs, rv_instances(*rvs)):
                 if probability > best_probability:
                     best_probability = probability
                     best_states = inst
+            if best_states is None:
+                return _NAN, ()
             return best_probability, best_states
         else:
@@ -220,8 +225,6 @@ class ProbabilitySpace(ABC):
             new_conditions = tuple(ind for ind in condition if ind.rv_idx not in rv_indexes)
             # Loop over the state space of the 'loop' rvs
-            best_probability = float('-inf')
-            best_states = None
             indicators: Tuple[Indicator, ...]
             for indicators in _combos(loop_rvs):
                 probability = self.wmc(*(indicators + new_conditions))
@@ -229,6 +232,8 @@ class ProbabilitySpace(ABC):
                     best_probability = probability
                     best_states = tuple(ind.state_idx for ind in indicators)
             condition_probability = self.wmc(*condition)
+            if best_states is None:
+                return _NAN, ()
             return best_probability / condition_probability, best_states
     def correlation(self, indicator1: Indicator, indicator2: Indicator, condition: Condition = ()) -> float:
@@ -245,6 +250,20 @@ class ProbabilitySpace(ABC):
         """
         condition = check_condition(condition)
+        if indicator1.rv_idx == indicator2.rv_idx:
+            # Special case - same random variable
+            condition_groups: MapSet[int, Indicator] = _group_indicators(condition)
+            rv_idx: int = indicator1.rv_idx
+            if indicator1 not in condition_groups.get(rv_idx, (indicator1,)):
+                return _NAN
+            if indicator1 == indicator2:
+                return 1
+            else:
+                if indicator2 not in condition_groups.get(rv_idx, (indicator2,)):
+                    return _NAN
+                else:
+                    return 0
         p1 = self.probability(indicator1, condition=condition)
         p2 = self.probability(indicator2, condition=condition)
         p12 = self._joint_probability(indicator1, indicator2, condition=condition)
@@ -267,12 +286,7 @@ class ProbabilitySpace(ABC):
             entropy of the given random variable.
         """
         condition = check_condition(condition)
-        e = 0.0
-        for ind in rv:
-            p = self.probability(ind, condition=condition)
-            if p > 0.0:
-                e -= p * math.log2(p)
-        return e
+        return -sum(plogp(self.probability(ind, condition=condition)) for ind in rv)
     def conditional_entropy(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
         """
@@ -309,13 +323,11 @@ class ProbabilitySpace(ABC):
             joint entropy of the given random variables.
         """
         condition = check_condition(condition)
-        e = 0.0
-        for ind1 in rv1:
-            for ind2 in rv2:
-                p = self._joint_probability(ind1, ind2, condition=condition)
-                if p > 0.0:
-                    e -= p * math.log2(p)
-        return e
+        return -sum(
+            plogp(self._joint_probability(ind1, ind2, condition=condition))
+            for ind1 in rv1
+            for ind2 in rv2
+        )
     def mutual_information(self, rv1: RandomVariable, rv2: RandomVariable, condition: Condition = ()) -> float:
         """
@@ -419,8 +431,12 @@ class ProbabilitySpace(ABC):
         denominator = self.joint_entropy(rv1, rv2, condition=condition)
         return self._normalised_mutual_information(rv1, rv2, denominator, condition=condition)
-    def covariant_normalised_mutual_information(self, rv1: RandomVariable, rv2: RandomVariable,
-                                                condition: Condition = ()) -> float:
+    def covariant_normalised_mutual_information(
+            self,
+            rv1: RandomVariable,
+            rv2: RandomVariable,
+            condition: Condition = (),
+    ) -> float:
         """
         Calculate the covariant normalised mutual information
         = I(rv1; rv2) / sqrt(H(rv1) * H(rv2)).
@@ -549,6 +565,14 @@ class ProbabilitySpace(ABC):
         return wmc
+def plogp(p: float) -> float:
+    """
+    Returns:
+        p * log2(p)
+    """
+    return p * math.log2(p) if p > 0 else 0
 def check_condition(condition: Condition) -> Tuple[Indicator, ...]:
     """
     Make the best effort to interpret the given condition.

ck_demos/dataset/__init__.py ADDED Viewed

File without changes

ck_demos/dataset/demo_dataset_builder.py ADDED Viewed

@@ -0,0 +1,37 @@
+from ck.dataset import HardDataset, SoftDataset
+from ck.dataset.dataset_builder import DatasetBuilder, soft_dataset_from_builder, hard_dataset_from_builder
+from ck.pgm import PGM
+def main() -> None:
+    pgm = PGM()
+    x = pgm.new_rv('x', (True, False))
+    y = pgm.new_rv('y', ('yes', 'no', 'maybe'))
+    builder = DatasetBuilder([x, y])
+    builder.append()
+    builder.append(1, 2).weight = 3
+    builder.append(None, [0.7, 0.1, 0.2])
+    builder.append().set_states(True, 'maybe')
+    print('DatasetBuilder dump')
+    builder.dump()
+    print()
+    print('DatasetBuilder dump, showing states and custom missing values')
+    builder.dump(as_states=True, missing='?')
+    print()
+    print('HardDataset dump')
+    dataset: HardDataset = hard_dataset_from_builder(builder, missing=99)
+    dataset.dump()
+    print()
+    print('SoftDataset dump')
+    dataset: SoftDataset = soft_dataset_from_builder(builder)
+    dataset.dump()
+    print()
+if __name__ == '__main__':
+    main()

ck_demos/dataset/demo_dataset_from_sampler.py ADDED Viewed

@@ -0,0 +1,18 @@
+from ck import example
+from ck.dataset.sampled_dataset import dataset_from_sampler
+from ck.pgm import PGM
+from ck.pgm_circuit.wmc_program import WMCProgram
+from ck.pgm_compiler import DEFAULT_PGM_COMPILER
+from ck.sampling.sampler import Sampler
+def main() -> None:
+    pgm: PGM = example.Student()
+    sampler: Sampler = WMCProgram(DEFAULT_PGM_COMPILER(pgm)).sample_direct()
+    dataset = dataset_from_sampler(sampler, 10)
+    dataset.dump()
+if __name__ == '__main__':
+    main()

ck_demos/learning/__init__.py ADDED Viewed

File without changes

ck_demos/learning/demo_bayesian_network_from_cross_tables.py ADDED Viewed

@@ -0,0 +1,71 @@
+from typing import List, Set
+from ck import example
+from ck.dataset import HardDataset
+from ck.dataset.cross_table import CrossTable, cross_table_from_hard_dataset
+from ck.dataset.sampled_dataset import dataset_from_sampler
+from ck.learning.model_from_cross_tables import model_from_cross_tables
+from ck.pgm import PGM, RandomVariable
+from ck.pgm_circuit.wmc_program import WMCProgram
+from ck.pgm_compiler import DEFAULT_PGM_COMPILER
+from ck.probability import divergence
+EXCLUDE_UNNECESSARY_CROSS_TABLES = True
+def main() -> None:
+    number_of_samples: int = 10000  # How many instances to make for the model dataset
+    # Create a dataset based on model which is an example PGM
+    model: PGM = example.Student()
+    model_dataset = dataset_from_sampler(
+        WMCProgram(DEFAULT_PGM_COMPILER(model)).sample_direct(),
+        number_of_samples,
+    )
+    # Clone the model, without factors, and transport the dataset to the new PGM
+    pgm = PGM()
+    dataset = HardDataset(weights=model_dataset.weights)
+    for model_rv in model.rvs:
+        rv = pgm.new_rv(model_rv.name, model_rv.states)
+        dataset.add_rv_from_state_idxs(rv, model_dataset.state_idxs(model_rv))
+    # What model rvs have a child
+    model_rvs_with_children: Set[RandomVariable] = set()
+    for model_factor in model.factors:
+        for parent_rv in model_factor.rvs[1:]:
+            model_rvs_with_children.add(parent_rv)
+    # Construct cross-tables from the dataset
+    cross_tables: List[CrossTable] = []
+    for model_factor in model.factors:
+        if (
+                EXCLUDE_UNNECESSARY_CROSS_TABLES
+                and len(model_factor.rvs) == 1
+                and model_factor.rvs[0] in model_rvs_with_children
+        ):
+            # The factor relates to a single random variable (has
+            # no parents) but it does have children.
+            # No need to include a cross-table as it is inferable from
+            # cross-tables of its children.
+            continue
+        rvs = tuple(pgm.rvs[model_rv.idx] for model_rv in model_factor.rvs)
+        cross_tables.append(cross_table_from_hard_dataset(dataset, rvs))
+        print('cross-table:', *rvs)
+    # Train the PGM
+    model_from_cross_tables(pgm, cross_tables)
+    # Show results
+    print()
+    pgm.dump(show_function_values=True)
+    print()
+    model_space = WMCProgram(DEFAULT_PGM_COMPILER(model))
+    pgm_space = WMCProgram(DEFAULT_PGM_COMPILER(pgm))
+    print('HI', divergence.hi(model_space, pgm_space))
+    print('KL', divergence.kl(model_space, pgm_space))
+if __name__ == '__main__':
+    main()

ck_demos/learning/demo_simple_learning.py ADDED Viewed

@@ -0,0 +1,55 @@
+from ck.dataset.dataset_from_csv import hard_dataset_from_csv
+from ck.learning.train_generative_bn import train_generative_bn
+from ck.pgm import PGM
+def main() -> None:
+    pgm = PGM('Student')
+    difficult = pgm.new_rv('difficult', ['y', 'n'])
+    intelligent = pgm.new_rv('intelligent', ['y', 'n'])
+    grade = pgm.new_rv('grade', ['low', 'medium', 'high'])
+    award = pgm.new_rv('award', ['y', 'n'])
+    letter = pgm.new_rv('letter', ['y', 'n'])
+    pgm.new_factor(difficult)
+    pgm.new_factor(intelligent)
+    pgm.new_factor(grade, intelligent, difficult)
+    pgm.new_factor(award, intelligent)
+    pgm.new_factor(letter, grade)
+    rvs = (difficult, intelligent, grade, award, letter)
+    csv = """
+    0,1,2,0,1
+    1,1,2,0,1
+    1,1,2,0,1
+    0,0,2,0,0
+    0,1,1,1,0
+    1,1,1,1,1
+    1,1,0,0,0
+    1,1,0,0,1
+    1,0,0,0,0
+    """
+    dataset = hard_dataset_from_csv(rvs, csv.splitlines())
+    # Learn parameters values for `pgm` using the training data `dataset`.
+    # This updates the PGMs potential functions.
+    train_generative_bn(pgm, dataset)
+    show_pgm_factors(pgm)
+    print('Done.')
+def show_pgm_factors(pgm: PGM) -> None:
+    for factor in pgm.factors:
+        potential_function = factor.function
+        print(f'Factor: {factor} {type(potential_function)}')
+        for instance, _, param_value in potential_function.keys_with_param:
+            print(f'Factor{instance} = {param_value}')
+        print()
+if __name__ == '__main__':
+    main()

ck_demos/sampling/demo_wmc_direct_sampler.py CHANGED Viewed

@@ -2,7 +2,7 @@ import random
 from ck import example
 from ck.pgm import PGM
-from ck.pgm_compiler import factor_elimination
+from ck.pgm_compiler import DEFAULT_PGM_COMPILER
 from ck.pgm_circuit import PGMCircuit
 from ck.pgm_circuit.wmc_program import WMCProgram
 from ck.probability.empirical_probability_space import EmpiricalProbabilitySpace
@@ -18,7 +18,7 @@ def main():
     pgm: PGM = example.Rain()
-    pgm_cct: PGMCircuit = factor_elimination.compile_pgm(pgm)
+    pgm_cct: PGMCircuit = DEFAULT_PGM_COMPILER(pgm)
     wmc = WMCProgram(pgm_cct)
     sampler = wmc.sample_direct()

{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compiled-knowledge
-Version: 4.1.0a1
+Version: 4.1.0a3
 Summary: A Python package for compiling and querying discrete probabilistic graphical models.
 Author-email: Barry Drake <barry@compiledknowledge.org>
 License-Expression: MIT
@@ -13,6 +13,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE.txt
 Requires-Dist: llvmlite
 Requires-Dist: numpy
+Requires-Dist: scipy
 Dynamic: license-file
 Compiled Knowledge

{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,17 @@
 ck_demos/all_demos.py,sha256=tqnMFbW6t1F4ksErf6QYTz9XtvbfayWl35lD3Bjm47E,2468
 ck_demos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ck_demos/dataset/demo_dataset_from_sampler.py,sha256=N2UDctHWePuUfJNWDnsd-UOSqeRfio6YQI21ZvyYhts,485
+ck_demos/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ck_demos/dataset/demo_dataset_builder.py,sha256=a9o-rw8PzpLq_5wtwjH0L15-eacbELlc7tfLrREJBqM,987
+ck_demos/learning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ck_demos/learning/demo_simple_learning.py,sha256=CZPzcsnTD8TK7nzGg3XUsx4exqggQXOT6UVwrV0ScF8,1483
+ck_demos/learning/demo_bayesian_network_from_cross_tables.py,sha256=sUpeUFI8WjveRo8Evz4woWyarrVN84E1DzYcM-2NOy0,2579
 ck_demos/circuit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ck_demos/circuit/demo_derivatives.py,sha256=6VwnW_Dbm2MWQFfJ46UQQFecV56QdfGpL7srthw5Py0,1143
 ck_demos/circuit/demo_circuit_dump.py,sha256=85x7UJV6cg6XVYU-PPsuKQVTBw5WZBfkhi6Avo9XbOs,436
 ck_demos/sampling/demo_uniform_sampler.py,sha256=zY5Kz97r43b1YvFz_4xNAeXvSpd7Kc2l0geZhWrz2no,924
 ck_demos/sampling/check_sampler.py,sha256=9Xy7oS3KnlNzcbdIU3bLnWlQ1SNo6S9hEp3TWoSM6C8,2035
-ck_demos/sampling/demo_wmc_direct_sampler.py,sha256=USz7vynHOEYUQgu5dJY-dG_Z_zNEDAfoYJ3VtX6uFmk,1073
+ck_demos/sampling/demo_wmc_direct_sampler.py,sha256=zLwygZ-LNZ_L47XM5czdhCDkj8m8dcq7eZyie-dtmiM,1065
 ck_demos/sampling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ck_demos/sampling/demo_marginal_direct_sampler.py,sha256=nv4smqYl1VhpB6pkF4L_aqnpVgVMcv3FrSvUkJ0EJz0,1109
 ck_demos/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -52,21 +58,28 @@ ck_demos/pgm_inference/demo_inferencing_mpe_cancer.py,sha256=hS9U2kyqjFgJ8jnVBtT
 ck_demos/pgm_inference/demo_inferencing_wmc_and_mpe_sprinkler.py,sha256=-q4Z1Fzf7-BuwVFTFXdGRY-zUNrY-SAU7ooaov2o_lM,5128
 ck_demos/getting_started/simple_demo.py,sha256=hiYscNnfkEwHCQ3ymXAswAYO5jAKR7cseb36pjzuus8,650
 ck_demos/getting_started/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ck/pgm.py,sha256=PsB2DboRtuiOrnbYGbYNOB-R2k94iET2o02UalKFy3I,117611
+compiled_knowledge-4.1.0a3.dist-info/RECORD,,
+compiled_knowledge-4.1.0a3.dist-info/WHEEL,sha256=V1loQ6TpxABu1APUg0MoTRBOzSKT5xVc3skizX-ovCU,136
+compiled_knowledge-4.1.0a3.dist-info/top_level.txt,sha256=Cf8DAfd2vcnLiA7HlxoduOzV0Q-8surE3kzX8P9qdks,12
+compiled_knowledge-4.1.0a3.dist-info/METADATA,sha256=GsnkOBTsYHaifQhlQzBzQdrpKIBNWLPhSNuC6sUjET8,1808
+compiled_knowledge-4.1.0a3.dist-info/licenses/LICENSE.txt,sha256=-LmkmqXKYojmS3zDxXAeTbsA82fnHA0KaRvpfIoEdjA,1068
+ck/pgm.py,sha256=EwKTWuYV9-0OfgJQfBw59MfGDLtxFe3wlgbYlkTqj1Y,117703
 ck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ck/pgm_circuit/target_marginals_program.py,sha256=qWz9FkAFzt8YHLZJzPkpRnvDH76BXm-dcEWhoqCkrOw,3665
 ck/pgm_circuit/slot_map.py,sha256=pqN0t5ElmUjR7SzvzldQwnO-jjRIz1rNZHH1PzE-V88,822
 ck/pgm_circuit/mpe_program.py,sha256=uDOykbBIbvvDQtxXOgBj6gzoehq1AfaQzZIWW3rMZnY,9990
 ck/pgm_circuit/program_with_slotmap.py,sha256=31Rgk4WoY7KW09L3TGySf1teYnf-ItvICTYEC17zB1w,7808
 ck/pgm_circuit/__init__.py,sha256=FctIFEYdL1pwxFMMEEu5Rwgq3kjPar-vJTqAmgIqb-I,36
-ck/pgm_circuit/marginals_program.py,sha256=E-L-4Rc2YLs3ndXIfXpTxUYGEFJG1_BkaZVDBs9gcgQ,14434
-ck/pgm_circuit/wmc_program.py,sha256=Btq7jUot-PodWXrgDFaE6zhUtr6GPUNF217CVLTaB70,12376
+ck/pgm_circuit/marginals_program.py,sha256=SOc31sxk_hNL0QgNQAbdYjVYRf0aOwsiHTh6CSyVsiM,14782
+ck/pgm_circuit/wmc_program.py,sha256=v7DLS2oq34uW5v99fvtadk8CbRSu7gipLA--DxtGSYo,12724
 ck/pgm_circuit/pgm_circuit.py,sha256=XBXANPREwp5Cl8P0x5XuG9besOJV5DjVxtNkqyt2DK8,3287
 ck/pgm_circuit/support/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ck/pgm_circuit/support/compile_circuit.py,sha256=XJFzi-BdFNTsdozRv0EHBM8cJ0SUZpbQwuTWONUzGck,3125
-ck/probability/probability_space.py,sha256=TTNSe6z40hs94kLBR_YHNjjRvBGVI86tza-CU2FKd9M,25482
+ck/probability/probability_space.py,sha256=fn_z3KWcRyBMF9XqoIE89Kij8-jpcmIjytGdnoNg2os,26125
+ck/probability/cross_table_probability_space.py,sha256=exaAVxzpQkqTmGIQx6ui64p6QTcy66IRYi5eWz6DFiE,1944
 ck/probability/pgm_probability_space.py,sha256=9al9sZk2LGvnTITvxS8x_ntabHKhaliUW-6JUeAEEl4,1231
 ck/probability/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ck/probability/divergence.py,sha256=l9mhHmCJQWNtY6Xf67ZCBeW1nry0B7-Jec6Tb99DP08,8258
 ck/probability/empirical_probability_space.py,sha256=Lp7_N_uNYq-W_S5caUC5ub9sTqaL-Vn4hudF0WYXPdU,2088
 ck/example/survey.py,sha256=ubjM8EP7aQMQbx7XFMaXvSYBOPuUDHeyG6wZIlRDqD8,1565
 ck/example/pathfinder.py,sha256=rQckvasnbzBYYESxngE_xbhyXxoJlELeiYc6Ghh7iFk,2257125
@@ -100,22 +113,25 @@ ck/example/diamond_square.py,sha256=ic8adEomQHMFlGQ3gMYGEja0LxEla8KEQKhET0XpULs,
 ck/example/rain.py,sha256=kLTU_9f_-_yy0ymPnS9-cbFVT6fYyCanDgszk3vQOgc,1187
 ck/example/cancer.py,sha256=-FnLbfb9yxriLl97N5BDZ0VrDZ5UnOWlT-Ep_tzO6QI,1698
 ck/dataset/dataset_compute.py,sha256=Bdxjl4c_0OttHgVWx-C3WdOI-imgupUQnnQVzNesPCw,5705
-ck/dataset/cross_table.py,sha256=KXhkAsMZBKc50V-kHDdj3sxGbEIiqdj72XqvGLCi6cs,9474
+ck/dataset/cross_table.py,sha256=-uBlzapzZ5SKB3Y2OdUs51syZZp4x9805NM3yfLJfk8,13014
 ck/dataset/__init__.py,sha256=QXCZWPHusMfXtl9bLPrIJP89ZnqWMz9KfdxScVrB3UQ,55
-ck/dataset/dataset.py,sha256=929eC2I8x4Nc3OF3sSsbeZ4xFQ-6CaMKPwv7emAVzjo,21121
-ck/dataset/dataset_from_crosstable.py,sha256=f-H9Q9G5HF6RRT1ltReuqg69HhnDcrKn8vJrAviyMkA,1278
+ck/dataset/dataset_builder.py,sha256=ewsz6znW_GtBvwsw6k9uXHT8yh_u6zQI5PFBZ_ykXlM,18873
+ck/dataset/dataset.py,sha256=iQGOqVrNll6QMPcRcV2phUbe0fCfpVmUVbcBIaqYx0s,25531
+ck/dataset/dataset_from_crosstable.py,sha256=rOdDFfb_2rnUJT3iZrLbZkeQcRJa5EzFVBs0hSdE57U,2281
 ck/dataset/sampled_dataset.py,sha256=Vcz2WN6IKdmK8DsFeXLten7Id3Kc2epC6qs2ZW7mvWU,3261
-ck/dataset/dataset_from_csv.py,sha256=wzDjGtMEgvrfrMRdddYhJJQ53QhMM6t9_nhrA7EpB3I,5551
-ck/dataset/cross_table_probabilities.py,sha256=exaAVxzpQkqTmGIQx6ui64p6QTcy66IRYi5eWz6DFiE,1944
-ck/learning/train_generative.py,sha256=_mXWcslgW1Tqfv1o0HhHDnU4CI7_KOUMdpxypQD3tQs,5551
+ck/dataset/dataset_from_csv.py,sha256=q4qjOsJFYAmw22xDaHcS6XC3nwqmkT-RoOaRNr_zJ8I,5802
+ck/learning/model_from_cross_tables.py,sha256=nB4dCsGgFtTMS-WaNvlJjzKCvRbs7uTUUxFQr5EcGG4,9083
 ck/learning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ck/circuit/_circuit_cy.c,sha256=EGE0-TkFq-i7AbXZEqs04yxFwdez8u-RmSy-j7Siskk,1704292
+ck/learning/parameters.py,sha256=x5yP-zxkpm0HfBusOxK5vImUnYanUJeZUjxgOwKNVAc,4388
+ck/learning/coalesce_cross_tables.py,sha256=dQcHXasQU1bEmTCLuiuCVDHEIRt77BNbFp1E5y-qfaI,12926
+ck/learning/train_generative_bn.py,sha256=hwmhbg4RKh3JvDlG7xOJm1apScXJ1Mmfgu4nasM-cwQ,8019
+ck/circuit/_circuit_cy.c,sha256=aUtvxUkF49UVCUYoFIuvdVRUgW9iBIBorKLciPlRaMA,1704292
 ck/circuit/_circuit_cy.pyx,sha256=mER1HK5yyf4UAj9ibn7fUQNyXwoxwxp7PClULPhY9B4,26995
 ck/circuit/__init__.py,sha256=B1jwDE_Xb6hOQE8DecjaTVotOnDxJaT7jsvPfGDXqCU,401
 ck/circuit/_circuit_cy.pxd,sha256=ZcW8xjw4oGQqD5gwz73GXc1H8NxpdAswFWzc2CUWWcA,1025
 ck/circuit/_circuit_py.py,sha256=hADjCFDC1LJKUdyiKZzNLFt7ZkUNJ0IYwEYRj594K4g,27495
 ck/circuit/tmp_const.py,sha256=q01bkIvTEg1l-qFcfl2B8NrSzKlqcWU7McNm4HKv7bU,2300
-ck/circuit/_circuit_cy.cpython-312-darwin.so,sha256=vgIJ6FY91q20TS23uceX0XnuEtp8r2_pzxyhys0xyNc,335296
+ck/circuit/_circuit_cy.cpython-312-darwin.so,sha256=vHyDzjVhhIfFWG3m4vFqZkdh09lq25c22SUOcIVrDtE,335296
 ck/sampling/wmc_metropolis_sampler.py,sha256=jfXb-MG0jAoMyepgq9zel2amqK-gmYrCtKuxJStl8VY,6305
 ck/sampling/wmc_direct_sampler.py,sha256=Pkv-u4GjN3npBrcQ92raoTrEIel1vpiDoE8LrlcfYJE,7094
 ck/sampling/sampler_support.py,sha256=AD47QPXlXSTiy7Jm-adD6US3cYeSwBimOY2jB5b2qn4,9534
@@ -148,8 +164,8 @@ ck/pgm_compiler/support/named_compiler_maker.py,sha256=Qz8a9gwY46Q3dtRCZEZ2czq5z
 ck/pgm_compiler/support/circuit_table/__init__.py,sha256=1kWjAZR5Rj6PYNdbCEbuyE2VtIDQU4Qf-3HPFzBlezs,562
 ck/pgm_compiler/support/circuit_table/_circuit_table_cy.pyx,sha256=Fsjw8P9clKQioqlLyr1JirUK5oYkeotpDMy5sMo7Khk,11683
 ck/pgm_compiler/support/circuit_table/_circuit_table_py.py,sha256=OZJC-JGX3ovCSv7nJtNYq7735KZ2eb4TQOlZdZbhPmk,10983
-ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c,sha256=wLdeh90m24Mp9Vo_-rHra8HRlIRg_-AIzlr1vof5abE,714044
-ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-312-darwin.so,sha256=7CKogglc_wM9o6w6t4UlrPGeA2pPq-qPC-RqM2V6-IY,165096
+ck/pgm_compiler/support/circuit_table/_circuit_table_cy.c,sha256=Ga_LmT5hlGc_mn8muLefX9zdt3vXEMiZDUxtMOLTKFU,714044
+ck/pgm_compiler/support/circuit_table/_circuit_table_cy.cpython-312-darwin.so,sha256=AupPKDwtObM-KjI_dNhVC46hsCE3lKCNU86ArZJKbFo,165096
 ck/pgm_compiler/ace/ace.py,sha256=An83dHxE_gQFcEs6H5qgm0PlNFnJSGGuvLJNC2H3hGU,10098
 ck/pgm_compiler/ace/__init__.py,sha256=5HWep-yL1Mr6z5VWEaIYpLumCdeso85J-l_-hQaVusM,96
 ck/program/raw_program.py,sha256=U7kLBCSLtP1CfG09RrzmGo7E3sZdNr7wr2V1qkTfVGc,4106
@@ -165,15 +181,15 @@ ck/circuit_compiler/llvm_vm_compiler.py,sha256=rM_6F5st3k9X5K1_MwzKJwDhQo1794voo
 ck/circuit_compiler/cython_vm_compiler/cython_vm_compiler.py,sha256=GdtBkipud8vylXYArOJvZ-10U9L_PL0oJrkyrnFGH2Q,4345
 ck/circuit_compiler/cython_vm_compiler/__init__.py,sha256=ks0sISOJ-XHIHgHnESyFsheNWvcSJQkbsrj1wVlnzTE,48
 ck/circuit_compiler/cython_vm_compiler/_compiler.pyx,sha256=RssdkoAcB3Ahes8xisqFy0PQyOPmC3GLEC2xR-miQaE,12898
-ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-312-darwin.so,sha256=y4HfSLn8gXs9J5nG85cRYRTVSoTiCDG4XUe4DIOkeng,163488
-ck/circuit_compiler/cython_vm_compiler/_compiler.c,sha256=GWW9MMB1dCN2Mc4YBszRtsmAxsTuJ3Mf1TxzuSuByLE,857789
+ck/circuit_compiler/cython_vm_compiler/_compiler.cpython-312-darwin.so,sha256=2wyk0kvPM3Fh5jhKb4vYxEjNXmugaqPSDMwsVA7Ss9c,163488
+ck/circuit_compiler/cython_vm_compiler/_compiler.c,sha256=BQYbYiuXvCtEBoKnQZIRfX7pMcCsZ0zTg1BVrGxMDc0,857789
 ck/circuit_compiler/support/llvm_ir_function.py,sha256=sMLKfwz90YcsrVyxsuY0Ymo1ibFOcul4Qiwdake-VkI,8321
 ck/circuit_compiler/support/input_vars.py,sha256=EZrvyhD9XVtf5GuDBluFNWhAOVixP7-_ETxAHLTpBcs,4664
 ck/circuit_compiler/support/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.pyx,sha256=a0fKmkwRNscJmy6qoO2AOqJYmHYptrQmkRSrDg3G-wg,3233
 ck/circuit_compiler/support/circuit_analyser/__init__.py,sha256=WhNwfg7GHVeI4k_m7owPGWxX0MyZg_wtcp2MA07qbWg,523
-ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-312-darwin.so,sha256=h-xNWP4PJgtB6E_CsiYvP9seLXYkdFit_sixZvmJpEA,104936
-ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c,sha256=MQoSbJVg537-Uexx2X-9WGPs3xt4xO07ehW5NZQ_2R8,438223
+ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.cpython-312-darwin.so,sha256=uJDX2rKQ7KKz5JJ2SHRfAH2c8ZA_fJIXn_l688fFUAo,104936
+ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_cy.c,sha256=R9IDuwbPkxUv9Xne8j_ulU0Dd3eXhY2hq5I1XmjnbUk,438223
 ck/circuit_compiler/support/circuit_analyser/_circuit_analyser_py.py,sha256=CMdXV6Rot5CCoK1UsurQdGK0UOx_09B6V7mCc_6-gfI,2993
 ck/in_out/render_net.py,sha256=VePvN6aYWuzEkW-Hv-qGT9QneOvsnrBMmS_KYueuj2I,4970
 ck/in_out/render_bugs.py,sha256=c39KbaD4gEiauFsZq2KUhDEEa-3cuY5kuvz97pEWVpw,3272
@@ -185,8 +201,3 @@ ck/in_out/render_pomegranate.py,sha256=tU7iDHkLWTJyFrxPa2LbZnD06qia8mG2FGi0aZAKu
 ck/in_out/parse_ace_nnf.py,sha256=faTCrCeuc-m_EUHNJFAg9IItZJ77-bvPzSbLYBeDFaw,13028
 ck/in_out/pgm_pickle.py,sha256=UhGCDHGVNEDtTwZjcCWyUWw00YXVgBGxek4fbBvLExs,962
 ck/in_out/pgm_python.py,sha256=6dnF9gzVdMrY0kkdsPg-ryVBwfmAo4bKoTwx17ociGg,9824
-compiled_knowledge-4.1.0a1.dist-info/RECORD,,
-compiled_knowledge-4.1.0a1.dist-info/WHEEL,sha256=V1loQ6TpxABu1APUg0MoTRBOzSKT5xVc3skizX-ovCU,136
-compiled_knowledge-4.1.0a1.dist-info/top_level.txt,sha256=Cf8DAfd2vcnLiA7HlxoduOzV0Q-8surE3kzX8P9qdks,12
-compiled_knowledge-4.1.0a1.dist-info/METADATA,sha256=2MW6jH05yr21k1jcKhFjlBQs4bVg4GtByq6zrhtzzDU,1787
-compiled_knowledge-4.1.0a1.dist-info/licenses/LICENSE.txt,sha256=-LmkmqXKYojmS3zDxXAeTbsA82fnHA0KaRvpfIoEdjA,1068

ck/learning/train_generative.py DELETED Viewed

@@ -1,149 +0,0 @@
-from dataclasses import dataclass
-from typing import Dict, Tuple, List
-import numpy as np
-from ck.dataset import SoftDataset, HardDataset
-from ck.dataset.cross_table import CrossTable, cross_table_from_dataset
-from ck.pgm import PGM, Instance, DensePotentialFunction, Shape, natural_key_idx, SparsePotentialFunction
-from ck.utils.iter_extras import multiply
-from ck.utils.np_extras import NDArrayFloat64
-@dataclass
-class ParameterValues:
-    """
-    A ParameterValues object represents learned parameter values of a PGM.
-    """
-    pgm: PGM
-    """
-    The PGM that the parameter values pertains to.
-    """
-    cpts: List[Dict[Instance, NDArrayFloat64]]
-    """
-    A list of CPTs co-indexed with `pgm.factors`. Each CPT is a dict
-    mapping from instances of the parent random variables (of the factors)
-    to the child conditional probability distribution (CPD).
-    """
-    def set_zero(self) -> None:
-        """
-        Set the potential function of each PGM factor to zero.
-        """
-        for factor in self.pgm.factors:
-            factor.set_zero()
-    def set_cpt(self) -> None:
-        """
-        Set the potential function of each PGM factor to a CPTPotentialFunction,
-        using our parameter values.
-        """
-        for factor, cpt in zip(self.pgm.factors, self.cpts):
-            factor.set_cpt().set(*cpt.items())
-    def set_dense(self) -> None:
-        """
-        Set the potential function of each PGM factor to a DensePotentialFunction,
-        using our parameter values.
-        """
-        for factor, cpt in zip(self.pgm.factors, self.cpts):
-            pot_function: DensePotentialFunction = factor.set_dense()
-            parent_shape: Shape = factor.shape[1:]
-            child_state: int
-            value: float
-            if len(parent_shape) == 0:
-                cpd: NDArrayFloat64 = cpt[()]
-                for child_state, value in enumerate(cpd):
-                    pot_function[child_state] = value
-            else:
-                parent_space: int = multiply(parent_shape)
-                parent_states: Instance
-                cpd: NDArrayFloat64
-                for parent_states, cpd in cpt.items():
-                    idx: int = natural_key_idx(parent_shape, parent_states)
-                    for value in cpd:
-                        pot_function[idx] = value
-                        idx += parent_space
-    def set_sparse(self) -> None:
-        """
-        Set the potential function of each PGM factor to a SparsePotentialFunction,
-        using our parameter values.
-        """
-        for factor, cpt in zip(self.pgm.factors, self.cpts):
-            pot_function: SparsePotentialFunction = factor.set_sparse()
-            parent_states: Instance
-            child_state: int
-            cpd: NDArrayFloat64
-            value: float
-            for parent_states, cpd in cpt.items():
-                for child_state, value in enumerate(cpd):
-                    key = (child_state,) + parent_states
-                    pot_function[key] = value
-def train_generative_bn(
-        pgm: PGM,
-        dataset: HardDataset | SoftDataset,
-        *,
-        dirichlet_prior: float = 0,
-        check_bayesian_network: bool = True,
-) -> ParameterValues:
-    """
-    Maximum-likelihood, generative training for a Bayesian network.
-    Args:
-        pgm: the probabilistic graphical model defining the model structure.
-            Potential function values are ignored and need not be set.
-        dataset: a dataset of random variable states.
-        dirichlet_prior: a real number >= 0. See `CrossTable` for an explanation.
-        check_bayesian_network: if true and not pgm.is_structure_bayesian an exception will be raised.
-    Returns:
-        a  ParameterValues object that can be used to update the parameters of the given PGM.
-    Raises:
-        ValueError: if the given PGM does not have a Bayesian network structure, and check_bayesian_network is True.
-    """
-    if check_bayesian_network and not pgm.is_structure_bayesian:
-        raise ValueError('the given PGM is not a Bayesian network')
-    cpts: List[Dict[Instance, NDArrayFloat64]] = [
-        cpt_from_crosstab(cross_table_from_dataset(dataset, factor.rvs, dirichlet_prior=dirichlet_prior))
-        for factor in pgm.factors
-    ]
-    return ParameterValues(pgm, cpts)
-def cpt_from_crosstab(crosstab: CrossTable) -> Dict[Instance, NDArrayFloat64]:
-    """
-    Make a conditional probability table (CPT) from a cross-table.
-    Args:
-        crosstab: a CrossTable representing the weight of unique instances.
-    Returns:
-        a mapping from instances of the parent random variables to the child
-        conditional probability distribution (CPD).
-    Assumes:
-        the first random variable in `crosstab.rvs` is the child random variable.
-    """
-    # Number of states for the child random variable.
-    child_size: int = len(crosstab.rvs[0])
-    # Get distribution over child states for seen parent states
-    parents_weights: Dict[Instance, NDArrayFloat64] = {}
-    for state, weight in crosstab.items():
-        parent_state: Tuple[int, ...] = state[1:]
-        child_state: int = state[0]
-        parent_weights = parents_weights.get(parent_state)
-        if parent_weights is None:
-            parents_weights[parent_state] = parent_weights = np.zeros(child_size, dtype=np.float64)
-        parent_weights[child_state] += weight
-    # Normalise
-    for parent_state, parent_weights in parents_weights.items():
-        parent_weights /= parent_weights.sum()
-    return parents_weights

/ck/{dataset/cross_table_probabilities.py → probability/cross_table_probability_space.py} RENAMED Viewed

File without changes

{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/WHEEL RENAMED Viewed

File without changes

{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{compiled_knowledge-4.1.0a1.dist-info → compiled_knowledge-4.1.0a3.dist-info}/top_level.txt RENAMED Viewed

File without changes