PyPI - pygms - Versions diffs - 0.4.0__tar.gz - Mend

pygms 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

pygms-0.4.0/.gitignore +71 -0
pygms-0.4.0/PKG-INFO +45 -0
pygms-0.4.0/README.md +28 -0
pygms-0.4.0/pygms/.RData +0 -0
pygms-0.4.0/pygms/.Rhistory +8 -0
pygms-0.4.0/pygms/.swo +0 -0
pygms-0.4.0/pygms/__init__.py +28 -0
pygms-0.4.0/pygms/_notes.txt +130 -0
pygms-0.4.0/pygms/_profile.py +40 -0
pygms-0.4.0/pygms/_rbm.py +321 -0
pygms-0.4.0/pygms/_todo.txt +131 -0
pygms-0.4.0/pygms/causal.py +42 -0
pygms-0.4.0/pygms/csp.py +385 -0
pygms-0.4.0/pygms/data/__init__.py +23 -0
pygms-0.4.0/pygms/data/__init__.py.bak +23 -0
pygms-0.4.0/pygms/data/catalog.py +234 -0
pygms-0.4.0/pygms/data/catalog.py.bak +234 -0
pygms-0.4.0/pygms/data/sources.json +9 -0
pygms-0.4.0/pygms/decisions.py +199 -0
pygms-0.4.0/pygms/development.py +39 -0
pygms-0.4.0/pygms/draw.py +366 -0
pygms-0.4.0/pygms/factor.py +15 -0
pygms-0.4.0/pygms/factorGauss.py +344 -0
pygms-0.4.0/pygms/factorNumpy.py +718 -0
pygms-0.4.0/pygms/factorSparse.py +698 -0
pygms-0.4.0/pygms/factorTorch.py +758 -0
pygms-0.4.0/pygms/filetypes.py +771 -0
pygms-0.4.0/pygms/graphmodel.py +798 -0
pygms-0.4.0/pygms/indexedheap.py +109 -0
pygms-0.4.0/pygms/ising.py +698 -0
pygms-0.4.0/pygms/jupyter.py +223 -0
pygms-0.4.0/pygms/learning.py +246 -0
pygms-0.4.0/pygms/messagepass.py +226 -0
pygms-0.4.0/pygms/misc.py +358 -0
pygms-0.4.0/pygms/montecarlo.py +513 -0
pygms-0.4.0/pygms/regiongraph.py +223 -0
pygms-0.4.0/pygms/search.py +282 -0
pygms-0.4.0/pygms/search1.py +680 -0
pygms-0.4.0/pygms/search2.py +324 -0
pygms-0.4.0/pygms/search_sum.py +402 -0
pygms-0.4.0/pygms/varset_py.py +86 -0
pygms-0.4.0/pygms/varset_py2.py +152 -0
pygms-0.4.0/pygms/weighted.py +380 -0
pygms-0.4.0/pygms/wmb.py +733 -0
pygms-0.4.0/pygms/wogm.py +312 -0
pygms-0.4.0/pyproject.toml +38 -0

pygms-0.4.0/.gitignore ADDED Viewed

@@ -0,0 +1,71 @@
+notes.txt
+*.swp
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+#Ipython Notebook
+.ipynb_checkpoints
+#VSCode
+.vscode
+# Mac osX meta data
+.DS_Store

pygms-0.4.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,45 @@
+Metadata-Version: 2.4
+Name: pygms
+Version: 0.4.0
+Summary: Python Graphical Models Toolbox
+Author-email: Alexander Ihler <ihler@ics.uci.edu>
+License-Expression: BSD-2-Clause
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Requires-Python: >=3.6
+Requires-Dist: matplotlib>=3.2
+Requires-Dist: networkx>=2.5
+Requires-Dist: numpy>=1.18
+Requires-Dist: scipy>=1.4
+Requires-Dist: sortedcontainers>=1.5.7
+Description-Content-Type: text/markdown
+pyGMs : A Python toolbox for Graphical Models
+================
+This code provides a simple Python-based interface for defining probabilistic
+graphical models (Bayesian networks, factor graphs, etc.) over discrete random
+variables, along with a number of routines for approximate inference.  It is
+being developed for use in teaching, as well as prototyping for research.
+The code currently uses [NumPy](http://www.numpy.org/) for representing and
+operating on the table-based representation of discrete factors, and
+[SortedContainers](https://pypi.python.org/pypi/sortedcontainers) for some
+internal representations.  Smaller portions use [networkx](https://networkx.org/)
+and [scipy](https://www.scipy.org/) as well.
+## Installation
+Simply download or clone the repository to a directory *pyGMs*, and add its
+parent directory to your Python path, either:
+```
+$ export PYTHONPATH=${PYTHONPATH}:/directory/containing/
+```
+or in Python
+```
+import sys
+sys.path.append('/directory/containing/')
+```

pygms-0.4.0/README.md ADDED Viewed

@@ -0,0 +1,28 @@
+pyGMs : A Python toolbox for Graphical Models
+================
+This code provides a simple Python-based interface for defining probabilistic
+graphical models (Bayesian networks, factor graphs, etc.) over discrete random
+variables, along with a number of routines for approximate inference.  It is
+being developed for use in teaching, as well as prototyping for research.
+The code currently uses [NumPy](http://www.numpy.org/) for representing and
+operating on the table-based representation of discrete factors, and
+[SortedContainers](https://pypi.python.org/pypi/sortedcontainers) for some
+internal representations.  Smaller portions use [networkx](https://networkx.org/)
+and [scipy](https://www.scipy.org/) as well.
+## Installation
+Simply download or clone the repository to a directory *pyGMs*, and add its
+parent directory to your Python path, either:
+```
+$ export PYTHONPATH=${PYTHONPATH}:/directory/containing/
+```
+or in Python
+```
+import sys
+sys.path.append('/directory/containing/')
+```

pygms-0.4.0/pygms/.RData ADDED Viewed

Binary file

pygms-0.4.0/pygms/.Rhistory ADDED Viewed

@@ -0,0 +1,8 @@
+suppressMessages(library('igraph'))
+suppressMessages(library('causaleffect'))
+s <- graph.formula(W -+ X,X -+ W,W -+ Y,Y -+ W,W -+ R,R -+ X,X -+ Y, simplify=FALSE)
+s <- set_edge_attr(s, 'description', 1:4, 'U')
+causal.effect('Y', c("X"), G = s)
+quit()

pygms-0.4.0/pygms/.swo ADDED Viewed

Binary file

pygms-0.4.0/pygms/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""
+pyGMs: Python Graphical Model code
+A simple graphical model class for learning about, testing, and developing algorithms
+for graphical models.
+Version 0.4.0 (2026-03-31)
+(c) 2015-2026 Alexander Ihler under the FreeBSD license; see license.txt for details.
+"""
+from sortedcontainers import SortedSet as sset;
+from .factor import *
+#from .factorSparse import *
+from .graphmodel import *
+from .filetypes import *
+from .misc import *
+from .draw import *
+__title__ = 'pygms'
+__version__ = '0.4.0'
+__author__ = 'Alexander Ihler'
+__license__ = 'BSD'
+__copyright__ = '2015-2026, Alexander Ihler'

pygms-0.4.0/pygms/_notes.txt ADDED Viewed

@@ -0,0 +1,130 @@
+General principles:
+(1) GraphModel & its inheritors are containers for collections of factors (funtions over a few variables each)
+(2) Variables are assumed to be X0...Xn, although some may be unused.  When the dimension of Xi is known/not required,
+    the index itself can be used instead.
+(3) Configurations (x0...xn) can be represented in different ways, depending on the circumstances:
+    * A map {0:x0, 1:x1, ...}, with unspecified values of a partial configuration left out
+    * An nparray, tuple, or list [x0,x1,...xn], with missing values (if allowed) set to NaN
+    * A collection of data X=[xa,xb,xc...] with xa=[xa0,...xan], so that X[i] is the ith data point
+      * This can be a list of lists or tuples, or a 2D numpy array
+    * We can detect whether a single data point or multiple are specified using try: next(iter(next(iter(X))))
+    * Some functions only accept single data (should check & error), others expect multiple, or can take either.
+(4) All data are expected to take values 0...d-1, for discrete variables with d states, even e.g. "Ising" models.
+(5) "isLog" indicates whether the model consists of log-factors: G(x) = \sum_a g_a(x_a),
+    or exp-factors: F(x) = \sum_a f_a(x_a).  Functions which make use of the joint value (log-likelihood, etc.)
+    or multiple factors in combination (e.g., variable elimination) use this in computing their quantities.
+    The model representation may be switched using "toLog" and "toExp".
+(6) Sampling functions should return config-value pairs:  x,lnq where x \sim q(X) and lnq = log(q(x)).
+(7) Training functions take the form "fit_method" for methods that estimate graph structure, and "refit_method"
+    for methods that preserve the current factors/cliques and simply update the parameters.
+(8) By default, GraphModel makes a copy of each factor during construction.  This is because some functions
+    (such as refit or reparameterization inference) may change the values of the factor tables.  If this is not
+    desired, "gm = GraphModel(factors, copy=False)" will use references to the factors (useful for memory sharing,
+    for example), but set gm.lock = True to indicate that functions should not alter these factors.
+    The function "gm.copy()" returns a new copy of the model with non-const factors.
+(9) Some functions (conditioning, manual factor additions, etc.) may alter the structure / cliques of the model as
+    well. Iterative methods (message passing inference, etc.) can check "gm.sig" to make sure the structure has
+    not changed between iterations to ensure validity, and may raise an exception if it does.
+    (TODO: add another lock flag for structure; functions should check before altering structure.)
+===============================================================================
+*** Version that can use torch tensors?
+   * Allow for "direct" optimization of various quantities?
+*** Data representation !!!
+   * Want x[i] to be data point i?
+   * Want to be able to pass lists of tuples, or a single tuple?  (No single tuples?) (Convert to arrays?)
+     * If "not 2D" convert before operation?  (Note: some operations can only take single tuples)
+   * Also want to be able to access X[:,j] = {x_j for all data}
+   * Want consistent with standard torch forms
+*** Exact inference
+    * Sensitivity analysis?
+      * Exp family form: dLogZ/dTheta = E[X]
+      * "BN" form: specify p(A=a|E=e) and x=p(Xi=xi|Xpai=xpai) ?
+         * all-to-one version requires BN form and bnOrder, and "one" p(A=a|E=e)
+         * one-to-all version requires p(Xi=xi|Xpai=xpai); BN can be unnormalized
+    * BN specialized functions?  Evidence pruning?
+    * CSP specialized functions?
+*** Sampling
+    * sampling function returns  config, logP = sample( [partial?] )
+    * MCMC returns config, logP = sample( startcfg )
+    *** Maybe make a generator object? "yield"?
+    * Method to "aggregate" samples? ("Query" object...)  => generic "estimate marginal" f'ns, etc?
+      * Useful for repeated / cts improvement inference  (save state)
+      * QueryMarginals (list cliques); QueryExpectations (list f'ns); QueryHistory (log all)
+      * Basic Gibbs (two versions)
+      ** Structured gibbs? Sets to sample; generate conditioned sub-models; VE-sample? (In-place slices: efficient?)
+      * MH: any common proposals?
+    TODO?
+      * Importance sampling (several? WMB, Tree BP, MF?)
+      * Annealed IS
+      * Estimators? Discriminance sampling?
+*** Search
+    * Pseudo-tree object
+    * Heuristic f'n: takes partial config, returns cost-to-go of internal PTree given config
+    * Next variable f'n: given partial config, what are the next vars to condition on?
+    * Node priority f'n: when adding node, when should we re-examine?
+    * Nodes link to heuristic, priority f'n so they can modify / be dynamic?  PFunc can use heuristic?
+    * Search algos:
+        * Basic DFS
+          * A/O DFS? (Rotating?)
+        * Best-First / A*
+          * A/O A*? Mem limited A*?
+*** Variational
+    * Various forms of DD?  (SoftArc done/easy; others?)
+    * Basic BP algo?  Fancier (scheduling, etc)?  Region models?
+    * NMF done; structured MF?
+    * WMB:
+       * Basic incremental MB: build; msg pass, merge; msg pass, merge; ...
+       * Sholeh algorithm?
+??? Iterative algorithms, use yield or other structures to run iterations?
+    * Can verify no structural changes, etc;
+    * Reparameterization vs message forms
+    ** "General" outer loop that checks for timeout, various convergence conditions?  Or make for each algo?
+*** Learning
+    *CRF representation? (More general factor representations? ?)
+    * EM? Stochastic EM?
+*** Structure learning
+   * Several Ising methods (clean up)
+   * Independence tests
+   * Non-ising group lasso, etc.
+   * BN stochastic search
+   * BN ILP method
+   * ...
+*** Special models: Ising models (clean?), RBM/CRBM,
+*** Other people's algorithms?
+    * Vibhav's: sample, assemble sparse JTree until memory limit, solve
+      * Need to "decompose" proposal along graph structure?
+    * Maua's: solve preserving lists of factors / configs?
+    * Model conversions? Binary, pairwise, etc?

pygms-0.4.0/pygms/_profile.py ADDED Viewed

@@ -0,0 +1,40 @@
+## Useful code for profiling execution speed, etc.
+try:
+    from line_profiler import LineProfiler
+    def do_profile(follow=[]):
+        def inner(func):
+            def profiled_func(*args, **kwargs):
+                try:
+                    profiler = LineProfiler()
+                    profiler.add_function(func)
+                    for f in follow:
+                        profiler.add_function(f)
+                    profiler.enable_by_count()
+                    return func(*args, **kwargs)
+                finally:
+                    profiler.print_stats()
+            return profiled_func
+        return inner
+except ImportError:
+    def do_profile(follow=[]):
+        "Helpful if you accidentally leave in production!"
+        def inner(func):
+            def nothing(*args, **kwargs):
+                return func(*args, **kwargs)
+            return nothing
+        return inner
+def get_number():
+    for x in xrange(5000000):
+        yield x
+# To profile the function, decorate e.g.:
+#@do_profile(follow=[get_number])
+#def __init__(self,model,elimOrder,force_or=False,max_width=None):

pygms-0.4.0/pygms/_rbm.py ADDED Viewed

@@ -0,0 +1,321 @@
+import numpy as np
+import copy
+from .base import classifier
+from .base import regressor
+from .utils import toIndex, fromIndex, to1ofK, from1ofK
+from numpy import asarray as arr
+from numpy import atleast_2d as twod
+from numpy import asmatrix as mat
+from scipy.special import expit
+import matplotlib.pyplot as plt
+# TODO:
+#  (1) Gibbs / CD variants (multiple chains, averaging, rao-blackwell estimates
+#  (2) logZ estimates: brute force enumeration, AIS estimate, loopyBP estimate, others?
+#  (3) loss functions: reconstruction errors (mse, logP, etc.); (approx) data likelihood; FE difference; others?
+#  (4) helpers? logsumexp? see wei & ruslan's code?
+#  (5) deep versions; variable numbers of layers?  (specialize first)
+#       - simple if use functions taking Wvh, bv+Wvx*X, bh+Whx*X?  each layer has Wlx, bl terms, + Wll' terms?
+################################################################################
+## BASIC RBM    ################################################################
+################################################################################
+def _add1(X):
+    return np.hstack( (np.ones((X.shape[0],1)),X) )
+def _sigma(z):
+    return expit(z);
+    #return 1.0/(1.0+np.exp(-z))
+class crbm(object):
+    """A restricted Boltzmann machine
+    Attributes:
+    """
+    def __init__(self, nV,nH,nX, Wvh=None,bh=None,bv=None, Wvx=None,Whx=None):
+        """Constructor for a (conditional) restricted Boltzmann machine
+        Parameters:
+	  nV : # of visible nodes (observable data)
+	  nH : # of hidden nodes (latent variables)
+	  nX : # of always-observed conditioning variables
+	  Wvh, Wvx, Whx : pairwise weights (default: initialize randomly)
+	  bh,bv : bias parameters (default: initialize to zero)
+        """
+        if Wvh is None: Wvh = np.random.rand(nV,nH) * .001
+        if Wvx is None: Wvx = np.random.rand(nV,nX) * .001
+        if Whx is None: Whx = np.random.rand(nH,nX) * .001
+        if bh  is None: bh  = np.zeros((nH,))
+        if bv  is None: bv  = np.zeros((nV,))
+        self.Wvh = Wvh
+        self.Wvx = Wvx
+        self.Whx = Whx
+        self.bv = bv
+        self.bh = bh
+    def __repr__(self):
+        to_return = 'Restricted Boltzmann machine, VxH={}x{}'.format(self.Wvh.shape[0],self.Wvh.shape[1])
+        return to_return
+    def __str__(self):
+        to_return = 'Restricted Boltzmann machine, VxH={}x{}'.format(self.W.shape[0],self.W.shape[1])
+        return to_return
+    def nLayers(self):
+        return 1
+    @property
+    def layers(self):
+        """Return list of layer sizes, [N,H1,H2,...]
+          N = # of input features ("V")
+          Hi = # of hidden nodes in layer i ("H")
+        """
+        layers = [self.Wvh.shape[0], self.Wvh.shape[1]]
+        #if len(self.wts):
+        #    layers = [self.W.shape[0], self.W.shape[1]]
+        #    #layers = [self.wts[l].shape[1] for l in range(len(self.wts))]
+        #    #layers.append( self.wts[-1].shape[0] )
+        #else:
+        #    layers = []
+        return layers
+    @layers.setter
+    def layers(self, layers):
+        raise NotImplementedError
+    # adapt / change size of weight matrices (?)
+## CORE METHODS ################################################################
+    # todo:  CD, BP; persistent CD?  make BP persistent?  others?
+    #     :  estimate marginal likelihood in various ways?
+    def marginals():
+        raise NotImplementedError
+    def marg_h(self, v, bh=None):
+        if bh is None: bh = self.bh
+        th = _sigma( v.dot(self.Wvh) + bh )  ## !!! regular rbm vs crbm?
+        return th
+    #@profile
+    def marg_bp(self, maxiter=100, bv=None,bh=None,stoptol=1e-6):
+        '''Estimate the singleton & pairwise marginals using belief propagation'''
+        Wvh = self.Wvh        # pass in bv, bh to enable Whx etc?
+        if bv is None: bv = self.bv
+        if bh is None: bh = self.bh
+        Mvh = np.empty(Wvh.shape); Mvh.fill(0.5);
+        Mhv = Mvh.T.copy()
+        tv, th = _sigma(self.bv), _sigma(self.bh)
+        tvOld = 0*tv;
+        for t in range(maxiter):
+          # h to v:
+          Lvh1 = (1 - Mhv).T * th   #Lvh1 = (1 - Mhv).T.dot( np.diag(th) )
+          Lvh2 = Mhv.T * ( 1-th )   #Lvh2 = Mhv.T.dot( np.diag( 1-th ) )
+          Mvh = _sigma( np.log( (np.exp(Wvh)*Lvh1 + Lvh2)/(Lvh1+Lvh2) ) )
+          tv  = _sigma( bv + np.log( Mvh/(1-Mvh) ).sum(1) )
+          if np.max(np.abs(tv-tvOld)) < stoptol: break;
+          # v to h:
+          Lhv1 = (1 - Mvh).T * tv  #Lhv1 = (1 - Mvh).T.dot( np.diag(tv) )
+          Lhv2 = Mvh.T * (1-tv)    #Lhv2 = Mvh.T.dot( np.diag(1-tv) )
+          Mhv  = _sigma( np.log( (np.exp(Wvh.T)*Lhv1+Lhv2)/(Lhv1+Lhv2) ) )
+          th   = _sigma( bh + np.log( Mhv/(1-Mhv) ).sum(1) )
+        Gsum = np.outer( 1-tv, 1-th ) * Mvh * Mhv.T
+        Gsum+= np.outer( tv, 1-th)*(1-Mvh)*Mhv.T
+        Gsum+= np.outer(1-tv,th)*Mvh*(1-Mhv.T)
+        G    = np.exp(Wvh)*np.outer(tv,th)*(1-Mvh)*(1-Mhv.T)
+        G   /= (Gsum+G)
+        return G,tv,th
+    def marg_cd(self, nstep=1,vinit=None, bv=None,bh=None, nchains=1):
+        '''Estimate the singleton & pairwise marginals using gibbs sampling (for contrastive divergence)'''
+        Wvh = self.Wvh        # pass in bv, bh to enable Whx etc?
+        if bv is None: bv = self.bv
+        if bh is None: bh = self.bh
+        if vinit is None: raise NotImplementedError;  # todo: init using p(v)
+        G,tv,th = 0,0,0
+        for c in range(nchains):
+          v = vinit;
+          for s in range(nstep):
+            ph = 1 / (1+np.exp(-v.dot(Wvh)-bh));
+            h  = (np.random.rand(*ph.shape) < ph);
+            pv = 1 / (1+np.exp(-Wvh.dot(h)-bv));
+            v  = (np.random.rand(*pv.shape) < pv);
+          tv += v; th += h; G += np.outer(v,h);
+        return G/nchains,tv/nchains,th/nchains
+        # TODO: variants: use p(h|v), or use all K samples
+    def nll_gap(self, Xtr,Ytr, Xva,Yva):
+        fe = np.mean( np.sum(Ytr*(self.bv + Xtr.dot(self.Wvx.T)),1) +
+               np.sum(np.log(1.0+np.exp( Ytr.dot(self.Wvh) + Xtr.dot(self.Whx.T) + self.bh ) ),1) )
+        fe-= np.mean( np.sum(Yva*(self.bv + Xva.dot(self.Wvx.T)),1) +
+               np.sum(np.log(1.0+np.exp( Yva.dot(self.Wvh) + Xva.dot(self.Whx.T) + self.bh ) ),1) )
+        return fe
+    def err(self,X,Y):
+        Y    = arr( Y )
+        Yhat = arr( self.predict(X) )
+        return np.mean(Yhat.reshape(Y.shape) != Y)
+    def nll(self,X,Y):
+        # TODO: fix; evaluate/estimate actual NLL?
+        P = self.predictSoft(X);
+        J = -np.mean( Y*np.log(P) + (1-Y)*np.log(1-P) );
+        return J
+    def predictLBP(self, X):
+        if len(X.shape)==1: X = X.reshape(1,-1)
+        Y = np.zeros((X.shape[0],self.Wvx.shape[0]));
+        for j in range(X.shape[0]):
+            bxh = self.bh + self.Whx.dot(X[j,:].T)
+            bxv = self.bv + self.Wvx.dot(X[j,:].T)
+            mu = marg_h(self, Y[j,:],bxh)
+            G,tv,th = marg_bp(self, 5, bxv, bxh)
+            Y[j,:] = tv;
+        return Y
+    def predictGibbs(self, X):
+        if len(X.shape)==1: X = X.reshape(1,-1)
+        Y = np.zeros((X.shape[0],self.Wvx.shape[0]));
+        for j in range(X.shape[0]):
+            bxh = self.bh + self.Whx.dot(X[j,:].T)
+            bxv = self.bv + self.Wvx.dot(X[j,:].T)
+            mu = marg_h(self, Y[j,:],bxh)
+            G,tv,th = marg_cd(self, 15, np.random.rand(Y[j,:].shape[0]), bxv, bxh)
+            Y[j,:] = tv;
+        return Y
+    def predict(self, X):
+        # Hard prediction.  TODO: create sampling function, MAP prediction function
+        return self.predictSoft(X) > 0.5;
+    def predictSoft(self, X):
+        """Make 'soft' (per-class confidence) predictions of the rbm on data X.
+        Args:
+          X : MxN numpy array containing M data points with N features each
+        Returns:
+          P : MxC numpy array of C class probabilities for each of the M data
+        """
+        Y = np.zeros((X.shape[0],self.Wvx.shape[0]));
+        for j in range(X.shape[0]):
+            bxh = self.bh + self.Whx.dot(X[j,:].T)
+            bxv = self.bv + self.Wvx.dot(X[j,:].T)
+            mu = self.marg_h(Y[j,:],bxh)
+            G,tv,th = self.marg_bp(5, bxv, bxh)
+            Y[j,:] = tv;
+        return Y
+    # TODO: add momentum for learning update
+    def train(self, X, Y, Xv=None,Yv=None, stepsize=.01, stopGap=0.1, stopEpoch=10):
+        """Train the (c)RBM
+        Args:
+          X : MxNx numpy array containing M data points with N features each
+          Y : MxNv numpy array of targets (visible units) for each data point in X
+          stepsize : scalar
+              The stepsize for gradient descent (decreases as 1 / iter).
+          stopTol : scalar
+              Tolerance for stopping criterion.
+          stopIter : int
+              The maximum number of steps before stopping.
+          activation : str
+              'logistic', 'htangent', or 'custom'. Sets the activation functions.
+        """
+        # TODO: Shape & argument checking
+        # outer loop of (mini-batch) stochastic gradient descent
+        it, j = 1, 0                                # iteration number & data index
+        nextPrint = 1                               # next time to print info
+        done = 0                                    # end of loop flag
+        nBatch = 40
+        while not done:
+            step_i = 3.0*stepsize / (2.0+it)        # step size evolution; classic 1/t decrease
+            dWvh, dWvx, dWhx, dbv, dbh = 0.0, 0.0, 0.0, 0.0, 0.0
+            # stochastic gradient update (one pass)
+            for jj in range(nBatch):
+                #print('j={}; jj={};'.format(j,jj));
+                j += 1
+                if j >= Y.shape[0]: j=0; it+=1;
+                # compute conditional model & required probabilities
+                bxh = self.bh + self.Whx.dot(X[j,:].T)
+                bxv = self.bv + self.Wvx.dot(X[j,:].T)
+                mu = self.marg_h(Y[j,:],bxh)
+                G,tv,th = self.marg_cd( 1, Y[j,:], bxv, bxh, 1)
+                #G,tv,th = self.marg_bp( min(4+it,50), bxv, bxh )
+                if (jj==1): #(np.random.rand() < .1):
+                    plt.figure(1);
+                    plt.subplot(221); plt.imshow(X[j,:].reshape(28,28)); plt.title('Observed X'); plt.draw();
+                    plt.subplot(222); plt.imshow(tv.reshape(28,28)); plt.title('Model Prob'); plt.draw();
+                    plt.subplot(223); plt.imshow(Y[j,:].reshape(28,28)); plt.title('Visible Y'); plt.draw();
+                    plt.pause(.01);
+                # take gradient step:
+                dWvh += (np.outer(Y[j,:], mu) - G)
+                dWvx += (np.outer(Y[j,:], X[j,:]) - np.outer(tv,X[j,:]))
+                dWhx += (np.outer(mu, X[j,:]) - np.outer(th,X[j,:]))
+                dbv  += (Y[j,:] - tv)
+                dbh  += (mu - th)
+            self.Wvh += step_i * dWvh / nBatch
+            self.Wvx += step_i * dWvx / nBatch
+            self.Whx += step_i * dWhx / nBatch
+            self.bv  += step_i * dbv / nBatch
+            self.bh  += step_i * dbh / nBatch
+            print('it {} : Gap = {}'.format(it,self.nll_gap(X,Y,Xv,Yv)));
+            print('  {} {} {} {} {}'.format(np.mean(self.Wvx**2),np.mean(self.Whx**2),np.mean(self.Wvh**2),np.mean(self.bv**2),np.mean(self.bh**2)));
+            Jtr,Jva = 0,0 #self.nll(X,Y),self.nll(Xv,Yv);
+            if it >= nextPrint:
+                print('it {} : Gap = {}'.format(it,self.nll_gap(X,Y,Xv,Yv)));
+                print('  {} {} {} {} {}'.format(np.mean(self.Wvx**2),np.mean(self.Whx**2),np.mean(self.Wvh**2),np.mean(self.bv**2),np.mean(self.bh**2)));
+                #print('it {} : Jtr = {} / Jva = {}'.format(it,Jtr,Jva))
+                nextPrint += 1; #*= 2
+            # check if finished
+            done = (it > 1) and ((Jva - Jtr) > stopGap) or it >= stopEpoch
+            #it += 1   # counting epochs elsewhere now
+    #def err_k(self, X, Y):
+    #    """Compute misclassification error rate. Assumes Y in 1-of-k form.  """
+    #    return self.err(X, from1ofK(Y,self.classes).ravel())
+    #
+    #
+    #def mse(self, X, Y):
+    #    """Compute mean squared error of predictor 'obj' on test data (X,Y).  """
+    #    return mse_k(X, to1ofK(Y))
+    #
+    #
+    #def mse_k(self, X, Y):
+    #    """Compute mean squared error of predictor; assumes Y is in 1-of-k format.  """
+    #    return np.power(Y - self.predictSoft(X), 2).sum(1).mean(0)
+## MUTATORS ####################################################################
+################################################################################
+################################################################################
+################################################################################