pysp-learn 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (421) hide show
  1. pysp_learn-0.2.0/LICENSE +21 -0
  2. pysp_learn-0.2.0/MANIFEST.in +1 -0
  3. pysp_learn-0.2.0/NOTICE +21 -0
  4. pysp_learn-0.2.0/PKG-INFO +483 -0
  5. pysp_learn-0.2.0/README.md +424 -0
  6. pysp_learn-0.2.0/pyproject.toml +137 -0
  7. pysp_learn-0.2.0/pysp/__init__.py +1 -0
  8. pysp_learn-0.2.0/pysp/arithmetic.py +103 -0
  9. pysp_learn-0.2.0/pysp/data/__init__.py +20 -0
  10. pysp_learn-0.2.0/pysp/data/dataframe.py +75 -0
  11. pysp_learn-0.2.0/pysp/data/graph_data.py +257 -0
  12. pysp_learn-0.2.0/pysp/data/rdd_sampler.py +46 -0
  13. pysp_learn-0.2.0/pysp/doe/__init__.py +91 -0
  14. pysp_learn-0.2.0/pysp/doe/bayesopt.py +333 -0
  15. pysp_learn-0.2.0/pysp/doe/constrained.py +210 -0
  16. pysp_learn-0.2.0/pysp/doe/designs.py +182 -0
  17. pysp_learn-0.2.0/pysp/doe/multiobjective.py +128 -0
  18. pysp_learn-0.2.0/pysp/doe/optimal.py +236 -0
  19. pysp_learn-0.2.0/pysp/doe/optimizer.py +170 -0
  20. pysp_learn-0.2.0/pysp/engines/__init__.py +119 -0
  21. pysp_learn-0.2.0/pysp/engines/base.py +87 -0
  22. pysp_learn-0.2.0/pysp/engines/numpy_engine.py +107 -0
  23. pysp_learn-0.2.0/pysp/engines/precision.py +170 -0
  24. pysp_learn-0.2.0/pysp/engines/symbolic_engine.py +529 -0
  25. pysp_learn-0.2.0/pysp/engines/symbolic_export.py +243 -0
  26. pysp_learn-0.2.0/pysp/engines/torch_engine.py +267 -0
  27. pysp_learn-0.2.0/pysp/infer/__init__.py +560 -0
  28. pysp_learn-0.2.0/pysp/infer/backends.py +127 -0
  29. pysp_learn-0.2.0/pysp/infer/diagnostics.py +96 -0
  30. pysp_learn-0.2.0/pysp/models/__init__.py +78 -0
  31. pysp_learn-0.2.0/pysp/models/dependence.py +193 -0
  32. pysp_learn-0.2.0/pysp/models/dpm.py +326 -0
  33. pysp_learn-0.2.0/pysp/models/gaussian_process.py +238 -0
  34. pysp_learn-0.2.0/pysp/models/grammar.py +200 -0
  35. pysp_learn-0.2.0/pysp/models/knowledge_graph.py +198 -0
  36. pysp_learn-0.2.0/pysp/models/neural.py +320 -0
  37. pysp_learn-0.2.0/pysp/models/pomdp.py +318 -0
  38. pysp_learn-0.2.0/pysp/models/random_graph.py +314 -0
  39. pysp_learn-0.2.0/pysp/planner.py +1847 -0
  40. pysp_learn-0.2.0/pysp/ppl/__init__.py +837 -0
  41. pysp_learn-0.2.0/pysp/ppl/autograd.py +537 -0
  42. pysp_learn-0.2.0/pysp/ppl/benchmark.py +152 -0
  43. pysp_learn-0.2.0/pysp/ppl/benchmark_vs.py +254 -0
  44. pysp_learn-0.2.0/pysp/ppl/core.py +1636 -0
  45. pysp_learn-0.2.0/pysp/ppl/diagnostics.py +194 -0
  46. pysp_learn-0.2.0/pysp/ppl/dynamics.py +198 -0
  47. pysp_learn-0.2.0/pysp/ppl/inference.py +1705 -0
  48. pysp_learn-0.2.0/pysp/ppl/pde.py +341 -0
  49. pysp_learn-0.2.0/pysp/ppl/regression.py +529 -0
  50. pysp_learn-0.2.0/pysp/ppl/statespace.py +96 -0
  51. pysp_learn-0.2.0/pysp/ppl/training_data.py +138 -0
  52. pysp_learn-0.2.0/pysp/ppl/vmp.py +490 -0
  53. pysp_learn-0.2.0/pysp/stats/__init__.py +1879 -0
  54. pysp_learn-0.2.0/pysp/stats/bayes/__init__.py +1 -0
  55. pysp_learn-0.2.0/pysp/stats/bayes/catdirichlet.py +173 -0
  56. pysp_learn-0.2.0/pysp/stats/bayes/dirichlet.py +985 -0
  57. pysp_learn-0.2.0/pysp/stats/bayes/dpm.py +743 -0
  58. pysp_learn-0.2.0/pysp/stats/bayes/hdpm.py +666 -0
  59. pysp_learn-0.2.0/pysp/stats/bayes/mvngamma.py +211 -0
  60. pysp_learn-0.2.0/pysp/stats/bayes/normgamma.py +195 -0
  61. pysp_learn-0.2.0/pysp/stats/bayes/normwishart.py +232 -0
  62. pysp_learn-0.2.0/pysp/stats/bayes/pitman_yor.py +372 -0
  63. pysp_learn-0.2.0/pysp/stats/bayes/symdirichlet.py +141 -0
  64. pysp_learn-0.2.0/pysp/stats/combinator/__init__.py +1 -0
  65. pysp_learn-0.2.0/pysp/stats/combinator/censored.py +286 -0
  66. pysp_learn-0.2.0/pysp/stats/combinator/composite.py +1075 -0
  67. pysp_learn-0.2.0/pysp/stats/combinator/conditional.py +1487 -0
  68. pysp_learn-0.2.0/pysp/stats/combinator/exponential_tilt.py +547 -0
  69. pysp_learn-0.2.0/pysp/stats/combinator/finite_stochastic_transform.py +314 -0
  70. pysp_learn-0.2.0/pysp/stats/combinator/ignored.py +287 -0
  71. pysp_learn-0.2.0/pysp/stats/combinator/null_dist.py +457 -0
  72. pysp_learn-0.2.0/pysp/stats/combinator/optional.py +773 -0
  73. pysp_learn-0.2.0/pysp/stats/combinator/record.py +586 -0
  74. pysp_learn-0.2.0/pysp/stats/combinator/select.py +794 -0
  75. pysp_learn-0.2.0/pysp/stats/combinator/sequence.py +1193 -0
  76. pysp_learn-0.2.0/pysp/stats/combinator/transform.py +558 -0
  77. pysp_learn-0.2.0/pysp/stats/combinator/truncated.py +287 -0
  78. pysp_learn-0.2.0/pysp/stats/combinator/weighted.py +451 -0
  79. pysp_learn-0.2.0/pysp/stats/compute/__init__.py +1 -0
  80. pysp_learn-0.2.0/pysp/stats/compute/backend.py +62 -0
  81. pysp_learn-0.2.0/pysp/stats/compute/capabilities.py +110 -0
  82. pysp_learn-0.2.0/pysp/stats/compute/declarations.py +1454 -0
  83. pysp_learn-0.2.0/pysp/stats/compute/encoded.py +93 -0
  84. pysp_learn-0.2.0/pysp/stats/compute/fused_kernels.py +1364 -0
  85. pysp_learn-0.2.0/pysp/stats/compute/gradient.py +622 -0
  86. pysp_learn-0.2.0/pysp/stats/compute/kernel.py +634 -0
  87. pysp_learn-0.2.0/pysp/stats/compute/pdist.py +1018 -0
  88. pysp_learn-0.2.0/pysp/stats/compute/stacked.py +650 -0
  89. pysp_learn-0.2.0/pysp/stats/compute/torch_mixture.py +269 -0
  90. pysp_learn-0.2.0/pysp/stats/exp_family.py +421 -0
  91. pysp_learn-0.2.0/pysp/stats/graph/__init__.py +1 -0
  92. pysp_learn-0.2.0/pysp/stats/graph/chow_liu_tree.py +713 -0
  93. pysp_learn-0.2.0/pysp/stats/graph/erdos_renyi_graph.py +380 -0
  94. pysp_learn-0.2.0/pysp/stats/graph/grammar.py +682 -0
  95. pysp_learn-0.2.0/pysp/stats/graph/icltree.py +729 -0
  96. pysp_learn-0.2.0/pysp/stats/graph/int_markovchain.py +1377 -0
  97. pysp_learn-0.2.0/pysp/stats/graph/mallows.py +352 -0
  98. pysp_learn-0.2.0/pysp/stats/graph/markov_chain.py +2006 -0
  99. pysp_learn-0.2.0/pysp/stats/graph/markov_transform.py +896 -0
  100. pysp_learn-0.2.0/pysp/stats/graph/matching.py +330 -0
  101. pysp_learn-0.2.0/pysp/stats/graph/plackett_luce.py +482 -0
  102. pysp_learn-0.2.0/pysp/stats/graph/rdpg.py +274 -0
  103. pysp_learn-0.2.0/pysp/stats/graph/spanning_tree.py +362 -0
  104. pysp_learn-0.2.0/pysp/stats/graph/sparse_markov_transform.py +980 -0
  105. pysp_learn-0.2.0/pysp/stats/graph/spearman_rho.py +650 -0
  106. pysp_learn-0.2.0/pysp/stats/graph/stochastic_block_graph.py +596 -0
  107. pysp_learn-0.2.0/pysp/stats/latent/__init__.py +1 -0
  108. pysp_learn-0.2.0/pysp/stats/latent/_hmm_numba_kernels.py +288 -0
  109. pysp_learn-0.2.0/pysp/stats/latent/dirac_length.py +1023 -0
  110. pysp_learn-0.2.0/pysp/stats/latent/heterogeneous_mixture.py +1149 -0
  111. pysp_learn-0.2.0/pysp/stats/latent/heterogeneous_pcfg.py +1245 -0
  112. pysp_learn-0.2.0/pysp/stats/latent/hidden_association.py +950 -0
  113. pysp_learn-0.2.0/pysp/stats/latent/hidden_markov.py +2861 -0
  114. pysp_learn-0.2.0/pysp/stats/latent/hidden_markov_ind_pi.py +1878 -0
  115. pysp_learn-0.2.0/pysp/stats/latent/hmixture.py +1251 -0
  116. pysp_learn-0.2.0/pysp/stats/latent/ibp.py +633 -0
  117. pysp_learn-0.2.0/pysp/stats/latent/int_hidden_association.py +1522 -0
  118. pysp_learn-0.2.0/pysp/stats/latent/int_plsi.py +1354 -0
  119. pysp_learn-0.2.0/pysp/stats/latent/jmixture.py +1103 -0
  120. pysp_learn-0.2.0/pysp/stats/latent/lda.py +1596 -0
  121. pysp_learn-0.2.0/pysp/stats/latent/llda.py +1773 -0
  122. pysp_learn-0.2.0/pysp/stats/latent/look_back_hmm.py +1232 -0
  123. pysp_learn-0.2.0/pysp/stats/latent/mixture.py +1594 -0
  124. pysp_learn-0.2.0/pysp/stats/latent/mvnmixture.py +840 -0
  125. pysp_learn-0.2.0/pysp/stats/latent/ppca.py +318 -0
  126. pysp_learn-0.2.0/pysp/stats/latent/quantized_hmm.py +924 -0
  127. pysp_learn-0.2.0/pysp/stats/latent/segmental_hmm.py +706 -0
  128. pysp_learn-0.2.0/pysp/stats/latent/ss_mixture.py +1070 -0
  129. pysp_learn-0.2.0/pysp/stats/latent/tree_hmm.py +2536 -0
  130. pysp_learn-0.2.0/pysp/stats/leaf/__init__.py +1 -0
  131. pysp_learn-0.2.0/pysp/stats/leaf/bernoulli.py +398 -0
  132. pysp_learn-0.2.0/pysp/stats/leaf/beta.py +419 -0
  133. pysp_learn-0.2.0/pysp/stats/leaf/binomial.py +1073 -0
  134. pysp_learn-0.2.0/pysp/stats/leaf/birth_death.py +332 -0
  135. pysp_learn-0.2.0/pysp/stats/leaf/cat_multinomial.py +1023 -0
  136. pysp_learn-0.2.0/pysp/stats/leaf/categorical.py +994 -0
  137. pysp_learn-0.2.0/pysp/stats/leaf/exgaussian.py +328 -0
  138. pysp_learn-0.2.0/pysp/stats/leaf/exponential.py +646 -0
  139. pysp_learn-0.2.0/pysp/stats/leaf/gamma.py +745 -0
  140. pysp_learn-0.2.0/pysp/stats/leaf/gaussian.py +774 -0
  141. pysp_learn-0.2.0/pysp/stats/leaf/geometric.py +747 -0
  142. pysp_learn-0.2.0/pysp/stats/leaf/gumbel.py +302 -0
  143. pysp_learn-0.2.0/pysp/stats/leaf/half_normal.py +360 -0
  144. pysp_learn-0.2.0/pysp/stats/leaf/inhomogeneous_poisson.py +303 -0
  145. pysp_learn-0.2.0/pysp/stats/leaf/int_multinomial.py +1132 -0
  146. pysp_learn-0.2.0/pysp/stats/leaf/int_range.py +1046 -0
  147. pysp_learn-0.2.0/pysp/stats/leaf/int_spike.py +797 -0
  148. pysp_learn-0.2.0/pysp/stats/leaf/inverse_gamma.py +415 -0
  149. pysp_learn-0.2.0/pysp/stats/leaf/inverse_gaussian.py +437 -0
  150. pysp_learn-0.2.0/pysp/stats/leaf/laplace.py +296 -0
  151. pysp_learn-0.2.0/pysp/stats/leaf/log_gaussian.py +767 -0
  152. pysp_learn-0.2.0/pysp/stats/leaf/logistic.py +273 -0
  153. pysp_learn-0.2.0/pysp/stats/leaf/logseries.py +351 -0
  154. pysp_learn-0.2.0/pysp/stats/leaf/negative_binomial.py +389 -0
  155. pysp_learn-0.2.0/pysp/stats/leaf/pareto.py +356 -0
  156. pysp_learn-0.2.0/pysp/stats/leaf/point_mass.py +251 -0
  157. pysp_learn-0.2.0/pysp/stats/leaf/poisson.py +771 -0
  158. pysp_learn-0.2.0/pysp/stats/leaf/rayleigh.py +296 -0
  159. pysp_learn-0.2.0/pysp/stats/leaf/skellam.py +266 -0
  160. pysp_learn-0.2.0/pysp/stats/leaf/student_t.py +299 -0
  161. pysp_learn-0.2.0/pysp/stats/leaf/tweedie.py +294 -0
  162. pysp_learn-0.2.0/pysp/stats/leaf/uniform.py +292 -0
  163. pysp_learn-0.2.0/pysp/stats/leaf/von_mises.py +410 -0
  164. pysp_learn-0.2.0/pysp/stats/leaf/weibull.py +355 -0
  165. pysp_learn-0.2.0/pysp/stats/multivariate/__init__.py +1 -0
  166. pysp_learn-0.2.0/pysp/stats/multivariate/dmvn.py +837 -0
  167. pysp_learn-0.2.0/pysp/stats/multivariate/mvn.py +907 -0
  168. pysp_learn-0.2.0/pysp/stats/multivariate/mvt.py +418 -0
  169. pysp_learn-0.2.0/pysp/stats/multivariate/vmf.py +760 -0
  170. pysp_learn-0.2.0/pysp/stats/sets/__init__.py +1 -0
  171. pysp_learn-0.2.0/pysp/stats/sets/int_edit_setdist.py +997 -0
  172. pysp_learn-0.2.0/pysp/stats/sets/int_edit_stepsetdist.py +987 -0
  173. pysp_learn-0.2.0/pysp/stats/sets/int_setdist.py +540 -0
  174. pysp_learn-0.2.0/pysp/stats/sets/setdist.py +900 -0
  175. pysp_learn-0.2.0/pysp/tests/api_naming_aliases_test.py +227 -0
  176. pysp_learn-0.2.0/pysp/tests/auto_precision_test.py +88 -0
  177. pysp_learn-0.2.0/pysp/tests/automatic_gof_test.py +41 -0
  178. pysp_learn-0.2.0/pysp/tests/automatic_lognormal_test.py +64 -0
  179. pysp_learn-0.2.0/pysp/tests/automatic_mixture_test.py +37 -0
  180. pysp_learn-0.2.0/pysp/tests/automatic_model_weights_test.py +49 -0
  181. pysp_learn-0.2.0/pysp/tests/automatic_scientific_test.py +421 -0
  182. pysp_learn-0.2.0/pysp/tests/automatic_studentt_test.py +39 -0
  183. pysp_learn-0.2.0/pysp/tests/automatic_test.py +212 -0
  184. pysp_learn-0.2.0/pysp/tests/backend_scoring_test.py +1828 -0
  185. pysp_learn-0.2.0/pysp/tests/base_dist_test.py +445 -0
  186. pysp_learn-0.2.0/pysp/tests/bayes_streaming_test.py +387 -0
  187. pysp_learn-0.2.0/pysp/tests/bayes_test.py +879 -0
  188. pysp_learn-0.2.0/pysp/tests/birth_death_test.py +57 -0
  189. pysp_learn-0.2.0/pysp/tests/categorical_expfamily_test.py +86 -0
  190. pysp_learn-0.2.0/pysp/tests/categorical_test.py +52 -0
  191. pysp_learn-0.2.0/pysp/tests/censored_test.py +100 -0
  192. pysp_learn-0.2.0/pysp/tests/chow_liu_tree_test.py +209 -0
  193. pysp_learn-0.2.0/pysp/tests/compute_kernel_test.py +767 -0
  194. pysp_learn-0.2.0/pysp/tests/compute_metadata_test.py +1830 -0
  195. pysp_learn-0.2.0/pysp/tests/conftest.py +128 -0
  196. pysp_learn-0.2.0/pysp/tests/continuous_cdf_test.py +147 -0
  197. pysp_learn-0.2.0/pysp/tests/coupled_multiset_enum_test.py +224 -0
  198. pysp_learn-0.2.0/pysp/tests/dask_encoded_data_test.py +174 -0
  199. pysp_learn-0.2.0/pysp/tests/dataframe_adapter_test.py +169 -0
  200. pysp_learn-0.2.0/pysp/tests/density_rank_test.py +302 -0
  201. pysp_learn-0.2.0/pysp/tests/dirac_length_engine_test.py +40 -0
  202. pysp_learn-0.2.0/pysp/tests/distribution_additions_test.py +418 -0
  203. pysp_learn-0.2.0/pysp/tests/doe_bayesopt_test.py +201 -0
  204. pysp_learn-0.2.0/pysp/tests/doe_constrained_test.py +93 -0
  205. pysp_learn-0.2.0/pysp/tests/doe_designs_test.py +134 -0
  206. pysp_learn-0.2.0/pysp/tests/doe_multiobjective_test.py +66 -0
  207. pysp_learn-0.2.0/pysp/tests/doe_optimal_test.py +132 -0
  208. pysp_learn-0.2.0/pysp/tests/doe_optimizer_test.py +108 -0
  209. pysp_learn-0.2.0/pysp/tests/em_nonfinite_guard_test.py +109 -0
  210. pysp_learn-0.2.0/pysp/tests/em_strategies_test.py +331 -0
  211. pysp_learn-0.2.0/pysp/tests/encoded_data_backend_registry_test.py +66 -0
  212. pysp_learn-0.2.0/pysp/tests/engine_accumulate_parity_test.py +90 -0
  213. pysp_learn-0.2.0/pysp/tests/engine_test.py +205 -0
  214. pysp_learn-0.2.0/pysp/tests/enumeration_test.py +591 -0
  215. pysp_learn-0.2.0/pysp/tests/enumerator_coverage_test.py +173 -0
  216. pysp_learn-0.2.0/pysp/tests/estimator_stability_test.py +104 -0
  217. pysp_learn-0.2.0/pysp/tests/exgaussian_test.py +112 -0
  218. pysp_learn-0.2.0/pysp/tests/exp_family_fisher_test.py +53 -0
  219. pysp_learn-0.2.0/pysp/tests/exp_family_test.py +236 -0
  220. pysp_learn-0.2.0/pysp/tests/exponential_tilt_test.py +177 -0
  221. pysp_learn-0.2.0/pysp/tests/finite_stochastic_transform_test.py +118 -0
  222. pysp_learn-0.2.0/pysp/tests/fisher_view_test.py +728 -0
  223. pysp_learn-0.2.0/pysp/tests/fused_em_association_test.py +209 -0
  224. pysp_learn-0.2.0/pysp/tests/fused_em_hmm_family_test.py +222 -0
  225. pysp_learn-0.2.0/pysp/tests/fused_em_mixtures_test.py +188 -0
  226. pysp_learn-0.2.0/pysp/tests/fused_em_test.py +176 -0
  227. pysp_learn-0.2.0/pysp/tests/fused_em_variational_test.py +164 -0
  228. pysp_learn-0.2.0/pysp/tests/gaussian_process_matern_test.py +80 -0
  229. pysp_learn-0.2.0/pysp/tests/gaussian_process_monotone_test.py +65 -0
  230. pysp_learn-0.2.0/pysp/tests/generated_kernel_parity_test.py +160 -0
  231. pysp_learn-0.2.0/pysp/tests/gradient_fit_test.py +612 -0
  232. pysp_learn-0.2.0/pysp/tests/graph_distribution_test.py +218 -0
  233. pysp_learn-0.2.0/pysp/tests/graph_engine_test.py +51 -0
  234. pysp_learn-0.2.0/pysp/tests/gumbel_test.py +61 -0
  235. pysp_learn-0.2.0/pysp/tests/half_normal_test.py +67 -0
  236. pysp_learn-0.2.0/pysp/tests/heterogeneous_pcfg_test.py +240 -0
  237. pysp_learn-0.2.0/pysp/tests/hidden_association_engine_test.py +80 -0
  238. pysp_learn-0.2.0/pysp/tests/hidden_association_keys_test.py +93 -0
  239. pysp_learn-0.2.0/pysp/tests/hmixture_engine_test.py +57 -0
  240. pysp_learn-0.2.0/pysp/tests/hmm_engine_test.py +193 -0
  241. pysp_learn-0.2.0/pysp/tests/hmm_keys_test.py +36 -0
  242. pysp_learn-0.2.0/pysp/tests/hmm_numba_parity_test.py +63 -0
  243. pysp_learn-0.2.0/pysp/tests/hmm_sampler_batching_test.py +217 -0
  244. pysp_learn-0.2.0/pysp/tests/hvis_test.py +1320 -0
  245. pysp_learn-0.2.0/pysp/tests/ibp_test.py +117 -0
  246. pysp_learn-0.2.0/pysp/tests/ind_pi_engine_test.py +85 -0
  247. pysp_learn-0.2.0/pysp/tests/infer_backends_test.py +177 -0
  248. pysp_learn-0.2.0/pysp/tests/infer_facade_test.py +186 -0
  249. pysp_learn-0.2.0/pysp/tests/infer_parallel_chains_test.py +52 -0
  250. pysp_learn-0.2.0/pysp/tests/inhomogeneous_poisson_test.py +57 -0
  251. pysp_learn-0.2.0/pysp/tests/int_hidden_association_engine_test.py +63 -0
  252. pysp_learn-0.2.0/pysp/tests/int_hidden_association_test.py +117 -0
  253. pysp_learn-0.2.0/pysp/tests/int_plsi_engine_test.py +47 -0
  254. pysp_learn-0.2.0/pysp/tests/integer_categorical_expfamily_test.py +71 -0
  255. pysp_learn-0.2.0/pysp/tests/inverse_gamma_test.py +76 -0
  256. pysp_learn-0.2.0/pysp/tests/jmixture_engine_test.py +56 -0
  257. pysp_learn-0.2.0/pysp/tests/kernels_ext_test.py +375 -0
  258. pysp_learn-0.2.0/pysp/tests/kernels_test.py +258 -0
  259. pysp_learn-0.2.0/pysp/tests/key_validation_test.py +59 -0
  260. pysp_learn-0.2.0/pysp/tests/lda_engine_test.py +64 -0
  261. pysp_learn-0.2.0/pysp/tests/lda_len_test.py +145 -0
  262. pysp_learn-0.2.0/pysp/tests/leaf_engine_test.py +80 -0
  263. pysp_learn-0.2.0/pysp/tests/lightning_encoded_data_test.py +55 -0
  264. pysp_learn-0.2.0/pysp/tests/llda_alpha_test.py +222 -0
  265. pysp_learn-0.2.0/pysp/tests/llda_engine_test.py +90 -0
  266. pysp_learn-0.2.0/pysp/tests/local_parallel_chunks_test.py +79 -0
  267. pysp_learn-0.2.0/pysp/tests/logseries_test.py +72 -0
  268. pysp_learn-0.2.0/pysp/tests/lookback_hmm_engine_test.py +106 -0
  269. pysp_learn-0.2.0/pysp/tests/lookback_lag0_test.py +280 -0
  270. pysp_learn-0.2.0/pysp/tests/mallows_test.py +82 -0
  271. pysp_learn-0.2.0/pysp/tests/markov_transform_engine_test.py +85 -0
  272. pysp_learn-0.2.0/pysp/tests/matching_test.py +70 -0
  273. pysp_learn-0.2.0/pysp/tests/mcmc_autograd_test.py +110 -0
  274. pysp_learn-0.2.0/pysp/tests/mcmc_convergence_test.py +125 -0
  275. pysp_learn-0.2.0/pysp/tests/mcmc_test.py +688 -0
  276. pysp_learn-0.2.0/pysp/tests/mixture_stability_test.py +203 -0
  277. pysp_learn-0.2.0/pysp/tests/model_helpers_test.py +191 -0
  278. pysp_learn-0.2.0/pysp/tests/mvt_test.py +71 -0
  279. pysp_learn-0.2.0/pysp/tests/numerics_test.py +423 -0
  280. pysp_learn-0.2.0/pysp/tests/nuts_mass_adaptation_test.py +67 -0
  281. pysp_learn-0.2.0/pysp/tests/nuts_torch_test.py +77 -0
  282. pysp_learn-0.2.0/pysp/tests/objective_resolution_test.py +84 -0
  283. pysp_learn-0.2.0/pysp/tests/objectives_test.py +498 -0
  284. pysp_learn-0.2.0/pysp/tests/parallel_test.py +222 -0
  285. pysp_learn-0.2.0/pysp/tests/pareto_expfamily_test.py +59 -0
  286. pysp_learn-0.2.0/pysp/tests/pcfg_engine_test.py +68 -0
  287. pysp_learn-0.2.0/pysp/tests/pde_adjoint_test.py +53 -0
  288. pysp_learn-0.2.0/pysp/tests/pde_nonlinear_test.py +66 -0
  289. pysp_learn-0.2.0/pysp/tests/pitman_yor_test.py +101 -0
  290. pysp_learn-0.2.0/pysp/tests/placement_test.py +391 -0
  291. pysp_learn-0.2.0/pysp/tests/plackett_luce_partial_mle_test.py +57 -0
  292. pysp_learn-0.2.0/pysp/tests/plackett_luce_partial_test.py +59 -0
  293. pysp_learn-0.2.0/pysp/tests/plackett_luce_test.py +73 -0
  294. pysp_learn-0.2.0/pysp/tests/ppca_test.py +77 -0
  295. pysp_learn-0.2.0/pysp/tests/ppl_composite_sampling_test.py +207 -0
  296. pysp_learn-0.2.0/pysp/tests/ppl_constraints_test.py +232 -0
  297. pysp_learn-0.2.0/pysp/tests/ppl_core_test.py +281 -0
  298. pysp_learn-0.2.0/pysp/tests/ppl_engine_test.py +52 -0
  299. pysp_learn-0.2.0/pysp/tests/ppl_hetero_regression_test.py +64 -0
  300. pysp_learn-0.2.0/pysp/tests/ppl_inference_test.py +267 -0
  301. pysp_learn-0.2.0/pysp/tests/ppl_lda_test.py +33 -0
  302. pysp_learn-0.2.0/pysp/tests/ppl_leaf_families_test.py +214 -0
  303. pysp_learn-0.2.0/pysp/tests/ppl_loo_stacking_test.py +44 -0
  304. pysp_learn-0.2.0/pysp/tests/ppl_model_comparison_test.py +132 -0
  305. pysp_learn-0.2.0/pysp/tests/ppl_pde_test.py +130 -0
  306. pysp_learn-0.2.0/pysp/tests/ppl_regression_test.py +125 -0
  307. pysp_learn-0.2.0/pysp/tests/ppl_semimix_test.py +48 -0
  308. pysp_learn-0.2.0/pysp/tests/ppl_soft_constraints_test.py +132 -0
  309. pysp_learn-0.2.0/pysp/tests/ppl_statespace_test.py +38 -0
  310. pysp_learn-0.2.0/pysp/tests/ppl_training_data_test.py +47 -0
  311. pysp_learn-0.2.0/pysp/tests/ppl_vector_params_test.py +158 -0
  312. pysp_learn-0.2.0/pysp/tests/ppl_vmp_test.py +148 -0
  313. pysp_learn-0.2.0/pysp/tests/quantization_test.py +486 -0
  314. pysp_learn-0.2.0/pysp/tests/quantized_hmm_test.py +339 -0
  315. pysp_learn-0.2.0/pysp/tests/quantized_index_test.py +373 -0
  316. pysp_learn-0.2.0/pysp/tests/random_graph_models_test.py +73 -0
  317. pysp_learn-0.2.0/pysp/tests/ray_encoded_data_test.py +64 -0
  318. pysp_learn-0.2.0/pysp/tests/rdpg_test.py +62 -0
  319. pysp_learn-0.2.0/pysp/tests/sampler_accuracy_test.py +215 -0
  320. pysp_learn-0.2.0/pysp/tests/sampler_batching_test.py +89 -0
  321. pysp_learn-0.2.0/pysp/tests/sampler_seed_test.py +453 -0
  322. pysp_learn-0.2.0/pysp/tests/segmental_engine_test.py +49 -0
  323. pysp_learn-0.2.0/pysp/tests/segmental_hmm_test.py +126 -0
  324. pysp_learn-0.2.0/pysp/tests/serialization_test.py +339 -0
  325. pysp_learn-0.2.0/pysp/tests/skellam_test.py +78 -0
  326. pysp_learn-0.2.0/pysp/tests/spanning_tree_test.py +93 -0
  327. pysp_learn-0.2.0/pysp/tests/spark_encoded_data_test.py +110 -0
  328. pysp_learn-0.2.0/pysp/tests/sparse_markov_engine_test.py +61 -0
  329. pysp_learn-0.2.0/pysp/tests/sparse_markov_transform_test.py +117 -0
  330. pysp_learn-0.2.0/pysp/tests/spearman_rho_test.py +72 -0
  331. pysp_learn-0.2.0/pysp/tests/ss_mixture_engine_test.py +55 -0
  332. pysp_learn-0.2.0/pysp/tests/stats_bayes_beta_group_test.py +144 -0
  333. pysp_learn-0.2.0/pysp/tests/stats_bayes_dirichlet_group_test.py +214 -0
  334. pysp_learn-0.2.0/pysp/tests/stats_bayes_dpm_test.py +331 -0
  335. pysp_learn-0.2.0/pysp/tests/stats_bayes_gamma_group_test.py +206 -0
  336. pysp_learn-0.2.0/pysp/tests/stats_bayes_gaussian_test.py +96 -0
  337. pysp_learn-0.2.0/pysp/tests/stats_bayes_markov_test.py +228 -0
  338. pysp_learn-0.2.0/pysp/tests/stats_bayes_mixture_test.py +197 -0
  339. pysp_learn-0.2.0/pysp/tests/stats_bayes_mvgaussian_group_test.py +222 -0
  340. pysp_learn-0.2.0/pysp/tests/stats_bayes_setdist_test.py +136 -0
  341. pysp_learn-0.2.0/pysp/tests/stats_bayes_wrappers_test.py +299 -0
  342. pysp_learn-0.2.0/pysp/tests/streaming_estimation_test.py +272 -0
  343. pysp_learn-0.2.0/pysp/tests/symbolic_export_test.py +140 -0
  344. pysp_learn-0.2.0/pysp/tests/torch_engine_ext_test.py +372 -0
  345. pysp_learn-0.2.0/pysp/tests/torch_engine_test.py +241 -0
  346. pysp_learn-0.2.0/pysp/tests/torchrun_encoded_data_test.py +238 -0
  347. pysp_learn-0.2.0/pysp/tests/tree_hmm_engine_test.py +66 -0
  348. pysp_learn-0.2.0/pysp/tests/tree_hmm_len_test.py +121 -0
  349. pysp_learn-0.2.0/pysp/tests/tree_hmm_sampler_guard_test.py +72 -0
  350. pysp_learn-0.2.0/pysp/tests/truncated_distribution_test.py +78 -0
  351. pysp_learn-0.2.0/pysp/tests/truncation_bound_test.py +90 -0
  352. pysp_learn-0.2.0/pysp/tests/tweedie_test.py +62 -0
  353. pysp_learn-0.2.0/pysp/tests/utils_test.py +66 -0
  354. pysp_learn-0.2.0/pysp/tests/vmf_test.py +123 -0
  355. pysp_learn-0.2.0/pysp/tests/von_mises_test.py +79 -0
  356. pysp_learn-0.2.0/pysp/tests/wave_bayes1_test.py +294 -0
  357. pysp_learn-0.2.0/pysp/tests/wave_bayes2_test.py +299 -0
  358. pysp_learn-0.2.0/pysp/tests/wave_bayes3_test.py +276 -0
  359. pysp_learn-0.2.0/pysp/tests/wave_bayes4_test.py +188 -0
  360. pysp_learn-0.2.0/pysp/tests/wave_core_test.py +239 -0
  361. pysp_learn-0.2.0/pysp/tests/wave_hmmlegacy_test.py +270 -0
  362. pysp_learn-0.2.0/pysp/tests/wave_latent_test.py +298 -0
  363. pysp_learn-0.2.0/pysp/tests/wave_lookback_test.py +162 -0
  364. pysp_learn-0.2.0/pysp/tests/wave_markov_test.py +294 -0
  365. pysp_learn-0.2.0/pysp/tests/wave_multinomial_enum_test.py +221 -0
  366. pysp_learn-0.2.0/pysp/tests/wave_mvn_test.py +267 -0
  367. pysp_learn-0.2.0/pysp/tests/wave_select_test.py +297 -0
  368. pysp_learn-0.2.0/pysp/tests/wave_setdist_test.py +189 -0
  369. pysp_learn-0.2.0/pysp/tests/zero_count_estimate_test.py +123 -0
  370. pysp_learn-0.2.0/pysp/utils/__init__.py +18 -0
  371. pysp_learn-0.2.0/pysp/utils/aliasing.py +95 -0
  372. pysp_learn-0.2.0/pysp/utils/automatic/__init__.py +30 -0
  373. pysp_learn-0.2.0/pysp/utils/automatic/factories.py +449 -0
  374. pysp_learn-0.2.0/pysp/utils/automatic/profiling.py +1926 -0
  375. pysp_learn-0.2.0/pysp/utils/builder.py +53 -0
  376. pysp_learn-0.2.0/pysp/utils/density_rank.py +728 -0
  377. pysp_learn-0.2.0/pysp/utils/em.py +729 -0
  378. pysp_learn-0.2.0/pysp/utils/enumeration.py +1134 -0
  379. pysp_learn-0.2.0/pysp/utils/estimation.py +1056 -0
  380. pysp_learn-0.2.0/pysp/utils/evaluation.py +133 -0
  381. pysp_learn-0.2.0/pysp/utils/fisher.py +2143 -0
  382. pysp_learn-0.2.0/pysp/utils/fit.py +642 -0
  383. pysp_learn-0.2.0/pysp/utils/hvis/__init__.py +295 -0
  384. pysp_learn-0.2.0/pysp/utils/hvis/affinity.py +725 -0
  385. pysp_learn-0.2.0/pysp/utils/hvis/embed.py +380 -0
  386. pysp_learn-0.2.0/pysp/utils/hvis/neighbors.py +341 -0
  387. pysp_learn-0.2.0/pysp/utils/hvis/tsne.py +797 -0
  388. pysp_learn-0.2.0/pysp/utils/mcmc/__init__.py +79 -0
  389. pysp_learn-0.2.0/pysp/utils/mcmc/conjugate.py +128 -0
  390. pysp_learn-0.2.0/pysp/utils/mcmc/gradients.py +91 -0
  391. pysp_learn-0.2.0/pysp/utils/mcmc/nuts_numba.py +245 -0
  392. pysp_learn-0.2.0/pysp/utils/mcmc/nuts_torch.py +249 -0
  393. pysp_learn-0.2.0/pysp/utils/mcmc/parameter_bridge.py +467 -0
  394. pysp_learn-0.2.0/pysp/utils/mcmc/proposals.py +380 -0
  395. pysp_learn-0.2.0/pysp/utils/mcmc/samplers.py +854 -0
  396. pysp_learn-0.2.0/pysp/utils/metrics.py +170 -0
  397. pysp_learn-0.2.0/pysp/utils/objectives.py +762 -0
  398. pysp_learn-0.2.0/pysp/utils/optional_deps.py +60 -0
  399. pysp_learn-0.2.0/pysp/utils/optsutil.py +199 -0
  400. pysp_learn-0.2.0/pysp/utils/parallel/__init__.py +1 -0
  401. pysp_learn-0.2.0/pysp/utils/parallel/lightning_data.py +138 -0
  402. pysp_learn-0.2.0/pysp/utils/parallel/mpi.py +205 -0
  403. pysp_learn-0.2.0/pysp/utils/parallel/multiprocessing.py +284 -0
  404. pysp_learn-0.2.0/pysp/utils/parallel/ray_data.py +159 -0
  405. pysp_learn-0.2.0/pysp/utils/parallel/torchrun.py +250 -0
  406. pysp_learn-0.2.0/pysp/utils/priors.py +274 -0
  407. pysp_learn-0.2.0/pysp/utils/pvalues.py +116 -0
  408. pysp_learn-0.2.0/pysp/utils/quantization/__init__.py +1 -0
  409. pysp_learn-0.2.0/pysp/utils/quantization/core.py +616 -0
  410. pysp_learn-0.2.0/pysp/utils/quantization/parallel.py +189 -0
  411. pysp_learn-0.2.0/pysp/utils/quantization/semiring.py +290 -0
  412. pysp_learn-0.2.0/pysp/utils/serialization.py +402 -0
  413. pysp_learn-0.2.0/pysp/utils/special.py +214 -0
  414. pysp_learn-0.2.0/pysp/utils/streaming.py +240 -0
  415. pysp_learn-0.2.0/pysp/utils/vector.py +590 -0
  416. pysp_learn-0.2.0/pysp_learn.egg-info/PKG-INFO +483 -0
  417. pysp_learn-0.2.0/pysp_learn.egg-info/SOURCES.txt +419 -0
  418. pysp_learn-0.2.0/pysp_learn.egg-info/dependency_links.txt +1 -0
  419. pysp_learn-0.2.0/pysp_learn.egg-info/requires.txt +47 -0
  420. pysp_learn-0.2.0/pysp_learn.egg-info/top_level.txt +1 -0
  421. pysp_learn-0.2.0/setup.cfg +4 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023, Lawrence Livermore National Security, LLC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ include README.md
@@ -0,0 +1,21 @@
1
+ This work was produced under the auspices of the U.S. Department of
2
+ Energy by Lawrence Livermore National Laboratory under Contract
3
+ DE-AC52-07NA27344.
4
+
5
+ This work was prepared as an account of work sponsored by an agency of
6
+ the United States Government. Neither the United States Government nor
7
+ Lawrence Livermore National Security, LLC, nor any of their employees
8
+ makes any warranty, expressed or implied, or assumes any legal liability
9
+ or responsibility for the accuracy, completeness, or usefulness of any
10
+ information, apparatus, product, or process disclosed, or represents that
11
+ its use would not infringe privately owned rights.
12
+
13
+ Reference herein to any specific commercial product, process, or service
14
+ by trade name, trademark, manufacturer, or otherwise does not necessarily
15
+ constitute or imply its endorsement, recommendation, or favoring by the
16
+ United States Government or Lawrence Livermore National Security, LLC.
17
+
18
+ The views and opinions of authors expressed herein do not necessarily
19
+ state or reflect those of the United States Government or Lawrence
20
+ Livermore National Security, LLC, and shall not be used for advertising
21
+ or product endorsement purposes.
@@ -0,0 +1,483 @@
1
+ Metadata-Version: 2.4
2
+ Name: pysp-learn
3
+ Version: 0.2.0
4
+ Summary: A package for estimating heterogeneous probability density functions.
5
+ Author-email: Grant Boquet <grant.boquet@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/gmboquet/pysparkplug
8
+ Project-URL: Repository, https://github.com/gmboquet/pysparkplug
9
+ Keywords: machine learning,density estimation,statistics,heterogeneous data
10
+ Classifier: Programming Language :: Python
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Development Status :: 4 - Beta
16
+ Classifier: Intended Audience :: Developers
17
+ Requires-Python: >=3.10
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ License-File: NOTICE
21
+ Requires-Dist: numpy
22
+ Requires-Dist: scipy
23
+ Requires-Dist: pandas
24
+ Requires-Dist: mpmath
25
+ Requires-Dist: networkx
26
+ Requires-Dist: tqdm
27
+ Provides-Extra: numba
28
+ Requires-Dist: numba; extra == "numba"
29
+ Requires-Dist: tbb; extra == "numba"
30
+ Provides-Extra: spark
31
+ Requires-Dist: pyspark; extra == "spark"
32
+ Provides-Extra: dask
33
+ Requires-Dist: dask; extra == "dask"
34
+ Requires-Dist: distributed; extra == "dask"
35
+ Provides-Extra: torch
36
+ Requires-Dist: torch; extra == "torch"
37
+ Provides-Extra: mpi
38
+ Requires-Dist: mpi4py; extra == "mpi"
39
+ Provides-Extra: umap
40
+ Requires-Dist: umap-learn; extra == "umap"
41
+ Provides-Extra: sympy
42
+ Requires-Dist: sympy; extra == "sympy"
43
+ Provides-Extra: test
44
+ Requires-Dist: pytest>=8; extra == "test"
45
+ Requires-Dist: pytest-xdist; extra == "test"
46
+ Provides-Extra: lint
47
+ Requires-Dist: ruff==0.15.17; extra == "lint"
48
+ Requires-Dist: mypy; extra == "lint"
49
+ Provides-Extra: all
50
+ Requires-Dist: numba; extra == "all"
51
+ Requires-Dist: tbb; extra == "all"
52
+ Requires-Dist: pyspark; extra == "all"
53
+ Requires-Dist: dask; extra == "all"
54
+ Requires-Dist: distributed; extra == "all"
55
+ Requires-Dist: torch; extra == "all"
56
+ Requires-Dist: mpi4py; extra == "all"
57
+ Requires-Dist: umap-learn; extra == "all"
58
+ Dynamic: license-file
59
+
60
+ <p align="left">
61
+ <img src="pysparkplug_logo.png" alt="pysparkplug" width="120"/>
62
+ </p>
63
+
64
+ # pysparkplug
65
+
66
+ ![python](https://img.shields.io/badge/python-3.10%2B-blue)
67
+ ![license](https://img.shields.io/badge/license-MIT-green)
68
+ ![tests](https://img.shields.io/badge/tests-1900%2B-brightgreen)
69
+
70
+ **Composable, distributed density estimation for messy, mixed-type records** — tuples of
71
+ categories, counts, reals, vectors, sets, sequences, and trees. Specify a probabilistic model in a
72
+ few lines and fit it with EM — locally (vectorized NumPy + Numba) or at scale on Spark, Dask, or
73
+ Torch (GPU).
74
+
75
+ - **Mixed-type & composable** — a Gaussian and a categorical compose into a tuple model, tuple
76
+ models become mixture components, mixtures become HMM emissions. Nest to any depth.
77
+ - **One interface, everywhere** — every family shares the same five parts (distribution · sampler ·
78
+ estimator · accumulator · encoder), so sampling, scoring, and estimation work uniformly.
79
+ - **Fit anywhere** — the same `optimize(...)` call runs local NumPy/Numba or distributed Spark /
80
+ Dask / Torch by swapping one argument.
81
+ - **Frequentist *or* Bayesian** — MLE, MAP, conjugate posteriors, and variational mixtures
82
+ (Dirichlet processes) selected by a single `prior=` switch.
83
+ - **A PPL surface** — [`pysp.ppl`](#probabilistic-programming-pyspppl): put `free` or another
84
+ distribution in any parameter slot, then `.fit().sample().posterior()`.
85
+
86
+ ## Contents
87
+
88
+ [Installation](#installation) · [Quickstart](#quickstart) · [Core concepts](#core-concepts) ·
89
+ [Distribution catalog](#distribution-catalog) · [Probabilistic programming](#probabilistic-programming-pyspppl) ·
90
+ [Frequentist & Bayesian](#frequentist--bayesian--one-switch) · [Engines & orchestration](#engines--orchestration) ·
91
+ [Enumeration & ranking](#enumeration--ranking) · [Beyond fitting](#beyond-fitting) ·
92
+ [Examples & notebooks](#examples--notebooks) · [Tests](#tests) · [License](#license)
93
+
94
+ ## Installation
95
+
96
+ Python 3.10+ (developed on 3.12). The base install (numpy, scipy, pandas, mpmath) covers every
97
+ distribution and local estimation. The package is published on PyPI as `pysp-learn` (the import
98
+ name is `pysp`):
99
+
100
+ ```sh
101
+ pip install pysp-learn
102
+ ```
103
+
104
+ Back-ends are opt-in extras — `numba` (JIT estimation), `spark` / `dask` (distributed),
105
+ `torch` (GPU/autograd), `umap`, or `all`:
106
+
107
+ ```sh
108
+ pip install "pysp-learn[all]"
109
+ ```
110
+
111
+ To install the latest unreleased code straight from source:
112
+
113
+ ```sh
114
+ pip install "pysp-learn[all] @ git+https://github.com/gmboquet/pysparkplug.git"
115
+ ```
116
+
117
+ Without extras, numba-flagged paths run as pure Python (correct, slower) and Spark/Dask inputs are
118
+ unavailable. For development: `git clone` then `pip install -e ".[all]"`.
119
+
120
+ ## Quickstart
121
+
122
+ Fit a two-component mixture over heterogeneous `(category, real, variable-length count sequence)`
123
+ records:
124
+
125
+ ```python
126
+ import numpy as np
127
+ from pysp.stats import *
128
+ from pysp.utils.estimation import optimize
129
+
130
+ component = lambda mu, p: CompositeDistribution((
131
+ CategoricalDistribution({'a': p, 'b': 1.0 - p}),
132
+ GaussianDistribution(mu, 1.0),
133
+ SequenceDistribution(PoissonDistribution(mu + 5.0),
134
+ len_dist=CategoricalDistribution({2: 0.5, 3: 0.5})),
135
+ ))
136
+ truth = MixtureDistribution([component(0.0, 0.8), component(5.0, 0.2)], [0.6, 0.4])
137
+ data = truth.sampler(seed=1).sample(2000) # data[0] -> ('a', -0.3, [5, 4, 6])
138
+
139
+ # Estimators mirror the distribution structure
140
+ est = MixtureEstimator([CompositeEstimator((
141
+ CategoricalEstimator(),
142
+ GaussianEstimator(),
143
+ SequenceEstimator(PoissonEstimator(), len_estimator=CategoricalEstimator()),
144
+ ))] * 2)
145
+
146
+ model = optimize(data, est, max_its=100, rng=np.random.RandomState(1))
147
+ print(model.w) # ≈ [0.6, 0.4]
148
+ ```
149
+
150
+ For the same model in a more concise dialect, see [`pysp.ppl`](#probabilistic-programming-pyspppl).
151
+
152
+ ## Core concepts
153
+
154
+ Each model family implements five cooperating pieces:
155
+
156
+ | Piece | Role |
157
+ | ----------------- | -------------------------------------------------------------------------- |
158
+ | `...Distribution` | Parameters + `log_density(x)` / vectorized `seq_log_density(enc)` |
159
+ | `...Sampler` | Draw samples (`dist.sampler(seed).sample(size)`) |
160
+ | `...Estimator` | Specifies the model to fit; closed-form M-step via `estimate()` |
161
+ | `...Accumulator` | Collects sufficient statistics (E-step), mergeable across partitions |
162
+ | `...DataEncoder` | `seq_encode(data)` flattens raw Python data into NumPy for the fast path |
163
+
164
+ `optimize(data, est)` (in `pysp.utils.estimation`) ties these together — EM to convergence locally
165
+ (vectorized NumPy/Numba), scaling out to Spark/Dask/Torch/MPI by swapping one argument (see
166
+ [Engines & orchestration](#engines--orchestration)).
167
+
168
+ Also available: `best_of` (random restarts), `StreamingEstimator` / `IncrementalEstimator`
169
+ (online EM), `fit_mle` / `fit_map` (autograd fitting with typed priors), `RecordDistribution` /
170
+ `field(...)` (named dict/DataFrame observations), and `pysp.utils.automatic.get_estimator(data)`
171
+ (infer an estimator straight from raw data).
172
+
173
+ ## Distribution catalog
174
+
175
+ ~90 composable families live in `pysp.stats`, grouped into subpackages (`leaf`, `multivariate`,
176
+ `combinator`, `sets`, `latent`, `graph`, `bayes`, `compute`) but all re-exported at the top level —
177
+ `from pysp.stats import GaussianDistribution` works regardless of where the file lives.
178
+
179
+ - **Scalar / basic:** Gaussian, Student-t / Cauchy, Logistic, LogGaussian, Laplace, Uniform,
180
+ Exponential, Gamma, Inverse Gamma, Inverse Gaussian, Half-Normal, Gumbel, Beta, Weibull, Rayleigh,
181
+ Pareto, Poisson, Bernoulli, Geometric, Binomial, Negative Binomial, Log-Series, von Mises, Dirichlet,
182
+ categorical, plus multivariate / diagonal Gaussian, von Mises–Fisher, and multivariate Student-t.
183
+ - **Combinators:** `CompositeDistribution` (tuples), `RecordDistribution` (named fields),
184
+ `SequenceDistribution`, `OptionalDistribution` (missing data), `TransformDistribution`,
185
+ `ConditionalDistribution`, `WeightedDistribution`.
186
+ - **Latent structure:** mixtures (plain, heterogeneous, hierarchical, joint, semi-supervised), LDA,
187
+ PLSI, probabilistic PCA, HMMs (standard, segmental, lookback, tree, quantized), PCFGs, Markov chains,
188
+ hidden associations, IBP, Pitman-Yor processes, Bernoulli sets.
189
+ - **Permutations & graphs:** Mallows and Plackett-Luce rankings, matchings, spanning trees, random
190
+ graphs (Erdős–Rényi, stochastic block, random dot-product), Spearman ranking.
191
+ - **Processes:** a general linear birth-death-sampling process (`BirthDeathSamplingDistribution` —
192
+ fossilized birth-death is the positive-`sampling_rate` case).
193
+ - **Bayesian:** conjugate priors (NormalGamma, NormalWishart, MvnGamma, Dirichlet, SymmetricDirichlet)
194
+ and variational Dirichlet-process / hierarchical-DP mixtures.
195
+
196
+ Estimators accept `pseudo_count` (regularization), `prior` (a conjugate prior — `None` gives MLE),
197
+ and `keys` (tying statistics across model parts). HMM-family models take `use_numba=True` for
198
+ parallel Numba kernels (the first call pays a cached JIT cost).
199
+
200
+ **API naming.** One stem per family (`<Stem>Distribution` / `Estimator` / `Sampler` /
201
+ `Accumulator`, …) and descriptive argument names. Legacy spellings stay as aliases (prefer `weights`
202
+ over `w`, `covariance` over `covar`, `max_iter` over `max_its`, …); passing both raises `TypeError`.
203
+
204
+ ## Probabilistic programming (`pysp.ppl`)
205
+
206
+ `pysp.ppl` is a concise, optional dialect over the same distributions. **One rule:** any parameter
207
+ slot is a value, the token `free` (estimate it), or another distribution (a prior — random /
208
+ hierarchical):
209
+
210
+ ```python
211
+ from pysp.ppl import Normal, Mix, Markov, free
212
+
213
+ Normal(0.0, 1.0) # value — fixed parameter
214
+ Normal(free, free) # free — estimate mean & sd
215
+ Normal(Normal(0, 10), 1.0) # distribution — a prior on the mean (hierarchical)
216
+ ```
217
+
218
+ Build a model, `.fit(data)`, then query with `.sample` / `.log_prob` / `.posterior` / `.params`:
219
+
220
+ ```python
221
+ m = Mix([Normal(free, free), Normal(free, free)]).fit(data) # 2-component Gaussian mixture
222
+ m.posterior(data) # responsibilities
223
+ Markov(Normal(free, free), states=2).fit(sequences) # 2-state Gaussian HMM (k-means++ seeded)
224
+ ```
225
+
226
+ A `free` coefficient times a `Field` turns the same surface into generalized linear models:
227
+
228
+ ```python
229
+ from pysp.ppl import Normal, Bernoulli, Poisson, Field, free
230
+
231
+ Normal(free * Field("x") + free * Field("z") + free, free).fit(y, given={"x": x, "z": z}) # linear
232
+ Bernoulli(free * Field("x") + free).fit(y, given={"x": x}) # logistic
233
+ Poisson(free * Field("x") + free).fit(y, given={"x": x}) # Poisson
234
+ ```
235
+
236
+ `how=` selects the engine — `auto` (default) takes an exact route when one exists, else falls back to
237
+ EM / gradient / sampling:
238
+
239
+ ```python
240
+ Mix([Normal(free, free), Normal(free, free)]).fit(data, how="nuts")
241
+ Markov(Normal(free, free), states=2).fit(seqs, how="ensemble", chains=4, parallel=True) # R̂ + pooled ESS
242
+ # how = auto | conjugate | conjugate_mixture | em | map | vi | vmp | mcmc | hmc | nuts | ensemble
243
+ ```
244
+
245
+ **Constraints** among random variables are plain comparisons (combine with `& | ~`); they shape
246
+ both inference and sampling:
247
+
248
+ ```python
249
+ a, b = Normal(0, 10, name="a"), Normal(0, 10, name="b")
250
+ Mix([Normal(a, 1), Normal(b, 1)]).fit(data, constraints=a < b) # ordered means break label-switching
251
+ constrain(2*a - b >= 1).sample(100) # draw from the truncated joint
252
+ ```
253
+
254
+ ```python
255
+ # model constructors: Mix · Seq · Markov · LDA · MVN · DiagGaussian · LocalLevel · AR1 · Graph
256
+ compare([model_a, model_b], data) # rank fitted models, best first
257
+ ```
258
+
259
+ **PDE-constrained state-space models** (`pysp.ppl.pde`) fit a latent spatial field that evolves by a
260
+ PDE (method-of-lines discretization → linear/nonlinear transition) from noisy spatiotemporal
261
+ snapshots, via multivariate Kalman/RTS + EM or the autograd adjoint:
262
+
263
+ ```python
264
+ from pysp.ppl.pde import fit_diffusivity, fit_reaction_diffusion
265
+
266
+ fit_diffusivity(snapshots, dx=dx, dt=dt) # infer a 1-D diffusion coefficient + noise
267
+ fit_reaction_diffusion(snapshots, dx=dx, dt=dt) # nonlinear Fisher-KPP: du/dt = D u_xx + r u(1-u)
268
+ ```
269
+
270
+ It's a thin surface — the `pysp.stats` classes underneath are untouched.
271
+
272
+ **Head-to-head speed** (`python -m pysp.ppl.benchmark_vs`, same machine/data/model vs the actual
273
+ competing PPLs):
274
+
275
+ | task | pysp.ppl | competitor | result |
276
+ | ---- | -------- | ---------- | ------ |
277
+ | Poisson-Gamma posterior, N=200k | **5 ms** (exact, 1 pass) | NumPyro NUTS 5690 ms | **~1000× faster**, identical |
278
+ | Beta-Bernoulli posterior, N=100k | **3 ms** (exact) | NumPyro NUTS 3619 ms | **~1400× faster**, identical |
279
+ | Gaussian MLE, N=500k | **45 ms** (EM) | Pyro SVI 11778 ms | **~260× faster**, same answer |
280
+ | Gaussian posterior (ESS/sec) | **8945** (`how='ensemble'`) | emcee 7883 · NumPyro 624 | **highest mixing throughput** |
281
+
282
+ For conjugate / exponential-family / mixture models pysp returns the *exact* posterior with no
283
+ sampling; for general posteriors the ensemble sampler leads on ESS/sec. See
284
+ [`pysp/ppl/BENCHMARKS.md`](pysp/ppl/BENCHMARKS.md).
285
+
286
+ ## Frequentist & Bayesian — one switch
287
+
288
+ The prior is the single switch — no prior is maximum likelihood, a conjugate `prior=` makes the
289
+ same machinery Bayesian:
290
+
291
+ ```python
292
+ from pysp.utils.priors import NormalGammaPrior
293
+
294
+ GaussianEstimator() # MLE
295
+ GaussianEstimator(prior=NormalGammaPrior()) # closed-form conjugate posterior — same EM call
296
+ ```
297
+
298
+ `optimize` / `fit` auto-select the objective from the model (MLE, MAP, or variational ELBO; force it
299
+ with `objective=`); `fit(...)` returns the posterior, `BayesianStreamingEstimator` carries it across
300
+ batches, and `pysp.stats.dpm` / `hdpm` add (hierarchical) Dirichlet-process mixtures. Gradient MAP
301
+ with typed priors is first-class too:
302
+
303
+ ```python
304
+ from pysp.engines import TorchEngine
305
+ from pysp.utils.fit import fit_map
306
+ from pysp.utils.priors import DirichletPrior, MixturePrior, NormalGammaPrior
307
+
308
+ enc = model.dist_to_encoder().seq_encode(data)
309
+ fitted, objective = fit_map(enc, model, engine=TorchEngine(device="cpu", dtype="float64"),
310
+ priors=MixturePrior(
311
+ components=[NormalGammaPrior(mu0=-2.0), NormalGammaPrior(mu0=2.0)],
312
+ weights=DirichletPrior([2.0, 2.0])))
313
+ ```
314
+
315
+ ## Engines & orchestration
316
+
317
+ Distributions own the likelihood and sufficient-statistic math; **compute engines** supply the
318
+ array ops, device, and precision — so the same EM contract runs unchanged on NumPy, Numba, Torch
319
+ (CPU / GPU / multi-device), or a symbolic backend.
320
+
321
+ ```python
322
+ from pysp.engines import TorchEngine
323
+
324
+ optimize(data, est, engine=TorchEngine(device="cuda", dtype="float32")) # GPU
325
+ optimize(data, est, engine=TorchEngine(mesh=mesh, shard="components")) # multi-GPU (DTensor)
326
+ ```
327
+
328
+ **Precision is data-aware** — `precision='auto'` picks float32/float64 from the data and engine, and
329
+ sufficient statistics always accumulate in float64, so reduced precision stays safe:
330
+
331
+ ```python
332
+ optimize(data, est, precision="auto")
333
+ ```
334
+
335
+ **Scale by swapping the back-end, not the model** — local and distributed go through identical math:
336
+
337
+ ```python
338
+ optimize(data, est) # local NumPy / Numba
339
+ optimize(data, est, backend="mp", num_workers=8) # multiprocessing
340
+ optimize(rdd, est, backend="spark") # Spark
341
+ optimize(data, est, backend="dask", client=client) # Dask
342
+ optimize(data, est, backend="mpi", comm=comm) # MPI / torchrun
343
+ optimize(data, est, backend="ray") # Ray
344
+ optimize(data, est, backend="lightning") # PyTorch Lightning
345
+ ```
346
+
347
+ New frameworks plug in by registering a factory (`register_encoded_data_backend`) — the same
348
+ "register, don't branch" pattern as the engines, so `ray` and `lightning` were added without
349
+ editing the dispatch. For a purely local speedup, `encoded_data(..., parallel_chunks=True)` folds
350
+ resident chunks across threads (bit-identical to serial).
351
+
352
+ **The planner** (`pysp.planner`) turns a hardware budget into a memory-aware *placement* — chunking,
353
+ device assignment, and (on Torch) model sharding — that you compute once and reuse:
354
+
355
+ ```python
356
+ from pysp.planner import plan, Resources
357
+
358
+ placement = plan(data, model=model, estimator=est, resources=Resources.local(num_cpus=8))
359
+ optimize(data, est, placement=placement)
360
+ optimize(data, est, resources=Resources.local(num_cpus=8)) # or let optimize plan it for you
361
+ ```
362
+
363
+ `Resources.{single_cpu, local, from_spark, from_dask, from_mpi, from_specs}` describe the hardware.
364
+
365
+ **Symbolic export.** The `SymbolicEngine` runs a distribution's density through SymPy, so a model can
366
+ emit its closed-form log-density as LaTeX / SymPy / Sage:
367
+
368
+ ```python
369
+ from pysp.engines import SYMBOLIC_ENGINE, to_latex
370
+
371
+ x = SYMBOLIC_ENGINE.symbol("x")
372
+ to_latex(GaussianDistribution(0.0, 1.0).backend_seq_log_density(x, SYMBOLIC_ENGINE))
373
+ # '- 0.5 x^{2} - 0.918938533204673'
374
+ ```
375
+
376
+ ## Enumeration & ranking
377
+
378
+ Discrete and structured models can **enumerate their support in descending-probability order** and
379
+ answer exact **rank / cumulative-probability** queries — even when the support is enormous or
380
+ unbounded.
381
+
382
+ ```python
383
+ from pysp.utils.density_rank import density_rank, count_dp_seek
384
+
385
+ dist.enumerator().top_k(5) # the 5 most probable (value, log_prob), in order
386
+ dist.enumerator().top_p(0.95) # smallest set covering 95% of the mass (discrete nucleus)
387
+ density_rank(dist, value) # exact-head + sampling rank & CDF of an observation
388
+ count_dp_seek(dist, index=10_000) # the ~10,000th most probable value, by structural count-DP
389
+ ```
390
+
391
+ `Composite` / `Record` also support **conditional enumeration** — most-probable completions given
392
+ some fields, best-first:
393
+
394
+ ```python
395
+ record.conditional_enumerator({"country": "US"}).top_k(5) # 5 likeliest records with country=US
396
+ ```
397
+
398
+ For decomposable families (`Composite` / `Record` / `Sequence` / `MarkovChain`), rank↔value is an
399
+ exact count dynamic program at any depth (`count_dp_rank`, `count_dp_seek`, `cumulative_probability`,
400
+ `count_dp_top_p` — the nucleus *size* without enumerating it, `mixture_cross_rank`). For very large
401
+ or infinite supports, **budget-bounded quantized indexes** seek and unrank over just the
402
+ most-probable region without enumerating everything:
403
+
404
+ ```python
405
+ index = dist.count_budget_index(budget_bits=20) # index the top ~2**20 values
406
+ for value, log_prob in dist.count_budget_distinct(budget_bits=20):
407
+ ...
408
+ ```
409
+
410
+ `pysp.utils.enumeration` provides the shared machinery (bounded best-first union, quantization,
411
+ Kronecker-substitution count convolution).
412
+
413
+ **Continuous families** realize the same operations through the CDF and its inverse. Every univariate
414
+ continuous leaf has an exact `cdf(x)` (the "index of `x`") and `quantile(q)` (the value at
415
+ cumulative-probability `q`); multivariate Gaussian and von Mises–Fisher expose an exact
416
+ probability-ordered cumulative plus `density_quantile(q)` (a representative point on the `q`-HDR
417
+ contour) — both surfaced via `density_rank` as method `exact-analytic`. Any other samplable family
418
+ falls back to a Monte-Carlo `density_rank` / `density_quantile(q)` / `density_enumeration(n)`, so all
419
+ four operations are reachable everywhere — exact where the support is countable or has a closed-form
420
+ density quantile, stochastic representatives otherwise.
421
+
422
+ ## Beyond fitting
423
+
424
+ - **Inference & analysis** — `pysp.utils.mcmc` (Metropolis–Hastings / HMC / VMP), `pysp.utils.em`
425
+ (hard, annealed, ECM, Monte-Carlo, variational, online, restart EM), `pysp.utils.fisher`
426
+ (Fisher-geometry views), and `pysp.utils.hvis` (model-based embeddings — t-SNE / UMAP).
427
+ - **Engine-agnostic inference facade** — `pysp.infer` runs NUTS or ADVI on an *arbitrary*
428
+ differentiable target (bring your own `value_and_grad`) and dispatches to a registered backend
429
+ (NumPy / Numba / Torch / JAX). Multiple chains run in parallel (`parallel="thread"|"process"`,
430
+ with R̂ + pooled ESS). The underlying `pysp.utils.mcmc` NUTS does dual-averaging step-size and
431
+ optional diagonal mass-matrix adaptation (`adapt_mass=True`).
432
+ - **Design of experiments & Bayesian optimization** — `pysp.doe` provides classical designs
433
+ (`latin_hypercube`, `maximin_latin_hypercube`, `full_factorial`, `random_design`) and sequential
434
+ GP-EI Bayesian optimization (`minimize`, `propose_next`, `expected_improvement`).
435
+ - **Non-iid models** — `pysp.models` holds GP regression, neural regressors, random graphs,
436
+ grammars, and knowledge graphs.
437
+
438
+ ## Examples & notebooks
439
+
440
+ Worked tutorials live in the companion
441
+ [**pysparkplug-notebooks**](https://github.com/gmboquet/pysparkplug-notebooks) repo.
442
+
443
+ Runnable scripts ship in [examples/](examples/) — `examples_pysp/` (core), `examples_bayes/`
444
+ (Bayesian), `examples_spark/`, `examples_mp/`, and `examples_mpi/`:
445
+
446
+ ```sh
447
+ cd examples/examples_pysp
448
+ python mixture_example.py
449
+ python hidden_markov_example.py
450
+ ```
451
+
452
+ Every script is self-contained — it samples from a known model, then refits and recovers it (no
453
+ downloads). The `gallery_*_example.py` scripts tour the families in bulk; the rest focus on
454
+ individual models end to end.
455
+
456
+ **Running on Spark.** PySpark 4.x needs a JVM (Java 17/21), and workers must use the driver's Python:
457
+
458
+ ```sh
459
+ export JAVA_HOME=$(/usr/libexec/java_home -v 17)
460
+ export PYSPARK_PYTHON=/path/to/venv/bin/python
461
+ export PYSPARK_DRIVER_PYTHON=$PYSPARK_PYTHON
462
+ python examples/examples_spark/mixture_example.py
463
+ ```
464
+
465
+ Estimation helpers detect RDD inputs automatically, so a model fit locally and one fit on a cluster
466
+ go through identical math.
467
+
468
+ ## Tests
469
+
470
+ ```sh
471
+ python -m pytest -m fast # quick correctness gate
472
+ python -m pytest -m "not optional and not benchmark" # full local suite
473
+ ```
474
+
475
+ Tests use `unittest.TestCase` internally with pytest markers / CI tiers (see
476
+ [`pysp/tests/README.md`](pysp/tests/README.md)). `base_dist_test.py` checks each distribution
477
+ end-to-end: sampler repeatability, `str`/`eval` round-trips, vectorized-vs-scalar densities, and
478
+ EM convergence.
479
+
480
+ ## License
481
+
482
+ MIT — see [LICENSE](LICENSE) and [NOTICE](NOTICE). Originally developed at Lawrence Livermore
483
+ National Laboratory (LLNL-CODE-844837).