bivpoispy 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bivpoispy-0.1.0/LICENSE +21 -0
- bivpoispy-0.1.0/PKG-INFO +19 -0
- bivpoispy-0.1.0/README.md +0 -0
- bivpoispy-0.1.0/pyproject.toml +31 -0
- bivpoispy-0.1.0/setup.cfg +4 -0
- bivpoispy-0.1.0/src/bivpoispy/__init__.py +1 -0
- bivpoispy-0.1.0/src/bivpoispy/bivpoispy.py +460 -0
- bivpoispy-0.1.0/src/bivpoispy.egg-info/PKG-INFO +19 -0
- bivpoispy-0.1.0/src/bivpoispy.egg-info/SOURCES.txt +10 -0
- bivpoispy-0.1.0/src/bivpoispy.egg-info/dependency_links.txt +1 -0
- bivpoispy-0.1.0/src/bivpoispy.egg-info/requires.txt +4 -0
- bivpoispy-0.1.0/src/bivpoispy.egg-info/top_level.txt +1 -0
bivpoispy-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ilyan Alexey Cortés Miranda
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
bivpoispy-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bivpoispy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Tols for bivariate Poisson distribution: density, sampling, MLE, and goodness-of-fit
|
|
5
|
+
Author-email: Ilyan Alexey Cortés Miranda <alexey.cortes13@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: bivpois,bivariate poisson
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: numpy>=1.20
|
|
16
|
+
Requires-Dist: scipy>=1.7
|
|
17
|
+
Requires-Dist: matplotlib>=3.5
|
|
18
|
+
Requires-Dist: numdifftools>=0.9
|
|
19
|
+
Dynamic: license-file
|
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools >= 61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "bivpoispy"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Tols for bivariate Poisson distribution: density, sampling, MLE, and goodness-of-fit"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [
|
|
11
|
+
{name = "Ilyan Alexey Cortés Miranda", email = "alexey.cortes13@gmail.com"}
|
|
12
|
+
]
|
|
13
|
+
keywords = ["bivpois", "bivariate poisson"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
]
|
|
20
|
+
license = "MIT"
|
|
21
|
+
|
|
22
|
+
requires-python = ">= 3.9"
|
|
23
|
+
dependencies = [
|
|
24
|
+
"numpy >= 1.20",
|
|
25
|
+
"scipy >= 1.7",
|
|
26
|
+
"matplotlib >= 3.5",
|
|
27
|
+
"numdifftools >= 0.9",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[tool.setuptools.packages.find]
|
|
31
|
+
where = ["src"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .bivpoispy import logp, random, contour, mle, gof, lambda3_profile, BivpoisMle
|
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
from matplotlib import gridspec, cm
|
|
4
|
+
from mpl_toolkits.axes_grid1 import make_axes_locatable
|
|
5
|
+
from scipy.special import gammaln, logsumexp
|
|
6
|
+
from scipy.stats import chi2, norm
|
|
7
|
+
from scipy.optimize import minimize
|
|
8
|
+
import numdifftools as nd
|
|
9
|
+
from numpy.typing import ArrayLike, NDArray
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
__all__ = ['logp', 'random', 'contour', 'mle', 'gof', 'lambda3_profile', 'BivpoisMle']
|
|
13
|
+
|
|
14
|
+
def _log_choose(n: np.ndarray, k: np.ndarray) -> np.ndarray:
|
|
15
|
+
"""This function computes the log of the binomial coefficients of a polynomial of degree n
|
|
16
|
+
also known as n choose k.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
n : numpy array
|
|
21
|
+
Total number of elements.
|
|
22
|
+
k : numpy array
|
|
23
|
+
Number of elements to choose.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
numpy array
|
|
28
|
+
An array containing the values of the log n choose k of each values of n and k.
|
|
29
|
+
Returns -inf when k > n or k < 0, and 0 when n = 0 or k = 0.
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
modif_n = np.where((n < k) | (n ==0), k, n)
|
|
33
|
+
modif_k = np.where(k == 0, 1, k)
|
|
34
|
+
|
|
35
|
+
result = gammaln(modif_n+1) - gammaln(modif_n-modif_k+1) - gammaln(modif_k+1)
|
|
36
|
+
result[(n < k)| (k < 0)] = -np.inf
|
|
37
|
+
result[(n == 0) | (k == 0)] = 0
|
|
38
|
+
return result
|
|
39
|
+
|
|
40
|
+
def _make_llf(mx, my, n, sly1, sly2, z, f2a, indexes):
|
|
41
|
+
"""Build the negative log-likelihood function for lam3."""
|
|
42
|
+
def llf(lam3):
|
|
43
|
+
"""Negative log-likelihood as a function of lam3, with lam1 and lam2
|
|
44
|
+
derived from the sample means as lam1 = mx - lam3, lam2 = my - lam3."""
|
|
45
|
+
|
|
46
|
+
lam1 = mx - lam3
|
|
47
|
+
lam2 = my - lam3
|
|
48
|
+
if lam1 <= 0 or lam2 <= 0 or lam3 < 0:
|
|
49
|
+
return np.inf
|
|
50
|
+
|
|
51
|
+
lam = lam3 / (lam1 * lam2)
|
|
52
|
+
llf_value = -n * (lam1 + lam2 + lam3) - sly1 - sly2 + n * mx * np.log(lam1) + n * my * np.log(lam2)
|
|
53
|
+
|
|
54
|
+
if lam3 != 0:
|
|
55
|
+
log_lam_term = np.where(indexes == 0, 0, np.log(lam) * indexes)
|
|
56
|
+
sum_terms = f2a + log_lam_term
|
|
57
|
+
sum_terms = np.where(z[None, :] < indexes, -np.inf, sum_terms)
|
|
58
|
+
last_term = logsumexp(sum_terms, axis = 0)
|
|
59
|
+
llf_value += last_term.sum()
|
|
60
|
+
|
|
61
|
+
return -llf_value
|
|
62
|
+
return llf
|
|
63
|
+
|
|
64
|
+
def logp(value: ArrayLike, lam1: float, lam2: float, lam3: float, logged: bool =True) -> np.ndarray:
|
|
65
|
+
r"""This function computes the log (or not) probability density function of the Poisson Bivariate distribution.
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
It's defined like:
|
|
69
|
+
|
|
70
|
+
\(\log(p(x, y) ) = -(\lambda_1+\lambda_2+\lambda_3) + x \log({\lambda_1}) - \log(x!) + y \log(\lambda_2) -
|
|
71
|
+
\log(y!) + \log \left( \sum_{i = 0}^{min(x,y)}{\binom xi \binom yi i! \left(\frac{\lambda_3}{\lambda_1\lambda_2}\right)^i} \right)\)
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
value : array like
|
|
76
|
+
the observed count values.
|
|
77
|
+
lam1 : float
|
|
78
|
+
the value of the parameter $\lambda_1$.
|
|
79
|
+
lam2 : float
|
|
80
|
+
the value of the parameter $\lambda_2$.
|
|
81
|
+
lam3 : float
|
|
82
|
+
the value of the parameter $\lambda_3$.
|
|
83
|
+
logged : boolean, optional
|
|
84
|
+
If True (default), returns the log-density.
|
|
85
|
+
If False, returns the density on the original scale.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
numpy array
|
|
90
|
+
the value logged or not of the probability of the observed values given the parameters $(\lambda_1, \lambda_2, \lambda_3)$
|
|
91
|
+
|
|
92
|
+
"""
|
|
93
|
+
value = np.asarray(value)
|
|
94
|
+
|
|
95
|
+
if lam1 <= 0 or lam2 <= 0:
|
|
96
|
+
raise ValueError(f"lam1 and lam2 must be > 0, got lam1={lam1}, lam2={lam2}")
|
|
97
|
+
if lam3 < 0:
|
|
98
|
+
raise ValueError(f"lam3 must be >= 0, got lam3={lam3}")
|
|
99
|
+
|
|
100
|
+
x = value[..., 0]
|
|
101
|
+
y = value[..., 1]
|
|
102
|
+
batch_shape = x.shape
|
|
103
|
+
lam = lam3 / (lam1 * lam2)
|
|
104
|
+
|
|
105
|
+
first_team = (- (lam1 + lam2 + lam3)
|
|
106
|
+
+ x * np.log(lam1) - gammaln(x + 1)
|
|
107
|
+
+ y * np.log(lam2) - gammaln(y + 1))
|
|
108
|
+
logp_flat = first_team.ravel()
|
|
109
|
+
|
|
110
|
+
if lam3 != 0:
|
|
111
|
+
x_flat = x.ravel()
|
|
112
|
+
y_flat = y.ravel()
|
|
113
|
+
lam_flat = np.broadcast_to(lam, batch_shape).ravel()
|
|
114
|
+
z = np.minimum(x_flat, y_flat)
|
|
115
|
+
m = int(np.max(z))
|
|
116
|
+
indexes = np.arange(m + 1)[:, None]
|
|
117
|
+
|
|
118
|
+
sum_terms = (
|
|
119
|
+
_log_choose(x_flat[None, :], indexes)
|
|
120
|
+
+ _log_choose(y_flat[None, :], indexes)
|
|
121
|
+
+ gammaln(indexes + 1)
|
|
122
|
+
+ np.log(lam_flat[None, :]) * indexes
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
sum_terms[z[None, :] < indexes] = -np.inf
|
|
126
|
+
logp_flat += logsumexp(sum_terms, axis=0)
|
|
127
|
+
result = logp_flat.reshape(batch_shape)
|
|
128
|
+
|
|
129
|
+
return result if logged else np.exp(result)
|
|
130
|
+
|
|
131
|
+
def random(lam1: float, lam2: float, lam3: float, size: int) -> np.ndarray:
|
|
132
|
+
"""This function computes random draws of the bivariate Poisson distribution with parameters lam1, lam2, lam3.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
lam1 : float
|
|
137
|
+
the value of the parameter lambda1.
|
|
138
|
+
lam2 : float
|
|
139
|
+
the value of the parameter lambda_2.
|
|
140
|
+
lam3 : float
|
|
141
|
+
the value of the parameter lambda_3.
|
|
142
|
+
size : int
|
|
143
|
+
the sample size.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
numpy array
|
|
148
|
+
A two dimensional vector containing size samples of the bivariate Poisson distribution parameters lambda_1, lambda_2, lambda_3.
|
|
149
|
+
|
|
150
|
+
"""
|
|
151
|
+
x1 = np.random.poisson(lam1, size=size)
|
|
152
|
+
x2 = np.random.poisson(lam2, size=size)
|
|
153
|
+
x3 = np.random.poisson(lam3, size=size)
|
|
154
|
+
|
|
155
|
+
return np.array([x1+x3, x2+x3]).T
|
|
156
|
+
|
|
157
|
+
def contour(value: ArrayLike, lam1: float, lam2: float, lam3:float, points: bool=True):
|
|
158
|
+
"""This function plot the contour of the bivariate Poisson with coefficients lam1, lam2, lam3)
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
value : array like
|
|
163
|
+
The count observed values
|
|
164
|
+
lam1 : float
|
|
165
|
+
The value of the parameter lambda_1.
|
|
166
|
+
lam2 : float
|
|
167
|
+
The value of the parameter lambda_2.
|
|
168
|
+
lam3 : float
|
|
169
|
+
The value of the parameter lambda_3.
|
|
170
|
+
points : boolean
|
|
171
|
+
If True (default), plot the observed points in value.
|
|
172
|
+
If False, don't plot the points saved in value.
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
"""
|
|
176
|
+
if lam1 <= 0 or lam2 <= 0:
|
|
177
|
+
raise ValueError(f"lam1 and lam2 must be > 0, got lam1={lam1}, lam2={lam2}")
|
|
178
|
+
if lam3 < 0:
|
|
179
|
+
raise ValueError(f"lam3 must be >= 0, got lam3={lam3}")
|
|
180
|
+
|
|
181
|
+
x = value[..., 0]
|
|
182
|
+
y = value[..., 1]
|
|
183
|
+
z1, z2 = np.mgrid[
|
|
184
|
+
(np.max(np.min(x)-3, 0)):(np.max(x) + 3),
|
|
185
|
+
(np.max(np.min(y) -3, 0)):(np.max(y) + 3)
|
|
186
|
+
]
|
|
187
|
+
z = logp(np.dstack((z1, z2)), lam1, lam2, lam3, logged = False)
|
|
188
|
+
|
|
189
|
+
fig = plt.figure(figsize=(7, 7))
|
|
190
|
+
gs = gridspec.GridSpec(
|
|
191
|
+
2, 2, width_ratios=[2, 1], height_ratios=[2, 1])
|
|
192
|
+
gs.update(wspace=0.25, hspace=0.25)
|
|
193
|
+
fig.suptitle('Marginal distributions', y=0.93)
|
|
194
|
+
|
|
195
|
+
## bivariate
|
|
196
|
+
ax1 = fig.add_subplot(gs[0])
|
|
197
|
+
con = ax1.contourf(z1, z2, z, levels = 10, cmap = cm.YlGnBu)
|
|
198
|
+
if points:
|
|
199
|
+
ax1.scatter(x, y, color='red', s=1)
|
|
200
|
+
ax1.set_xlabel('$x$', fontsize=13)
|
|
201
|
+
|
|
202
|
+
## plot y
|
|
203
|
+
marginal_y = z.sum(axis=0)
|
|
204
|
+
ax2 = fig.add_subplot(gs[1], sharey=ax1)
|
|
205
|
+
ax2.plot(marginal_y, z2[0, :], 'o-', markersize = 4, label=f'$p(y)$')
|
|
206
|
+
plt.setp(ax2.get_yticklabels(), visible=False)
|
|
207
|
+
ax2.legend(loc=0)
|
|
208
|
+
ax2.set_ylabel('$y$', fontsize=13)
|
|
209
|
+
ax2.set_xlabel('density', fontsize=13)
|
|
210
|
+
|
|
211
|
+
## plot x
|
|
212
|
+
marginal_x = z.sum(axis=1)
|
|
213
|
+
ax3 = fig.add_subplot(gs[2], sharex = ax1)
|
|
214
|
+
ax3.plot(z1[:, 0], marginal_x, 'o-', markersize = 4, label=f'$p(x)$')
|
|
215
|
+
plt.setp(ax3.get_xticklabels(), visible=False)
|
|
216
|
+
ax3.legend(loc=0)
|
|
217
|
+
ax3.set_ylabel('density', fontsize=13)
|
|
218
|
+
ax3.yaxis.set_label_position('right')
|
|
219
|
+
|
|
220
|
+
# density
|
|
221
|
+
ax4 = fig.add_subplot(gs[3])
|
|
222
|
+
ax4.set_visible(False)
|
|
223
|
+
divider = make_axes_locatable(ax4)
|
|
224
|
+
cax = divider.append_axes('left', size='20%', pad=0.05)
|
|
225
|
+
cbar = fig.colorbar(con, cax=cax)
|
|
226
|
+
cbar.ax.set_ylabel('density: $p(x, xy)$', fontsize=12)
|
|
227
|
+
return fig
|
|
228
|
+
|
|
229
|
+
@dataclass
|
|
230
|
+
class BivpoisMle:
|
|
231
|
+
""" Object to save the MLE output
|
|
232
|
+
|
|
233
|
+
Attributes
|
|
234
|
+
----------
|
|
235
|
+
lam1 : float
|
|
236
|
+
The value of the parameter lambda1.
|
|
237
|
+
lam2 : float
|
|
238
|
+
The value of the parameter lambda2.
|
|
239
|
+
lam3 : float
|
|
240
|
+
The value of the parameter lambda3.
|
|
241
|
+
rho : float
|
|
242
|
+
Pearson correlation coefficient between the two variables.
|
|
243
|
+
ci_obs : ArrayLike of shape (2,)
|
|
244
|
+
Observed confidence interval for λ₃, as [lower, upper].
|
|
245
|
+
ci_asymp : ArrayLike of shape (2,)
|
|
246
|
+
Asymptotic confidence interval for λ₃, as [lower, upper].
|
|
247
|
+
loglik : ArrayLike of shape (2,)
|
|
248
|
+
Log-likelihood values, as [restricted, unrestricted].
|
|
249
|
+
pvalue : ArrayLike of shape (3,)
|
|
250
|
+
P-values for the hypothesis tests.
|
|
251
|
+
|
|
252
|
+
"""
|
|
253
|
+
lam1: float
|
|
254
|
+
lam2: float
|
|
255
|
+
lam3: float
|
|
256
|
+
rho: float
|
|
257
|
+
ci_obs: ArrayLike
|
|
258
|
+
ci_asymp: ArrayLike
|
|
259
|
+
loglik: ArrayLike
|
|
260
|
+
pvalue: ArrayLike
|
|
261
|
+
|
|
262
|
+
def __str__(self) -> str:
|
|
263
|
+
return (
|
|
264
|
+
f"lambda: [{self.lam1}, {self.lam2}, {self.lam3}]\n"
|
|
265
|
+
f"rho: {self.rho}\n"
|
|
266
|
+
f"observed ci: [{self.ci_obs[0]}, {self.ci_obs[1]}]\n"
|
|
267
|
+
f"asymptotic ci: [{self.ci_asymp[0]}, {self.ci_asymp[1]}]\n"
|
|
268
|
+
f"loglik: [{self.loglik[0]}, {self.loglik[1]}]\n"
|
|
269
|
+
f"pvalues: [{self.pvalue[0]}, {self.pvalue[1]}, {self.pvalue[2]}]"
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
def mle(value: ArrayLike) -> BivpoisMle:
|
|
273
|
+
"""Function to compute the estimation of maximum likelihood given a set of values.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
value : ArrayLike of shape (n, 2)
|
|
278
|
+
Observed count pairs (x, y). Each row is one observation.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
BivpoisMle
|
|
283
|
+
MLE estimates of lam1, lam2, lam3 along with the correlation coefficient,
|
|
284
|
+
confidence intervals, log-likelihood values, and p-values.
|
|
285
|
+
|
|
286
|
+
"""
|
|
287
|
+
value = np.asarray(value)
|
|
288
|
+
if value.ndim != 2 or value.shape[1] != 2:
|
|
289
|
+
raise ValueError(f"value must have shape (n, 2), got {value.shape}")
|
|
290
|
+
if np.any(value < 0):
|
|
291
|
+
raise ValueError("All count values must be >= 0")
|
|
292
|
+
|
|
293
|
+
x = value[..., 0]
|
|
294
|
+
y = value[..., 1]
|
|
295
|
+
|
|
296
|
+
n = x.shape[0]
|
|
297
|
+
mx, my = np.mean(value, axis = 0)
|
|
298
|
+
sly1 = gammaln(x + 1).sum()
|
|
299
|
+
sly2 = gammaln(y + 1).sum()
|
|
300
|
+
z = np.minimum(x, y)
|
|
301
|
+
m = int(np.max(z))
|
|
302
|
+
indexes = np.arange(m + 1)[:, None]
|
|
303
|
+
f2a = _log_choose(x[None, :], indexes) + _log_choose(y[None, :], indexes) + gammaln(indexes + 1)
|
|
304
|
+
|
|
305
|
+
llf = _make_llf(mx, my, n, sly1, sly2, z, f2a, indexes)
|
|
306
|
+
res = minimize(fun=llf,
|
|
307
|
+
x0=np.cov(x, y)[0][1],
|
|
308
|
+
bounds=[(0, np.min([mx,my]))],
|
|
309
|
+
method='L-BFGS-B'
|
|
310
|
+
)
|
|
311
|
+
lam3 = res.x[0]
|
|
312
|
+
|
|
313
|
+
if lam3 == 0:
|
|
314
|
+
hess = nd.Hessian(llf, step=1e-4, method='forward')
|
|
315
|
+
|
|
316
|
+
else:
|
|
317
|
+
hess = nd.Hessian(llf, step=1e-4, method='central')
|
|
318
|
+
|
|
319
|
+
v1 = 1 / hess(lam3)[0,0][0]
|
|
320
|
+
lam1 = mx - lam3
|
|
321
|
+
lam2 = my - lam3
|
|
322
|
+
llf0 = - llf(0)
|
|
323
|
+
llf1 = - res.fun
|
|
324
|
+
rho = lam3 / np.sqrt(mx * my)
|
|
325
|
+
|
|
326
|
+
r_vals = np.arange(1, np.max([x, y]) + 21)
|
|
327
|
+
s_vals = np.arange(1, np.max([x, y]) + 21)
|
|
328
|
+
R, S = np.meshgrid(r_vals, s_vals, indexing='ij')
|
|
329
|
+
grid = np.stack((R, S), axis=-1)
|
|
330
|
+
num = logp(grid -1, lam1, lam2, lam3, logged=False) ** 2
|
|
331
|
+
den = logp(grid, lam1, lam2, lam3, logged=False)
|
|
332
|
+
tau = (num / den).sum()
|
|
333
|
+
|
|
334
|
+
delta2 = - (lam1 + lam2) + (mx * my - lam3 **2) * (tau -1)
|
|
335
|
+
asymp_var = (lam1 * lam2 + lam3 * (lam1 + lam2) * (lam3* (tau - 1) -1)) / ( n * delta2)
|
|
336
|
+
|
|
337
|
+
t1 = lam3 / np.sqrt(v1)
|
|
338
|
+
t2 = lam3 / np.sqrt(asymp_var)
|
|
339
|
+
|
|
340
|
+
Z95 = 1.959964
|
|
341
|
+
ci_obs = [lam3 - Z95 * np.sqrt(v1), lam3 + Z95 * np.sqrt(v1)]
|
|
342
|
+
ci_asymp = [lam3 - Z95 * np.sqrt(asymp_var), lam3 + Z95 * np.sqrt(asymp_var)]
|
|
343
|
+
loglik = [llf0, llf1]
|
|
344
|
+
pvalue = [chi2.sf(2 * (llf1 - llf0), df=1), norm.cdf(-t1), norm.cdf(-t2)]
|
|
345
|
+
|
|
346
|
+
return BivpoisMle(lam1, lam2, lam3, rho, ci_obs, ci_asymp, loglik, pvalue)
|
|
347
|
+
|
|
348
|
+
def gof(value: ArrayLike, R: int = 1000) -> float:
|
|
349
|
+
"""This function computes goodnes of fit test for the bivariate Poisson distribution given a set
|
|
350
|
+
of values
|
|
351
|
+
|
|
352
|
+
Parameters
|
|
353
|
+
----------
|
|
354
|
+
value : array like
|
|
355
|
+
Observed count pairs (x, y). Each row is one observation.
|
|
356
|
+
R : int
|
|
357
|
+
Number of Monte Carlo replicates. Default is 1000.
|
|
358
|
+
|
|
359
|
+
Returns
|
|
360
|
+
-------
|
|
361
|
+
float
|
|
362
|
+
The pvalue of the index of dispersion test using Monte Carlo.
|
|
363
|
+
|
|
364
|
+
"""
|
|
365
|
+
value = np.asarray(value)
|
|
366
|
+
if value.ndim != 2 or value.shape[1] != 2:
|
|
367
|
+
raise ValueError(f"value must have shape (n, 2), got {value.shape}")
|
|
368
|
+
|
|
369
|
+
x = value[..., 0]
|
|
370
|
+
y = value[..., 1]
|
|
371
|
+
|
|
372
|
+
n = x.shape[0]
|
|
373
|
+
mx, my = np.mean(value, axis = 0)
|
|
374
|
+
vx = ((x * x).sum() - n * (mx * mx)) / (n-1)
|
|
375
|
+
vy = ((y * y).sum() - n * (my * my)) / (n-1)
|
|
376
|
+
cov_xy = np.mean((x - mx) * (y - my)) * n / (n - 1)
|
|
377
|
+
r_obs = cov_xy / np.sqrt(vx * vy)
|
|
378
|
+
Ib = (n / (1 - (r_obs * r_obs))) * ((vx / mx) - 2 * (r_obs * r_obs) * np.sqrt((vx / mx) * (vy / my)) + (vy / my))
|
|
379
|
+
|
|
380
|
+
lam3 = mle(value).lam3
|
|
381
|
+
lam1 = mx - lam3
|
|
382
|
+
lam2 = my - lam3
|
|
383
|
+
|
|
384
|
+
# bootstrap
|
|
385
|
+
z3 = np.random.poisson(lam= lam3, size = (n, R))
|
|
386
|
+
z1 = np.random.poisson(lam= lam1, size = (n, R)) + z3
|
|
387
|
+
z2 = np.random.poisson(lam= lam2, size = (n, R)) + z3
|
|
388
|
+
|
|
389
|
+
m1, m2 = z1.sum(axis = 0)/n, z2.sum(axis = 0)/n
|
|
390
|
+
s1 = ((z1 * z1).sum(axis = 0) - n * m1 * m1) / (n-1)
|
|
391
|
+
s2 = ((z2 * z2).sum(axis = 0) - n * m2 * m2) / (n-1)
|
|
392
|
+
|
|
393
|
+
dev1 = z1 - m1
|
|
394
|
+
dev2 = z2 - m2
|
|
395
|
+
cov_12 = np.sum(dev1 * dev2, axis = 0) / (n - 1)
|
|
396
|
+
r_b = cov_12 / np.sqrt(s1 * s2)
|
|
397
|
+
tb = (n/ (1 - r_b*r_b) ) * ((s1 / m1) - 2 * r_b*r_b * np.sqrt((s1/m1) * (s2/m2)) + s2/m2)
|
|
398
|
+
|
|
399
|
+
return ((tb > Ib).sum() + 1) / (R + 1)
|
|
400
|
+
|
|
401
|
+
def lambda3_profile(value: ArrayLike) -> np.ndarray:
|
|
402
|
+
"""This function plot the likelihood profile of lambda3 of the bivariate Poisson distribution with a
|
|
403
|
+
95% confidence interval of the parameter, given a set of values.
|
|
404
|
+
|
|
405
|
+
Parameters
|
|
406
|
+
----------
|
|
407
|
+
value : array like
|
|
408
|
+
Observed count pairs (x, y). Each row is one observation.
|
|
409
|
+
|
|
410
|
+
Returns
|
|
411
|
+
-------
|
|
412
|
+
numpy array
|
|
413
|
+
The confident interval
|
|
414
|
+
"""
|
|
415
|
+
|
|
416
|
+
x = value[..., 0]
|
|
417
|
+
y = value[..., 1]
|
|
418
|
+
|
|
419
|
+
n = x.shape[0]
|
|
420
|
+
mx, my = np.mean(value, axis = 0)
|
|
421
|
+
sly1 = gammaln(x + 1).sum()
|
|
422
|
+
sly2 = gammaln(y + 1).sum()
|
|
423
|
+
|
|
424
|
+
z = np.minimum(x, y)
|
|
425
|
+
m = int(np.max(z))
|
|
426
|
+
indexes = np.arange(m + 1)[:, None]
|
|
427
|
+
f2a = _log_choose(x[None, :], indexes) + _log_choose(y[None, :], indexes) + gammaln(indexes + 1)
|
|
428
|
+
|
|
429
|
+
llf = _make_llf(mx, my, n, sly1, sly2, z, f2a, indexes)
|
|
430
|
+
res = minimize(fun = llf,
|
|
431
|
+
x0 = np.cov(x, y)[0][1],
|
|
432
|
+
bounds = [(0, np.min([mx,my]) - 0.05)],
|
|
433
|
+
method = 'L-BFGS-B'
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
llf1 = -res.fun
|
|
437
|
+
lam3 = res.x[0]
|
|
438
|
+
rang = np.linspace(0, min(mx, my) - 0.1, num = 400, endpoint= False)
|
|
439
|
+
|
|
440
|
+
fig, ax = plt.subplots(figsize = (6,6))
|
|
441
|
+
fig.suptitle(r'$\lambda_3$ profile likelihood')
|
|
442
|
+
|
|
443
|
+
values_lff = np.array([-llf(a) for a in rang])
|
|
444
|
+
CHI2_95_HALF = 1.920729
|
|
445
|
+
cl = llf1 - CHI2_95_HALF
|
|
446
|
+
a1 = (rang[ values_lff >= cl ]).min()
|
|
447
|
+
a2 = (rang[ values_lff >= cl ]).max()
|
|
448
|
+
|
|
449
|
+
ax.plot(rang, values_lff, color = 'black', label = 'Loglikelihood function')
|
|
450
|
+
ax.axhline(llf1, color = 'blue')
|
|
451
|
+
ax.axvline(lam3, color = 'blue')
|
|
452
|
+
ax.axvspan(a1, a2, alpha=0.1, color='blue', label = 'Confidence interval')
|
|
453
|
+
ax.plot(lam3, llf1, 'o', color = 'blue', label = 'Maximum')
|
|
454
|
+
ax.set_xlabel(r'Values of $\lambda_3$')
|
|
455
|
+
ax.set_ylabel('Log-likelihood')
|
|
456
|
+
|
|
457
|
+
fig.legend(loc='center left', bbox_to_anchor=(0.9, 0.5))
|
|
458
|
+
|
|
459
|
+
return np.array([a1, a2])
|
|
460
|
+
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bivpoispy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Tols for bivariate Poisson distribution: density, sampling, MLE, and goodness-of-fit
|
|
5
|
+
Author-email: Ilyan Alexey Cortés Miranda <alexey.cortes13@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: bivpois,bivariate poisson
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: numpy>=1.20
|
|
16
|
+
Requires-Dist: scipy>=1.7
|
|
17
|
+
Requires-Dist: matplotlib>=3.5
|
|
18
|
+
Requires-Dist: numdifftools>=0.9
|
|
19
|
+
Dynamic: license-file
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/bivpoispy/__init__.py
|
|
5
|
+
src/bivpoispy/bivpoispy.py
|
|
6
|
+
src/bivpoispy.egg-info/PKG-INFO
|
|
7
|
+
src/bivpoispy.egg-info/SOURCES.txt
|
|
8
|
+
src/bivpoispy.egg-info/dependency_links.txt
|
|
9
|
+
src/bivpoispy.egg-info/requires.txt
|
|
10
|
+
src/bivpoispy.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
bivpoispy
|