copulas 0.12.4.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- copulas/__init__.py +91 -0
- copulas/bivariate/__init__.py +175 -0
- copulas/bivariate/base.py +448 -0
- copulas/bivariate/clayton.py +163 -0
- copulas/bivariate/frank.py +170 -0
- copulas/bivariate/gumbel.py +144 -0
- copulas/bivariate/independence.py +81 -0
- copulas/bivariate/utils.py +19 -0
- copulas/datasets.py +214 -0
- copulas/errors.py +5 -0
- copulas/multivariate/__init__.py +8 -0
- copulas/multivariate/base.py +200 -0
- copulas/multivariate/gaussian.py +345 -0
- copulas/multivariate/tree.py +691 -0
- copulas/multivariate/vine.py +359 -0
- copulas/optimize/__init__.py +154 -0
- copulas/univariate/__init__.py +25 -0
- copulas/univariate/base.py +661 -0
- copulas/univariate/beta.py +48 -0
- copulas/univariate/gamma.py +38 -0
- copulas/univariate/gaussian.py +27 -0
- copulas/univariate/gaussian_kde.py +192 -0
- copulas/univariate/log_laplace.py +38 -0
- copulas/univariate/selection.py +36 -0
- copulas/univariate/student_t.py +31 -0
- copulas/univariate/truncated_gaussian.py +66 -0
- copulas/univariate/uniform.py +27 -0
- copulas/utils.py +248 -0
- copulas/visualization.py +345 -0
- copulas-0.12.4.dev3.dist-info/METADATA +215 -0
- copulas-0.12.4.dev3.dist-info/RECORD +34 -0
- copulas-0.12.4.dev3.dist-info/WHEEL +5 -0
- copulas-0.12.4.dev3.dist-info/licenses/LICENSE +106 -0
- copulas-0.12.4.dev3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,691 @@
|
|
|
1
|
+
"""Multivariate trees module."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import scipy
|
|
8
|
+
|
|
9
|
+
from copulas.bivariate.base import Bivariate
|
|
10
|
+
from copulas.multivariate.base import Multivariate
|
|
11
|
+
from copulas.utils import EPSILON, get_qualified_name
|
|
12
|
+
|
|
13
|
+
LOGGER = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TreeTypes(Enum):
|
|
17
|
+
"""The available types of trees."""
|
|
18
|
+
|
|
19
|
+
CENTER = 0
|
|
20
|
+
DIRECT = 1
|
|
21
|
+
REGULAR = 2
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Tree(Multivariate):
|
|
25
|
+
"""Helper class to instantiate a single tree in the vine model."""
|
|
26
|
+
|
|
27
|
+
tree_type = None
|
|
28
|
+
fitted = False
|
|
29
|
+
|
|
30
|
+
def fit(self, index, n_nodes, tau_matrix, previous_tree, edges=None):
|
|
31
|
+
"""Fit this tree object.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
index (int):
|
|
35
|
+
index of the tree.
|
|
36
|
+
n_nodes (int):
|
|
37
|
+
number of nodes in the tree.
|
|
38
|
+
tau_matrix (numpy.array):
|
|
39
|
+
kendall's tau matrix of the data, shape (n_nodes, n_nodes).
|
|
40
|
+
previous_tree (Tree):
|
|
41
|
+
tree object of previous level.
|
|
42
|
+
"""
|
|
43
|
+
self.level = index + 1
|
|
44
|
+
self.n_nodes = n_nodes
|
|
45
|
+
self.tau_matrix = tau_matrix
|
|
46
|
+
self.previous_tree = previous_tree
|
|
47
|
+
self.edges = edges or []
|
|
48
|
+
|
|
49
|
+
if not self.edges:
|
|
50
|
+
if self.level == 1:
|
|
51
|
+
self.u_matrix = previous_tree
|
|
52
|
+
self._build_first_tree()
|
|
53
|
+
|
|
54
|
+
else:
|
|
55
|
+
self._build_kth_tree()
|
|
56
|
+
|
|
57
|
+
self.prepare_next_tree()
|
|
58
|
+
|
|
59
|
+
self.fitted = True
|
|
60
|
+
|
|
61
|
+
def _check_constraint(self, edge1, edge2):
|
|
62
|
+
"""Check if two edges satisfy vine constraint.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
edge1 (Edge):
|
|
66
|
+
edge object representing edge1
|
|
67
|
+
edge2 (Edge):
|
|
68
|
+
edge object representing edge2
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
bool:
|
|
72
|
+
True if the two edges satisfy vine constraints
|
|
73
|
+
"""
|
|
74
|
+
full_node = {edge1.L, edge1.R, edge2.L, edge2.R}
|
|
75
|
+
full_node.update(edge1.D)
|
|
76
|
+
full_node.update(edge2.D)
|
|
77
|
+
return len(full_node) == (self.level + 1)
|
|
78
|
+
|
|
79
|
+
def _get_constraints(self):
|
|
80
|
+
"""Get neighboring edges for each edge in the edges."""
|
|
81
|
+
num_edges = len(self.edges)
|
|
82
|
+
for k in range(num_edges):
|
|
83
|
+
for i in range(num_edges):
|
|
84
|
+
# add to constraints if i shared an edge with k
|
|
85
|
+
if k != i and self.edges[k].is_adjacent(self.edges[i]):
|
|
86
|
+
self.edges[k].neighbors.append(i)
|
|
87
|
+
|
|
88
|
+
def _sort_tau_by_y(self, y):
|
|
89
|
+
"""Sort tau matrix by dependece with variable y.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
y (int):
|
|
93
|
+
index of variable of intrest
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
numpy.ndarray:
|
|
97
|
+
sorted tau matrix.
|
|
98
|
+
"""
|
|
99
|
+
# first column is the variable of interest
|
|
100
|
+
tau_y = self.tau_matrix[:, y]
|
|
101
|
+
tau_y[y] = np.nan
|
|
102
|
+
|
|
103
|
+
temp = np.empty([self.n_nodes, 3])
|
|
104
|
+
temp[:, 0] = np.arange(self.n_nodes)
|
|
105
|
+
temp[:, 1] = tau_y
|
|
106
|
+
temp[:, 2] = abs(tau_y)
|
|
107
|
+
temp[np.isnan(temp)] = -10
|
|
108
|
+
sort_temp = temp[:, 2].argsort()[::-1]
|
|
109
|
+
tau_sorted = temp[sort_temp]
|
|
110
|
+
|
|
111
|
+
return tau_sorted
|
|
112
|
+
|
|
113
|
+
def get_tau_matrix(self):
|
|
114
|
+
"""Get tau matrix for adjacent pairs.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
tau (numpy.ndarray):
|
|
118
|
+
tau matrix for the current tree
|
|
119
|
+
"""
|
|
120
|
+
num_edges = len(self.edges)
|
|
121
|
+
tau = np.empty([num_edges, num_edges])
|
|
122
|
+
|
|
123
|
+
for i in range(num_edges):
|
|
124
|
+
edge = self.edges[i]
|
|
125
|
+
for j in edge.neighbors:
|
|
126
|
+
if self.level == 1:
|
|
127
|
+
left_u = self.u_matrix[:, edge.L]
|
|
128
|
+
right_u = self.u_matrix[:, edge.R]
|
|
129
|
+
|
|
130
|
+
else:
|
|
131
|
+
left_parent, right_parent = edge.parents
|
|
132
|
+
left_u, right_u = Edge.get_conditional_uni(left_parent, right_parent)
|
|
133
|
+
|
|
134
|
+
tau[i, j], _pvalue = scipy.stats.kendalltau(left_u, right_u)
|
|
135
|
+
|
|
136
|
+
return tau
|
|
137
|
+
|
|
138
|
+
def get_adjacent_matrix(self):
|
|
139
|
+
"""Get adjacency matrix.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
numpy.ndarray:
|
|
143
|
+
adjacency matrix
|
|
144
|
+
"""
|
|
145
|
+
edges = self.edges
|
|
146
|
+
num_edges = len(edges) + 1
|
|
147
|
+
adj = np.zeros([num_edges, num_edges])
|
|
148
|
+
|
|
149
|
+
for k in range(num_edges - 1):
|
|
150
|
+
adj[edges[k].L, edges[k].R] = 1
|
|
151
|
+
adj[edges[k].R, edges[k].L] = 1
|
|
152
|
+
|
|
153
|
+
return adj
|
|
154
|
+
|
|
155
|
+
def prepare_next_tree(self):
|
|
156
|
+
"""Prepare conditional U matrix for next tree."""
|
|
157
|
+
for edge in self.edges:
|
|
158
|
+
copula_theta = edge.theta
|
|
159
|
+
|
|
160
|
+
if self.level == 1:
|
|
161
|
+
left_u = self.u_matrix[:, edge.L]
|
|
162
|
+
right_u = self.u_matrix[:, edge.R]
|
|
163
|
+
|
|
164
|
+
else:
|
|
165
|
+
left_parent, right_parent = edge.parents
|
|
166
|
+
left_u, right_u = Edge.get_conditional_uni(left_parent, right_parent)
|
|
167
|
+
|
|
168
|
+
# compute conditional cdfs C(i|j) = dC(i,j)/duj and dC(i,j)/du
|
|
169
|
+
left_u = [x for x in left_u if x is not None]
|
|
170
|
+
right_u = [x for x in right_u if x is not None]
|
|
171
|
+
X_left_right = np.array([[x, y] for x, y in zip(left_u, right_u)])
|
|
172
|
+
X_right_left = np.array([[x, y] for x, y in zip(right_u, left_u)])
|
|
173
|
+
|
|
174
|
+
copula = Bivariate(copula_type=edge.name)
|
|
175
|
+
copula.theta = copula_theta
|
|
176
|
+
left_given_right = copula.partial_derivative(X_left_right)
|
|
177
|
+
right_given_left = copula.partial_derivative(X_right_left)
|
|
178
|
+
|
|
179
|
+
# correction of 0 or 1
|
|
180
|
+
left_given_right[left_given_right == 0] = EPSILON
|
|
181
|
+
right_given_left[right_given_left == 0] = EPSILON
|
|
182
|
+
left_given_right[left_given_right == 1] = 1 - EPSILON
|
|
183
|
+
right_given_left[right_given_left == 1] = 1 - EPSILON
|
|
184
|
+
edge.U = np.array([left_given_right, right_given_left])
|
|
185
|
+
|
|
186
|
+
def get_likelihood(self, uni_matrix):
|
|
187
|
+
"""Compute likelihood of the tree given an U matrix.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
uni_matrix (numpy.array):
|
|
191
|
+
univariate matrix to evaluate likelihood on.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
tuple[float, numpy.array]:
|
|
195
|
+
likelihood of the current tree, next level conditional univariate matrix
|
|
196
|
+
"""
|
|
197
|
+
uni_dim = uni_matrix.shape[1]
|
|
198
|
+
num_edge = len(self.edges)
|
|
199
|
+
values = np.zeros([1, num_edge])
|
|
200
|
+
new_uni_matrix = np.empty([uni_dim, uni_dim])
|
|
201
|
+
|
|
202
|
+
for i in range(num_edge):
|
|
203
|
+
edge = self.edges[i]
|
|
204
|
+
value, left_u, right_u = edge.get_likelihood(uni_matrix)
|
|
205
|
+
new_uni_matrix[edge.L, edge.R] = left_u.item()
|
|
206
|
+
new_uni_matrix[edge.R, edge.L] = right_u.item()
|
|
207
|
+
values[0, i] = np.log(value)
|
|
208
|
+
|
|
209
|
+
return np.sum(values), new_uni_matrix
|
|
210
|
+
|
|
211
|
+
def __str__(self):
|
|
212
|
+
"""Produce printable representation of the class."""
|
|
213
|
+
template = 'L:{} R:{} D:{} Copula:{} Theta:{}'
|
|
214
|
+
return '\n'.join([
|
|
215
|
+
template.format(edge.L, edge.R, edge.D, edge.name, edge.theta) for edge in self.edges
|
|
216
|
+
])
|
|
217
|
+
|
|
218
|
+
def _serialize_previous_tree(self):
|
|
219
|
+
if self.level == 1:
|
|
220
|
+
return self.previous_tree.tolist()
|
|
221
|
+
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
@classmethod
|
|
225
|
+
def _deserialize_previous_tree(cls, tree_dict, previous):
|
|
226
|
+
if tree_dict['level'] == 1:
|
|
227
|
+
return np.array(tree_dict['previous_tree'])
|
|
228
|
+
|
|
229
|
+
return previous
|
|
230
|
+
|
|
231
|
+
def to_dict(self):
|
|
232
|
+
"""Return a `dict` with the parameters to replicate this Tree.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
dict:
|
|
236
|
+
Parameters of this Tree.
|
|
237
|
+
"""
|
|
238
|
+
fitted = self.fitted
|
|
239
|
+
result = {'tree_type': self.tree_type, 'type': get_qualified_name(self), 'fitted': fitted}
|
|
240
|
+
|
|
241
|
+
if not fitted:
|
|
242
|
+
return result
|
|
243
|
+
|
|
244
|
+
result.update({
|
|
245
|
+
'level': self.level,
|
|
246
|
+
'n_nodes': self.n_nodes,
|
|
247
|
+
'tau_matrix': self.tau_matrix.tolist(),
|
|
248
|
+
'previous_tree': self._serialize_previous_tree(),
|
|
249
|
+
'edges': [edge.to_dict() for edge in self.edges],
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
return result
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def from_dict(cls, tree_dict, previous=None):
|
|
256
|
+
"""Create a new instance from a parameters dictionary.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
params (dict):
|
|
260
|
+
Parameters of the Tree, in the same format as the one
|
|
261
|
+
returned by the ``to_dict`` method.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
Tree:
|
|
265
|
+
Instance of the tree defined on the parameters.
|
|
266
|
+
"""
|
|
267
|
+
instance = get_tree(tree_dict['tree_type'])
|
|
268
|
+
|
|
269
|
+
fitted = tree_dict['fitted']
|
|
270
|
+
instance.fitted = fitted
|
|
271
|
+
if fitted:
|
|
272
|
+
instance.level = tree_dict['level']
|
|
273
|
+
instance.n_nodes = tree_dict['n_nodes']
|
|
274
|
+
instance.tau_matrix = np.array(tree_dict['tau_matrix'])
|
|
275
|
+
instance.previous_tree = cls._deserialize_previous_tree(tree_dict, previous)
|
|
276
|
+
instance.edges = [Edge.from_dict(edge) for edge in tree_dict['edges']]
|
|
277
|
+
|
|
278
|
+
return instance
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class CenterTree(Tree):
|
|
282
|
+
"""Tree for a C-vine copula."""
|
|
283
|
+
|
|
284
|
+
tree_type = TreeTypes.CENTER
|
|
285
|
+
|
|
286
|
+
def _build_first_tree(self):
|
|
287
|
+
"""Build first level tree."""
|
|
288
|
+
tau_sorted = self._sort_tau_by_y(0)
|
|
289
|
+
for itr in range(self.n_nodes - 1):
|
|
290
|
+
ind = int(tau_sorted[itr, 0])
|
|
291
|
+
copula = Bivariate.select_copula(self.u_matrix[:, (0, ind)])
|
|
292
|
+
name, theta = copula.copula_type, copula.theta
|
|
293
|
+
|
|
294
|
+
new_edge = Edge(itr, 0, ind, name, theta)
|
|
295
|
+
new_edge.tau = self.tau_matrix[0, ind]
|
|
296
|
+
self.edges.append(new_edge)
|
|
297
|
+
|
|
298
|
+
def _build_kth_tree(self):
|
|
299
|
+
"""Build k-th level tree."""
|
|
300
|
+
anchor = self.get_anchor()
|
|
301
|
+
aux_sorted = self._sort_tau_by_y(anchor)
|
|
302
|
+
edges = self.previous_tree.edges
|
|
303
|
+
|
|
304
|
+
for itr in range(self.n_nodes - 1):
|
|
305
|
+
right = int(aux_sorted[itr, 0])
|
|
306
|
+
left_parent, right_parent = Edge.sort_edge([edges[anchor], edges[right]])
|
|
307
|
+
new_edge = Edge.get_child_edge(itr, left_parent, right_parent)
|
|
308
|
+
new_edge.tau = aux_sorted[itr, 1]
|
|
309
|
+
self.edges.append(new_edge)
|
|
310
|
+
|
|
311
|
+
def get_anchor(self):
|
|
312
|
+
"""Find anchor variable with highest sum of dependence with the rest.
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
int:
|
|
316
|
+
Anchor variable.
|
|
317
|
+
"""
|
|
318
|
+
temp = np.empty([self.n_nodes, 2])
|
|
319
|
+
temp[:, 0] = np.arange(self.n_nodes, dtype=int)
|
|
320
|
+
temp[:, 1] = np.sum(abs(self.tau_matrix), 1)
|
|
321
|
+
anchor = int(temp[0, 0])
|
|
322
|
+
return anchor
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class DirectTree(Tree):
|
|
326
|
+
"""DirectTree class."""
|
|
327
|
+
|
|
328
|
+
tree_type = TreeTypes.DIRECT
|
|
329
|
+
|
|
330
|
+
def _build_first_tree(self):
|
|
331
|
+
# find the pair of maximum tau
|
|
332
|
+
tau_matrix = self.tau_matrix
|
|
333
|
+
tau_sorted = self._sort_tau_by_y(0)
|
|
334
|
+
left_ind = tau_sorted[0, 0]
|
|
335
|
+
right_ind = tau_sorted[1, 0]
|
|
336
|
+
T1 = np.array([left_ind, 0, right_ind]).astype(int)
|
|
337
|
+
tau_T1 = tau_sorted[:2, 1]
|
|
338
|
+
|
|
339
|
+
# replace tau matrix of the selected variables as a negative number
|
|
340
|
+
tau_matrix[:, [T1]] = -10
|
|
341
|
+
for k in range(2, self.n_nodes - 1):
|
|
342
|
+
left = np.argmax(tau_matrix[T1[0], :])
|
|
343
|
+
right = np.argmax(tau_matrix[T1[-1], :])
|
|
344
|
+
valL = np.max(tau_matrix[T1[0], :])
|
|
345
|
+
valR = np.max(tau_matrix[T1[-1], :])
|
|
346
|
+
|
|
347
|
+
if valL > valR:
|
|
348
|
+
# add nodes to the left
|
|
349
|
+
T1 = np.append(int(left), T1)
|
|
350
|
+
tau_T1 = np.append(valL, tau_T1)
|
|
351
|
+
tau_matrix[:, left] = -10
|
|
352
|
+
|
|
353
|
+
else:
|
|
354
|
+
# add node to the right
|
|
355
|
+
T1 = np.append(T1, int(right))
|
|
356
|
+
tau_T1 = np.append(tau_T1, valR)
|
|
357
|
+
tau_matrix[:, right] = -10
|
|
358
|
+
|
|
359
|
+
for k in range(self.n_nodes - 1):
|
|
360
|
+
copula = Bivariate.select_copula(self.u_matrix[:, (T1[k], T1[k + 1])])
|
|
361
|
+
name, theta = copula.copula_type, copula.theta
|
|
362
|
+
|
|
363
|
+
left, right = sorted([T1[k], T1[k + 1]])
|
|
364
|
+
new_edge = Edge(k, left, right, name, theta)
|
|
365
|
+
new_edge.tau = tau_T1[k]
|
|
366
|
+
self.edges.append(new_edge)
|
|
367
|
+
|
|
368
|
+
def _build_kth_tree(self):
|
|
369
|
+
edges = self.previous_tree.edges
|
|
370
|
+
for k in range(self.n_nodes - 1):
|
|
371
|
+
left_parent, right_parent = Edge.sort_edge([edges[k], edges[k + 1]])
|
|
372
|
+
new_edge = Edge.get_child_edge(k, left_parent, right_parent)
|
|
373
|
+
new_edge.tau = self.tau_matrix[k, k + 1]
|
|
374
|
+
self.edges.append(new_edge)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class RegularTree(Tree):
|
|
378
|
+
"""RegularTree class."""
|
|
379
|
+
|
|
380
|
+
tree_type = TreeTypes.REGULAR
|
|
381
|
+
|
|
382
|
+
def _build_first_tree(self):
|
|
383
|
+
"""Build the first tree with n-1 variable."""
|
|
384
|
+
# Prim's algorithm
|
|
385
|
+
neg_tau = -1.0 * abs(self.tau_matrix)
|
|
386
|
+
X = {0}
|
|
387
|
+
|
|
388
|
+
while len(X) != self.n_nodes:
|
|
389
|
+
adj_set = set()
|
|
390
|
+
for x in X:
|
|
391
|
+
for k in range(self.n_nodes):
|
|
392
|
+
if k not in X and k != x:
|
|
393
|
+
adj_set.add((x, k)) # noqa: PD005
|
|
394
|
+
|
|
395
|
+
# find edge with maximum
|
|
396
|
+
edge = sorted(adj_set, key=lambda e: neg_tau[e[0]][e[1]])[0]
|
|
397
|
+
copula = Bivariate.select_copula(self.u_matrix[:, (edge[0], edge[1])])
|
|
398
|
+
name, theta = copula.copula_type, copula.theta
|
|
399
|
+
|
|
400
|
+
left, right = sorted([edge[0], edge[1]])
|
|
401
|
+
new_edge = Edge(len(X) - 1, left, right, name, theta)
|
|
402
|
+
new_edge.tau = self.tau_matrix[edge[0], edge[1]]
|
|
403
|
+
self.edges.append(new_edge)
|
|
404
|
+
X.add(edge[1]) # noqa: PD005
|
|
405
|
+
|
|
406
|
+
def _build_kth_tree(self):
|
|
407
|
+
"""Build tree for level k."""
|
|
408
|
+
neg_tau = -1.0 * abs(self.tau_matrix)
|
|
409
|
+
edges = self.previous_tree.edges
|
|
410
|
+
visited = {0}
|
|
411
|
+
unvisited = set(range(self.n_nodes))
|
|
412
|
+
|
|
413
|
+
while len(visited) != self.n_nodes:
|
|
414
|
+
adj_set = set()
|
|
415
|
+
for x in visited:
|
|
416
|
+
for k in range(self.n_nodes):
|
|
417
|
+
# check if (x,k) is a valid edge in the vine
|
|
418
|
+
if k not in visited and k != x and self._check_constraint(edges[x], edges[k]):
|
|
419
|
+
adj_set.add((x, k)) # noqa: PD005
|
|
420
|
+
|
|
421
|
+
# find edge with maximum tau
|
|
422
|
+
if len(adj_set) == 0:
|
|
423
|
+
visited.add(list(unvisited)[0]) # noqa: PD005
|
|
424
|
+
continue
|
|
425
|
+
|
|
426
|
+
pairs = sorted(adj_set, key=lambda e: neg_tau[e[0]][e[1]])[0]
|
|
427
|
+
left_parent, right_parent = Edge.sort_edge([edges[pairs[0]], edges[pairs[1]]])
|
|
428
|
+
|
|
429
|
+
new_edge = Edge.get_child_edge(len(visited) - 1, left_parent, right_parent)
|
|
430
|
+
new_edge.tau = self.tau_matrix[pairs[0], pairs[1]]
|
|
431
|
+
self.edges.append(new_edge)
|
|
432
|
+
|
|
433
|
+
visited.add(pairs[1]) # noqa: PD005
|
|
434
|
+
unvisited.remove(pairs[1])
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def get_tree(tree_type):
|
|
438
|
+
"""Get a Tree instance of the specified type.
|
|
439
|
+
|
|
440
|
+
Args:
|
|
441
|
+
tree_type (str or TreeTypes):
|
|
442
|
+
Type of tree of which to get an instance.
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
Tree:
|
|
446
|
+
Instance of a Tree of the specified type.
|
|
447
|
+
"""
|
|
448
|
+
if not isinstance(tree_type, TreeTypes):
|
|
449
|
+
if isinstance(tree_type, str) and tree_type.upper() in TreeTypes.__members__:
|
|
450
|
+
tree_type = TreeTypes[tree_type.upper()]
|
|
451
|
+
else:
|
|
452
|
+
raise ValueError(f'Invalid tree type {tree_type}')
|
|
453
|
+
|
|
454
|
+
if tree_type == TreeTypes.CENTER:
|
|
455
|
+
return CenterTree()
|
|
456
|
+
if tree_type == TreeTypes.REGULAR:
|
|
457
|
+
return RegularTree()
|
|
458
|
+
if tree_type == TreeTypes.DIRECT:
|
|
459
|
+
return DirectTree()
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
class Edge(object):
|
|
463
|
+
"""Represents an edge in the copula."""
|
|
464
|
+
|
|
465
|
+
def __init__(self, index, left, right, copula_name, copula_theta):
|
|
466
|
+
"""Initialize an Edge object.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
left (int):
|
|
470
|
+
left_node index (smaller)
|
|
471
|
+
right (int):
|
|
472
|
+
right_node index (larger)
|
|
473
|
+
copula_name (str):
|
|
474
|
+
name of the fitted copula class
|
|
475
|
+
copula_theta (float):
|
|
476
|
+
parameters of the fitted copula class
|
|
477
|
+
"""
|
|
478
|
+
self.index = index
|
|
479
|
+
self.L = left
|
|
480
|
+
self.R = right
|
|
481
|
+
self.D = set() # dependence_set
|
|
482
|
+
self.parents = None
|
|
483
|
+
self.neighbors = []
|
|
484
|
+
|
|
485
|
+
self.name = copula_name
|
|
486
|
+
self.theta = copula_theta
|
|
487
|
+
self.tau = None
|
|
488
|
+
self.U = None
|
|
489
|
+
self.likelihood = None
|
|
490
|
+
|
|
491
|
+
@staticmethod
|
|
492
|
+
def _identify_eds_ing(first, second):
|
|
493
|
+
"""Find nodes connecting adjacent edges.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
first (Edge):
|
|
497
|
+
Edge object representing the first edge.
|
|
498
|
+
second (Edge):
|
|
499
|
+
Edge object representing the second edge.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
tuple[int, int, set[int]]:
|
|
503
|
+
The first two values represent left and right node
|
|
504
|
+
indicies of the new edge. The third value is the new dependence set.
|
|
505
|
+
"""
|
|
506
|
+
A = {first.L, first.R}
|
|
507
|
+
A.update(first.D)
|
|
508
|
+
|
|
509
|
+
B = {second.L, second.R}
|
|
510
|
+
B.update(second.D)
|
|
511
|
+
|
|
512
|
+
depend_set = A & B
|
|
513
|
+
left, right = sorted(A ^ B)
|
|
514
|
+
|
|
515
|
+
return left, right, depend_set
|
|
516
|
+
|
|
517
|
+
def is_adjacent(self, another_edge):
|
|
518
|
+
"""Check if two edges are adjacent.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
another_edge (Edge):
|
|
522
|
+
edge object of another edge
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
bool:
|
|
526
|
+
True if the two edges are adjacent.
|
|
527
|
+
"""
|
|
528
|
+
return (
|
|
529
|
+
self.L == another_edge.L
|
|
530
|
+
or self.L == another_edge.R
|
|
531
|
+
or self.R == another_edge.L
|
|
532
|
+
or self.R == another_edge.R
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
@staticmethod
|
|
536
|
+
def sort_edge(edges):
|
|
537
|
+
"""Sort iterable of edges first by left node indices then right.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
edges (list[Edge]):
|
|
541
|
+
List of edges to be sorted.
|
|
542
|
+
|
|
543
|
+
Returns:
|
|
544
|
+
list[Edge]:
|
|
545
|
+
Sorted list by left and right node indices.
|
|
546
|
+
"""
|
|
547
|
+
return sorted(edges, key=lambda x: (x.L, x.R))
|
|
548
|
+
|
|
549
|
+
@classmethod
|
|
550
|
+
def get_conditional_uni(cls, left_parent, right_parent):
|
|
551
|
+
"""Identify pair univariate value from parents.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
left_parent (Edge):
|
|
555
|
+
left parent
|
|
556
|
+
right_parent (Edge):
|
|
557
|
+
right parent
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
tuple[np.ndarray, np.ndarray]:
|
|
561
|
+
left and right parents univariate.
|
|
562
|
+
"""
|
|
563
|
+
left, right, _ = cls._identify_eds_ing(left_parent, right_parent)
|
|
564
|
+
|
|
565
|
+
left_u = left_parent.U[0] if left_parent.L == left else left_parent.U[1]
|
|
566
|
+
right_u = right_parent.U[0] if right_parent.L == right else right_parent.U[1]
|
|
567
|
+
|
|
568
|
+
return left_u, right_u
|
|
569
|
+
|
|
570
|
+
@classmethod
|
|
571
|
+
def get_child_edge(cls, index, left_parent, right_parent):
|
|
572
|
+
"""Construct a child edge from two parent edges.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
index (int):
|
|
576
|
+
Index of the new Edge.
|
|
577
|
+
left_parent (Edge):
|
|
578
|
+
Left parent
|
|
579
|
+
right_parent (Edge):
|
|
580
|
+
Right parent
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
Edge:
|
|
584
|
+
The new child edge.
|
|
585
|
+
"""
|
|
586
|
+
[ed1, ed2, depend_set] = cls._identify_eds_ing(left_parent, right_parent)
|
|
587
|
+
left_u, right_u = cls.get_conditional_uni(left_parent, right_parent)
|
|
588
|
+
X = np.array([[x, y] for x, y in zip(left_u, right_u)])
|
|
589
|
+
copula = Bivariate.select_copula(X)
|
|
590
|
+
name, theta = copula.copula_type, copula.theta
|
|
591
|
+
new_edge = Edge(index, ed1, ed2, name, theta)
|
|
592
|
+
new_edge.D = depend_set
|
|
593
|
+
new_edge.parents = [left_parent, right_parent]
|
|
594
|
+
return new_edge
|
|
595
|
+
|
|
596
|
+
def get_likelihood(self, uni_matrix):
|
|
597
|
+
"""Compute likelihood given a U matrix.
|
|
598
|
+
|
|
599
|
+
Args:
|
|
600
|
+
uni_matrix (numpy.array):
|
|
601
|
+
Matrix to compute the likelihood.
|
|
602
|
+
|
|
603
|
+
Return:
|
|
604
|
+
tuple (np.ndarray, np.ndarray, np.array):
|
|
605
|
+
likelihood and conditional values.
|
|
606
|
+
"""
|
|
607
|
+
if self.parents is None:
|
|
608
|
+
left_u = uni_matrix[:, self.L]
|
|
609
|
+
right_u = uni_matrix[:, self.R]
|
|
610
|
+
|
|
611
|
+
else:
|
|
612
|
+
left_ing = list(self.D - self.parents[0].D)[0]
|
|
613
|
+
right_ing = list(self.D - self.parents[1].D)[0]
|
|
614
|
+
left_u = uni_matrix[self.L, left_ing]
|
|
615
|
+
right_u = uni_matrix[self.R, right_ing]
|
|
616
|
+
|
|
617
|
+
copula = Bivariate(copula_type=self.name)
|
|
618
|
+
copula.theta = self.theta
|
|
619
|
+
|
|
620
|
+
X_left_right = np.array([[left_u, right_u]])
|
|
621
|
+
X_right_left = np.array([[right_u, left_u]])
|
|
622
|
+
|
|
623
|
+
value = np.sum(copula.probability_density(X_left_right))
|
|
624
|
+
left_given_right = copula.partial_derivative(X_left_right)
|
|
625
|
+
right_given_left = copula.partial_derivative(X_right_left)
|
|
626
|
+
|
|
627
|
+
return value, left_given_right, right_given_left
|
|
628
|
+
|
|
629
|
+
def to_dict(self):
|
|
630
|
+
"""Return a `dict` with the parameters to replicate this Edge.
|
|
631
|
+
|
|
632
|
+
Returns:
|
|
633
|
+
dict:
|
|
634
|
+
Parameters of this Edge.
|
|
635
|
+
"""
|
|
636
|
+
parents = None
|
|
637
|
+
if self.parents:
|
|
638
|
+
parents = [parent.to_dict() for parent in self.parents]
|
|
639
|
+
|
|
640
|
+
U = None
|
|
641
|
+
if self.U is not None:
|
|
642
|
+
U = self.U.tolist()
|
|
643
|
+
|
|
644
|
+
return {
|
|
645
|
+
'index': self.index,
|
|
646
|
+
'L': self.L,
|
|
647
|
+
'R': self.R,
|
|
648
|
+
'D': self.D,
|
|
649
|
+
'parents': parents,
|
|
650
|
+
'neighbors': self.neighbors,
|
|
651
|
+
'name': self.name,
|
|
652
|
+
'theta': self.theta,
|
|
653
|
+
'tau': self.tau,
|
|
654
|
+
'U': U,
|
|
655
|
+
'likelihood': self.likelihood,
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
@classmethod
|
|
659
|
+
def from_dict(cls, edge_dict):
|
|
660
|
+
"""Create a new instance from a parameters dictionary.
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
params (dict):
|
|
664
|
+
Parameters of the Edge, in the same format as the one
|
|
665
|
+
returned by the ``to_dict`` method.
|
|
666
|
+
|
|
667
|
+
Returns:
|
|
668
|
+
Edge:
|
|
669
|
+
Instance of the edge defined on the parameters.
|
|
670
|
+
"""
|
|
671
|
+
instance = cls(
|
|
672
|
+
edge_dict['index'],
|
|
673
|
+
edge_dict['L'],
|
|
674
|
+
edge_dict['R'],
|
|
675
|
+
edge_dict['name'],
|
|
676
|
+
edge_dict['theta'],
|
|
677
|
+
)
|
|
678
|
+
instance.U = np.array(edge_dict['U'])
|
|
679
|
+
parents = edge_dict['parents']
|
|
680
|
+
|
|
681
|
+
if parents:
|
|
682
|
+
instance.parents = []
|
|
683
|
+
for parent in parents:
|
|
684
|
+
edge = Edge.from_dict(parent)
|
|
685
|
+
instance.parents.append(edge)
|
|
686
|
+
|
|
687
|
+
regular_attributes = ['D', 'tau', 'likelihood', 'neighbors']
|
|
688
|
+
for key in regular_attributes:
|
|
689
|
+
setattr(instance, key, edge_dict[key])
|
|
690
|
+
|
|
691
|
+
return instance
|