copulas 0.10.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of copulas might be problematic. Click here for more details.
- copulas/__init__.py +332 -0
- copulas/bivariate/__init__.py +175 -0
- copulas/bivariate/base.py +449 -0
- copulas/bivariate/clayton.py +162 -0
- copulas/bivariate/frank.py +169 -0
- copulas/bivariate/gumbel.py +144 -0
- copulas/bivariate/independence.py +81 -0
- copulas/bivariate/utils.py +19 -0
- copulas/datasets.py +221 -0
- copulas/multivariate/__init__.py +14 -0
- copulas/multivariate/base.py +199 -0
- copulas/multivariate/gaussian.py +314 -0
- copulas/multivariate/tree.py +693 -0
- copulas/multivariate/vine.py +356 -0
- copulas/optimize/__init__.py +153 -0
- copulas/univariate/__init__.py +25 -0
- copulas/univariate/base.py +650 -0
- copulas/univariate/beta.py +42 -0
- copulas/univariate/gamma.py +38 -0
- copulas/univariate/gaussian.py +33 -0
- copulas/univariate/gaussian_kde.py +193 -0
- copulas/univariate/log_laplace.py +38 -0
- copulas/univariate/selection.py +36 -0
- copulas/univariate/student_t.py +35 -0
- copulas/univariate/truncated_gaussian.py +74 -0
- copulas/univariate/uniform.py +33 -0
- copulas/visualization.py +350 -0
- copulas-0.10.1.dev0.dist-info/LICENSE +106 -0
- copulas-0.10.1.dev0.dist-info/METADATA +223 -0
- copulas-0.10.1.dev0.dist-info/RECORD +32 -0
- copulas-0.10.1.dev0.dist-info/WHEEL +5 -0
- copulas-0.10.1.dev0.dist-info/top_level.txt +1 -0
copulas/visualization.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
"""Visualization utilities for the Copulas library."""
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import plotly.express as px
|
|
5
|
+
import plotly.figure_factory as ff
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PlotConfig:
|
|
9
|
+
"""Custom plot settings for visualizations."""
|
|
10
|
+
|
|
11
|
+
DATACEBO_DARK = '#000036'
|
|
12
|
+
DATACEBO_GREEN = '#01E0C9'
|
|
13
|
+
BACKGROUND_COLOR = '#F5F5F8'
|
|
14
|
+
FONT_SIZE = 18
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _generate_1d_plot(data, title, labels, colors):
|
|
18
|
+
"""Generate a density plot of an array-like structure.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
data (array-like structure):
|
|
22
|
+
The data to plot.
|
|
23
|
+
title (str):
|
|
24
|
+
The title of the plot.
|
|
25
|
+
labels (list[str]):
|
|
26
|
+
The labels of the data.
|
|
27
|
+
colors (list[str]):
|
|
28
|
+
The colors of the data.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
plotly.graph_objects._figure.Figure
|
|
32
|
+
"""
|
|
33
|
+
fig = ff.create_distplot(
|
|
34
|
+
hist_data=data,
|
|
35
|
+
group_labels=labels,
|
|
36
|
+
show_hist=False,
|
|
37
|
+
show_rug=False,
|
|
38
|
+
colors=colors
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
for i, name in enumerate(labels):
|
|
42
|
+
fig.update_traces(
|
|
43
|
+
x=fig.data[i].x,
|
|
44
|
+
hovertemplate=f'<b>{name}</b><br>Frequency: %{{y}}<extra></extra>',
|
|
45
|
+
selector={'name': name},
|
|
46
|
+
fill='tozeroy',
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
fig.update_layout(
|
|
50
|
+
title=title,
|
|
51
|
+
plot_bgcolor=PlotConfig.BACKGROUND_COLOR,
|
|
52
|
+
font={'size': PlotConfig.FONT_SIZE},
|
|
53
|
+
showlegend=True if labels[0] else False,
|
|
54
|
+
xaxis_title='value',
|
|
55
|
+
yaxis_title='frequency'
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return fig
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def dist_1d(data, title=None, label=None):
|
|
62
|
+
"""Plot the 1 dimensional data.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
data (array_like structure):
|
|
66
|
+
The table data.
|
|
67
|
+
title (str):
|
|
68
|
+
The title of the plot.
|
|
69
|
+
label (str):
|
|
70
|
+
The label of the plot.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
plotly.graph_objects._figure.Figure
|
|
74
|
+
"""
|
|
75
|
+
if not title:
|
|
76
|
+
title = 'Data'
|
|
77
|
+
if isinstance(data, pd.DataFrame):
|
|
78
|
+
title += f" for column '{data.columns[0]}'"
|
|
79
|
+
elif isinstance(data, pd.Series) and data.name:
|
|
80
|
+
title += f" for column '{data.name}'"
|
|
81
|
+
|
|
82
|
+
return _generate_1d_plot(
|
|
83
|
+
data=[data],
|
|
84
|
+
title=title,
|
|
85
|
+
labels=[label],
|
|
86
|
+
colors=[PlotConfig.DATACEBO_DARK]
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def compare_1d(real, synth, title=None):
|
|
91
|
+
"""Plot the comparison between real and synthetic data.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
real (array_like):
|
|
95
|
+
The real data.
|
|
96
|
+
synth (array_like):
|
|
97
|
+
The synthetic data.
|
|
98
|
+
title (str):
|
|
99
|
+
The title of the plot.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
plotly.graph_objects._figure.Figure
|
|
103
|
+
"""
|
|
104
|
+
if not title:
|
|
105
|
+
title = 'Real vs. Synthetic Data'
|
|
106
|
+
if isinstance(real, pd.DataFrame):
|
|
107
|
+
title += f" for column '{real.columns[0]}'"
|
|
108
|
+
elif isinstance(real, pd.Series) and real.name:
|
|
109
|
+
title += f" for column '{real.name}'"
|
|
110
|
+
|
|
111
|
+
return _generate_1d_plot(
|
|
112
|
+
data=[real, synth],
|
|
113
|
+
title=title,
|
|
114
|
+
labels=['Real', 'Synthetic'],
|
|
115
|
+
colors=[PlotConfig.DATACEBO_DARK, PlotConfig.DATACEBO_GREEN]
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _generate_scatter_2d_plot(data, columns, color_discrete_map, title):
|
|
120
|
+
"""Generate a scatter plot for a pair of columns.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
data (pandas.DataFrame):
|
|
124
|
+
The data for the desired column pair containing a
|
|
125
|
+
``Data`` column indicating whether it is real or synthetic.
|
|
126
|
+
columns (list):
|
|
127
|
+
A list of the columns being plotted.
|
|
128
|
+
color_discrete_map (dict):
|
|
129
|
+
A dictionary mapping the values of the ``Data`` column to the colors
|
|
130
|
+
used to plot them.
|
|
131
|
+
title (str):
|
|
132
|
+
The title of the plot.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
plotly.graph_objects._figure.Figure
|
|
136
|
+
"""
|
|
137
|
+
if columns:
|
|
138
|
+
columns.append('Data')
|
|
139
|
+
else:
|
|
140
|
+
columns = data.columns
|
|
141
|
+
|
|
142
|
+
if len(columns) != 3: # includes the 'Data' column
|
|
143
|
+
raise ValueError('Only 2 columns can be plotted')
|
|
144
|
+
|
|
145
|
+
fig = px.scatter(
|
|
146
|
+
data,
|
|
147
|
+
x=columns[0],
|
|
148
|
+
y=columns[1],
|
|
149
|
+
color='Data',
|
|
150
|
+
color_discrete_map=color_discrete_map,
|
|
151
|
+
symbol='Data'
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
fig.update_layout(
|
|
155
|
+
title=title,
|
|
156
|
+
plot_bgcolor=PlotConfig.BACKGROUND_COLOR,
|
|
157
|
+
font={'size': PlotConfig.FONT_SIZE},
|
|
158
|
+
showlegend=False if len(color_discrete_map) == 1 else True,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return fig
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def scatter_2d(data, columns=None, title=None):
|
|
165
|
+
"""Plot 2 dimensional data in a scatter plot.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
data (pandas.DataFrame):
|
|
169
|
+
The table data.
|
|
170
|
+
columns (list[string]):
|
|
171
|
+
The names of the two columns to plot.
|
|
172
|
+
title (str):
|
|
173
|
+
The title of the plot.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
plotly.graph_objects._figure.Figure
|
|
177
|
+
"""
|
|
178
|
+
data = data.copy()
|
|
179
|
+
data['Data'] = 'Real'
|
|
180
|
+
|
|
181
|
+
if not title:
|
|
182
|
+
title = 'Data'
|
|
183
|
+
if columns:
|
|
184
|
+
title += f" for columns '{columns[0]}' and '{columns[1]}'"
|
|
185
|
+
elif isinstance(data, pd.DataFrame):
|
|
186
|
+
title += f" for columns '{data.columns[0]}' and '{data.columns[1]}'"
|
|
187
|
+
|
|
188
|
+
return _generate_scatter_2d_plot(
|
|
189
|
+
data=data,
|
|
190
|
+
columns=columns,
|
|
191
|
+
color_discrete_map={'Real': PlotConfig.DATACEBO_DARK},
|
|
192
|
+
title=title
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def compare_2d(real, synth, columns=None, title=None):
|
|
197
|
+
"""Plot the comparison between real and synthetic data for a given column pair.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
real (pandas.DataFrame):
|
|
201
|
+
The real table data.
|
|
202
|
+
synth (pandas.Dataframe):
|
|
203
|
+
The synthetic table data.
|
|
204
|
+
columns (list[string]):
|
|
205
|
+
The names of the two columns to plot.
|
|
206
|
+
title (str):
|
|
207
|
+
The title of the plot.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
plotly.graph_objects._figure.Figure
|
|
211
|
+
"""
|
|
212
|
+
real, synth = real.copy(), synth.copy()
|
|
213
|
+
real['Data'] = 'Real'
|
|
214
|
+
synth['Data'] = 'Synthetic'
|
|
215
|
+
data = pd.concat([real, synth], axis=0, ignore_index=True)
|
|
216
|
+
|
|
217
|
+
if not title:
|
|
218
|
+
title = 'Real vs. Synthetic Data'
|
|
219
|
+
if columns:
|
|
220
|
+
title += f" for columns '{columns[0]}' and '{columns[1]}'"
|
|
221
|
+
elif isinstance(data, pd.DataFrame):
|
|
222
|
+
title += f" for columns '{data.columns[0]}' and '{data.columns[1]}'"
|
|
223
|
+
|
|
224
|
+
return _generate_scatter_2d_plot(
|
|
225
|
+
data=data,
|
|
226
|
+
columns=columns,
|
|
227
|
+
color_discrete_map={
|
|
228
|
+
'Real': PlotConfig.DATACEBO_DARK,
|
|
229
|
+
'Synthetic': PlotConfig.DATACEBO_GREEN
|
|
230
|
+
},
|
|
231
|
+
title=title
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _generate_scatter_3d_plot(data, columns, color_discrete_map, title):
|
|
236
|
+
"""Generate a scatter plot for column pair plot.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
data (pandas.DataFrame):
|
|
240
|
+
The data for the desired three columns containing a
|
|
241
|
+
``Data`` column that indicates whether it is real or synthetic.
|
|
242
|
+
columns (list):
|
|
243
|
+
A list of the columns being plotted.
|
|
244
|
+
color_discrete_map (dict):
|
|
245
|
+
A dictionary mapping the values of the ``Data`` column to the colors
|
|
246
|
+
used to plot them.
|
|
247
|
+
title (str):
|
|
248
|
+
The title of the plot.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
plotly.graph_objects._figure.Figure
|
|
252
|
+
"""
|
|
253
|
+
if columns:
|
|
254
|
+
columns.append('Data')
|
|
255
|
+
else:
|
|
256
|
+
columns = data.columns
|
|
257
|
+
|
|
258
|
+
if len(columns) != 4: # includes the 'Data' column
|
|
259
|
+
raise ValueError('Only 3 columns can be plotted')
|
|
260
|
+
|
|
261
|
+
fig = px.scatter_3d(
|
|
262
|
+
data,
|
|
263
|
+
x=columns[0],
|
|
264
|
+
y=columns[1],
|
|
265
|
+
z=columns[2],
|
|
266
|
+
color='Data',
|
|
267
|
+
color_discrete_map=color_discrete_map,
|
|
268
|
+
symbol='Data',
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
fig.update_traces(marker={'size': 5})
|
|
272
|
+
|
|
273
|
+
fig.update_layout(
|
|
274
|
+
title=title,
|
|
275
|
+
plot_bgcolor=PlotConfig.BACKGROUND_COLOR,
|
|
276
|
+
font={'size': PlotConfig.FONT_SIZE},
|
|
277
|
+
showlegend=False if len(color_discrete_map) == 1 else True,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
return fig
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def scatter_3d(data, columns=None, title=None):
|
|
284
|
+
"""Plot 3 dimensional data in a scatter plot.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
data (pandas.DataFrame):
|
|
288
|
+
The table data. Must have at least 3 columns.
|
|
289
|
+
columns (list[string]):
|
|
290
|
+
The names of the three columns to plot.
|
|
291
|
+
title (str):
|
|
292
|
+
The title of the plot.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
plotly.graph_objects._figure.Figure
|
|
296
|
+
"""
|
|
297
|
+
data = data.copy()
|
|
298
|
+
data['Data'] = 'Real'
|
|
299
|
+
|
|
300
|
+
if not title:
|
|
301
|
+
title = 'Data'
|
|
302
|
+
if columns:
|
|
303
|
+
title += f" for columns '{columns[0]}', '{columns[1]}' and '{columns[2]}'"
|
|
304
|
+
elif isinstance(data, pd.DataFrame):
|
|
305
|
+
title += \
|
|
306
|
+
f" for columns '{data.columns[0]}', '{data.columns[1]}' and '{data.columns[2]}'"
|
|
307
|
+
|
|
308
|
+
return _generate_scatter_3d_plot(
|
|
309
|
+
data=data,
|
|
310
|
+
columns=columns,
|
|
311
|
+
color_discrete_map={'Real': PlotConfig.DATACEBO_DARK},
|
|
312
|
+
title=title
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def compare_3d(real, synth, columns=None, title=None):
|
|
317
|
+
"""Plot the comparison between real and synthetic data for a given column triplet.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
real (pd.DataFrame):
|
|
321
|
+
The real data.
|
|
322
|
+
synth (pd.DataFrame):
|
|
323
|
+
The synthetic data.
|
|
324
|
+
columns (list):
|
|
325
|
+
The name of the columns to plot.
|
|
326
|
+
title (str):
|
|
327
|
+
The title of the plot.
|
|
328
|
+
"""
|
|
329
|
+
real, synth = real.copy(), synth.copy()
|
|
330
|
+
real['Data'] = 'Real'
|
|
331
|
+
synth['Data'] = 'Synthetic'
|
|
332
|
+
data = pd.concat([real, synth], axis=0, ignore_index=True)
|
|
333
|
+
|
|
334
|
+
if not title:
|
|
335
|
+
title = 'Real vs. Synthetic Data'
|
|
336
|
+
if columns:
|
|
337
|
+
title += f" for columns '{columns[0]}', '{columns[1]}' and '{columns[2]}'"
|
|
338
|
+
elif isinstance(data, pd.DataFrame):
|
|
339
|
+
title += \
|
|
340
|
+
f" for columns '{data.columns[0]}', '{data.columns[1]}' and '{data.columns[2]}'"
|
|
341
|
+
|
|
342
|
+
return _generate_scatter_3d_plot(
|
|
343
|
+
data=data,
|
|
344
|
+
columns=columns,
|
|
345
|
+
color_discrete_map={
|
|
346
|
+
'Real': PlotConfig.DATACEBO_DARK,
|
|
347
|
+
'Synthetic': PlotConfig.DATACEBO_GREEN
|
|
348
|
+
},
|
|
349
|
+
title=title
|
|
350
|
+
)
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Business Source License 1.1
|
|
2
|
+
|
|
3
|
+
Parameters
|
|
4
|
+
|
|
5
|
+
Licensor: DataCebo, Inc.
|
|
6
|
+
|
|
7
|
+
Licensed Work: Copulas
|
|
8
|
+
The Licensed Work is (c) DataCebo, Inc.
|
|
9
|
+
|
|
10
|
+
Additional Use Grant: You may make use of the Licensed Work, and derivatives of the Licensed
|
|
11
|
+
Work, provided that you do not use the Licensed Work, or derivatives of
|
|
12
|
+
the Licensed Work, for a Synthetic Data Creation Service.
|
|
13
|
+
|
|
14
|
+
A "Synthetic Data Creation Service" is a commercial offering
|
|
15
|
+
that allows third parties (other than your employees and
|
|
16
|
+
contractors) to access the functionality of the Licensed
|
|
17
|
+
Work so that such third parties directly benefit from the
|
|
18
|
+
data processing, machine learning or synthetic data creation
|
|
19
|
+
features of the Licensed Work.
|
|
20
|
+
|
|
21
|
+
Change Date: Change date is four years from release date.
|
|
22
|
+
Please see https://github.com/sdv-dev/Copulas/releases
|
|
23
|
+
for exact dates.
|
|
24
|
+
|
|
25
|
+
Change License: MIT License
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
Notice
|
|
29
|
+
|
|
30
|
+
The Business Source License (this document, or the "License") is not an Open
|
|
31
|
+
Source license. However, the Licensed Work will eventually be made available
|
|
32
|
+
under an Open Source License, as stated in this License.
|
|
33
|
+
|
|
34
|
+
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
|
|
35
|
+
"Business Source License" is a trademark of MariaDB Corporation Ab.
|
|
36
|
+
|
|
37
|
+
-----------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
Business Source License 1.1
|
|
40
|
+
|
|
41
|
+
Terms
|
|
42
|
+
|
|
43
|
+
The Licensor hereby grants you the right to copy, modify, create derivative
|
|
44
|
+
works, redistribute, and make non-production use of the Licensed Work. The
|
|
45
|
+
Licensor may make an Additional Use Grant, above, permitting limited
|
|
46
|
+
production use.
|
|
47
|
+
|
|
48
|
+
Effective on the Change Date, or the fourth anniversary of the first publicly
|
|
49
|
+
available distribution of a specific version of the Licensed Work under this
|
|
50
|
+
License, whichever comes first, the Licensor hereby grants you rights under
|
|
51
|
+
the terms of the Change License, and the rights granted in the paragraph
|
|
52
|
+
above terminate.
|
|
53
|
+
|
|
54
|
+
If your use of the Licensed Work does not comply with the requirements
|
|
55
|
+
currently in effect as described in this License, you must purchase a
|
|
56
|
+
commercial license from the Licensor, its affiliated entities, or authorized
|
|
57
|
+
resellers, or you must refrain from using the Licensed Work.
|
|
58
|
+
|
|
59
|
+
All copies of the original and modified Licensed Work, and derivative works
|
|
60
|
+
of the Licensed Work, are subject to this License. This License applies
|
|
61
|
+
separately for each version of the Licensed Work and the Change Date may vary
|
|
62
|
+
for each version of the Licensed Work released by Licensor.
|
|
63
|
+
|
|
64
|
+
You must conspicuously display this License on each original or modified copy
|
|
65
|
+
of the Licensed Work. If you receive the Licensed Work in original or
|
|
66
|
+
modified form from a third party, the terms and conditions set forth in this
|
|
67
|
+
License apply to your use of that work.
|
|
68
|
+
|
|
69
|
+
Any use of the Licensed Work in violation of this License will automatically
|
|
70
|
+
terminate your rights under this License for the current and all other
|
|
71
|
+
versions of the Licensed Work.
|
|
72
|
+
|
|
73
|
+
This License does not grant you any right in any trademark or logo of
|
|
74
|
+
Licensor or its affiliates (provided that you may use a trademark or logo of
|
|
75
|
+
Licensor as expressly required by this License).
|
|
76
|
+
|
|
77
|
+
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
|
78
|
+
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
|
79
|
+
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
|
80
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
|
81
|
+
TITLE.
|
|
82
|
+
|
|
83
|
+
MariaDB hereby grants you permission to use this License’s text to license
|
|
84
|
+
your works, and to refer to it using the trademark "Business Source License",
|
|
85
|
+
as long as you comply with the Covenants of Licensor below.
|
|
86
|
+
|
|
87
|
+
Covenants of Licensor
|
|
88
|
+
|
|
89
|
+
In consideration of the right to use this License’s text and the "Business
|
|
90
|
+
Source License" name and trademark, Licensor covenants to MariaDB, and to all
|
|
91
|
+
other recipients of the licensed work to be provided by Licensor:
|
|
92
|
+
|
|
93
|
+
1. To specify as the Change License the GPL Version 2.0 or any later version,
|
|
94
|
+
or a license that is compatible with GPL Version 2.0 or a later version,
|
|
95
|
+
where "compatible" means that software provided under the Change License can
|
|
96
|
+
be included in a program with software provided under GPL Version 2.0 or a
|
|
97
|
+
later version. Licensor may specify additional Change Licenses without
|
|
98
|
+
limitation.
|
|
99
|
+
|
|
100
|
+
2. To either: (a) specify an additional grant of rights to use that does not
|
|
101
|
+
impose any additional restriction on the right granted in this License, as
|
|
102
|
+
the Additional Use Grant; or (b) insert the text "None".
|
|
103
|
+
|
|
104
|
+
3. To specify a Change Date.
|
|
105
|
+
|
|
106
|
+
4. Not to modify this License in any other way.
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: copulas
|
|
3
|
+
Version: 0.10.1.dev0
|
|
4
|
+
Summary: Create tabular synthetic data using copulas-based modeling.
|
|
5
|
+
Author-email: "DataCebo, Inc." <info@sdv.dev>
|
|
6
|
+
License: BSL-1.1
|
|
7
|
+
Project-URL: Source Code, https://github.com/sdv-dev/Copulas/
|
|
8
|
+
Project-URL: Issue Tracker, https://github.com/sdv-dev/Copulas/issues
|
|
9
|
+
Project-URL: Changes, https://github.com/sdv-dev/Copulas/blob/main/HISTORY.md
|
|
10
|
+
Project-URL: Twitter, https://twitter.com/sdv_dev
|
|
11
|
+
Project-URL: Chat, https://bit.ly/sdv-slack-invite
|
|
12
|
+
Keywords: copulas
|
|
13
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: Free for non-commercial use
|
|
16
|
+
Classifier: Natural Language :: English
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Requires-Python: <3.12,>=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: plotly <6,>=5.10.0
|
|
27
|
+
Requires-Dist: numpy <2,>=1.20.0 ; python_version < "3.10"
|
|
28
|
+
Requires-Dist: pandas >=1.1.3 ; python_version < "3.10"
|
|
29
|
+
Requires-Dist: scipy <2,>=1.5.4 ; python_version < "3.10"
|
|
30
|
+
Requires-Dist: numpy <2,>=1.23.3 ; python_version >= "3.10"
|
|
31
|
+
Requires-Dist: scipy <2,>=1.9.2 ; python_version >= "3.10"
|
|
32
|
+
Requires-Dist: pandas >=1.3.4 ; python_version >= "3.10" and python_version < "3.11"
|
|
33
|
+
Requires-Dist: pandas >=1.5.0 ; python_version >= "3.11"
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: copulas[test,tutorials] ; extra == 'dev'
|
|
36
|
+
Requires-Dist: pip >=9.0.1 ; extra == 'dev'
|
|
37
|
+
Requires-Dist: build <2,>=1.0.0 ; extra == 'dev'
|
|
38
|
+
Requires-Dist: bump-my-version <1,>=0.18.3 ; extra == 'dev'
|
|
39
|
+
Requires-Dist: watchdog <0.11,>=0.8.3 ; extra == 'dev'
|
|
40
|
+
Requires-Dist: m2r <0.3,>=0.2.0 ; extra == 'dev'
|
|
41
|
+
Requires-Dist: nbsphinx <0.7,>=0.5.0 ; extra == 'dev'
|
|
42
|
+
Requires-Dist: Sphinx <3,>=1.7.1 ; extra == 'dev'
|
|
43
|
+
Requires-Dist: sphinx-rtd-theme <0.5,>=0.2.4 ; extra == 'dev'
|
|
44
|
+
Requires-Dist: Jinja2 <3,>=2 ; extra == 'dev'
|
|
45
|
+
Requires-Dist: flake8 <4,>=3.7.7 ; extra == 'dev'
|
|
46
|
+
Requires-Dist: isort <5,>=4.3.4 ; extra == 'dev'
|
|
47
|
+
Requires-Dist: flake8-debugger <4.1,>=4.0.0 ; extra == 'dev'
|
|
48
|
+
Requires-Dist: flake8-mock <0.4,>=0.3 ; extra == 'dev'
|
|
49
|
+
Requires-Dist: flake8-mutable <1.3,>=1.2.0 ; extra == 'dev'
|
|
50
|
+
Requires-Dist: flake8-fixme <1.2,>=1.1.1 ; extra == 'dev'
|
|
51
|
+
Requires-Dist: pep8-naming <0.13,>=0.12.1 ; extra == 'dev'
|
|
52
|
+
Requires-Dist: dlint <0.12,>=0.11.0 ; extra == 'dev'
|
|
53
|
+
Requires-Dist: flake8-docstrings <2,>=1.5.0 ; extra == 'dev'
|
|
54
|
+
Requires-Dist: pydocstyle <6.2,>=6.1.1 ; extra == 'dev'
|
|
55
|
+
Requires-Dist: flake8-pytest-style <2,>=1.5.0 ; extra == 'dev'
|
|
56
|
+
Requires-Dist: flake8-comprehensions <3.7,>=3.6.1 ; extra == 'dev'
|
|
57
|
+
Requires-Dist: flake8-print <4.1,>=4.0.0 ; extra == 'dev'
|
|
58
|
+
Requires-Dist: flake8-expression-complexity <0.1,>=0.0.9 ; extra == 'dev'
|
|
59
|
+
Requires-Dist: flake8-multiline-containers <0.1,>=0.0.18 ; extra == 'dev'
|
|
60
|
+
Requires-Dist: pandas-vet <0.3,>=0.2.2 ; extra == 'dev'
|
|
61
|
+
Requires-Dist: flake8-builtins <1.6,>=1.5.3 ; extra == 'dev'
|
|
62
|
+
Requires-Dist: flake8-eradicate <1.2,>=1.1.0 ; extra == 'dev'
|
|
63
|
+
Requires-Dist: flake8-quotes <4,>=3.3.0 ; extra == 'dev'
|
|
64
|
+
Requires-Dist: flake8-variables-names <0.1,>=0.0.4 ; extra == 'dev'
|
|
65
|
+
Requires-Dist: flake8-sfs <0.1,>=0.0.3 ; extra == 'dev'
|
|
66
|
+
Requires-Dist: flake8-absolute-import <2,>=1.0 ; extra == 'dev'
|
|
67
|
+
Requires-Dist: autoflake <2,>=1.1 ; extra == 'dev'
|
|
68
|
+
Requires-Dist: autopep8 <1.6,>=1.4.3 ; extra == 'dev'
|
|
69
|
+
Requires-Dist: twine <4,>=1.10.0 ; extra == 'dev'
|
|
70
|
+
Requires-Dist: wheel >=0.30.0 ; extra == 'dev'
|
|
71
|
+
Requires-Dist: coverage <6,>=4.5.1 ; extra == 'dev'
|
|
72
|
+
Requires-Dist: tox <4,>=2.9.1 ; extra == 'dev'
|
|
73
|
+
Requires-Dist: invoke ; extra == 'dev'
|
|
74
|
+
Requires-Dist: doc8 <0.9,>=0.8.0 ; extra == 'dev'
|
|
75
|
+
Requires-Dist: urllib3 <1.26,>=1.20 ; extra == 'dev'
|
|
76
|
+
Requires-Dist: tabulate <0.9,>=0.8.3 ; extra == 'dev'
|
|
77
|
+
Requires-Dist: boto3 <1.10,>=1.7.47 ; extra == 'dev'
|
|
78
|
+
Requires-Dist: docutils <0.15,>=0.10 ; extra == 'dev'
|
|
79
|
+
Provides-Extra: test
|
|
80
|
+
Requires-Dist: copulas[tutorials] ; extra == 'test'
|
|
81
|
+
Requires-Dist: pytest <7,>=6.2.5 ; extra == 'test'
|
|
82
|
+
Requires-Dist: pytest-cov <3,>=2.6.0 ; extra == 'test'
|
|
83
|
+
Requires-Dist: pytest-rerunfailures <10,>=9.0.0 ; extra == 'test'
|
|
84
|
+
Requires-Dist: rundoc <0.5,>=0.4.3 ; extra == 'test'
|
|
85
|
+
Requires-Dist: tomli <3,>=2.0.0 ; extra == 'test'
|
|
86
|
+
Provides-Extra: tutorials
|
|
87
|
+
Requires-Dist: markupsafe <=2.0.1 ; extra == 'tutorials'
|
|
88
|
+
Requires-Dist: scikit-learn <1.2,>=0.24 ; extra == 'tutorials'
|
|
89
|
+
Requires-Dist: jupyter <2,>=1.0.0 ; extra == 'tutorials'
|
|
90
|
+
|
|
91
|
+
<p style="text-align:center">
|
|
92
|
+
<i>This repository is part of <a href="https://sdv.dev">The Synthetic Data Vault Project</a>, a project from <a href="https://datacebo.com">DataCebo</a>.</i>
|
|
93
|
+
</p>
|
|
94
|
+
|
|
95
|
+
[](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha)
|
|
96
|
+
[](https://pypi.python.org/pypi/copulas)
|
|
97
|
+
[](https://pepy.tech/project/copulas)
|
|
98
|
+
[](https://github.com/sdv-dev/Copulas/actions/workflows/unit.yml)
|
|
99
|
+
[](https://codecov.io/gh/sdv-dev/Copulas)
|
|
100
|
+
[](https://bit.ly/sdv-slack-invite)
|
|
101
|
+
|
|
102
|
+
<br/>
|
|
103
|
+
<p align="center" style="text-align:center">
|
|
104
|
+
<a href="https://github.com/sdv-dev/Copulas">
|
|
105
|
+
<img width=40% src="https://github.com/sdv-dev/SDV/blob/stable/docs/images/Copulas-DataCebo.png?raw=true"></img>
|
|
106
|
+
</a>
|
|
107
|
+
</p>
|
|
108
|
+
|
|
109
|
+
# Overview
|
|
110
|
+
|
|
111
|
+
**Copulas** is a Python library for modeling multivariate distributions and sampling from them
|
|
112
|
+
using copula functions.
|
|
113
|
+
Given a table of numerical data, use Copulas to learn the distribution and
|
|
114
|
+
generate new synthetic data following the same statistical properties.
|
|
115
|
+
|
|
116
|
+
**Key Features:**
|
|
117
|
+
|
|
118
|
+
* **Model multivariate data.** Choose from a variety of univariate
|
|
119
|
+
distributions and copulas – including Archimedian Copulas, Gaussian Copulas and Vine Copulas.
|
|
120
|
+
|
|
121
|
+
* **Compare real and synthetic data visually** after building your model. Visualizations
|
|
122
|
+
are available as 1D histograms, 2D scatterplots and 3D scatterplots.
|
|
123
|
+
|
|
124
|
+
* **Access & manipulate learned parameters.** With complete access to the internals
|
|
125
|
+
of the model, set or tune parameters to your choosing.
|
|
126
|
+
|
|
127
|
+
# Install
|
|
128
|
+
|
|
129
|
+
Install the Copulas library using pip or conda.
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
pip install copulas
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
conda install -c conda-forge copulas
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
# Usage
|
|
140
|
+
|
|
141
|
+
Get started using a demo dataset. This dataset contains 3 numerical columns.
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from copulas.datasets import sample_trivariate_xyz
|
|
145
|
+
|
|
146
|
+
real_data = sample_trivariate_xyz()
|
|
147
|
+
real_data.head()
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
<img src="docs/images/copulas_sample_dataset.png" width="300">
|
|
151
|
+
|
|
152
|
+
Model the data using a copula and use it to create synthetic data.
|
|
153
|
+
The Copulas library offers many options including Gaussian Copula,
|
|
154
|
+
Vine Copulas and Archimedian Copulas.
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from copulas.multivariate import GaussianMultivariate
|
|
158
|
+
|
|
159
|
+
copula = GaussianMultivariate()
|
|
160
|
+
copula.fit(real_data)
|
|
161
|
+
|
|
162
|
+
synthetic_data = copula.sample(len(real_data))
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Visualize the real and synthetic data side-by-side. Let's do this in 3D so see our full dataset.
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from copulas.visualization import compare_3d
|
|
169
|
+
|
|
170
|
+
compare_3d(real_data, synthetic_data)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+

|
|
174
|
+
|
|
175
|
+
# Tutorials
|
|
176
|
+
Click below to run the code yourself on a Colab Notebook and discover new features.
|
|
177
|
+
|
|
178
|
+
[](https://bit.ly/copulas-demo)
|
|
179
|
+
|
|
180
|
+
# Community & Support
|
|
181
|
+
|
|
182
|
+
Learn more about Copulas library from our [documentation](https://sdv.dev/Copulas/) site.
|
|
183
|
+
|
|
184
|
+
**Questions or issues?** Join our [Slack channel](https://bit.ly/sdv-slack-invite)
|
|
185
|
+
to discuss more about Copulas and synthetic data.
|
|
186
|
+
If you find a bug or have a feature request, you can also
|
|
187
|
+
[open an issue](https://github.com/sdv-dev/Copulas/issues/new/choose) on our GitHub.
|
|
188
|
+
|
|
189
|
+
**Interested in contributing to Copulas?** Read our
|
|
190
|
+
[Contribution Guide](https://sdv.dev/Copulas/contributing.html) to get started.
|
|
191
|
+
|
|
192
|
+
# Credits
|
|
193
|
+
|
|
194
|
+
The Copulas open source project first started at the Data to AI Lab at MIT in 2018.
|
|
195
|
+
Thank you to our team of contributors who have built and maintained the library over the years!
|
|
196
|
+
|
|
197
|
+
[View Contributors](https://github.com/sdv-dev/Copulas/graphs/contributors)
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
<div align="center">
|
|
203
|
+
<a href="https://datacebo.com"><img align="center" width=40% src="https://github.com/sdv-dev/SDV/blob/stable/docs/images/DataCebo.png"></img></a>
|
|
204
|
+
</div>
|
|
205
|
+
<br/>
|
|
206
|
+
<br/>
|
|
207
|
+
|
|
208
|
+
[The Synthetic Data Vault Project](https://sdv.dev) was first created at MIT's [Data to AI Lab](
|
|
209
|
+
https://dai.lids.mit.edu/) in 2016. After 4 years of research and traction with enterprise, we
|
|
210
|
+
created [DataCebo](https://datacebo.com) in 2020 with the goal of growing the project.
|
|
211
|
+
Today, DataCebo is the proud developer of SDV, the largest ecosystem for
|
|
212
|
+
synthetic data generation & evaluation. It is home to multiple libraries that support synthetic
|
|
213
|
+
data, including:
|
|
214
|
+
|
|
215
|
+
* 🔄 Data discovery & transformation. Reverse the transforms to reproduce realistic data.
|
|
216
|
+
* 🧠 Multiple machine learning models -- ranging from Copulas to Deep Learning -- to create tabular,
|
|
217
|
+
multi table and time series data.
|
|
218
|
+
* 📊 Measuring quality and privacy of synthetic data, and comparing different synthetic data
|
|
219
|
+
generation models.
|
|
220
|
+
|
|
221
|
+
[Get started using the SDV package](https://sdv.dev/SDV/getting_started/install.html) -- a fully
|
|
222
|
+
integrated solution and your one-stop shop for synthetic data. Or, use the standalone libraries
|
|
223
|
+
for specific needs.
|