pycodamath 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycodamath-1.1.1/LICENSE +19 -0
- pycodamath-1.1.1/PKG-INFO +293 -0
- pycodamath-1.1.1/README.md +262 -0
- pycodamath-1.1.1/pyproject.toml +43 -0
- pycodamath-1.1.1/setup.cfg +4 -0
- pycodamath-1.1.1/src/pycodamath/__init__.py +14 -0
- pycodamath-1.1.1/src/pycodamath/entropy.py +87 -0
- pycodamath-1.1.1/src/pycodamath/extra.py +123 -0
- pycodamath-1.1.1/src/pycodamath/pca.py +456 -0
- pycodamath-1.1.1/src/pycodamath/plot.py +70 -0
- pycodamath-1.1.1/src/pycodamath/pycoda.py +188 -0
- pycodamath-1.1.1/src/pycodamath.egg-info/PKG-INFO +293 -0
- pycodamath-1.1.1/src/pycodamath.egg-info/SOURCES.txt +14 -0
- pycodamath-1.1.1/src/pycodamath.egg-info/dependency_links.txt +1 -0
- pycodamath-1.1.1/src/pycodamath.egg-info/requires.txt +7 -0
- pycodamath-1.1.1/src/pycodamath.egg-info/top_level.txt +1 -0
pycodamath-1.1.1/LICENSE
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) 2018 The Python Packaging Authority
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pycodamath
|
|
3
|
+
Version: 1.1.1
|
|
4
|
+
Summary: Compositional data (CoDa) analysis tools for Python
|
|
5
|
+
Author-email: Christian Brinch <cbri@food.dtu.dk>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://bitbucket.org/genomicepidemiology/pycodamath
|
|
8
|
+
Project-URL: Bug Tracker, https://bitbucket.org/genomicepidemiology/pycodamath/issues
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering
|
|
20
|
+
Requires-Python: >=3.8
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: adjustText>=0.7.3
|
|
24
|
+
Requires-Dist: matplotlib>=3.1.1
|
|
25
|
+
Requires-Dist: numpy>=1.17.2
|
|
26
|
+
Requires-Dist: pandas>=0.25.1
|
|
27
|
+
Requires-Dist: python-ternary>=1.0.6
|
|
28
|
+
Requires-Dist: scipy>=1.3.1
|
|
29
|
+
Requires-Dist: webcolors>=1.13
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# pyCoDaMath
|
|
33
|
+
|
|
34
|
+
[](https://www.python.org/)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
pyCoDaMath provides compositional data (CoDa) analysis tools for Python
|
|
38
|
+
|
|
39
|
+
- **Source code:** https://bitbucket.org/genomicepidemiology/pycodamath
|
|
40
|
+
|
|
41
|
+
## Getting Started
|
|
42
|
+
|
|
43
|
+
This package extends the Pandas dataframe object with various CoDa tools. It also provides a set of plotting functions for CoDa figures.
|
|
44
|
+
|
|
45
|
+
### Installation
|
|
46
|
+
|
|
47
|
+
Clone the git repo to your local hard drive:
|
|
48
|
+
|
|
49
|
+
git clone https://bitbucket.org/genomicepidemiology/pycodamath.git
|
|
50
|
+
|
|
51
|
+
Enter the directory and install:
|
|
52
|
+
|
|
53
|
+
pip install .
|
|
54
|
+
|
|
55
|
+
### Usage
|
|
56
|
+
|
|
57
|
+
The pyCoDaMath module is loaded as
|
|
58
|
+
|
|
59
|
+
import pycodamath
|
|
60
|
+
|
|
61
|
+
At this point, in order to get CLR values from a Pandas DataFrame `df`, do
|
|
62
|
+
|
|
63
|
+
df.coda.clr()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
## Documentation
|
|
67
|
+
|
|
68
|
+
### CLR transformation - point estimate
|
|
69
|
+
df.coda.clr()
|
|
70
|
+
|
|
71
|
+
Returns centered logratio coefficients. If the dataframe contains zeros, values
|
|
72
|
+
will be replaced by the Aitchison mean point estimate.
|
|
73
|
+
|
|
74
|
+
### CLR transformation - standard deviation
|
|
75
|
+
df.coda.clr_std(n_samples=5000)
|
|
76
|
+
|
|
77
|
+
Returns the standard deviation of `n_samples` random draws in CLR space.
|
|
78
|
+
|
|
79
|
+
**Parameters**
|
|
80
|
+
|
|
81
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
### ALR transformation - point estimate
|
|
85
|
+
df.coda.alr(part=None)
|
|
86
|
+
|
|
87
|
+
Returns additive logratio values. If `part` is None, the last part of the composition is used as the denominator.
|
|
88
|
+
|
|
89
|
+
**Parameters**
|
|
90
|
+
|
|
91
|
+
- part (str) - Name of the part to use as denominator.
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
### ALR transformation - standard deviation
|
|
95
|
+
df.coda.alr_std(part=None, n_samples=5000)
|
|
96
|
+
|
|
97
|
+
Returns the standard deviation of `n_samples` random draws in ALR space.
|
|
98
|
+
|
|
99
|
+
**Parameters**
|
|
100
|
+
|
|
101
|
+
- part (str) - Name of the part to use as denominator.
|
|
102
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
### ILR transformation - point estimate
|
|
106
|
+
df.coda.ilr(psi=None)
|
|
107
|
+
|
|
108
|
+
Returns isometric logratio values. If no basis is given, a default sequential binary partition basis is used.
|
|
109
|
+
|
|
110
|
+
**Parameters**
|
|
111
|
+
|
|
112
|
+
- psi (array_like) - Orthonormal basis. If None, the default SBP basis is used.
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
### ILR inverse transformation
|
|
116
|
+
df.coda.ilr_inv(psi=None)
|
|
117
|
+
|
|
118
|
+
Returns the composition corresponding to a set of ILR coordinates. The same basis used for the forward transform must be supplied.
|
|
119
|
+
|
|
120
|
+
**Parameters**
|
|
121
|
+
|
|
122
|
+
- psi (array_like) - Orthonormal basis. If None, the default SBP basis is used.
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
### Aitchison point estimate
|
|
126
|
+
df.coda.aitchison_mean(alpha=1.0)
|
|
127
|
+
|
|
128
|
+
Returns the Bayesian point estimate based on the Dirichlet concentration parameter alpha.
|
|
129
|
+
Use values between 0.5 (sparse prior) and 1.0 (flat prior).
|
|
130
|
+
|
|
131
|
+
**Parameters**
|
|
132
|
+
|
|
133
|
+
- alpha (float) - Dirichlet concentration parameter. Defaults to 1.0.
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
### Bayesian zero replacement
|
|
137
|
+
df.coda.zero_replacement(n_samples=5000)
|
|
138
|
+
|
|
139
|
+
Returns a count table with zero values replaced by finite values using Bayesian inference.
|
|
140
|
+
|
|
141
|
+
**Parameters**
|
|
142
|
+
|
|
143
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
### Closure
|
|
147
|
+
df.coda.closure(N)
|
|
148
|
+
|
|
149
|
+
Applies closure to constant N to the composition.
|
|
150
|
+
|
|
151
|
+
**Parameters**
|
|
152
|
+
|
|
153
|
+
- N (float) - Closure constant.
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
### Variance matrix
|
|
157
|
+
df.coda.varmatrix(nmp=False)
|
|
158
|
+
|
|
159
|
+
Returns the total variation matrix of a composition. For large datasets, variance is
|
|
160
|
+
estimated from at most 500 rows.
|
|
161
|
+
|
|
162
|
+
**Parameters**
|
|
163
|
+
|
|
164
|
+
- nmp (bool) - If True, return a numpy array instead of a DataFrame. Defaults to False.
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
### Total variance
|
|
168
|
+
df.coda.totvar()
|
|
169
|
+
|
|
170
|
+
Returns the total variance of a set of compositions, computed as the sum of the
|
|
171
|
+
variance matrix divided by twice the number of parts.
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
### Geometric mean
|
|
175
|
+
df.coda.gmean()
|
|
176
|
+
|
|
177
|
+
Returns the geometric mean of a set of compositions as percentages.
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
### Power transformation
|
|
181
|
+
df.coda.power(alpha)
|
|
182
|
+
|
|
183
|
+
Applies compositional scalar multiplication (power transformation).
|
|
184
|
+
|
|
185
|
+
**Parameters**
|
|
186
|
+
|
|
187
|
+
- alpha (float) - Scalar multiplier.
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
### Perturbation
|
|
191
|
+
df.coda.perturbation(comp)
|
|
192
|
+
|
|
193
|
+
Applies a compositional perturbation (Aitchison addition) with another composition.
|
|
194
|
+
|
|
195
|
+
**Parameters**
|
|
196
|
+
|
|
197
|
+
- comp (array_like) - Composition to perturb with.
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
### Scaling
|
|
201
|
+
df.coda.scale()
|
|
202
|
+
|
|
203
|
+
Scales the composition by the reciprocal of the square root of the total variance.
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
### Centering
|
|
207
|
+
df.coda.center()
|
|
208
|
+
|
|
209
|
+
Centers the composition by perturbing with the reciprocal of the geometric mean.
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## Plotting functions
|
|
215
|
+
|
|
216
|
+
### Ternary diagram
|
|
217
|
+
pycodamath.plot.ternary(data, descr=None, center=False, conf=False)
|
|
218
|
+
|
|
219
|
+
Plots a ternary diagram from a three-part composition closed to 100.
|
|
220
|
+
|
|
221
|
+
**Parameters**
|
|
222
|
+
|
|
223
|
+
- data (DataFrame) - Three-part compositional data, closed to 100.
|
|
224
|
+
- descr (Series) - Optional grouping variable; if provided, points are coloured by group.
|
|
225
|
+
- center (bool) - If True, the composition is centred before plotting. Defaults to False.
|
|
226
|
+
- conf (bool) - If True, a 95% confidence ellipse is overlaid. Defaults to False.
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
### Scree plot
|
|
230
|
+
pycodamath.pca.scree_plot(axis, eig_val)
|
|
231
|
+
|
|
232
|
+
Plots a scree plot of explained variance from singular values.
|
|
233
|
+
|
|
234
|
+
**Parameters**
|
|
235
|
+
|
|
236
|
+
- axis - A Matplotlib axes object.
|
|
237
|
+
- eig_val (array_like) - Singular values from SVD.
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
### PCA biplot
|
|
241
|
+
class pycodamath.pca.Biplot(data, axis=None, default=True)
|
|
242
|
+
|
|
243
|
+
Creates a PCA biplot based on a centered log-ratio transformation of the data.
|
|
244
|
+
|
|
245
|
+
**Parameters**
|
|
246
|
+
|
|
247
|
+
- data (DataFrame) - Compositional count data to analyse.
|
|
248
|
+
- axis - A Matplotlib axes object. If None, a new figure is created.
|
|
249
|
+
- default (bool) - If True, loadings and scores are plotted immediately. Defaults to True.
|
|
250
|
+
|
|
251
|
+
The following methods are available for customising the biplot:
|
|
252
|
+
|
|
253
|
+
- `plotloadings(cutoff=0, scale=None, labels=None, cluster=False)` — plot loading arrows.
|
|
254
|
+
Set `cutoff` (as a fraction of the maximum loading length) to suppress short loadings.
|
|
255
|
+
Set `cluster=True` to reduce the number of loadings by hierarchical clustering; the
|
|
256
|
+
resulting cluster legend is accessible as `biplot.clusterlegend`.
|
|
257
|
+
- `plotloadinglabels(labels=None, loadings=None, cutoff=0)` — add text labels to loadings.
|
|
258
|
+
- `adjustloadinglabels()` — shift loading labels to reduce overlap.
|
|
259
|
+
- `plotscores(group=None, palette=None, legend=True, labels=None)` — plot sample scores
|
|
260
|
+
as points, optionally coloured by group.
|
|
261
|
+
- `plotscorelabels(labels=None)` — add text labels to the scores.
|
|
262
|
+
- `plotellipses(group, palette=None, legend=False)` — plot 90% confidence ellipses for
|
|
263
|
+
each group (requires at least 3 samples per group).
|
|
264
|
+
- `plotcentroids(group, palette=None, legend=False)` — plot the centroid of each group.
|
|
265
|
+
- `plothulls(group, palette=None, legend=True)` — plot convex hulls around each group
|
|
266
|
+
(requires at least 3 samples per group).
|
|
267
|
+
- `plotcontours(group, palette=None, legend=True, plot_outliers=True, percent_outliers=0.1, linewidth=2.2)` — plot kernel density contours for each group. Samples outside the outermost contour are optionally shown as individual points.
|
|
268
|
+
- `labeloutliers(group, conf=3.0)` — label samples more than `conf` standard deviations
|
|
269
|
+
from their group centroid.
|
|
270
|
+
- `displaylegend(loc=2)` — display the group legend at Matplotlib legend location `loc`.
|
|
271
|
+
- `removepatches()` — remove loading arrows and hull polygons from the plot.
|
|
272
|
+
- `removescores()` — remove score points from the plot.
|
|
273
|
+
- `removelabels()` — remove text labels from the plot.
|
|
274
|
+
- `removecontours()` — remove contour fills from the plot.
|
|
275
|
+
|
|
276
|
+
The keyword `labels` is a list of label names. If `labels` is None, all labels are plotted.
|
|
277
|
+
|
|
278
|
+
The keyword `group` is a Pandas Series with an index matching the data index.
|
|
279
|
+
|
|
280
|
+
The keyword `palette` is a dict mapping each unique group value to a colour.
|
|
281
|
+
|
|
282
|
+
**Example**
|
|
283
|
+
|
|
284
|
+
import pycodamath as coda
|
|
285
|
+
import pandas as pd
|
|
286
|
+
data = pd.read_csv('example/kilauea_iki_chem.csv')
|
|
287
|
+
mypca = coda.pca.Biplot(data)
|
|
288
|
+
mypca.removelabels()
|
|
289
|
+
mypca.plotloadings(cluster=True)
|
|
290
|
+
print(mypca.clusterlegend)
|
|
291
|
+
mypca.removelabels()
|
|
292
|
+
mypca.plotloadings(labels=['FeO', 'Al2O3', 'CaO'], cluster=False)
|
|
293
|
+
mypca.adjustloadinglabels()
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# pyCoDaMath
|
|
2
|
+
|
|
3
|
+
[](https://www.python.org/)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
pyCoDaMath provides compositional data (CoDa) analysis tools for Python
|
|
7
|
+
|
|
8
|
+
- **Source code:** https://bitbucket.org/genomicepidemiology/pycodamath
|
|
9
|
+
|
|
10
|
+
## Getting Started
|
|
11
|
+
|
|
12
|
+
This package extends the Pandas dataframe object with various CoDa tools. It also provides a set of plotting functions for CoDa figures.
|
|
13
|
+
|
|
14
|
+
### Installation
|
|
15
|
+
|
|
16
|
+
Clone the git repo to your local hard drive:
|
|
17
|
+
|
|
18
|
+
git clone https://bitbucket.org/genomicepidemiology/pycodamath.git
|
|
19
|
+
|
|
20
|
+
Enter the directory and install:
|
|
21
|
+
|
|
22
|
+
pip install .
|
|
23
|
+
|
|
24
|
+
### Usage
|
|
25
|
+
|
|
26
|
+
The pyCoDaMath module is loaded as
|
|
27
|
+
|
|
28
|
+
import pycodamath
|
|
29
|
+
|
|
30
|
+
At this point, in order to get CLR values from a Pandas DataFrame `df`, do
|
|
31
|
+
|
|
32
|
+
df.coda.clr()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
## Documentation
|
|
36
|
+
|
|
37
|
+
### CLR transformation - point estimate
|
|
38
|
+
df.coda.clr()
|
|
39
|
+
|
|
40
|
+
Returns centered logratio coefficients. If the dataframe contains zeros, values
|
|
41
|
+
will be replaced by the Aitchison mean point estimate.
|
|
42
|
+
|
|
43
|
+
### CLR transformation - standard deviation
|
|
44
|
+
df.coda.clr_std(n_samples=5000)
|
|
45
|
+
|
|
46
|
+
Returns the standard deviation of `n_samples` random draws in CLR space.
|
|
47
|
+
|
|
48
|
+
**Parameters**
|
|
49
|
+
|
|
50
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
### ALR transformation - point estimate
|
|
54
|
+
df.coda.alr(part=None)
|
|
55
|
+
|
|
56
|
+
Returns additive logratio values. If `part` is None, the last part of the composition is used as the denominator.
|
|
57
|
+
|
|
58
|
+
**Parameters**
|
|
59
|
+
|
|
60
|
+
- part (str) - Name of the part to use as denominator.
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
### ALR transformation - standard deviation
|
|
64
|
+
df.coda.alr_std(part=None, n_samples=5000)
|
|
65
|
+
|
|
66
|
+
Returns the standard deviation of `n_samples` random draws in ALR space.
|
|
67
|
+
|
|
68
|
+
**Parameters**
|
|
69
|
+
|
|
70
|
+
- part (str) - Name of the part to use as denominator.
|
|
71
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
### ILR transformation - point estimate
|
|
75
|
+
df.coda.ilr(psi=None)
|
|
76
|
+
|
|
77
|
+
Returns isometric logratio values. If no basis is given, a default sequential binary partition basis is used.
|
|
78
|
+
|
|
79
|
+
**Parameters**
|
|
80
|
+
|
|
81
|
+
- psi (array_like) - Orthonormal basis. If None, the default SBP basis is used.
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
### ILR inverse transformation
|
|
85
|
+
df.coda.ilr_inv(psi=None)
|
|
86
|
+
|
|
87
|
+
Returns the composition corresponding to a set of ILR coordinates. The same basis used for the forward transform must be supplied.
|
|
88
|
+
|
|
89
|
+
**Parameters**
|
|
90
|
+
|
|
91
|
+
- psi (array_like) - Orthonormal basis. If None, the default SBP basis is used.
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
### Aitchison point estimate
|
|
95
|
+
df.coda.aitchison_mean(alpha=1.0)
|
|
96
|
+
|
|
97
|
+
Returns the Bayesian point estimate based on the Dirichlet concentration parameter alpha.
|
|
98
|
+
Use values between 0.5 (sparse prior) and 1.0 (flat prior).
|
|
99
|
+
|
|
100
|
+
**Parameters**
|
|
101
|
+
|
|
102
|
+
- alpha (float) - Dirichlet concentration parameter. Defaults to 1.0.
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
### Bayesian zero replacement
|
|
106
|
+
df.coda.zero_replacement(n_samples=5000)
|
|
107
|
+
|
|
108
|
+
Returns a count table with zero values replaced by finite values using Bayesian inference.
|
|
109
|
+
|
|
110
|
+
**Parameters**
|
|
111
|
+
|
|
112
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
### Closure
|
|
116
|
+
df.coda.closure(N)
|
|
117
|
+
|
|
118
|
+
Applies closure to constant N to the composition.
|
|
119
|
+
|
|
120
|
+
**Parameters**
|
|
121
|
+
|
|
122
|
+
- N (float) - Closure constant.
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
### Variance matrix
|
|
126
|
+
df.coda.varmatrix(nmp=False)
|
|
127
|
+
|
|
128
|
+
Returns the total variation matrix of a composition. For large datasets, variance is
|
|
129
|
+
estimated from at most 500 rows.
|
|
130
|
+
|
|
131
|
+
**Parameters**
|
|
132
|
+
|
|
133
|
+
- nmp (bool) - If True, return a numpy array instead of a DataFrame. Defaults to False.
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
### Total variance
|
|
137
|
+
df.coda.totvar()
|
|
138
|
+
|
|
139
|
+
Returns the total variance of a set of compositions, computed as the sum of the
|
|
140
|
+
variance matrix divided by twice the number of parts.
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
### Geometric mean
|
|
144
|
+
df.coda.gmean()
|
|
145
|
+
|
|
146
|
+
Returns the geometric mean of a set of compositions as percentages.
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
### Power transformation
|
|
150
|
+
df.coda.power(alpha)
|
|
151
|
+
|
|
152
|
+
Applies compositional scalar multiplication (power transformation).
|
|
153
|
+
|
|
154
|
+
**Parameters**
|
|
155
|
+
|
|
156
|
+
- alpha (float) - Scalar multiplier.
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
### Perturbation
|
|
160
|
+
df.coda.perturbation(comp)
|
|
161
|
+
|
|
162
|
+
Applies a compositional perturbation (Aitchison addition) with another composition.
|
|
163
|
+
|
|
164
|
+
**Parameters**
|
|
165
|
+
|
|
166
|
+
- comp (array_like) - Composition to perturb with.
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
### Scaling
|
|
170
|
+
df.coda.scale()
|
|
171
|
+
|
|
172
|
+
Scales the composition by the reciprocal of the square root of the total variance.
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
### Centering
|
|
176
|
+
df.coda.center()
|
|
177
|
+
|
|
178
|
+
Centers the composition by perturbing with the reciprocal of the geometric mean.
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Plotting functions
|
|
184
|
+
|
|
185
|
+
### Ternary diagram
|
|
186
|
+
pycodamath.plot.ternary(data, descr=None, center=False, conf=False)
|
|
187
|
+
|
|
188
|
+
Plots a ternary diagram from a three-part composition closed to 100.
|
|
189
|
+
|
|
190
|
+
**Parameters**
|
|
191
|
+
|
|
192
|
+
- data (DataFrame) - Three-part compositional data, closed to 100.
|
|
193
|
+
- descr (Series) - Optional grouping variable; if provided, points are coloured by group.
|
|
194
|
+
- center (bool) - If True, the composition is centred before plotting. Defaults to False.
|
|
195
|
+
- conf (bool) - If True, a 95% confidence ellipse is overlaid. Defaults to False.
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
### Scree plot
|
|
199
|
+
pycodamath.pca.scree_plot(axis, eig_val)
|
|
200
|
+
|
|
201
|
+
Plots a scree plot of explained variance from singular values.
|
|
202
|
+
|
|
203
|
+
**Parameters**
|
|
204
|
+
|
|
205
|
+
- axis - A Matplotlib axes object.
|
|
206
|
+
- eig_val (array_like) - Singular values from SVD.
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
### PCA biplot
|
|
210
|
+
class pycodamath.pca.Biplot(data, axis=None, default=True)
|
|
211
|
+
|
|
212
|
+
Creates a PCA biplot based on a centered log-ratio transformation of the data.
|
|
213
|
+
|
|
214
|
+
**Parameters**
|
|
215
|
+
|
|
216
|
+
- data (DataFrame) - Compositional count data to analyse.
|
|
217
|
+
- axis - A Matplotlib axes object. If None, a new figure is created.
|
|
218
|
+
- default (bool) - If True, loadings and scores are plotted immediately. Defaults to True.
|
|
219
|
+
|
|
220
|
+
The following methods are available for customising the biplot:
|
|
221
|
+
|
|
222
|
+
- `plotloadings(cutoff=0, scale=None, labels=None, cluster=False)` — plot loading arrows.
|
|
223
|
+
Set `cutoff` (as a fraction of the maximum loading length) to suppress short loadings.
|
|
224
|
+
Set `cluster=True` to reduce the number of loadings by hierarchical clustering; the
|
|
225
|
+
resulting cluster legend is accessible as `biplot.clusterlegend`.
|
|
226
|
+
- `plotloadinglabels(labels=None, loadings=None, cutoff=0)` — add text labels to loadings.
|
|
227
|
+
- `adjustloadinglabels()` — shift loading labels to reduce overlap.
|
|
228
|
+
- `plotscores(group=None, palette=None, legend=True, labels=None)` — plot sample scores
|
|
229
|
+
as points, optionally coloured by group.
|
|
230
|
+
- `plotscorelabels(labels=None)` — add text labels to the scores.
|
|
231
|
+
- `plotellipses(group, palette=None, legend=False)` — plot 90% confidence ellipses for
|
|
232
|
+
each group (requires at least 3 samples per group).
|
|
233
|
+
- `plotcentroids(group, palette=None, legend=False)` — plot the centroid of each group.
|
|
234
|
+
- `plothulls(group, palette=None, legend=True)` — plot convex hulls around each group
|
|
235
|
+
(requires at least 3 samples per group).
|
|
236
|
+
- `plotcontours(group, palette=None, legend=True, plot_outliers=True, percent_outliers=0.1, linewidth=2.2)` — plot kernel density contours for each group. Samples outside the outermost contour are optionally shown as individual points.
|
|
237
|
+
- `labeloutliers(group, conf=3.0)` — label samples more than `conf` standard deviations
|
|
238
|
+
from their group centroid.
|
|
239
|
+
- `displaylegend(loc=2)` — display the group legend at Matplotlib legend location `loc`.
|
|
240
|
+
- `removepatches()` — remove loading arrows and hull polygons from the plot.
|
|
241
|
+
- `removescores()` — remove score points from the plot.
|
|
242
|
+
- `removelabels()` — remove text labels from the plot.
|
|
243
|
+
- `removecontours()` — remove contour fills from the plot.
|
|
244
|
+
|
|
245
|
+
The keyword `labels` is a list of label names. If `labels` is None, all labels are plotted.
|
|
246
|
+
|
|
247
|
+
The keyword `group` is a Pandas Series with an index matching the data index.
|
|
248
|
+
|
|
249
|
+
The keyword `palette` is a dict mapping each unique group value to a colour.
|
|
250
|
+
|
|
251
|
+
**Example**
|
|
252
|
+
|
|
253
|
+
import pycodamath as coda
|
|
254
|
+
import pandas as pd
|
|
255
|
+
data = pd.read_csv('example/kilauea_iki_chem.csv')
|
|
256
|
+
mypca = coda.pca.Biplot(data)
|
|
257
|
+
mypca.removelabels()
|
|
258
|
+
mypca.plotloadings(cluster=True)
|
|
259
|
+
print(mypca.clusterlegend)
|
|
260
|
+
mypca.removelabels()
|
|
261
|
+
mypca.plotloadings(labels=['FeO', 'Al2O3', 'CaO'], cluster=False)
|
|
262
|
+
mypca.adjustloadinglabels()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pycodamath"
|
|
7
|
+
version = "1.1.1"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Christian Brinch", email="cbri@food.dtu.dk" },
|
|
10
|
+
]
|
|
11
|
+
description = "Compositional data (CoDa) analysis tools for Python"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
license = { text = "MIT" }
|
|
14
|
+
requires-python = ">=3.8"
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Science/Research",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.8",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Topic :: Scientific/Engineering",
|
|
27
|
+
]
|
|
28
|
+
dependencies = [
|
|
29
|
+
"adjustText>=0.7.3",
|
|
30
|
+
"matplotlib>=3.1.1",
|
|
31
|
+
"numpy>=1.17.2",
|
|
32
|
+
"pandas>=0.25.1",
|
|
33
|
+
"python-ternary>=1.0.6",
|
|
34
|
+
"scipy>=1.3.1",
|
|
35
|
+
"webcolors>=1.13",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.urls]
|
|
39
|
+
"Homepage" = "https://bitbucket.org/genomicepidemiology/pycodamath"
|
|
40
|
+
"Bug Tracker" = "https://bitbucket.org/genomicepidemiology/pycodamath/issues"
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["src"]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
'''
|
|
2
|
+
pyCoDa init script
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
__title__ = "pyCoDaMath"
|
|
6
|
+
__author__ = "Christian Brinch"
|
|
7
|
+
__email__ = "cbri@food.dtu.dk"
|
|
8
|
+
__copyright__ = "Copyright 2019 C. Brinch"
|
|
9
|
+
__version__ = 1.0
|
|
10
|
+
__all__ = ['pycoda', 'extra', 'plot', 'pca']
|
|
11
|
+
|
|
12
|
+
from . import pycoda, pca, entropy
|
|
13
|
+
pycoda.init()
|
|
14
|
+
entropy.init()
|