pyCoDaMath 1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyCoDaMath-1.0/LICENSE +19 -0
- pyCoDaMath-1.0/PKG-INFO +180 -0
- pyCoDaMath-1.0/README.md +166 -0
- pyCoDaMath-1.0/pyproject.toml +30 -0
- pyCoDaMath-1.0/setup.cfg +4 -0
- pyCoDaMath-1.0/src/pyCoDaMath.egg-info/PKG-INFO +180 -0
- pyCoDaMath-1.0/src/pyCoDaMath.egg-info/SOURCES.txt +13 -0
- pyCoDaMath-1.0/src/pyCoDaMath.egg-info/dependency_links.txt +1 -0
- pyCoDaMath-1.0/src/pyCoDaMath.egg-info/requires.txt +7 -0
- pyCoDaMath-1.0/src/pyCoDaMath.egg-info/top_level.txt +1 -0
- pyCoDaMath-1.0/src/pycodamath/__init__.py +13 -0
- pyCoDaMath-1.0/src/pycodamath/extra.py +128 -0
- pyCoDaMath-1.0/src/pycodamath/pca.py +341 -0
- pyCoDaMath-1.0/src/pycodamath/plot.py +71 -0
- pyCoDaMath-1.0/src/pycodamath/pycoda.py +188 -0
pyCoDaMath-1.0/LICENSE
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) 2018 The Python Packaging Authority
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
pyCoDaMath-1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pyCoDaMath
|
|
3
|
+
Version: 1.0
|
|
4
|
+
Summary: Compositional data (CoDa) analysis tools for Python
|
|
5
|
+
Author-email: Christian Brinch <cbri@food.dtu.dk>
|
|
6
|
+
Project-URL: Homepage, https://bitbucket.org/genomicepidemiology/pycodamath
|
|
7
|
+
Project-URL: Bug Tracker, https://bitbucket.org/genomicepidemiology/pycodamath/issues?status=new&status=open&is_spam=!spam
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
|
|
15
|
+
# pyCoDaMath
|
|
16
|
+
|
|
17
|
+
[](https://www.python.org/)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
pyCoDaMath provides compositional data (CoDa) analysis tools for Python
|
|
21
|
+
|
|
22
|
+
- **Source code:** https://bitbucket.org/genomicepidemiology/pycoda
|
|
23
|
+
|
|
24
|
+
## Getting Started
|
|
25
|
+
|
|
26
|
+
This package extends the Pandas dataframe object with various CoDa tools. It also provides a set of plotting functions for CoDa figures.
|
|
27
|
+
|
|
28
|
+
### Installation
|
|
29
|
+
|
|
30
|
+
Clone the git repo to your local hard drive:
|
|
31
|
+
|
|
32
|
+
git clone https://brinch@bitbucket.org/genomicepidemiology/pycoda.git
|
|
33
|
+
|
|
34
|
+
Enter pycoda directory and type
|
|
35
|
+
|
|
36
|
+
pip install ./
|
|
37
|
+
|
|
38
|
+
### Usage
|
|
39
|
+
|
|
40
|
+
The pyCoDaMath module is loaded as
|
|
41
|
+
|
|
42
|
+
import pycodamath
|
|
43
|
+
|
|
44
|
+
At this point, in order to get CLR values from a Pandas DataFrame df, do
|
|
45
|
+
|
|
46
|
+
df.coda.clr()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## Documentation
|
|
50
|
+
|
|
51
|
+
### CLR transformation - point estimate
|
|
52
|
+
df.coda.clr()
|
|
53
|
+
|
|
54
|
+
Returns centered logratio coefficients. If the data frame contains zeros, values
|
|
55
|
+
will be replaced by the Aitchison mean point estimate.
|
|
56
|
+
|
|
57
|
+
### CLR transformation - standard deviation
|
|
58
|
+
df.coda.clr_std(n_samples=5000)
|
|
59
|
+
|
|
60
|
+
Returns the standard deviation of n_samples random draws in CLR space.
|
|
61
|
+
|
|
62
|
+
**Parameters**
|
|
63
|
+
|
|
64
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
### ALR transformation - point estimate
|
|
68
|
+
df.coda.alr(part=None)
|
|
69
|
+
|
|
70
|
+
Same as clr() but returning additive logratio values. If part is None, then the last part of the composition is used, otherwise part is used as denominator.
|
|
71
|
+
|
|
72
|
+
**Parameters**
|
|
73
|
+
|
|
74
|
+
- part (str) - Name of the part to be used as denominator.
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
### ALR transformation - standard deviation
|
|
78
|
+
df.coda.alr_std(part=None, n_samples=5000)
|
|
79
|
+
|
|
80
|
+
Same as clr_std, but in ALR space.
|
|
81
|
+
|
|
82
|
+
**Parameters**
|
|
83
|
+
|
|
84
|
+
- part (str) - Name of the part to be used as denominator.
|
|
85
|
+
|
|
86
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
### ILR transformation - point estimate
|
|
90
|
+
df.coda.ilr(psi=None)
|
|
91
|
+
|
|
92
|
+
Same as clr() but for isometric logratio transform. An orthonormal basis can be
|
|
93
|
+
provided as psi. If no basis is given, a default sequential binary partition basis will be used.
|
|
94
|
+
|
|
95
|
+
**Parameters**
|
|
96
|
+
|
|
97
|
+
- psi (array_like) - Orthonormal basis.
|
|
98
|
+
|
|
99
|
+
### ILR transformation - standard deviation
|
|
100
|
+
df.coda.ilr_std(psi=None, n_samples=5000)
|
|
101
|
+
|
|
102
|
+
This method does not exist (yet).
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
### Bayesian zero replacement
|
|
106
|
+
df.coda.zero_replacement(n_samples=5000)
|
|
107
|
+
|
|
108
|
+
Returns a count table with zero values replaced by finite values using Bayesian inference.
|
|
109
|
+
|
|
110
|
+
**Parameters**
|
|
111
|
+
|
|
112
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
### Closure
|
|
116
|
+
df.coda.closure(N)
|
|
117
|
+
|
|
118
|
+
Apply closure to constant N to the composition.
|
|
119
|
+
|
|
120
|
+
**Parameters**
|
|
121
|
+
|
|
122
|
+
- N (int) - Closure constant.
|
|
123
|
+
|
|
124
|
+
### Total variance
|
|
125
|
+
df.coda.totvar()
|
|
126
|
+
|
|
127
|
+
Calculates the total variance of a set of compositions.
|
|
128
|
+
|
|
129
|
+
### Geometric mean
|
|
130
|
+
df.coda.gmean()
|
|
131
|
+
|
|
132
|
+
Calculates the geometric mean of a set of compositions.
|
|
133
|
+
|
|
134
|
+
### Centering
|
|
135
|
+
df.coda.center()
|
|
136
|
+
|
|
137
|
+
Centers (and scales) the composition by dividing by the geometric mean and powering by the reciprocal variance.
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
## Plotting functions
|
|
142
|
+
|
|
143
|
+
### PCA biplot
|
|
144
|
+
class pycoda.pca.Biplot(data, default=True)
|
|
145
|
+
|
|
146
|
+
Plots a PCA biplot. Set default to False for an empty plot.
|
|
147
|
+
The parameter data (DataFrame) is the data to be analyzed. Use counts, not CLR values.
|
|
148
|
+
|
|
149
|
+
A number of methods are available for customizing the biplot:
|
|
150
|
+
|
|
151
|
+
- plotloadings(cutoff=0, scale=None, labels=None)
|
|
152
|
+
- plotloadinglabels(labels=None)
|
|
153
|
+
- plotscores(group=None, palette=None, legend=True, labels=None)
|
|
154
|
+
- plotscorelables(labels=None)
|
|
155
|
+
- plotellipses(group=None, palette=None)
|
|
156
|
+
- plotcentroids(group=None, palette=None)
|
|
157
|
+
- plothulls(group=None, palette=None)
|
|
158
|
+
- plotcontours(group=None, palette=None, size=None, levels=None)
|
|
159
|
+
- removepatches()
|
|
160
|
+
- removescores()
|
|
161
|
+
- removelabels()
|
|
162
|
+
|
|
163
|
+
The keyword labels is a list of labelnames. If labels is None, all labels are plottet. Use labels=[] for no labels.
|
|
164
|
+
|
|
165
|
+
The keyword group is a Pandas dataframe with index equal to the index of data.
|
|
166
|
+
|
|
167
|
+
The keyword palette is a dict with colors to use to each unique member of group.
|
|
168
|
+
|
|
169
|
+
Example
|
|
170
|
+
import pycoda as coda
|
|
171
|
+
import pandas as pd
|
|
172
|
+
|
|
173
|
+
data = pd.read_csv('example/kilauea_iki_chem.csv')
|
|
174
|
+
mypca = coda.pca.Biplot(data)
|
|
175
|
+
mypca.plothulls()
|
|
176
|
+
mypca.removelabels()
|
|
177
|
+
mypca.plotloadinglabels(['FeO'])
|
|
178
|
+
|
|
179
|
+
### Ternary diagram
|
|
180
|
+
pycoda.plot.ternary()
|
pyCoDaMath-1.0/README.md
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# pyCoDaMath
|
|
2
|
+
|
|
3
|
+
[](https://www.python.org/)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
pyCoDaMath provides compositional data (CoDa) analysis tools for Python
|
|
7
|
+
|
|
8
|
+
- **Source code:** https://bitbucket.org/genomicepidemiology/pycoda
|
|
9
|
+
|
|
10
|
+
## Getting Started
|
|
11
|
+
|
|
12
|
+
This package extends the Pandas dataframe object with various CoDa tools. It also provides a set of plotting functions for CoDa figures.
|
|
13
|
+
|
|
14
|
+
### Installation
|
|
15
|
+
|
|
16
|
+
Clone the git repo to your local hard drive:
|
|
17
|
+
|
|
18
|
+
git clone https://brinch@bitbucket.org/genomicepidemiology/pycoda.git
|
|
19
|
+
|
|
20
|
+
Enter pycoda directory and type
|
|
21
|
+
|
|
22
|
+
pip install ./
|
|
23
|
+
|
|
24
|
+
### Usage
|
|
25
|
+
|
|
26
|
+
The pyCoDaMath module is loaded as
|
|
27
|
+
|
|
28
|
+
import pycodamath
|
|
29
|
+
|
|
30
|
+
At this point, in order to get CLR values from a Pandas DataFrame df, do
|
|
31
|
+
|
|
32
|
+
df.coda.clr()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
## Documentation
|
|
36
|
+
|
|
37
|
+
### CLR transformation - point estimate
|
|
38
|
+
df.coda.clr()
|
|
39
|
+
|
|
40
|
+
Returns centered logratio coefficients. If the data frame contains zeros, values
|
|
41
|
+
will be replaced by the Aitchison mean point estimate.
|
|
42
|
+
|
|
43
|
+
### CLR transformation - standard deviation
|
|
44
|
+
df.coda.clr_std(n_samples=5000)
|
|
45
|
+
|
|
46
|
+
Returns the standard deviation of n_samples random draws in CLR space.
|
|
47
|
+
|
|
48
|
+
**Parameters**
|
|
49
|
+
|
|
50
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
### ALR transformation - point estimate
|
|
54
|
+
df.coda.alr(part=None)
|
|
55
|
+
|
|
56
|
+
Same as clr() but returning additive logratio values. If part is None, then the last part of the composition is used, otherwise part is used as denominator.
|
|
57
|
+
|
|
58
|
+
**Parameters**
|
|
59
|
+
|
|
60
|
+
- part (str) - Name of the part to be used as denominator.
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
### ALR transformation - standard deviation
|
|
64
|
+
df.coda.alr_std(part=None, n_samples=5000)
|
|
65
|
+
|
|
66
|
+
Same as clr_std, but in ALR space.
|
|
67
|
+
|
|
68
|
+
**Parameters**
|
|
69
|
+
|
|
70
|
+
- part (str) - Name of the part to be used as denominator.
|
|
71
|
+
|
|
72
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
### ILR transformation - point estimate
|
|
76
|
+
df.coda.ilr(psi=None)
|
|
77
|
+
|
|
78
|
+
Same as clr() but for isometric logratio transform. An orthonormal basis can be
|
|
79
|
+
provided as psi. If no basis is given, a default sequential binary partition basis will be used.
|
|
80
|
+
|
|
81
|
+
**Parameters**
|
|
82
|
+
|
|
83
|
+
- psi (array_like) - Orthonormal basis.
|
|
84
|
+
|
|
85
|
+
### ILR transformation - standard deviation
|
|
86
|
+
df.coda.ilr_std(psi=None, n_samples=5000)
|
|
87
|
+
|
|
88
|
+
This method does not exist (yet).
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
### Bayesian zero replacement
|
|
92
|
+
df.coda.zero_replacement(n_samples=5000)
|
|
93
|
+
|
|
94
|
+
Returns a count table with zero values replaced by finite values using Bayesian inference.
|
|
95
|
+
|
|
96
|
+
**Parameters**
|
|
97
|
+
|
|
98
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
### Closure
|
|
102
|
+
df.coda.closure(N)
|
|
103
|
+
|
|
104
|
+
Apply closure to constant N to the composition.
|
|
105
|
+
|
|
106
|
+
**Parameters**
|
|
107
|
+
|
|
108
|
+
- N (int) - Closure constant.
|
|
109
|
+
|
|
110
|
+
### Total variance
|
|
111
|
+
df.coda.totvar()
|
|
112
|
+
|
|
113
|
+
Calculates the total variance of a set of compositions.
|
|
114
|
+
|
|
115
|
+
### Geometric mean
|
|
116
|
+
df.coda.gmean()
|
|
117
|
+
|
|
118
|
+
Calculates the geometric mean of a set of compositions.
|
|
119
|
+
|
|
120
|
+
### Centering
|
|
121
|
+
df.coda.center()
|
|
122
|
+
|
|
123
|
+
Centers (and scales) the composition by dividing by the geometric mean and powering by the reciprocal variance.
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
## Plotting functions
|
|
128
|
+
|
|
129
|
+
### PCA biplot
|
|
130
|
+
class pycoda.pca.Biplot(data, default=True)
|
|
131
|
+
|
|
132
|
+
Plots a PCA biplot. Set default to False for an empty plot.
|
|
133
|
+
The parameter data (DataFrame) is the data to be analyzed. Use counts, not CLR values.
|
|
134
|
+
|
|
135
|
+
A number of methods are available for customizing the biplot:
|
|
136
|
+
|
|
137
|
+
- plotloadings(cutoff=0, scale=None, labels=None)
|
|
138
|
+
- plotloadinglabels(labels=None)
|
|
139
|
+
- plotscores(group=None, palette=None, legend=True, labels=None)
|
|
140
|
+
- plotscorelables(labels=None)
|
|
141
|
+
- plotellipses(group=None, palette=None)
|
|
142
|
+
- plotcentroids(group=None, palette=None)
|
|
143
|
+
- plothulls(group=None, palette=None)
|
|
144
|
+
- plotcontours(group=None, palette=None, size=None, levels=None)
|
|
145
|
+
- removepatches()
|
|
146
|
+
- removescores()
|
|
147
|
+
- removelabels()
|
|
148
|
+
|
|
149
|
+
The keyword labels is a list of labelnames. If labels is None, all labels are plottet. Use labels=[] for no labels.
|
|
150
|
+
|
|
151
|
+
The keyword group is a Pandas dataframe with index equal to the index of data.
|
|
152
|
+
|
|
153
|
+
The keyword palette is a dict with colors to use to each unique member of group.
|
|
154
|
+
|
|
155
|
+
Example
|
|
156
|
+
import pycoda as coda
|
|
157
|
+
import pandas as pd
|
|
158
|
+
|
|
159
|
+
data = pd.read_csv('example/kilauea_iki_chem.csv')
|
|
160
|
+
mypca = coda.pca.Biplot(data)
|
|
161
|
+
mypca.plothulls()
|
|
162
|
+
mypca.removelabels()
|
|
163
|
+
mypca.plotloadinglabels(['FeO'])
|
|
164
|
+
|
|
165
|
+
### Ternary diagram
|
|
166
|
+
pycoda.plot.ternary()
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pyCoDaMath"
|
|
7
|
+
version = "1.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Christian Brinch", email="cbri@food.dtu.dk" },
|
|
10
|
+
]
|
|
11
|
+
description = "Compositional data (CoDa) analysis tools for Python"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.7"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = ['matplotlib>=3.1.1',
|
|
20
|
+
'numpy>=1.17.2',
|
|
21
|
+
'pandas>=0.25.1',
|
|
22
|
+
'python-ternary>=1.0.6',
|
|
23
|
+
'scipy>=1.3.1',
|
|
24
|
+
'webcolors>=1.13',
|
|
25
|
+
'adjustText==0.7.3',
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
"Homepage" = "https://bitbucket.org/genomicepidemiology/pycodamath"
|
|
30
|
+
"Bug Tracker" = "https://bitbucket.org/genomicepidemiology/pycodamath/issues?status=new&status=open&is_spam=!spam"
|
pyCoDaMath-1.0/setup.cfg
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pyCoDaMath
|
|
3
|
+
Version: 1.0
|
|
4
|
+
Summary: Compositional data (CoDa) analysis tools for Python
|
|
5
|
+
Author-email: Christian Brinch <cbri@food.dtu.dk>
|
|
6
|
+
Project-URL: Homepage, https://bitbucket.org/genomicepidemiology/pycodamath
|
|
7
|
+
Project-URL: Bug Tracker, https://bitbucket.org/genomicepidemiology/pycodamath/issues?status=new&status=open&is_spam=!spam
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
|
|
15
|
+
# pyCoDaMath
|
|
16
|
+
|
|
17
|
+
[](https://www.python.org/)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
pyCoDaMath provides compositional data (CoDa) analysis tools for Python
|
|
21
|
+
|
|
22
|
+
- **Source code:** https://bitbucket.org/genomicepidemiology/pycoda
|
|
23
|
+
|
|
24
|
+
## Getting Started
|
|
25
|
+
|
|
26
|
+
This package extends the Pandas dataframe object with various CoDa tools. It also provides a set of plotting functions for CoDa figures.
|
|
27
|
+
|
|
28
|
+
### Installation
|
|
29
|
+
|
|
30
|
+
Clone the git repo to your local hard drive:
|
|
31
|
+
|
|
32
|
+
git clone https://brinch@bitbucket.org/genomicepidemiology/pycoda.git
|
|
33
|
+
|
|
34
|
+
Enter pycoda directory and type
|
|
35
|
+
|
|
36
|
+
pip install ./
|
|
37
|
+
|
|
38
|
+
### Usage
|
|
39
|
+
|
|
40
|
+
The pyCoDaMath module is loaded as
|
|
41
|
+
|
|
42
|
+
import pycodamath
|
|
43
|
+
|
|
44
|
+
At this point, in order to get CLR values from a Pandas DataFrame df, do
|
|
45
|
+
|
|
46
|
+
df.coda.clr()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## Documentation
|
|
50
|
+
|
|
51
|
+
### CLR transformation - point estimate
|
|
52
|
+
df.coda.clr()
|
|
53
|
+
|
|
54
|
+
Returns centered logratio coefficients. If the data frame contains zeros, values
|
|
55
|
+
will be replaced by the Aitchison mean point estimate.
|
|
56
|
+
|
|
57
|
+
### CLR transformation - standard deviation
|
|
58
|
+
df.coda.clr_std(n_samples=5000)
|
|
59
|
+
|
|
60
|
+
Returns the standard deviation of n_samples random draws in CLR space.
|
|
61
|
+
|
|
62
|
+
**Parameters**
|
|
63
|
+
|
|
64
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
### ALR transformation - point estimate
|
|
68
|
+
df.coda.alr(part=None)
|
|
69
|
+
|
|
70
|
+
Same as clr() but returning additive logratio values. If part is None, then the last part of the composition is used, otherwise part is used as denominator.
|
|
71
|
+
|
|
72
|
+
**Parameters**
|
|
73
|
+
|
|
74
|
+
- part (str) - Name of the part to be used as denominator.
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
### ALR transformation - standard deviation
|
|
78
|
+
df.coda.alr_std(part=None, n_samples=5000)
|
|
79
|
+
|
|
80
|
+
Same as clr_std, but in ALR space.
|
|
81
|
+
|
|
82
|
+
**Parameters**
|
|
83
|
+
|
|
84
|
+
- part (str) - Name of the part to be used as denominator.
|
|
85
|
+
|
|
86
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
### ILR transformation - point estimate
|
|
90
|
+
df.coda.ilr(psi=None)
|
|
91
|
+
|
|
92
|
+
Same as clr() but for isometric logratio transform. An orthonormal basis can be
|
|
93
|
+
provided as psi. If no basis is given, a default sequential binary partition basis will be used.
|
|
94
|
+
|
|
95
|
+
**Parameters**
|
|
96
|
+
|
|
97
|
+
- psi (array_like) - Orthonormal basis.
|
|
98
|
+
|
|
99
|
+
### ILR transformation - standard deviation
|
|
100
|
+
df.coda.ilr_std(psi=None, n_samples=5000)
|
|
101
|
+
|
|
102
|
+
This method does not exist (yet).
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
### Bayesian zero replacement
|
|
106
|
+
df.coda.zero_replacement(n_samples=5000)
|
|
107
|
+
|
|
108
|
+
Returns a count table with zero values replaced by finite values using Bayesian inference.
|
|
109
|
+
|
|
110
|
+
**Parameters**
|
|
111
|
+
|
|
112
|
+
- n_samples (int) - Number of random draws from a Dirichlet distribution.
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
### Closure
|
|
116
|
+
df.coda.closure(N)
|
|
117
|
+
|
|
118
|
+
Apply closure to constant N to the composition.
|
|
119
|
+
|
|
120
|
+
**Parameters**
|
|
121
|
+
|
|
122
|
+
- N (int) - Closure constant.
|
|
123
|
+
|
|
124
|
+
### Total variance
|
|
125
|
+
df.coda.totvar()
|
|
126
|
+
|
|
127
|
+
Calculates the total variance of a set of compositions.
|
|
128
|
+
|
|
129
|
+
### Geometric mean
|
|
130
|
+
df.coda.gmean()
|
|
131
|
+
|
|
132
|
+
Calculates the geometric mean of a set of compositions.
|
|
133
|
+
|
|
134
|
+
### Centering
|
|
135
|
+
df.coda.center()
|
|
136
|
+
|
|
137
|
+
Centers (and scales) the composition by dividing by the geometric mean and powering by the reciprocal variance.
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
## Plotting functions
|
|
142
|
+
|
|
143
|
+
### PCA biplot
|
|
144
|
+
class pycoda.pca.Biplot(data, default=True)
|
|
145
|
+
|
|
146
|
+
Plots a PCA biplot. Set default to False for an empty plot.
|
|
147
|
+
The parameter data (DataFrame) is the data to be analyzed. Use counts, not CLR values.
|
|
148
|
+
|
|
149
|
+
A number of methods are available for customizing the biplot:
|
|
150
|
+
|
|
151
|
+
- plotloadings(cutoff=0, scale=None, labels=None)
|
|
152
|
+
- plotloadinglabels(labels=None)
|
|
153
|
+
- plotscores(group=None, palette=None, legend=True, labels=None)
|
|
154
|
+
- plotscorelables(labels=None)
|
|
155
|
+
- plotellipses(group=None, palette=None)
|
|
156
|
+
- plotcentroids(group=None, palette=None)
|
|
157
|
+
- plothulls(group=None, palette=None)
|
|
158
|
+
- plotcontours(group=None, palette=None, size=None, levels=None)
|
|
159
|
+
- removepatches()
|
|
160
|
+
- removescores()
|
|
161
|
+
- removelabels()
|
|
162
|
+
|
|
163
|
+
The keyword labels is a list of labelnames. If labels is None, all labels are plottet. Use labels=[] for no labels.
|
|
164
|
+
|
|
165
|
+
The keyword group is a Pandas dataframe with index equal to the index of data.
|
|
166
|
+
|
|
167
|
+
The keyword palette is a dict with colors to use to each unique member of group.
|
|
168
|
+
|
|
169
|
+
Example
|
|
170
|
+
import pycoda as coda
|
|
171
|
+
import pandas as pd
|
|
172
|
+
|
|
173
|
+
data = pd.read_csv('example/kilauea_iki_chem.csv')
|
|
174
|
+
mypca = coda.pca.Biplot(data)
|
|
175
|
+
mypca.plothulls()
|
|
176
|
+
mypca.removelabels()
|
|
177
|
+
mypca.plotloadinglabels(['FeO'])
|
|
178
|
+
|
|
179
|
+
### Ternary diagram
|
|
180
|
+
pycoda.plot.ternary()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/pyCoDaMath.egg-info/PKG-INFO
|
|
5
|
+
src/pyCoDaMath.egg-info/SOURCES.txt
|
|
6
|
+
src/pyCoDaMath.egg-info/dependency_links.txt
|
|
7
|
+
src/pyCoDaMath.egg-info/requires.txt
|
|
8
|
+
src/pyCoDaMath.egg-info/top_level.txt
|
|
9
|
+
src/pycodamath/__init__.py
|
|
10
|
+
src/pycodamath/extra.py
|
|
11
|
+
src/pycodamath/pca.py
|
|
12
|
+
src/pycodamath/plot.py
|
|
13
|
+
src/pycodamath/pycoda.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pycodamath
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
'''
|
|
2
|
+
pyCoDa init script
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
__title__ = "pyCoDaMath"
|
|
6
|
+
__author__ = "Christian Brinch"
|
|
7
|
+
__email__ = "cbri@food.dtu.dk"
|
|
8
|
+
__copyright__ = "Copyright 2019 C. Brinch"
|
|
9
|
+
__version__ = 1.0
|
|
10
|
+
__all__ = ['pycoda', 'extra', 'plot', 'pca']
|
|
11
|
+
|
|
12
|
+
from . import pycoda, pca
|
|
13
|
+
pycoda.init()
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
''' Auxilliary functions for pyCoDaMath
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
__author__ = "Christian Brinch"
|
|
6
|
+
__copyright__ = "Copyright 2019"
|
|
7
|
+
__credits__ = ["Christian Brinch"]
|
|
8
|
+
__license__ = "AFL 3.0"
|
|
9
|
+
__version__ = "1.0"
|
|
10
|
+
__maintainer__ = "Christian Brinch"
|
|
11
|
+
__email__ = "cbri@food.dtu.dk"
|
|
12
|
+
|
|
13
|
+
import math
|
|
14
|
+
import numpy as np
|
|
15
|
+
from matplotlib.patches import Ellipse
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def sbp_basis(obj):
|
|
19
|
+
''' Define basis to use in IRL transformation '''
|
|
20
|
+
dim = np.shape(obj)[1]
|
|
21
|
+
psi = np.zeros([dim-1, dim])
|
|
22
|
+
for i in range(dim-1):
|
|
23
|
+
for j in range(dim):
|
|
24
|
+
if j+1 <= dim-i-1:
|
|
25
|
+
psi[i, j] = np.sqrt(1./((dim-i-1)*(dim-i)))
|
|
26
|
+
elif j+1 == dim-i:
|
|
27
|
+
psi[i, j] = -np.sqrt((dim-i-1)/(dim-i))
|
|
28
|
+
|
|
29
|
+
check_basis(psi)
|
|
30
|
+
return psi
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def norm(balances):
|
|
34
|
+
''' Normalize a matrix of balances '''
|
|
35
|
+
psi = []
|
|
36
|
+
for row in balances:
|
|
37
|
+
minus = sum(1 for i in row if i < 0)
|
|
38
|
+
plus = sum(1 for i in row if i > 0)
|
|
39
|
+
psi.append([1/plus*np.sqrt(plus*minus/(plus+minus)) if i > 0 else -1/minus *
|
|
40
|
+
np.sqrt(plus*minus/(plus+minus)) if i < 0 else 0 for i in row])
|
|
41
|
+
|
|
42
|
+
psi = np.array(psi)
|
|
43
|
+
check_basis(psi)
|
|
44
|
+
return psi
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def check_basis(psi):
|
|
48
|
+
''' Check if basis is orthonormal '''
|
|
49
|
+
ident = np.matmul(psi, psi.T)
|
|
50
|
+
if np.trace(ident) != np.shape(ident)[0]:
|
|
51
|
+
raise AttributeError("Error: Basis is not normalized.")
|
|
52
|
+
if np.abs(np.sum(ident-np.diag(np.diagonal(ident)))) > 1e-6:
|
|
53
|
+
raise AttributeError("Error: Basis is not orthogonal.")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def points_in_ellipse(ellipse, npoints):
|
|
57
|
+
''' Return n points along the edge of an ellipse '''
|
|
58
|
+
return [(ellipse['shape'][0] * math.cos(x) * math.cos(-ellipse['angle'])
|
|
59
|
+
- ellipse['shape'][1] * math.sin(x)*math.sin(-ellipse['angle']) + ellipse['center'][0],
|
|
60
|
+
ellipse['shape'][0] * math.cos(x) * math.sin(-ellipse['angle'])
|
|
61
|
+
+ ellipse['shape'][1] * math.sin(x)*math.cos(-ellipse['angle']) + ellipse['center'][1])
|
|
62
|
+
for x in np.linspace(0, 2*np.pi, npoints)]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def check_point_in_ellipse(scores, ellipse):
|
|
66
|
+
''' This function takes a point and checks if it is inside or outside an
|
|
67
|
+
ellipse
|
|
68
|
+
'''
|
|
69
|
+
xcoord = scores[0] - ellipse['center'][0]
|
|
70
|
+
ycoord = scores[1] - ellipse['center'][1]
|
|
71
|
+
|
|
72
|
+
xct = xcoord * np.cos(-ellipse['angle']) - ycoord * np.sin(-ellipse['angle'])
|
|
73
|
+
yct = xcoord * np.sin(-ellipse['angle']) + ycoord * np.cos(-ellipse['angle'])
|
|
74
|
+
|
|
75
|
+
if (xct**2/(ellipse['shape'][0])**2) + (yct**2/(ellipse['shape'][1])**2) > 1.25:
|
|
76
|
+
return True
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_covariance_ellipse(data, conf=95):
|
|
81
|
+
''' Return a covariance ellipse object '''
|
|
82
|
+
if len(data.columns) > 2:
|
|
83
|
+
raise AttributeError(
|
|
84
|
+
("Error: get_covariance_ellipse expects only two columns. " +
|
|
85
|
+
"Got {0:d}.").format(len(data.columns)))
|
|
86
|
+
|
|
87
|
+
lambda_, angle = np.linalg.eig(np.cov(data.loc[:, 0], data.loc[:, 1]))
|
|
88
|
+
lambda_ = np.sqrt(lambda_)
|
|
89
|
+
|
|
90
|
+
if conf == 95:
|
|
91
|
+
scale = 5.991 # 95% confidence interval
|
|
92
|
+
elif conf == 90:
|
|
93
|
+
scale = 4.605 # 90%
|
|
94
|
+
elif conf == 99:
|
|
95
|
+
scale = 9.210 # 99%
|
|
96
|
+
else:
|
|
97
|
+
raise AttributeError(
|
|
98
|
+
"Error: get_covariance_ellipse parameter conf can only accept values {90, 95, 99}.")
|
|
99
|
+
|
|
100
|
+
return {'shape': (lambda_[0]*np.sqrt(scale), lambda_[1]*np.sqrt(scale)),
|
|
101
|
+
# 'angle': np.arccos(-angle[0, 0]),
|
|
102
|
+
'angle': np.arctan(angle[1, 0]/angle[0, 0]),
|
|
103
|
+
'center': (np.mean(data.loc[:, 0]), np.mean(data.loc[:, 1]))}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def plot_covariance_ellipse(axis, ellipse, color=0):
|
|
107
|
+
''' plot covariance ellipse '''
|
|
108
|
+
if color is None:
|
|
109
|
+
color = 'black'
|
|
110
|
+
ell = Ellipse(xy=ellipse['center'],
|
|
111
|
+
width=2*ellipse['shape'][0],
|
|
112
|
+
height=2*ellipse['shape'][1],
|
|
113
|
+
angle=np.rad2deg(ellipse['angle']),
|
|
114
|
+
alpha=0.5,
|
|
115
|
+
edgecolor=color,
|
|
116
|
+
fill=False,
|
|
117
|
+
lw=1.5,
|
|
118
|
+
ls='-')
|
|
119
|
+
axis.add_artist(ell)
|
|
120
|
+
ell = Ellipse(xy=ellipse['center'],
|
|
121
|
+
width=2*ellipse['shape'][0],
|
|
122
|
+
height=2*ellipse['shape'][1],
|
|
123
|
+
angle=np.rad2deg(ellipse['angle']),
|
|
124
|
+
alpha=0.15,
|
|
125
|
+
edgecolor=None,
|
|
126
|
+
fill=True,
|
|
127
|
+
color=color)
|
|
128
|
+
axis.add_artist(ell)
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
''' Class and methods for making compositional biplots based on PCA '''
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import matplotlib.pyplot as plt
|
|
5
|
+
import webcolors as wc
|
|
6
|
+
from matplotlib.colors import ListedColormap
|
|
7
|
+
from matplotlib import cm
|
|
8
|
+
import matplotlib.patches as mpatches
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import scipy.stats as st
|
|
11
|
+
from pycodamath import extra
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GeomObj():
|
|
15
|
+
''' A generic container of geometric objects '''
|
|
16
|
+
|
|
17
|
+
def __init__(self, **kwargs):
|
|
18
|
+
vars(self).update(kwargs)
|
|
19
|
+
self.area = self.polyarea()
|
|
20
|
+
|
|
21
|
+
def polyarea(self):
|
|
22
|
+
''' Calculate the area of a polygon given two lists of vertices '''
|
|
23
|
+
x, y = self.vertices
|
|
24
|
+
return 0.5*np.abs(np.dot(x, np.roll(y, 1))-np.dot(y, np.roll(x, 1)))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def scree_plot(axis, eig_val):
|
|
28
|
+
''' Make scree plot from eigen values'''
|
|
29
|
+
axis.set_xlabel('Component')
|
|
30
|
+
axis.set_ylabel('Explained varaince')
|
|
31
|
+
axis.set_xlim(0, min(len(eig_val)+1, 20))
|
|
32
|
+
axis.bar(np.arange(len(eig_val))+1, (eig_val/np.sum(eig_val))**2)
|
|
33
|
+
csum = np.cumsum(eig_val**2/np.sum(eig_val**2))
|
|
34
|
+
for i in range(min(5, len(eig_val))):
|
|
35
|
+
axis.annotate(str(np.round(csum[i]*100))+'%',
|
|
36
|
+
(i+1.2, (eig_val[i]/np.sum(eig_val))**2))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _get_palette(group):
|
|
40
|
+
cspace = cm.jet(np.linspace(0, 1, len(set(group))))
|
|
41
|
+
palette = {}
|
|
42
|
+
for idx, item in enumerate(set(group)):
|
|
43
|
+
palette[item] = cspace[idx]
|
|
44
|
+
return palette
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _svd(clr):
|
|
48
|
+
''' Internal SVD function '''
|
|
49
|
+
scores, eig_val, loadings = np.linalg.svd(clr)
|
|
50
|
+
scores = pd.DataFrame(scores.T[0:2, :], columns=clr.index, index=['pc1', 'pc2'])
|
|
51
|
+
loadings = pd.DataFrame(np.inner(eig_val*np.identity(len(eig_val)),
|
|
52
|
+
loadings.T[0:len(eig_val), 0:len(eig_val)])[0:2],
|
|
53
|
+
columns=clr.columns[0:len(eig_val)], index=['pc1', 'pc2'])
|
|
54
|
+
return scores, eig_val, loadings
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Biplot():
|
|
58
|
+
''' A class to create and a PCA biplot '''
|
|
59
|
+
|
|
60
|
+
def __init__(self, data, axis=None, default=True):
|
|
61
|
+
if axis is None:
|
|
62
|
+
_, self.axis = plt.subplots(figsize=(7.8, 7.8))
|
|
63
|
+
else:
|
|
64
|
+
self.axis = axis
|
|
65
|
+
self.axis.set(adjustable='box', aspect='equal')
|
|
66
|
+
self.scores, eig_val, self.loadings = _svd(data.coda.center().coda.scale().coda.clr())
|
|
67
|
+
scales = [np.max(np.abs(self.loadings.values)),
|
|
68
|
+
[np.max(np.abs(self.scores.loc[idx].values)) for idx in ['pc1', 'pc2']]]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
self.axis.set_xlabel(f'P.C. 1 ({np.round(eig_val[0]**2 / np.sum(eig_val**2)*100):.1f}% explained variation)')
|
|
72
|
+
self.axis.set_ylabel(f'P.C. 2 ({np.round(eig_val[1]**2 / np.sum(eig_val**2)*100):.1f}% explained variation)')
|
|
73
|
+
self.axis.set_xlim(-scales[0]*1.1, scales[0]*1.1)
|
|
74
|
+
self.axis.set_ylim(-scales[0]*1.1, scales[0]*1.1)
|
|
75
|
+
self.axis.plot([self.axis.get_xlim()[0], self.axis.get_xlim()[1]],
|
|
76
|
+
[0.0, 0.0], '--', color='black', alpha=0.4)
|
|
77
|
+
self.axis.plot([0.0, 0.0], [self.axis.get_ylim()[0], self.axis.get_ylim()[1]],
|
|
78
|
+
'--', color='black', alpha=0.4)
|
|
79
|
+
|
|
80
|
+
self.scores = (scales[0]*(self.scores.T/scales[1])).T
|
|
81
|
+
|
|
82
|
+
self.patches = []
|
|
83
|
+
self.geomobj = {}
|
|
84
|
+
plt.tight_layout()
|
|
85
|
+
|
|
86
|
+
if default:
|
|
87
|
+
self.plotloadings()
|
|
88
|
+
self.plotscores()
|
|
89
|
+
|
|
90
|
+
def plotloadings(self, cutoff=0, scale=None, labels=None):
|
|
91
|
+
''' Plot loadings '''
|
|
92
|
+
if scale is None:
|
|
93
|
+
scale = np.max(np.abs(self.loadings.values))
|
|
94
|
+
|
|
95
|
+
for column in self.loadings:
|
|
96
|
+
if np.sqrt(pow(self.loadings.loc['pc1', column], 2) +
|
|
97
|
+
pow(self.loadings.loc['pc2', column], 2)) > cutoff:
|
|
98
|
+
self.axis.arrow(0, 0,
|
|
99
|
+
self.loadings.loc['pc1', column],
|
|
100
|
+
self.loadings.loc['pc2', column],
|
|
101
|
+
facecolor='black',
|
|
102
|
+
alpha=0.5,
|
|
103
|
+
linewidth=0.,
|
|
104
|
+
width=scale*0.01,
|
|
105
|
+
zorder=2000)
|
|
106
|
+
self.plotloadinglabels(labels, cutoff)
|
|
107
|
+
|
|
108
|
+
def plotloadinglabels(self, labels=None, cutoff=0):
|
|
109
|
+
''' Add labels to the loadings '''
|
|
110
|
+
if labels is None:
|
|
111
|
+
labels = self.loadings.columns
|
|
112
|
+
|
|
113
|
+
for column in labels:
|
|
114
|
+
if np.sqrt(pow(self.loadings.loc['pc1', column], 2) +
|
|
115
|
+
pow(self.loadings.loc['pc2', column], 2)) > cutoff:
|
|
116
|
+
yoff = 0.
|
|
117
|
+
if self.loadings.loc['pc1', column] > 0.9*self.axis.get_xlim()[1]:
|
|
118
|
+
xoff = -1.2
|
|
119
|
+
else:
|
|
120
|
+
xoff = 0
|
|
121
|
+
self.axis.annotate(column, (self.loadings.loc['pc1', column]+xoff,
|
|
122
|
+
self.loadings.loc['pc2', column]+yoff),
|
|
123
|
+
ha='left',
|
|
124
|
+
va='bottom',
|
|
125
|
+
alpha=1.0,
|
|
126
|
+
zorder=2001
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def plotscores(self, group=None, palette=None, legend=True, labels=None):
|
|
130
|
+
''' Plot scores as points '''
|
|
131
|
+
if labels is None:
|
|
132
|
+
labels = self.scores.columns
|
|
133
|
+
|
|
134
|
+
if palette is None:
|
|
135
|
+
if group is not None:
|
|
136
|
+
palette = _get_palette(group)
|
|
137
|
+
else:
|
|
138
|
+
palette = 'steelblue'
|
|
139
|
+
|
|
140
|
+
if group is None:
|
|
141
|
+
self.axis.plot(*self.scores[labels].values, 'o', alpha=0.5,
|
|
142
|
+
color=palette, zorder=7, markeredgewidth=0)
|
|
143
|
+
else:
|
|
144
|
+
for item in set(group):
|
|
145
|
+
idx = group.loc[group == item].index
|
|
146
|
+
self.axis.plot(*self.scores[idx].values, 'o', alpha=0.5, zorder=7,
|
|
147
|
+
label=item, color=palette[item], markeredgewidth=0)
|
|
148
|
+
if legend:
|
|
149
|
+
self.patches.append(mpatches.Patch(color=palette[item], label=item))
|
|
150
|
+
|
|
151
|
+
def plotscorelabels(self, labels=None):
|
|
152
|
+
''' Add labels to the scores '''
|
|
153
|
+
if labels is None:
|
|
154
|
+
labels = self.scores.columns
|
|
155
|
+
|
|
156
|
+
for label in labels:
|
|
157
|
+
self.axis.annotate(label, (self.scores.loc['pc1', label],
|
|
158
|
+
self.scores.loc['pc2', label]),
|
|
159
|
+
ha='left',
|
|
160
|
+
va='bottom',
|
|
161
|
+
alpha=0.8,
|
|
162
|
+
zorder=201,
|
|
163
|
+
size=8
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def plotellipses(self, group, palette=None, legend=False):
|
|
167
|
+
''' Plot confidence ellipses '''
|
|
168
|
+
if palette is None:
|
|
169
|
+
palette = _get_palette(group)
|
|
170
|
+
|
|
171
|
+
for item in set(group):
|
|
172
|
+
idx = group.loc[group == item].index
|
|
173
|
+
if len(idx) > 3:
|
|
174
|
+
ellipse = extra.get_covariance_ellipse(pd.DataFrame(self.scores[idx].values.T),
|
|
175
|
+
conf=90)
|
|
176
|
+
extra.plot_covariance_ellipse(self.axis, ellipse, color=palette[item])
|
|
177
|
+
if legend:
|
|
178
|
+
self.patches.append(mpatches.Patch(color=palette[item], label=item))
|
|
179
|
+
|
|
180
|
+
def plotcentroids(self, group, palette=None, legend=False):
|
|
181
|
+
''' Plot score group centroids '''
|
|
182
|
+
if palette is None:
|
|
183
|
+
palette = _get_palette(group)
|
|
184
|
+
|
|
185
|
+
for item in set(group):
|
|
186
|
+
idx = group.loc[group == item].index
|
|
187
|
+
length = len(self.scores[idx].T)
|
|
188
|
+
sum_x = np.sum(self.scores.loc['pc1', idx])
|
|
189
|
+
sum_y = np.sum(self.scores.loc['pc2', idx])
|
|
190
|
+
self.axis.plot([sum_x/length], [sum_y/length], 'x', alpha=0.7,
|
|
191
|
+
label=item, color=palette[item], markersize=24)
|
|
192
|
+
if legend:
|
|
193
|
+
self.patches.append(mpatches.Patch(color=palette[item], label=item))
|
|
194
|
+
|
|
195
|
+
def plothulls(self, group, palette=None, legend=True):
|
|
196
|
+
''' Plot score group hulls '''
|
|
197
|
+
if palette is None:
|
|
198
|
+
palette = _get_palette(group)
|
|
199
|
+
|
|
200
|
+
self.geomobj = {}
|
|
201
|
+
for item in set(group):
|
|
202
|
+
idx = group.loc[group == item].index
|
|
203
|
+
if len(idx) > 3:
|
|
204
|
+
# My secret hull construction algorithm
|
|
205
|
+
idxmin = self.scores.loc['pc1', idx].idxmin()
|
|
206
|
+
j = self.scores[idx].columns.get_loc(idxmin)
|
|
207
|
+
hull = [list(self.scores[idxmin])]
|
|
208
|
+
while (j != self.scores[idx].columns.get_loc(idxmin) or len(hull) == 1):
|
|
209
|
+
k = (j + 1) % len(idx)
|
|
210
|
+
for i in range(len(idx)):
|
|
211
|
+
if (self.scores[idx].iloc[1, k]-self.scores[idx].iloc[1, j]) * \
|
|
212
|
+
(self.scores[idx].iloc[0, i]-self.scores[idx].iloc[0, k]) - \
|
|
213
|
+
(self.scores[idx].iloc[0, k]-self.scores[idx].iloc[0, j]) * \
|
|
214
|
+
(self.scores[idx].iloc[1, i]-self.scores[idx].iloc[1, k]) < 0:
|
|
215
|
+
k = i
|
|
216
|
+
j = k
|
|
217
|
+
hull.append(list(self.scores[self.scores[idx].columns[k]]))
|
|
218
|
+
self.geomobj[item] = GeomObj(vertices=tuple(map(list, zip(*hull))))
|
|
219
|
+
|
|
220
|
+
for idx, item in enumerate(sorted(self.geomobj,
|
|
221
|
+
key=lambda x: self.geomobj[x].area, reverse=True)):
|
|
222
|
+
self.axis.fill(*self.geomobj[item].vertices,
|
|
223
|
+
color=palette[item], alpha=0.7, zorder=10+(2*idx))
|
|
224
|
+
self.axis.fill(*self.geomobj[item].vertices, facecolor='none',
|
|
225
|
+
edgecolor='black', alpha=0.9, linewidth=2.2, zorder=11+(2*idx))
|
|
226
|
+
|
|
227
|
+
if legend:
|
|
228
|
+
self.patches.append(mpatches.Patch(color=palette[item], label=item))
|
|
229
|
+
|
|
230
|
+
def plotcontours(self, group, palette=None, legend=True,
|
|
231
|
+
plot_outliers=True, percent_outliers=0.1, linewidth=2.2):
|
|
232
|
+
''' Plot scores as contours '''
|
|
233
|
+
if palette is None and group is not None:
|
|
234
|
+
palette = _get_palette(group)
|
|
235
|
+
if percent_outliers > 1 or percent_outliers < 0:
|
|
236
|
+
raise Exception('Percent_outliers has to be between 0 and 1')
|
|
237
|
+
|
|
238
|
+
# Build color maps
|
|
239
|
+
cmap = {}
|
|
240
|
+
for item in set(group):
|
|
241
|
+
colorvalues = np.ones((4, 4))
|
|
242
|
+
if '#' in str(palette[item]):
|
|
243
|
+
color = wc.hex_to_rgb(palette[item])
|
|
244
|
+
elif palette[item][-1] != 1:
|
|
245
|
+
color = wc.name_to_rgb(palette[item])
|
|
246
|
+
else:
|
|
247
|
+
color = palette[item]
|
|
248
|
+
|
|
249
|
+
for i in range(3):
|
|
250
|
+
colorvalues[:, i] = np.linspace(1, color[i]/256., 5)[1:]
|
|
251
|
+
colorvalues[:, 3] = np.linspace(.95, .25, 4)
|
|
252
|
+
cmap[item] = ListedColormap(colorvalues)
|
|
253
|
+
|
|
254
|
+
self.geomobj = {}
|
|
255
|
+
for item in set(group):
|
|
256
|
+
minlevel = 0.2
|
|
257
|
+
diff = 100
|
|
258
|
+
k = 0
|
|
259
|
+
while abs(diff) > 0 and k < 25:
|
|
260
|
+
levels = np.arange(5)*(1.-minlevel)/4.+minlevel
|
|
261
|
+
idx = group.loc[group == item].index
|
|
262
|
+
xgrid, ygrid = np.mgrid[self.axis.get_xlim()[0]: self.axis.get_xlim()[1]: 300j,
|
|
263
|
+
self.axis.get_ylim()[0]: self.axis.get_ylim()[1]: 300j]
|
|
264
|
+
positions = np.vstack([xgrid.ravel(), ygrid.ravel()])
|
|
265
|
+
values = np.vstack([self.scores.loc['pc1', idx], self.scores.loc['pc2', idx]])
|
|
266
|
+
kernel = st.gaussian_kde(values)
|
|
267
|
+
density = np.reshape(kernel(positions).T, xgrid.shape)
|
|
268
|
+
vals = np.max(density)*levels
|
|
269
|
+
self.axis.contour(xgrid, ygrid, density, vals)
|
|
270
|
+
vertices = self.axis.collections[-4].get_paths()[0].vertices.T
|
|
271
|
+
contained = [False for _ in range(len(idx))]
|
|
272
|
+
for j in range(len(self.axis.collections[-5].get_paths())):
|
|
273
|
+
contained = np.logical_or(contained,
|
|
274
|
+
self.axis.collections[-5].get_paths()[j].contains_points(
|
|
275
|
+
[[self.scores.loc['pc1', i],
|
|
276
|
+
self.scores.loc['pc2', i]] for i in idx]))
|
|
277
|
+
_ = [self.axis.collections[-1].remove() for _ in np.arange(5)]
|
|
278
|
+
outside = [a for a, b in zip(list(idx), contained) if not b]
|
|
279
|
+
|
|
280
|
+
diff = round(percent_outliers*len(idx))-len(outside)
|
|
281
|
+
minlevel = minlevel+diff/1000.
|
|
282
|
+
k += 1
|
|
283
|
+
|
|
284
|
+
self.geomobj[item] = GeomObj(vertices=vertices, grid=(
|
|
285
|
+
xgrid, ygrid), density=density, values=vals, outside=outside)
|
|
286
|
+
|
|
287
|
+
for idx, item in enumerate(sorted(self.geomobj,
|
|
288
|
+
key=lambda x: self.geomobj[x].area, reverse=True)):
|
|
289
|
+
self.axis.contourf(*self.geomobj[item].grid, self.geomobj[item].density,
|
|
290
|
+
self.geomobj[item].values, antialiased=True,
|
|
291
|
+
cmap=cmap[item], alpha=0.9, zorder=10+(2*idx))
|
|
292
|
+
self.axis.contour(*self.geomobj[item].grid, self.geomobj[item].density,
|
|
293
|
+
self.geomobj[item].values, antialiased=True,
|
|
294
|
+
colors='black', alpha=0.9, linewidths=linewidth, zorder=11+(2*idx))
|
|
295
|
+
self.axis.collections[-1].remove()
|
|
296
|
+
|
|
297
|
+
if plot_outliers:
|
|
298
|
+
self.plotscores(None, palette[item], False, self.geomobj[item].outside)
|
|
299
|
+
|
|
300
|
+
if legend:
|
|
301
|
+
self.patches.append(mpatches.Patch(color=palette[item], label=item))
|
|
302
|
+
|
|
303
|
+
def labeloutliers(self, group, conf=3.):
|
|
304
|
+
''' Print labels on scores that are more than conf away from centroid '''
|
|
305
|
+
for item in set(group):
|
|
306
|
+
idx = group.loc[group == item].index
|
|
307
|
+
length = len(self.scores[idx].T)
|
|
308
|
+
sum_x = np.sum(self.scores.loc['pc1', idx])
|
|
309
|
+
sum_y = np.sum(self.scores.loc['pc2', idx])
|
|
310
|
+
|
|
311
|
+
pdist = {i: np.sqrt(pow(self.scores.loc['pc1', i]-sum_x/length, 2) +
|
|
312
|
+
pow(self.scores.loc['pc2', i]-sum_y/length, 2)) for i in idx}
|
|
313
|
+
std = np.std(pdist.values())
|
|
314
|
+
|
|
315
|
+
outliers = [i for i in pdist.keys() if pdist[i] > conf*std]
|
|
316
|
+
self.plotscorelabels(outliers)
|
|
317
|
+
|
|
318
|
+
def displaylegend(self, loc=2):
|
|
319
|
+
''' Display the item legend at location loc '''
|
|
320
|
+
patches = sorted(self.patches, key=lambda x: x._label)
|
|
321
|
+
self.axis.legend(handles=patches, fontsize=9, frameon=False, loc=loc)
|
|
322
|
+
|
|
323
|
+
def removepatches(self):
|
|
324
|
+
''' remove arrows and polygons from plot '''
|
|
325
|
+
for _ in range(len(self.axis.patches)):
|
|
326
|
+
self.axis.patches[-1].remove()
|
|
327
|
+
|
|
328
|
+
def removelabels(self):
|
|
329
|
+
''' remove labels from plot '''
|
|
330
|
+
for _ in range(len(self.axis.texts)):
|
|
331
|
+
self.axis.texts[-1].remove()
|
|
332
|
+
|
|
333
|
+
def removescores(self):
|
|
334
|
+
''' remove points from plot '''
|
|
335
|
+
for _ in range(len(self.axis.lines)):
|
|
336
|
+
self.axis.lines[-1].remove()
|
|
337
|
+
|
|
338
|
+
def removecontours(self):
|
|
339
|
+
''' remove points from plot '''
|
|
340
|
+
for _ in range(len(self.axis.collections)):
|
|
341
|
+
self.axis.collections[-1].remove()
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
''' Compositional plot
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
__author__ = "Christian Brinch"
|
|
6
|
+
__copyright__ = "Copyright 2019"
|
|
7
|
+
__credits__ = ["Christian Brinch"]
|
|
8
|
+
__license__ = "AFL 3.0"
|
|
9
|
+
__version__ = "1.0"
|
|
10
|
+
__maintainer__ = "Christian Brinch"
|
|
11
|
+
__email__ = "cbri@food.dtu.dk"
|
|
12
|
+
|
|
13
|
+
import ternary as td
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from pycodamath import extra
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def ternary(data, descr=None, center=False, conf=False):
|
|
20
|
+
''' Plot ternary diagram '''
|
|
21
|
+
if np.shape(data)[1] > 3:
|
|
22
|
+
raise AttributeError("Error: Too many parts in composition (max. 3).")
|
|
23
|
+
for column in data.T:
|
|
24
|
+
if np.abs(data.T[column].sum()-100.) > 1e-6:
|
|
25
|
+
raise AttributeError("Error: Composition is not closed to 100.")
|
|
26
|
+
|
|
27
|
+
_, tax = td.figure(scale=100)
|
|
28
|
+
tax.boundary(linewidth=1.5)
|
|
29
|
+
tax.gridlines(color="blue", multiple=10, linewidth=0.5, alpha=0.5)
|
|
30
|
+
tax.left_axis_label(f"% {data.columns[0]:s}", fontsize=16, offset=0.14)
|
|
31
|
+
tax.right_axis_label(f"% {data.columns[1]:s}", fontsize=16, offset=0.14)
|
|
32
|
+
tax.bottom_axis_label(f"% {data.columns[2]:s}", fontsize=16, offset=0.12)
|
|
33
|
+
tax.ticks(axis='lbr', linewidth=1, multiple=10, offset=0.03)
|
|
34
|
+
tax.clear_matplotlib_ticks()
|
|
35
|
+
tax.get_axes().axis('off')
|
|
36
|
+
|
|
37
|
+
if center:
|
|
38
|
+
sdata = (data/data.coda.gmean()).coda.closure(100)
|
|
39
|
+
else:
|
|
40
|
+
sdata = data
|
|
41
|
+
|
|
42
|
+
if descr is not None:
|
|
43
|
+
for group in set(descr):
|
|
44
|
+
idx = descr.loc[descr == group].index
|
|
45
|
+
tax.scatter(sdata.loc[idx, [sdata.columns[2], sdata.columns[1],
|
|
46
|
+
sdata.columns[0]]].values, alpha=0.7)
|
|
47
|
+
else:
|
|
48
|
+
tax.scatter(sdata.loc[:, [sdata.columns[2], sdata.columns[1],
|
|
49
|
+
sdata.columns[0]]].values, alpha=0.7,
|
|
50
|
+
color='steelblue')
|
|
51
|
+
|
|
52
|
+
if conf:
|
|
53
|
+
ilr = sdata.coda.ilr().loc[:, [0, 1]]
|
|
54
|
+
par = extra.get_covariance_ellipse(ilr)
|
|
55
|
+
|
|
56
|
+
points = [[par['center'][0] +
|
|
57
|
+
par['shape'][0]*np.cos(par['angle'])*np.cos(a) -
|
|
58
|
+
par['shape'][1]*np.sin(par['angle'])*np.sin(a),
|
|
59
|
+
par['center'][1] +
|
|
60
|
+
par['shape'][0]*np.cos(par['angle'])*np.sin(a) +
|
|
61
|
+
par['shape'][1]*np.sin(par['angle'])*np.cos(a)]
|
|
62
|
+
for a in np.linspace(0, 2*np.pi, 100)]
|
|
63
|
+
|
|
64
|
+
psi = extra.sbp_basis(sdata)
|
|
65
|
+
|
|
66
|
+
ellipse = pd.DataFrame(np.exp(np.matmul(points, psi))).coda.closure(100)
|
|
67
|
+
ellipse = ellipse.loc[:, [ellipse.columns[1], ellipse.columns[0],
|
|
68
|
+
ellipse.columns[2]]]
|
|
69
|
+
tax.plot(ellipse.values, color='black', lw=0.5, ls='-')
|
|
70
|
+
|
|
71
|
+
return tax
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
''' CoDa related extensions to pandas dataframes
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
__author__ = "Christian Brinch"
|
|
6
|
+
__copyright__ = "Copyright 2019"
|
|
7
|
+
__credits__ = ["Christian Brinch"]
|
|
8
|
+
__license__ = "AFL 3.0"
|
|
9
|
+
__version__ = "1.0"
|
|
10
|
+
__maintainer__ = "Christian Brinch"
|
|
11
|
+
__email__ = "cbri@food.dtu.dk"
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
import numpy as np
|
|
15
|
+
import scipy.stats as ss
|
|
16
|
+
import scipy.special as sp
|
|
17
|
+
from pycodamath import extra
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _clr_internal(obj):
|
|
21
|
+
return (np.log(obj.T) - np.mean(np.log(obj.T), axis=0)).T
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _alr_internal(obj):
|
|
25
|
+
return pd.DataFrame(np.log(obj.T/obj.T.loc[obj.columns[-1]])).T.iloc[:, :-1]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _ilr_internal(obj, psi):
|
|
29
|
+
return pd.DataFrame(np.dot(_clr_internal(obj), psi.T), index=obj.index)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _ilr_inv_internal(obj, psi):
|
|
33
|
+
return pd.DataFrame(np.exp(np.matmul(obj.values, psi)))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def init():
|
|
37
|
+
''' Initialize CoDa extension '''
|
|
38
|
+
@ pd.api.extensions.register_dataframe_accessor("coda")
|
|
39
|
+
class _:
|
|
40
|
+
''' A CoDa extension to pandas objects containing counts '''
|
|
41
|
+
|
|
42
|
+
def __init__(self, pandas_obj):
|
|
43
|
+
self._obj = pandas_obj
|
|
44
|
+
|
|
45
|
+
def _check_for_zeros(self):
|
|
46
|
+
if not self._obj.values.all():
|
|
47
|
+
print("Dataframe contains zeros. Using Bayesian inference to replace zeros.")
|
|
48
|
+
return True
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
def clr(self):
|
|
52
|
+
''' Wrapper for CLR '''
|
|
53
|
+
if self._check_for_zeros():
|
|
54
|
+
return _clr_internal(self.aitchison_mean())
|
|
55
|
+
|
|
56
|
+
return _clr_internal(self._obj)
|
|
57
|
+
|
|
58
|
+
def clr_std(self, n_samples=5000):
|
|
59
|
+
''' Wrapper for CLR bayesian error estimate'''
|
|
60
|
+
logratio = pd.DataFrame(index=self._obj.columns)
|
|
61
|
+
for column in self._obj.T:
|
|
62
|
+
p_matrix = ss.dirichlet.rvs(self._obj.T[column]+0.5, n_samples)
|
|
63
|
+
c_matrix = _clr_internal(p_matrix)
|
|
64
|
+
logratio[column] = [np.std(i) for i in zip(*c_matrix)]
|
|
65
|
+
return logratio.T
|
|
66
|
+
|
|
67
|
+
def alr(self, part=None):
|
|
68
|
+
''' Wrapper for ALR '''
|
|
69
|
+
if part:
|
|
70
|
+
parts = self._obj.T.index.tolist()
|
|
71
|
+
parts.remove(part)
|
|
72
|
+
self._obj = self._obj.T.reindex(parts+[part]).T
|
|
73
|
+
|
|
74
|
+
print("Using "+self._obj.columns[-1] + " as denominator.")
|
|
75
|
+
if self._check_for_zeros():
|
|
76
|
+
return _alr_internal(self.aitchison_mean())
|
|
77
|
+
|
|
78
|
+
return _alr_internal(self._obj)
|
|
79
|
+
|
|
80
|
+
def alr_std(self, part=None, n_samples=5000):
|
|
81
|
+
''' Wrapper for ALR error estimate'''
|
|
82
|
+
if part:
|
|
83
|
+
parts = self._obj.index.tolist()
|
|
84
|
+
parts.remove(part)
|
|
85
|
+
self._obj.reindex(parts+[part])
|
|
86
|
+
|
|
87
|
+
logratio = pd.DataFrame(index=self._obj.columns)
|
|
88
|
+
for column in self._obj.T:
|
|
89
|
+
p_matrix = ss.dirichlet.rvs(self._obj.T[column]+0.5, n_samples)
|
|
90
|
+
c_matrix = [np.log(i/i[-1]) for i in p_matrix]
|
|
91
|
+
logratio[column] = [np.std(i) for i in zip(*c_matrix)]
|
|
92
|
+
return logratio.T.iloc[:, :-1]
|
|
93
|
+
|
|
94
|
+
def ilr(self, psi=None):
|
|
95
|
+
''' Wrapper for ILR '''
|
|
96
|
+
if psi is None:
|
|
97
|
+
psi = extra.sbp_basis(self._obj)
|
|
98
|
+
else:
|
|
99
|
+
extra.check_basis(psi)
|
|
100
|
+
|
|
101
|
+
if self._check_for_zeros():
|
|
102
|
+
return _ilr_internal(self.aitchison_mean(), psi)
|
|
103
|
+
|
|
104
|
+
return _ilr_internal(self._obj, psi)
|
|
105
|
+
|
|
106
|
+
def ilr_inv(self, psi=None):
|
|
107
|
+
''' Wrapper for inverse ILR transformation '''
|
|
108
|
+
if psi is None:
|
|
109
|
+
psi = extra.sbp_basis(self._obj)
|
|
110
|
+
else:
|
|
111
|
+
extra.check_basis(psi)
|
|
112
|
+
|
|
113
|
+
return _ilr_inv_internal(self._obj, psi)
|
|
114
|
+
|
|
115
|
+
def zero_replacement(self, n_samples=5000):
|
|
116
|
+
''' Replace zero values using Dirichlet-multinomial Bayesian inherence '''
|
|
117
|
+
counts = pd.DataFrame(index=self._obj.columns)
|
|
118
|
+
for column in self._obj.T:
|
|
119
|
+
p_matrix = ss.dirichlet.rvs(self._obj.T[column]+0.5, n_samples)
|
|
120
|
+
counts[column] = [np.mean(i) for i in zip(*p_matrix)]
|
|
121
|
+
return counts.T
|
|
122
|
+
|
|
123
|
+
def aitchison_mean(self):
|
|
124
|
+
''' Return the Aitchison mean point estimate '''
|
|
125
|
+
return np.exp(sp.digamma(self._obj+1.0)).coda.closure(1.0)
|
|
126
|
+
|
|
127
|
+
def closure(self, cls_const):
|
|
128
|
+
''' Apply Closure to composition '''
|
|
129
|
+
return cls_const*self._obj.divide(self._obj.sum(axis=1), axis=0)
|
|
130
|
+
|
|
131
|
+
def varmatrix(self, nmp=False):
|
|
132
|
+
'''
|
|
133
|
+
Calculate the total variation of a composition
|
|
134
|
+
TODO: for large datasets, this function blows up the memory.
|
|
135
|
+
This could be overcome by using a clever running variance
|
|
136
|
+
algorithm, alas I am lazy, so we estimate the variance by only
|
|
137
|
+
using a maximum of 500 entries. This can still be a problem if
|
|
138
|
+
dim[0] is large, so something needs to be done here. -- C.B.
|
|
139
|
+
'''
|
|
140
|
+
if self._check_for_zeros():
|
|
141
|
+
comp = self.aitchison_mean()
|
|
142
|
+
else:
|
|
143
|
+
comp = self._obj
|
|
144
|
+
|
|
145
|
+
# Quick fix: Estimate variance from at most 500 entries.
|
|
146
|
+
reduc = np.array(comp)[:min(500, np.shape(comp)[0]), :]
|
|
147
|
+
|
|
148
|
+
# New vectorized version. Faster than ketchup!
|
|
149
|
+
vrmtrx = np.var(np.log(reduc[:, :, None]*1./reduc[:, None]), axis=0)
|
|
150
|
+
if nmp:
|
|
151
|
+
return vrmtrx
|
|
152
|
+
return pd.DataFrame(vrmtrx, columns=self._obj.columns, index=self._obj.columns)
|
|
153
|
+
|
|
154
|
+
def totvar(self):
|
|
155
|
+
'''
|
|
156
|
+
Calculate the total variance from the variance matrix
|
|
157
|
+
'''
|
|
158
|
+
var_matrix = self.varmatrix(True)
|
|
159
|
+
return 1./(2*np.shape(var_matrix)[0]) * np.sum(var_matrix)
|
|
160
|
+
|
|
161
|
+
def gmean(self):
|
|
162
|
+
''' Calculate the geometric mean '''
|
|
163
|
+
if self._check_for_zeros():
|
|
164
|
+
gmean = ss.mstats.gmean(self.aitchison_mean())
|
|
165
|
+
else:
|
|
166
|
+
gmean = ss.mstats.gmean(self._obj)
|
|
167
|
+
return np.array([100 * i / np.sum(gmean) for i in gmean])
|
|
168
|
+
|
|
169
|
+
def power(self, alpha):
|
|
170
|
+
''' Compositional scalar multiplication'''
|
|
171
|
+
if self._check_for_zeros():
|
|
172
|
+
return pow(self.aitchison_mean(), alpha)
|
|
173
|
+
return pow(self._obj,alpha)
|
|
174
|
+
|
|
175
|
+
def perturbation(self, comp):
|
|
176
|
+
''' Compositional addition with comp'''
|
|
177
|
+
if self._check_for_zeros():
|
|
178
|
+
return self.aitchison_mean()*np.array(comp)
|
|
179
|
+
return self._obj*np.array(comp)
|
|
180
|
+
|
|
181
|
+
def scale(self):
|
|
182
|
+
''' Scale composition with total variance '''
|
|
183
|
+
return self.power(1./np.sqrt(self.totvar()))
|
|
184
|
+
|
|
185
|
+
def center(self):
|
|
186
|
+
''' Center the composition '''
|
|
187
|
+
return self.perturbation(1./self.gmean())
|
|
188
|
+
|