rock-physics-open 0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rock-physics-open might be problematic. Click here for more details.
- rock_physics_open/__init__.py +0 -0
- rock_physics_open/equinor_utilities/__init__.py +0 -0
- rock_physics_open/equinor_utilities/anisotropy.py +162 -0
- rock_physics_open/equinor_utilities/classification_functions/__init__.py +17 -0
- rock_physics_open/equinor_utilities/classification_functions/class_stats.py +58 -0
- rock_physics_open/equinor_utilities/classification_functions/lin_class.py +47 -0
- rock_physics_open/equinor_utilities/classification_functions/mahal_class.py +56 -0
- rock_physics_open/equinor_utilities/classification_functions/norm_class.py +65 -0
- rock_physics_open/equinor_utilities/classification_functions/poly_class.py +40 -0
- rock_physics_open/equinor_utilities/classification_functions/post_prob.py +26 -0
- rock_physics_open/equinor_utilities/classification_functions/two_step_classification.py +46 -0
- rock_physics_open/equinor_utilities/conversions.py +10 -0
- rock_physics_open/equinor_utilities/gen_utilities/__init__.py +11 -0
- rock_physics_open/equinor_utilities/gen_utilities/dict_to_float.py +33 -0
- rock_physics_open/equinor_utilities/gen_utilities/dim_check_vector.py +83 -0
- rock_physics_open/equinor_utilities/gen_utilities/filter_input.py +126 -0
- rock_physics_open/equinor_utilities/gen_utilities/filter_output.py +78 -0
- rock_physics_open/equinor_utilities/machine_learning_utilities/__init__.py +14 -0
- rock_physics_open/equinor_utilities/machine_learning_utilities/dummy_vars.py +42 -0
- rock_physics_open/equinor_utilities/machine_learning_utilities/exponential_model.py +119 -0
- rock_physics_open/equinor_utilities/machine_learning_utilities/import_ml_models.py +61 -0
- rock_physics_open/equinor_utilities/machine_learning_utilities/run_regression.py +151 -0
- rock_physics_open/equinor_utilities/machine_learning_utilities/sigmoidal_model.py +188 -0
- rock_physics_open/equinor_utilities/snapshot_test_utilities/__init__.py +10 -0
- rock_physics_open/equinor_utilities/snapshot_test_utilities/compare_snapshots.py +145 -0
- rock_physics_open/equinor_utilities/snapshot_test_utilities/snapshots.py +54 -0
- rock_physics_open/equinor_utilities/std_functions/__init__.py +43 -0
- rock_physics_open/equinor_utilities/std_functions/backus_ave.py +53 -0
- rock_physics_open/equinor_utilities/std_functions/dvorkin_nur.py +69 -0
- rock_physics_open/equinor_utilities/std_functions/gassmann.py +140 -0
- rock_physics_open/equinor_utilities/std_functions/hashin_shtrikman.py +195 -0
- rock_physics_open/equinor_utilities/std_functions/hertz_mindlin.py +43 -0
- rock_physics_open/equinor_utilities/std_functions/moduli_velocity.py +51 -0
- rock_physics_open/equinor_utilities/std_functions/reflection_eq.py +98 -0
- rock_physics_open/equinor_utilities/std_functions/rho.py +59 -0
- rock_physics_open/equinor_utilities/std_functions/voigt_reuss_hill.py +128 -0
- rock_physics_open/equinor_utilities/std_functions/walton.py +38 -0
- rock_physics_open/equinor_utilities/std_functions/wood_brie.py +77 -0
- rock_physics_open/equinor_utilities/various_utilities/Equinor_logo.gif +0 -0
- rock_physics_open/equinor_utilities/various_utilities/Equinor_logo.ico +0 -0
- rock_physics_open/equinor_utilities/various_utilities/__init__.py +24 -0
- rock_physics_open/equinor_utilities/various_utilities/display_result_statistics.py +83 -0
- rock_physics_open/equinor_utilities/various_utilities/gassmann_dry_mod.py +37 -0
- rock_physics_open/equinor_utilities/various_utilities/gassmann_mod.py +37 -0
- rock_physics_open/equinor_utilities/various_utilities/gassmann_sub_mod.py +53 -0
- rock_physics_open/equinor_utilities/various_utilities/hs_average.py +40 -0
- rock_physics_open/equinor_utilities/various_utilities/pressure.py +88 -0
- rock_physics_open/equinor_utilities/various_utilities/reflectivity.py +85 -0
- rock_physics_open/equinor_utilities/various_utilities/timeshift.py +91 -0
- rock_physics_open/equinor_utilities/various_utilities/vp_vs_rho_set_statistics.py +154 -0
- rock_physics_open/equinor_utilities/various_utilities/vrh_3_min.py +61 -0
- rock_physics_open/fluid_models/__init__.py +9 -0
- rock_physics_open/fluid_models/brine_model/__init__.py +5 -0
- rock_physics_open/fluid_models/brine_model/brine_properties.py +143 -0
- rock_physics_open/fluid_models/gas_model/__init__.py +5 -0
- rock_physics_open/fluid_models/gas_model/gas_properties.py +277 -0
- rock_physics_open/fluid_models/oil_model/__init__.py +5 -0
- rock_physics_open/fluid_models/oil_model/dead_oil_density.py +60 -0
- rock_physics_open/fluid_models/oil_model/dead_oil_velocity.py +28 -0
- rock_physics_open/fluid_models/oil_model/live_oil_density.py +79 -0
- rock_physics_open/fluid_models/oil_model/live_oil_velocity.py +24 -0
- rock_physics_open/fluid_models/oil_model/oil_bubble_point.py +69 -0
- rock_physics_open/fluid_models/oil_model/oil_properties.py +114 -0
- rock_physics_open/sandstone_models/__init__.py +57 -0
- rock_physics_open/sandstone_models/cemented_shalysand_sandyshale_models.py +304 -0
- rock_physics_open/sandstone_models/constant_cement_models.py +204 -0
- rock_physics_open/sandstone_models/constant_cement_optimisation.py +122 -0
- rock_physics_open/sandstone_models/contact_cement_model.py +138 -0
- rock_physics_open/sandstone_models/curvefit_sandstone_models.py +143 -0
- rock_physics_open/sandstone_models/friable_models.py +178 -0
- rock_physics_open/sandstone_models/friable_optimisation.py +112 -0
- rock_physics_open/sandstone_models/friable_shalysand_sandyshale_models.py +235 -0
- rock_physics_open/sandstone_models/patchy_cement_fluid_substitution_model.py +477 -0
- rock_physics_open/sandstone_models/patchy_cement_model.py +286 -0
- rock_physics_open/sandstone_models/patchy_cement_optimisation.py +251 -0
- rock_physics_open/sandstone_models/unresolved_cemented_sandshale_models.py +134 -0
- rock_physics_open/sandstone_models/unresolved_friable_sandshale_models.py +126 -0
- rock_physics_open/shale_models/__init__.py +19 -0
- rock_physics_open/shale_models/dem.py +174 -0
- rock_physics_open/shale_models/dem_dual_por.py +61 -0
- rock_physics_open/shale_models/kus_tok.py +59 -0
- rock_physics_open/shale_models/multi_sca.py +133 -0
- rock_physics_open/shale_models/pq.py +102 -0
- rock_physics_open/shale_models/sca.py +90 -0
- rock_physics_open/shale_models/shale4_mineral.py +147 -0
- rock_physics_open/shale_models/shale4_mineral_dem_overlay.py +92 -0
- rock_physics_open/span_wagner/__init__.py +5 -0
- rock_physics_open/span_wagner/co2_properties.py +438 -0
- rock_physics_open/span_wagner/coefficients.py +165 -0
- rock_physics_open/span_wagner/equations.py +104 -0
- rock_physics_open/span_wagner/tables/__init__.py +0 -0
- rock_physics_open/span_wagner/tables/carbon_dioxide_density.npz +0 -0
- rock_physics_open/span_wagner/tables/lookup_table.py +33 -0
- rock_physics_open/t_matrix_models/Equinor_logo.ico +0 -0
- rock_physics_open/t_matrix_models/__init__.py +45 -0
- rock_physics_open/t_matrix_models/carbonate_pressure_substitution.py +124 -0
- rock_physics_open/t_matrix_models/curvefit_t_matrix_exp.py +124 -0
- rock_physics_open/t_matrix_models/curvefit_t_matrix_min.py +86 -0
- rock_physics_open/t_matrix_models/opt_subst_utilities.py +415 -0
- rock_physics_open/t_matrix_models/parse_t_matrix_inputs.py +297 -0
- rock_physics_open/t_matrix_models/run_t_matrix.py +243 -0
- rock_physics_open/t_matrix_models/t_matrix_C.py +210 -0
- rock_physics_open/t_matrix_models/t_matrix_opt_fluid_sub_exp.py +137 -0
- rock_physics_open/t_matrix_models/t_matrix_opt_fluid_sub_petec.py +163 -0
- rock_physics_open/t_matrix_models/t_matrix_opt_forward_model_exp.py +72 -0
- rock_physics_open/t_matrix_models/t_matrix_opt_forward_model_min.py +86 -0
- rock_physics_open/t_matrix_models/t_matrix_parameter_optimisation_exp.py +172 -0
- rock_physics_open/t_matrix_models/t_matrix_parameter_optimisation_min.py +159 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/__init__.py +12 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/array_functions.py +75 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_c_eff.py +163 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_isolated.py +95 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_kd.py +40 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_kd_eff.py +116 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_kd_uuv.py +18 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_pressure.py +140 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_t.py +71 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_td.py +42 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_theta.py +43 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_x.py +33 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/calc_z.py +50 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/check_and_tile.py +43 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/g_tensor.py +140 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/iso_av.py +60 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/iso_ave_all.py +55 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/pressure_input.py +44 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/t_matrix_vec.py +278 -0
- rock_physics_open/t_matrix_models/t_matrix_vector/velocity_vti_angles.py +81 -0
- rock_physics_open/t_matrix_models/tmatrix_python.dll +0 -0
- rock_physics_open/t_matrix_models/tmatrix_python.so +0 -0
- rock_physics_open/ternary_plots/__init__.py +3 -0
- rock_physics_open/ternary_plots/gen_ternary_plot.py +73 -0
- rock_physics_open/ternary_plots/shale_prop_ternary.py +337 -0
- rock_physics_open/ternary_plots/ternary_patches.py +277 -0
- rock_physics_open/ternary_plots/ternary_plot_utilities.py +197 -0
- rock_physics_open/ternary_plots/unconventionals_ternary.py +75 -0
- rock_physics_open/version.py +21 -0
- rock_physics_open-0.0.dist-info/METADATA +92 -0
- rock_physics_open-0.0.dist-info/RECORD +142 -0
- rock_physics_open-0.0.dist-info/WHEEL +5 -0
- rock_physics_open-0.0.dist-info/licenses/LICENSE +165 -0
- rock_physics_open-0.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from rock_physics_open.equinor_utilities import gen_utilities
|
|
4
|
+
|
|
5
|
+
# These routines are not finalised or used in any plugins yet
|
|
6
|
+
"""
|
|
7
|
+
c11, c12, c13, c33, c44, c66 = c_ij_2_c_factors(cij)
|
|
8
|
+
|
|
9
|
+
Transform a single stiffness tensor into components. VTI medium is assumed
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def c_ij_2_c_factors(cij):
|
|
14
|
+
"""Transform a single stifness tensor into components. VTI medium is assumed
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
cij : np.ndarray
|
|
19
|
+
A 6x6 matrix.
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
tuple
|
|
24
|
+
(c11, c12, c13, c33, c44, c66).
|
|
25
|
+
"""
|
|
26
|
+
if not isinstance(cij, np.ndarray):
|
|
27
|
+
try:
|
|
28
|
+
cij = np.array(cij, dtype=float)
|
|
29
|
+
except ValueError:
|
|
30
|
+
print("Input data can't be transformed into a NumPy array")
|
|
31
|
+
try:
|
|
32
|
+
num_samp = int(cij.size / 36)
|
|
33
|
+
cij = cij.reshape((6, 6, num_samp))
|
|
34
|
+
c11 = cij[0, 0, :].reshape(num_samp, 1)
|
|
35
|
+
c12 = cij[0, 1, :].reshape(num_samp, 1)
|
|
36
|
+
c13 = cij[0, 2, :].reshape(num_samp, 1)
|
|
37
|
+
c33 = cij[2, 2, :].reshape(num_samp, 1)
|
|
38
|
+
c44 = cij[3, 3, :].reshape(num_samp, 1)
|
|
39
|
+
c66 = cij[5, 5, :].reshape(num_samp, 1)
|
|
40
|
+
return c11, c12, c13, c33, c44, c66
|
|
41
|
+
|
|
42
|
+
except ValueError:
|
|
43
|
+
print("Input data is not a 6x6xN array")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def cfactors2cij(c11, c12, c13, c33, c44, c66):
|
|
47
|
+
"""Transform individual stiffness factors to stiffness tensor 6x6x(number of samples).
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
c11, c12, c13, c33, c44, c66 : np.ndarray
|
|
52
|
+
All 1-dimensional of same length.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
np.ndarray
|
|
57
|
+
A 6xx6x(number of samples) stifness teensor.
|
|
58
|
+
"""
|
|
59
|
+
c11, c12, c13, c33, c44, c66 = gen_utilities.dim_check_vector(
|
|
60
|
+
(c11, c12, c13, c33, c44, c66)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
num_samp = c11.shape[1]
|
|
64
|
+
|
|
65
|
+
cij = np.zeros((6, 6, num_samp))
|
|
66
|
+
cij[0, 0, :] = c11
|
|
67
|
+
cij[0, 1, :] = c12
|
|
68
|
+
cij[0, 2, :] = c13
|
|
69
|
+
cij[1, 0, :] = c12
|
|
70
|
+
cij[1, 1, :] = c11
|
|
71
|
+
cij[1, 2, :] = c13
|
|
72
|
+
cij[2, 0, :] = c13
|
|
73
|
+
cij[2, 1, :] = c13
|
|
74
|
+
cij[2, 1, :] = c33
|
|
75
|
+
cij[3, 3, :] = c44
|
|
76
|
+
cij[4, 4, :] = c44
|
|
77
|
+
cij[5, 5, :] = c66
|
|
78
|
+
|
|
79
|
+
return cij
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def c_ij_2_thomsen(c, rho):
|
|
83
|
+
"""Thomsen parameter for weak anisotropy.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
c : np.ndarray
|
|
88
|
+
A (log of or single instance of) 6x6 elastic tensor.
|
|
89
|
+
rho : np.ndarray
|
|
90
|
+
Density - log of same length as c.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
tuple
|
|
95
|
+
alpha, beta, gamma, delta, epsilon.
|
|
96
|
+
"""
|
|
97
|
+
# C matrix should be 6x6
|
|
98
|
+
if not isinstance(c, np.ndarray):
|
|
99
|
+
try:
|
|
100
|
+
c = np.array(c, dtype=float)
|
|
101
|
+
except ValueError:
|
|
102
|
+
print("Input data can't be transformed into a NumPy array")
|
|
103
|
+
try:
|
|
104
|
+
num_samp = int(c.size / 36)
|
|
105
|
+
c = c.reshape((6, 6, num_samp))
|
|
106
|
+
rho = rho.reshape(num_samp, 1)
|
|
107
|
+
alpha = np.sqrt(c[2, 2, :].reshape(num_samp, 1) / rho)
|
|
108
|
+
beta = np.sqrt(c[3, 3, :].reshape(num_samp, 1) / rho)
|
|
109
|
+
gamma = ((c[5, 5, :] - c[3, 3, :]) / (2 * c[3, 3, :])).reshape(num_samp, 1)
|
|
110
|
+
epsilon = ((c[0, 0, :] - c[2, 2, :]) / (2 * c[2, 2, :])).reshape(num_samp, 1)
|
|
111
|
+
delta = (
|
|
112
|
+
((c[0, 2, :] + c[3, 3, :]) ** 2 - (c[2, 2, :] - c[3, 3, :]) ** 2)
|
|
113
|
+
/ (2 * c[2, 2, :] * (c[2, 2, :] - c[3, 3, :]))
|
|
114
|
+
).reshape(num_samp, 1)
|
|
115
|
+
|
|
116
|
+
return alpha, beta, epsilon, gamma, delta
|
|
117
|
+
except ValueError:
|
|
118
|
+
print("Input data is not a 6x6xN array")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def thomsen_2_c_ij(alpha, beta, gamma, delta, epsilon, rho):
|
|
122
|
+
"""Elastic stiffness. Assumptions:
|
|
123
|
+
Thomsen's parameters apply for weak anisotropy in a transversly isotropic medium:
|
|
124
|
+
|
|
125
|
+
c11 c12 c13 0 0 0
|
|
126
|
+
|
|
127
|
+
c12 c11 c13 0 0 0
|
|
128
|
+
|
|
129
|
+
c13 c13 c33 0 0 0
|
|
130
|
+
|
|
131
|
+
0 0 0 c44 0 0
|
|
132
|
+
|
|
133
|
+
0 0 0 0 c44 0
|
|
134
|
+
|
|
135
|
+
0 0 0 0 0 c66
|
|
136
|
+
|
|
137
|
+
Where c66 = 1/2(c11 - c12)
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
alpha, beta, gamma, delta, epsilon :
|
|
142
|
+
Thomsen's parameters.
|
|
143
|
+
rho :
|
|
144
|
+
Bulk density.
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
tuple
|
|
149
|
+
Elastic stiffness c11, c12, c13, c33, c44, c66.
|
|
150
|
+
"""
|
|
151
|
+
alpha, beta, gamma, delta, epsilon = gen_utilities.dim_check_vector(
|
|
152
|
+
(alpha, beta, gamma, delta, epsilon)
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
c33 = rho * alpha**2
|
|
156
|
+
c44 = rho * beta**2
|
|
157
|
+
c11 = c33 * (1 + 2 * epsilon)
|
|
158
|
+
c66 = c44 * (1 + 2 * gamma)
|
|
159
|
+
c12 = c11 - 2 * c66
|
|
160
|
+
c13 = np.sqrt(2 * c33 * (c33 - c44) * delta + (c33 - c44) ** 2) - c44
|
|
161
|
+
|
|
162
|
+
return c11, c12, c13, c33, c44, c66
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .class_stats import gen_class_stats
|
|
2
|
+
from .lin_class import lin_class
|
|
3
|
+
from .mahal_class import mahal_class
|
|
4
|
+
from .norm_class import norm_class
|
|
5
|
+
from .poly_class import poly_class
|
|
6
|
+
from .post_prob import posterior_probability
|
|
7
|
+
from .two_step_classification import gen_two_step_class_stats
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"gen_class_stats",
|
|
11
|
+
"lin_class",
|
|
12
|
+
"mahal_class",
|
|
13
|
+
"norm_class",
|
|
14
|
+
"poly_class",
|
|
15
|
+
"posterior_probability",
|
|
16
|
+
"gen_two_step_class_stats",
|
|
17
|
+
]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
NULL_CLASS = 0
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def gen_class_stats(obs, class_val):
|
|
7
|
+
"""
|
|
8
|
+
Generate statistics - mean, covariance and prior probability - for each
|
|
9
|
+
class in the training data. The observations are an n x m array, where n
|
|
10
|
+
is the number of observations and m is the number of variables. With p
|
|
11
|
+
classes the returned mean value will be an array of dimension p x m,
|
|
12
|
+
covariance m x m x p and the class_id and prior probability p length vector.
|
|
13
|
+
class_mean, class_cov, prior_prob, class_counts, class_id = gen_class_stats(obs, class_val).
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
obs : np.ndarray
|
|
18
|
+
An nxm array of data samples (observations).
|
|
19
|
+
class_val : np.ndarray
|
|
20
|
+
n length vector with class ID of the observations. Assumed to
|
|
21
|
+
be integer.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
tuple
|
|
26
|
+
class_mean, class_cov, prior_prob, class_counts, class_id : (np.ndarray, np.ndarray, np.ndarray, np.ndarray,
|
|
27
|
+
np.ndarray).
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
n, m = obs.shape
|
|
31
|
+
# Find number of classes. If class_val input is not integer, raise an exception
|
|
32
|
+
if not (
|
|
33
|
+
isinstance(class_val, np.ndarray)
|
|
34
|
+
and issubclass(class_val.dtype.type, np.integer)
|
|
35
|
+
):
|
|
36
|
+
raise ValueError(f"{__file__}: class values are not discrete numbers")
|
|
37
|
+
|
|
38
|
+
class_id, class_counts = np.unique(class_val, return_counts=True)
|
|
39
|
+
# Remove Null class
|
|
40
|
+
idx_null = np.where(class_id == NULL_CLASS)
|
|
41
|
+
class_id = np.delete(class_id, idx_null)
|
|
42
|
+
class_counts = np.delete(class_counts, idx_null)
|
|
43
|
+
p = class_id.shape[0]
|
|
44
|
+
|
|
45
|
+
# Very simple prior probability - number of observations in each class
|
|
46
|
+
# divided by total number of observations
|
|
47
|
+
prior_prob = class_counts / n
|
|
48
|
+
|
|
49
|
+
# Assign output arrays
|
|
50
|
+
class_mean = np.zeros((p, m))
|
|
51
|
+
class_cov = np.zeros((m, m, p))
|
|
52
|
+
|
|
53
|
+
for i in range(len(class_id)):
|
|
54
|
+
idx = class_val == class_id[i]
|
|
55
|
+
class_mean[i, :] = np.mean(obs[idx, :], axis=0)
|
|
56
|
+
class_cov[:, :, i] = np.cov(obs[idx, :], rowvar=False)
|
|
57
|
+
|
|
58
|
+
return class_mean, class_cov, prior_prob, class_counts, class_id
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
NULL_CLASS = 0
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def lin_class(obs, class_mean, class_id, thresh=np.inf):
|
|
7
|
+
"""
|
|
8
|
+
Linear classification routine. All data points are assigned a class, unless a threshold is set.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
obs : np.ndarray
|
|
13
|
+
An nxm array, where n is the number of samples and m is the number of features.
|
|
14
|
+
class_mean : np.ndarray
|
|
15
|
+
A pxm array, where p is the number of classes and m is the number of features.
|
|
16
|
+
class_id : np.ndarray
|
|
17
|
+
A p length vector, where p is the number of classes, containing class_id (integer numbers).
|
|
18
|
+
thresh : float
|
|
19
|
+
Unclassified threshold.
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
tuple
|
|
24
|
+
lin_class_arr, lin_dist : (np.ndarray, np.ndarray).
|
|
25
|
+
lin_class_arr: nx1 vector. The classes are numbered according to class_id,
|
|
26
|
+
and unclassified samples (with distance greater than thresh) are set to 0,
|
|
27
|
+
lin_dist: nx1 vector with linear distance from the closest class centre to each sample.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
# Find dimensions
|
|
31
|
+
n = obs.shape[0]
|
|
32
|
+
p = class_mean.shape[0]
|
|
33
|
+
|
|
34
|
+
# Assign matrices
|
|
35
|
+
dist = np.zeros((n, p))
|
|
36
|
+
|
|
37
|
+
# Calculate distance for each class
|
|
38
|
+
for i in range(p):
|
|
39
|
+
dist[:, i] = np.sqrt(np.sum((obs - class_mean[i, :]) ** 2, axis=1))
|
|
40
|
+
|
|
41
|
+
# Find the shortest distance, assign class, filter out observations with distance
|
|
42
|
+
# greater than the threshold
|
|
43
|
+
lin_class_arr = np.choose(np.argmin(dist, axis=1), class_id)
|
|
44
|
+
lin_dist = np.amin(dist, axis=1)
|
|
45
|
+
lin_class_arr[lin_dist > thresh] = NULL_CLASS
|
|
46
|
+
|
|
47
|
+
return lin_class_arr, lin_dist
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .post_prob import posterior_probability
|
|
4
|
+
|
|
5
|
+
NULL_CLASS = 0
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def mahal_class(obs, class_mean, class_cov, class_id, thresh=np.inf):
|
|
9
|
+
"""
|
|
10
|
+
Mahalanobis classification routine. All data points are assigned a class, unless a threshold is set
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
obs : np.ndarray
|
|
15
|
+
An nxm array, where n is the number of samples and m is the number of variables.
|
|
16
|
+
class_mean : np.ndarray
|
|
17
|
+
A pxm array, where p is the number of classes and m is the number of variables.
|
|
18
|
+
class_cov : np.ndarray
|
|
19
|
+
A pxm array, where p is the number of classes and m is the number of variables.
|
|
20
|
+
class_id : np.ndarray
|
|
21
|
+
A p length vector, where p is the number of classes, containing class_id (integer numbers).
|
|
22
|
+
thresh : float
|
|
23
|
+
Unclassified threshold.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
tuple
|
|
28
|
+
mahal_class_arr, mahal_dist, mahal_pp : (np.ndarray, np.ndarray, np.ndarray).
|
|
29
|
+
mahal_class_arr: nx1 vector. The classes are numbered 1 to m, and unclassified samples (with distance
|
|
30
|
+
greater than thresh) are set to 0,
|
|
31
|
+
mahal_dist: nx1 vector with mahalanobis distance from the closest class centre to sample,
|
|
32
|
+
mahal_pp: nx1 vector with posterior probability based on the distance to each class
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
# Find dimensions
|
|
36
|
+
n = obs.shape[0]
|
|
37
|
+
p = class_mean.shape[0]
|
|
38
|
+
|
|
39
|
+
# Assign matrices
|
|
40
|
+
dist = np.zeros((n, p))
|
|
41
|
+
|
|
42
|
+
# Calculate distance for each class
|
|
43
|
+
for i in range(p):
|
|
44
|
+
cov_inv = np.linalg.inv(class_cov[:, :, i])
|
|
45
|
+
delta = obs - class_mean[i, :]
|
|
46
|
+
dist[:, i] = np.sqrt(np.einsum("nj,jk,nk->n", delta, cov_inv, delta))
|
|
47
|
+
|
|
48
|
+
# Find the shortest distance, assign class, calculate posterior probability and
|
|
49
|
+
# filter out observations with distance greater than the threshold
|
|
50
|
+
mahal_class_arr = np.choose(np.argmin(dist, axis=1), class_id)
|
|
51
|
+
mahal_dist = np.amin(dist, axis=1)
|
|
52
|
+
mahal_pp = posterior_probability(mahal_dist, dist)
|
|
53
|
+
d_idx = mahal_dist > thresh
|
|
54
|
+
mahal_class_arr[d_idx] = NULL_CLASS
|
|
55
|
+
|
|
56
|
+
return mahal_class_arr, mahal_dist, mahal_pp
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
NULL_CLASS = 0
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def norm_class(obs, class_mean, class_cov, prior_prob, class_id, thresh=np.inf):
|
|
7
|
+
"""
|
|
8
|
+
Normal distribution classification routine. All data points are assigned a
|
|
9
|
+
class, unless a threshold is set. The "dist" calculated here is the quadratic
|
|
10
|
+
discriminant function according to a Bayes classification. This is a negative
|
|
11
|
+
number, and the closest class has the smallest absolute value.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
obs : np.ndarray
|
|
16
|
+
An nxm array, where n is the number of samples and m is the number of variables.
|
|
17
|
+
class_mean : np.ndarray
|
|
18
|
+
A pxm array, where p is the number of classes and m is the number of variables.
|
|
19
|
+
class_cov : np.ndarray
|
|
20
|
+
A pxm array, where p is the number of classes and m is the number of variables.
|
|
21
|
+
prior_prob : np.ndarray
|
|
22
|
+
A p length vector, where p is the number of classes containing prior probabilities for each class.
|
|
23
|
+
class_id : np.ndarray
|
|
24
|
+
A p length vector, where p is the number of classes, containing class_id (integer numbers).
|
|
25
|
+
thresh : float
|
|
26
|
+
Unclassified threshold.
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
--------
|
|
31
|
+
tuple
|
|
32
|
+
norm_class_id, norm_dist, norm_pp : (np.ndarray, np.ndarray, np.ndarray).
|
|
33
|
+
norm_class_id: nx1 vector. The classes are numbered 1 to m, and unclassified
|
|
34
|
+
samples (with absolute distance greater than thresh) are set to 0,
|
|
35
|
+
norm_dist: nx1 vector with quadratic discriminant distance from the closest class centre to sample,
|
|
36
|
+
norm_pp: nx1 vector with posterior probability based on the distance to each class.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
# Find dimensions
|
|
40
|
+
n = obs.shape[0]
|
|
41
|
+
p = class_mean.shape[0]
|
|
42
|
+
|
|
43
|
+
# Assign matrices
|
|
44
|
+
dist = np.zeros((n, p))
|
|
45
|
+
|
|
46
|
+
# Calculate distance for each class
|
|
47
|
+
for i in range(p):
|
|
48
|
+
cov_inv = np.linalg.inv(class_cov[:, :, i])
|
|
49
|
+
delta = obs - class_mean[i, :]
|
|
50
|
+
dist[:, i] = (
|
|
51
|
+
-0.5 * np.log(np.linalg.det(class_cov[:, :, i]))
|
|
52
|
+
- 0.5 * np.sqrt(np.einsum("nj,jk,nk->n", delta, cov_inv, delta))
|
|
53
|
+
+ np.log(prior_prob[i])
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# The discrimination function ("dist") are negative numbers. Choose the one
|
|
57
|
+
# with the smallest value as the closest class
|
|
58
|
+
norm_class_id = np.choose(np.argmax(dist, axis=1), class_id)
|
|
59
|
+
norm_dist = np.amax(dist, axis=1)
|
|
60
|
+
norm_pp = -np.exp(norm_dist) / np.sum(np.exp(dist), axis=1)
|
|
61
|
+
|
|
62
|
+
# Compare the absolute value of the discriminator with the threshold
|
|
63
|
+
norm_class_id[np.abs(norm_dist) > thresh] = NULL_CLASS
|
|
64
|
+
|
|
65
|
+
return norm_class_id, norm_dist, norm_pp
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import matplotlib.path as mplpath
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def poly_class(train_data, polygons, labels):
|
|
6
|
+
"""
|
|
7
|
+
Points within the polygons are assigned to class labels. Point that do not
|
|
8
|
+
fall within any polygon are set to NULL_CLASS.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
|
|
13
|
+
train_data : np.ndarray
|
|
14
|
+
Data points of two variables.
|
|
15
|
+
polygons : np.ndarray
|
|
16
|
+
Vertices of polygons in two-dimensional space.
|
|
17
|
+
labels : np.ndarray
|
|
18
|
+
Class label for each polygon.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
np.ndarray
|
|
23
|
+
Class id.
|
|
24
|
+
"""
|
|
25
|
+
if len(labels) != len(polygons):
|
|
26
|
+
raise ValueError("Number of labels are not matching number of polygons")
|
|
27
|
+
|
|
28
|
+
# Create output variables
|
|
29
|
+
idx_filtered = np.zeros(train_data.shape[0]).astype("bool")
|
|
30
|
+
poly_class_id = np.zeros(train_data.shape[0]).astype("int")
|
|
31
|
+
|
|
32
|
+
for i in range(len(polygons)):
|
|
33
|
+
class_polygon = polygons[i]
|
|
34
|
+
path = mplpath.Path(class_polygon)
|
|
35
|
+
# Only points within the given polygon are used
|
|
36
|
+
idx_poly = path.contains_points(train_data)
|
|
37
|
+
poly_class_id[idx_poly] = labels[i]
|
|
38
|
+
idx_filtered = np.logical_or(idx_filtered, idx_poly)
|
|
39
|
+
# idx_filtered is no longer returned
|
|
40
|
+
return poly_class_id
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
NULL_CLASS = 0
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def posterior_probability(min_dist, dist):
|
|
7
|
+
"""
|
|
8
|
+
Posterior probability, which is defined as the exponential of minimum distance divided by the sum of the
|
|
9
|
+
exponentials of distance to all classes.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
min_dist : np.ndarray
|
|
14
|
+
Minimum class distance according to some metric.
|
|
15
|
+
dist : np.ndarray
|
|
16
|
+
All class distances, each class in a column in a two-dimensional array.
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
np.ndarray
|
|
21
|
+
Posterior probability array.
|
|
22
|
+
"""
|
|
23
|
+
dist *= -1.0
|
|
24
|
+
n_exp = np.exp(dist)
|
|
25
|
+
d_sum = n_exp.sum(axis=1)
|
|
26
|
+
return np.exp(-min_dist) / d_sum
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from .class_stats import gen_class_stats
|
|
2
|
+
from .mahal_class import mahal_class
|
|
3
|
+
|
|
4
|
+
NULL_CLASS = 0
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def gen_two_step_class_stats(obs, class_val, thresh):
|
|
8
|
+
"""
|
|
9
|
+
The observations are an n x m array, where n
|
|
10
|
+
is the number of observations and m is the number of variables. With p
|
|
11
|
+
classes the returned mean value will be an array of dimension p x m,
|
|
12
|
+
covariance m x m x p and the class_id and prior probability p length vector.
|
|
13
|
+
|
|
14
|
+
Generate statistics - mean, covariance and prior probability - for each
|
|
15
|
+
class in the training data. Run a mahalanobis classification, and exclude
|
|
16
|
+
values that have distance above the threshold. Generate class statistics again
|
|
17
|
+
and return them.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
obs : np.ndarray
|
|
22
|
+
An nxm array, where n is the number of samples and m is the number of variables.
|
|
23
|
+
class_val : np.ndarray
|
|
24
|
+
A p length vector, where p is the number of classes, containing class_id (integer numbers).
|
|
25
|
+
thresh : float
|
|
26
|
+
Unclassified threshold.
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
tuple
|
|
31
|
+
(np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray).
|
|
32
|
+
class_mean, class_cov: statistics for each class after rejects;
|
|
33
|
+
prior_prob, class_counts: based on number of observations in each class,
|
|
34
|
+
find the prior probability of each class;
|
|
35
|
+
class_id: label for each class;
|
|
36
|
+
mahal_class_id: class id from mahalanobis classification.
|
|
37
|
+
"""
|
|
38
|
+
mean_class_id, class_cov, _, _, class_id = gen_class_stats(obs, class_val)
|
|
39
|
+
mahal_class_id = mahal_class(obs, mean_class_id, class_cov, class_id, thresh)[0]
|
|
40
|
+
|
|
41
|
+
idx = mahal_class_id != NULL_CLASS
|
|
42
|
+
mean_class_id, class_cov, prior_prob, class_counts, class_id = gen_class_stats(
|
|
43
|
+
obs[idx], class_val[idx]
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return mean_class_id, class_cov, prior_prob, class_counts, class_id, mahal_class_id
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .dict_to_float import dict_value_to_float
|
|
2
|
+
from .dim_check_vector import dim_check_vector
|
|
3
|
+
from .filter_input import filter_input_log
|
|
4
|
+
from .filter_output import filter_output
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"dict_value_to_float",
|
|
8
|
+
"dim_check_vector",
|
|
9
|
+
"filter_input_log",
|
|
10
|
+
"filter_output",
|
|
11
|
+
]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
def dict_value_to_float(input_dict):
|
|
2
|
+
"""
|
|
3
|
+
Convert dictionary strings to floating point numbers. Each value can have multiple floats.
|
|
4
|
+
|
|
5
|
+
Parameters
|
|
6
|
+
----------
|
|
7
|
+
input_dict : dict
|
|
8
|
+
Input dictionary.
|
|
9
|
+
|
|
10
|
+
Returns
|
|
11
|
+
-------
|
|
12
|
+
dict
|
|
13
|
+
Output dictionary.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
for item in input_dict:
|
|
17
|
+
if isinstance(input_dict[item], float):
|
|
18
|
+
pass
|
|
19
|
+
else:
|
|
20
|
+
try:
|
|
21
|
+
ff = float(input_dict[item])
|
|
22
|
+
input_dict[item] = ff
|
|
23
|
+
except ValueError: # if a list or tuple is hidden in the string
|
|
24
|
+
try:
|
|
25
|
+
ll = eval(input_dict[item])
|
|
26
|
+
ff = [float(i) for i in ll]
|
|
27
|
+
input_dict[item] = ff
|
|
28
|
+
except ValueError:
|
|
29
|
+
raise ValueError(
|
|
30
|
+
"dict_value_to_float: not possible to convert value to float"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
return input_dict
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def dim_check_vector(args, force_type=None):
|
|
6
|
+
"""
|
|
7
|
+
Check that all inputs are of the same (one-dimensional) size. Raise ValueError in case there are several lengths
|
|
8
|
+
present in the inputs. All inputs will be checked and possibly expanded to common length. Only the first dimension
|
|
9
|
+
is harmonised.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
args : list or tuple
|
|
14
|
+
Input list or tuple of scalars, numpy arrays or pandas data frames of numerical or boolean type.
|
|
15
|
+
force_type : np.dtype
|
|
16
|
+
Force all outputs to be of a specific dtype.
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
output_args : list
|
|
21
|
+
List of inputs where all are of the same length.
|
|
22
|
+
"""
|
|
23
|
+
single_types = (np.ndarray, pd.DataFrame)
|
|
24
|
+
iterable_types = (list, tuple)
|
|
25
|
+
allowed_types = single_types + iterable_types
|
|
26
|
+
if not isinstance(args, allowed_types):
|
|
27
|
+
raise ValueError("dim_check_vector: unknown input type: {}".format(type(args)))
|
|
28
|
+
|
|
29
|
+
# Single array or dataframe is just returned
|
|
30
|
+
if isinstance(args, single_types):
|
|
31
|
+
if force_type is not None:
|
|
32
|
+
try:
|
|
33
|
+
args = args.astype(force_type)
|
|
34
|
+
except ValueError:
|
|
35
|
+
raise ValueError(
|
|
36
|
+
"dim_check_vector: not possible to force dtype to {}".format(
|
|
37
|
+
force_type
|
|
38
|
+
)
|
|
39
|
+
)
|
|
40
|
+
return args
|
|
41
|
+
|
|
42
|
+
# If any input is a scalar, make it into an array
|
|
43
|
+
if force_type is not None:
|
|
44
|
+
try:
|
|
45
|
+
args = [
|
|
46
|
+
np.array(item, ndmin=1, dtype=force_type)
|
|
47
|
+
if np.isscalar(item)
|
|
48
|
+
else item.astype(force_type)
|
|
49
|
+
for item in args
|
|
50
|
+
]
|
|
51
|
+
except ValueError:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
"dim_check_vector: not possible to force dtype to {}".format(force_type)
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
args = [np.array(item, ndmin=1) if np.isscalar(item) else item for item in args]
|
|
57
|
+
|
|
58
|
+
# Can now test for length - must either be a scalar or have the same length
|
|
59
|
+
max_length = np.max([item.shape[0] for item in args])
|
|
60
|
+
if not np.all([item.shape[0] == max_length or item.shape[0] == 1 for item in args]):
|
|
61
|
+
raise ValueError(
|
|
62
|
+
"dim_check_vector: Unequal array lengths in input to dim_check_vector"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
output_arg = []
|
|
66
|
+
for item in args:
|
|
67
|
+
if item.shape[0] == max_length:
|
|
68
|
+
output_arg.append(item)
|
|
69
|
+
else:
|
|
70
|
+
item_dim = item.ndim
|
|
71
|
+
repeat_tuple = tuple([max_length] + [1] * (item_dim - 1))
|
|
72
|
+
if isinstance(item, pd.DataFrame):
|
|
73
|
+
output_arg.append(
|
|
74
|
+
pd.DataFrame(
|
|
75
|
+
np.tile(np.array(item), repeat_tuple),
|
|
76
|
+
columns=item.columns,
|
|
77
|
+
index=np.arange(max_length),
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
output_arg.append(np.tile(item, repeat_tuple))
|
|
82
|
+
|
|
83
|
+
return output_arg
|