DASPy-toolbox 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DASPy_toolbox-1.0.0.dist-info/LICENSE.txt +1 -0
- DASPy_toolbox-1.0.0.dist-info/METADATA +85 -0
- DASPy_toolbox-1.0.0.dist-info/RECORD +49 -0
- DASPy_toolbox-1.0.0.dist-info/WHEEL +5 -0
- DASPy_toolbox-1.0.0.dist-info/entry_points.txt +2 -0
- DASPy_toolbox-1.0.0.dist-info/top_level.txt +1 -0
- daspy/__init__.py +4 -0
- daspy/advanced_tools/__init__.py +0 -0
- daspy/advanced_tools/channel.py +354 -0
- daspy/advanced_tools/decomposition.py +165 -0
- daspy/advanced_tools/denoising.py +276 -0
- daspy/advanced_tools/fdct.py +789 -0
- daspy/advanced_tools/strain2vel.py +245 -0
- daspy/basic_tools/__init__.py +0 -0
- daspy/basic_tools/filter.py +257 -0
- daspy/basic_tools/freqattributes.py +117 -0
- daspy/basic_tools/preprocessing.py +238 -0
- daspy/basic_tools/visualization.py +186 -0
- daspy/core/__init__.py +4 -0
- daspy/core/collection.py +279 -0
- daspy/core/dasdatetime.py +72 -0
- daspy/core/example.pkl +0 -0
- daspy/core/make_example.py +32 -0
- daspy/core/read.py +544 -0
- daspy/core/section.py +1319 -0
- daspy/core/write.py +282 -0
- daspy/seismic_detection/__init__.py +1 -0
- daspy/seismic_detection/calc_travel_time.py +23 -0
- daspy/seismic_detection/core.py +119 -0
- daspy/seismic_detection/detection.py +12 -0
- daspy/seismic_detection/gamma/__init__.py +13 -0
- daspy/seismic_detection/gamma/_base.py +549 -0
- daspy/seismic_detection/gamma/_bayesian_mixture.py +875 -0
- daspy/seismic_detection/gamma/_gaussian_mixture.py +866 -0
- daspy/seismic_detection/gamma/app.py +192 -0
- daspy/seismic_detection/gamma/seismic_ops.py +478 -0
- daspy/seismic_detection/gamma/utils.py +512 -0
- daspy/seismic_detection/location.py +266 -0
- daspy/seismic_detection/magnitude.py +43 -0
- daspy/seismic_detection/phase_picking.py +67 -0
- daspy/structure_imaging/__init__.py +0 -0
- daspy/structure_imaging/ambient_noise.py +4 -0
- daspy/structure_imaging/dispersion.py +27 -0
- daspy/structure_imaging/fault_zone.py +59 -0
- daspy/structure_imaging/inversion.py +6 -0
- daspy/traffic_monitoring/JamDetection.py +6 -0
- daspy/traffic_monitoring/SpeedMeasurement.py +6 -0
- daspy/traffic_monitoring/VehicleDetection.py +6 -0
- daspy/traffic_monitoring/__init__.py +0 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
# Purpose: Remove noise from data
|
|
2
|
+
# Author: Minzhe Hu, Zefeng Li
|
|
3
|
+
# Date: 2024.5.13
|
|
4
|
+
# Email: hmz2018@mail.ustc.edu.cn
|
|
5
|
+
import numpy as np
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
from scipy.ndimage import median_filter
|
|
8
|
+
from scipy.interpolate import interp1d
|
|
9
|
+
from daspy.basic_tools.preprocessing import padding
|
|
10
|
+
from daspy.advanced_tools.fdct import fdct_wrapping, ifdct_wrapping
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def spike_removal(data, nch=50, nsp=5, thresh=10):
|
|
14
|
+
"""
|
|
15
|
+
Use a median filter to remove high-strain spikes in the data. Modified from
|
|
16
|
+
https://github.com/atterholt/curvelet-denoising/blob/main/MedianFilter.m
|
|
17
|
+
|
|
18
|
+
:param data: numpy.ndarray. Data to remove spikes from.
|
|
19
|
+
:param nch: int. Number of channels over which to compute the median.
|
|
20
|
+
:param nsp: int. Number of sampling points over which to compute the median.
|
|
21
|
+
:param thresh: Ratio threshold over the median over which a number is
|
|
22
|
+
considered to be an outlier.
|
|
23
|
+
:return: numpy.ndarray. Data with spikes removed.
|
|
24
|
+
"""
|
|
25
|
+
absdata = np.abs(data)
|
|
26
|
+
|
|
27
|
+
medians1 = median_filter(absdata, (nch, 1))
|
|
28
|
+
medians = median_filter(medians1, (1, nsp))
|
|
29
|
+
ratio = absdata / medians # comparisons matrix
|
|
30
|
+
|
|
31
|
+
# find the bad values and interpolate with their neighbors
|
|
32
|
+
data_dn = data.copy()
|
|
33
|
+
out_i, out_j = np.where(ratio > thresh)
|
|
34
|
+
for j in set(out_j):
|
|
35
|
+
bch = out_i[out_j == j]
|
|
36
|
+
gch = list(set(range(len(data))) - set(bch))
|
|
37
|
+
f = interp1d(gch, data[gch, j], bounds_error=False,
|
|
38
|
+
fill_value=(data[gch[0], j], data[gch[-1], j]))
|
|
39
|
+
data_dn[bch, j] = f(bch)
|
|
40
|
+
|
|
41
|
+
return data_dn
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def common_mode_noise_removal(data, method='median'):
|
|
45
|
+
"""
|
|
46
|
+
Remove common mode noise (sometimes called horizontal noise) from data.
|
|
47
|
+
|
|
48
|
+
:param data: numpy.ndarray. Data to remove common mode noise.
|
|
49
|
+
:param method:str. Method for extracting commmon mode noise. 'median' or
|
|
50
|
+
'mean'
|
|
51
|
+
:return: numpy.ndarray. Denoised data.
|
|
52
|
+
"""
|
|
53
|
+
nch, nt = data.shape
|
|
54
|
+
if method == 'median':
|
|
55
|
+
common = np.median(data, 0)
|
|
56
|
+
elif method == 'mean':
|
|
57
|
+
common = np.mean(data, 0)
|
|
58
|
+
|
|
59
|
+
xx = np.sum(common ** 2)
|
|
60
|
+
data_dn = np.zeros((nch, nt))
|
|
61
|
+
for i in range(nch):
|
|
62
|
+
xc = np.sum(common * data[i])
|
|
63
|
+
data_dn[i] = data[i] - xc / xx * common
|
|
64
|
+
|
|
65
|
+
return data_dn
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _noise_level(data, finest=2, nbscales=None, nbangles=16, percentile=95):
|
|
69
|
+
"""
|
|
70
|
+
Find threshold for curvelet denoising with noise record.
|
|
71
|
+
|
|
72
|
+
:param data: numpy.ndarray. Noise data.
|
|
73
|
+
:param nbscales: int. Number of scales including the coarsest wavelet level.
|
|
74
|
+
Default set to ceil(log2(min(M,N)) - 3).
|
|
75
|
+
:param nbangles: int. Number of angles at the 2nd coarsest level,
|
|
76
|
+
minimum 8, must be a multiple of 4.
|
|
77
|
+
:param percentile: number. The threshold is taken as this percentile of the
|
|
78
|
+
curvelet coefficient of the noise record
|
|
79
|
+
:return: 2-D list. Threshold for curvelet coefficients.
|
|
80
|
+
"""
|
|
81
|
+
C = fdct_wrapping(data, is_real=True, finest=finest, nbscales=nbscales,
|
|
82
|
+
nbangles_coarse=nbangles)
|
|
83
|
+
|
|
84
|
+
E_noise = []
|
|
85
|
+
for s in range(len(C)):
|
|
86
|
+
E_noise.append([])
|
|
87
|
+
for w in range(len(C[s])):
|
|
88
|
+
threshold = np.percentile(abs(C[s][w]), percentile)
|
|
89
|
+
E_noise[s].append(threshold)
|
|
90
|
+
|
|
91
|
+
return E_noise
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _knee_points(C, factor=0.2):
|
|
95
|
+
"""
|
|
96
|
+
Find threshold for curvelet denoising without noise record.
|
|
97
|
+
|
|
98
|
+
:param C: 2-D list of np.ndarray. Array of curvelet coefficients.
|
|
99
|
+
:param factor: float. Multiplication factor from 0 to 1. Small factor
|
|
100
|
+
corresponds to conservative strategy.
|
|
101
|
+
:return: 2-D list. Threshold for curvelet coefficients.
|
|
102
|
+
"""
|
|
103
|
+
E_knee = []
|
|
104
|
+
for s in range(len(C)):
|
|
105
|
+
E_knee.append([])
|
|
106
|
+
for w in range(len(C[s])):
|
|
107
|
+
F, x = np.histogram(abs(C[s][w]), density=True)
|
|
108
|
+
x = (x[1:] + x[:-1]) / 2
|
|
109
|
+
F = np.cumsum(F) / np.sum(F)
|
|
110
|
+
slope = (x[-1] - x[0]) / (F[-1] - F[0])
|
|
111
|
+
tiltedplot = x - (slope * F)
|
|
112
|
+
idx = np.argmin(tiltedplot)
|
|
113
|
+
E_knee[s].append(x[idx] * factor)
|
|
114
|
+
|
|
115
|
+
return E_knee
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _velocity_bin(nbangles, fs, dx):
|
|
119
|
+
v_bounds = np.zeros(nbangles // 4 + 1)
|
|
120
|
+
half = nbangles // 8
|
|
121
|
+
v_bounds[half] = fs * dx
|
|
122
|
+
np.seterr(divide='ignore')
|
|
123
|
+
for i in range(half):
|
|
124
|
+
v_bounds[i] = i / half * fs * dx
|
|
125
|
+
v_bounds[half + i + 1] = np.divide(fs * dx, 1 - (i + 1) / half)
|
|
126
|
+
|
|
127
|
+
np.seterr(divide='warn')
|
|
128
|
+
v_lows = list(range(half - 1, -1, -1)) + list(range(half * 2)) + \
|
|
129
|
+
list(range(2 * half - 1, half - 1, -1))
|
|
130
|
+
velocity = []
|
|
131
|
+
for i in range(nbangles // 2):
|
|
132
|
+
v_low = v_bounds[v_lows[i]]
|
|
133
|
+
v_high = v_bounds[v_lows[i] + 1]
|
|
134
|
+
velocity.append([v_low, v_high])
|
|
135
|
+
velocity = np.array(velocity * 2)
|
|
136
|
+
for i in range(half):
|
|
137
|
+
velocity[i] = -1 * velocity[i][::-1]
|
|
138
|
+
velocity[3 * half + i] = -1 * velocity[3 * half + i][::-1]
|
|
139
|
+
velocity[4 * half + i] = -1 * velocity[4 * half + i][::-1]
|
|
140
|
+
velocity[7 * half + i] = -1 * velocity[7 * half + i][::-1]
|
|
141
|
+
return velocity
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _mask_factor(velocity, vmin, vmax, flag=0):
|
|
145
|
+
if flag:
|
|
146
|
+
if flag == -1:
|
|
147
|
+
vmin = -vmax
|
|
148
|
+
vmax = -vmin
|
|
149
|
+
else:
|
|
150
|
+
half = len(velocity) // 8
|
|
151
|
+
for i in range(half):
|
|
152
|
+
velocity[i] = -1 * velocity[i][::-1]
|
|
153
|
+
velocity[3 * half + i] = -1 * velocity[3 * half + i][::-1]
|
|
154
|
+
velocity[4 * half + i] = -1 * velocity[4 * half + i][::-1]
|
|
155
|
+
velocity[7 * half + i] = -1 * velocity[7 * half + i][::-1]
|
|
156
|
+
|
|
157
|
+
factors = np.zeros(len(velocity))
|
|
158
|
+
for i, (v_low, v_high) in enumerate(velocity):
|
|
159
|
+
v1 = max(v_low, vmin)
|
|
160
|
+
v2 = min(v_high, vmax)
|
|
161
|
+
if v1 < v2:
|
|
162
|
+
if v_high == np.inf or v_low == -np.inf:
|
|
163
|
+
factors[i] = 1
|
|
164
|
+
else:
|
|
165
|
+
factors[i] = np.divide(v2 - v1, v_high - v_low)
|
|
166
|
+
|
|
167
|
+
return factors
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def curvelet_denoising(data, choice=0, pad=0.3, noise=None, noise_perc=95,
|
|
171
|
+
knee_fac=0.2, soft_thresh=True, vmin=0, vmax=np.inf,
|
|
172
|
+
flag=0, dx=None, fs=None, mode='remove',
|
|
173
|
+
scale_begin=3, nbscales=None, nbangles=16, finest=2):
|
|
174
|
+
"""
|
|
175
|
+
Use curevelet transform to filter stochastic or/and cooherent noise.
|
|
176
|
+
Modified from
|
|
177
|
+
https://github.com/atterholt/curvelet-denoising/blob/main/CurveletDenoising.m
|
|
178
|
+
{Atterholt et al., 2022 , Geophys. J. Int.}
|
|
179
|
+
|
|
180
|
+
:param data: numpy.ndarray. Data to denoise.
|
|
181
|
+
:param choice: int. 0 for Gaussian denoising using soft thresholding, 1 for
|
|
182
|
+
velocity filtering using the standard FK methodology and 2 for both.
|
|
183
|
+
:param pad: float or sequence of floats. Each float means padding percentage
|
|
184
|
+
before FFT for corresponding dimension. If set to 0.1 will pad 5% before
|
|
185
|
+
the beginning and after the end.
|
|
186
|
+
:param noise: numpy.ndarray or daspy.Section. Noise record as reference.
|
|
187
|
+
:param noise_perc: number. The threshold is taken as this percentile of the
|
|
188
|
+
curvelet coefficient of the noise record. (only used when noise is
|
|
189
|
+
specified)
|
|
190
|
+
:param knee_fac: float. Multiplication factor from 0 to 1. Small factor
|
|
191
|
+
corresponds to conservative strategy. (only used when noise is not
|
|
192
|
+
specified)
|
|
193
|
+
:param soft_thresh: bool. True for soft thresholding and False for hard
|
|
194
|
+
thresholding.
|
|
195
|
+
:param vmin, vmax: float. Velocity range in m/s.
|
|
196
|
+
:param flag: -1 choose only negative apparent velocities, 0 choose both
|
|
197
|
+
postive and negative apparent velocities, 1 choose only positive
|
|
198
|
+
apparent velocities.
|
|
199
|
+
:param dx: Channel interval in m.
|
|
200
|
+
:param fs: Sampling rate in Hz.
|
|
201
|
+
:param mode: str. Only available when choice in (1,2). 'remove' for
|
|
202
|
+
denoising, 'retain' for extraction, and 'decompose' for decomposition.
|
|
203
|
+
:param scale_begin: int. The beginning scale to do coherent denoising.
|
|
204
|
+
:param nbscales: int. Number of scales including the coarsest wavelet level.
|
|
205
|
+
Default set to ceil(log2(min(M,N)) - 3).
|
|
206
|
+
:param nbangles: int. Number of angles at the 2nd coarsest level,
|
|
207
|
+
minimum 8, must be a multiple of 4.
|
|
208
|
+
:param finest: int. Objects at the finest scale. 1 for curvelets, 2 for
|
|
209
|
+
wavelets. Curvelets are more precise while wavelets are more efficient.
|
|
210
|
+
:return: numpy.ndarray. Denoised data.
|
|
211
|
+
"""
|
|
212
|
+
if pad is None or pad is False:
|
|
213
|
+
pad = 0
|
|
214
|
+
dn = np.round(np.array(pad) * data.shape).astype(int)
|
|
215
|
+
data_pd = padding(data, dn)
|
|
216
|
+
|
|
217
|
+
C = fdct_wrapping(data_pd, is_real=True, finest=finest, nbscales=nbscales,
|
|
218
|
+
nbangles_coarse=nbangles)
|
|
219
|
+
|
|
220
|
+
# apply Gaussian denoising
|
|
221
|
+
if choice in (0, 2):
|
|
222
|
+
# define threshold
|
|
223
|
+
if noise is None:
|
|
224
|
+
E = _knee_points(C, factor=knee_fac)
|
|
225
|
+
else:
|
|
226
|
+
if not isinstance(noise, np.ndarray):
|
|
227
|
+
noise = noise.data
|
|
228
|
+
noise_pd = padding(noise,
|
|
229
|
+
np.array(data_pd.shape) - np.array(noise.shape))
|
|
230
|
+
E = _noise_level(noise_pd, finest=finest, nbscales=nbscales,
|
|
231
|
+
nbangles=nbangles, percentile=noise_perc)
|
|
232
|
+
for s in range(1, len(C)):
|
|
233
|
+
for w in range(len(C[s])):
|
|
234
|
+
# first do a hard threshold
|
|
235
|
+
C[s][w] = C[s][w] * (abs(C[s][w]) > abs(E[s][w]))
|
|
236
|
+
if soft_thresh:
|
|
237
|
+
# soften the existing coefficients
|
|
238
|
+
C[s][w] = np.sign(C[s][w]) * (abs(C[s][w]) - abs(E[s][w]))
|
|
239
|
+
|
|
240
|
+
# apply velocity filtering
|
|
241
|
+
if choice in (1, 2):
|
|
242
|
+
if dx is None or fs is None:
|
|
243
|
+
raise ValueError('Please set both dx and fs.')
|
|
244
|
+
|
|
245
|
+
if mode == 'decompose':
|
|
246
|
+
lst = list(range(scale_begin - 1))
|
|
247
|
+
if finest == 2:
|
|
248
|
+
lst.append(len(C) - 1)
|
|
249
|
+
for s in lst:
|
|
250
|
+
for w in range(len(C[s])):
|
|
251
|
+
C[s][w] /= 2
|
|
252
|
+
C_rt = deepcopy(C)
|
|
253
|
+
|
|
254
|
+
for s in range(scale_begin - 1, len(C) - finest + 1):
|
|
255
|
+
nbangles = len(C[s])
|
|
256
|
+
velocity = _velocity_bin(nbangles, fs, dx)
|
|
257
|
+
factors = _mask_factor(velocity, vmin, vmax, flag=flag)
|
|
258
|
+
for w in range(nbangles):
|
|
259
|
+
if mode == 'retain':
|
|
260
|
+
C[s][w] *= factors[w]
|
|
261
|
+
elif mode == 'remove':
|
|
262
|
+
C[s][w] *= 1 - factors[w]
|
|
263
|
+
elif mode == 'decompose':
|
|
264
|
+
C[s][w] *= factors[w]
|
|
265
|
+
C_rt[s][w] *= 1 - factors[w]
|
|
266
|
+
|
|
267
|
+
# perform the inverse curvelet transform
|
|
268
|
+
data_dn = padding(ifdct_wrapping(C, is_real=True, size=data_pd.shape), dn,
|
|
269
|
+
reverse=True)
|
|
270
|
+
|
|
271
|
+
if mode == 'decompose':
|
|
272
|
+
data_n = padding(ifdct_wrapping(C_rt, is_real=True, size=data_pd.shape),
|
|
273
|
+
dn, reverse=True)
|
|
274
|
+
return data_dn, data_n
|
|
275
|
+
else:
|
|
276
|
+
return data_dn
|