sdatip 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdatip/__init__.py ADDED
@@ -0,0 +1,46 @@
1
+ """SDATIP: Fast Stochastic Determination of Arrival Time and Initial Polarity.
2
+
3
+ A high-performance Python package for seismic waveform analysis that determines
4
+ arrival time and initial polarity using Markov chain-based stochastic methods.
5
+
6
+ Example usage:
7
+ >>> import sdatip
8
+ >>>
9
+ >>> # Quick processing
10
+ >>> result = sdatip.process_waveform(
11
+ ... name="station_A",
12
+ ... data=waveform_array,
13
+ ... output_dir="./output/"
14
+ ... )
15
+ >>> print(f"Arrival time: {result['arrival_time']:.3f}s")
16
+ >>> print(f"Polarity: {result['polarity_up']:.3f}")
17
+ >>>
18
+ >>> # Step-by-step processing
19
+ >>> wf = sdatip.Waveform("station_A")
20
+ >>> wf.importdata(data, delta=0.01)
21
+ >>> wf.analyzedata()
22
+ >>> # ... continue with other steps
23
+ """
24
+
25
+ from sdatip.waveform import Waveform, findnoise
26
+ from sdatip.state import State
27
+ from sdatip.pmi import entropy, pmi, maxpmi, calculate_general
28
+ from sdatip.processor import process_waveform, process_batch
29
+
30
+ __version__ = "1.0.0"
31
+ __author__ = "Chuan1937"
32
+
33
+ __all__ = [
34
+ # Main classes
35
+ "Waveform",
36
+ "State",
37
+ # High-level API
38
+ "process_waveform",
39
+ "process_batch",
40
+ # Utility functions
41
+ "findnoise",
42
+ "entropy",
43
+ "pmi",
44
+ "maxpmi",
45
+ "calculate_general",
46
+ ]
sdatip/plotting.py ADDED
@@ -0,0 +1,404 @@
1
+ """Plotting utilities for seismic waveform analysis results."""
2
+
3
+ import numpy as np
4
+ import matplotlib
5
+
6
+ matplotlib.use("Agg")
7
+ import matplotlib.pyplot as plt
8
+
9
+
10
+ def plot_result(wf, state, qualifiedid, name, outputdir):
11
+ """Generate detailed probability plot for analysis results.
12
+
13
+ Args:
14
+ wf: Waveform object with processed data.
15
+ state: State object with probability estimates.
16
+ qualifiedid: Index of the solution to plot.
17
+ name: Station name for filename.
18
+ outputdir: Directory to save the plot.
19
+ """
20
+ plt.rcParams.update({
21
+ "font.weight": "bold",
22
+ "axes.labelweight": "bold",
23
+ "font.family": "Times New Roman",
24
+ "font.size": 45,
25
+ })
26
+
27
+ fig = plt.figure(figsize=(25, 9))
28
+ timeprob = state.timeprob[qualifiedid]
29
+
30
+ b_upthreshold = np.array(state.upthreshold)
31
+ b_downthreshold = np.array(state.downthreshold)
32
+ b_Apeak = np.array([item[0] for item in state.Apeak])
33
+
34
+ a_cut = np.array(wf.cut).flatten().astype(int)
35
+
36
+ b_upthreshold[-1] = b_upthreshold[-2] * 1.5
37
+
38
+ prob1 = timeprob / (b_upthreshold - b_downthreshold)
39
+ alphacoefficient = (0.75 - 0.03) / np.max(prob1)
40
+ alphas = 0.03 + prob1 * alphacoefficient
41
+
42
+ colori = np.zeros((state.num, 3))
43
+ colori[b_Apeak > 0] = [1, 0, 0]
44
+ colori[b_Apeak < 0] = [0, 1, 0]
45
+
46
+ mask_nonzero = np.abs(b_Apeak) > 0
47
+
48
+ downt = np.zeros(state.num)
49
+ upt = np.zeros(state.num)
50
+
51
+ cut_indices = a_cut[mask_nonzero]
52
+ tchange = wf.longtimestamp[cut_indices + 1] - wf.longtimestamp[cut_indices]
53
+ achange = wf.denselongdata[cut_indices + 1] - wf.denselongdata[cut_indices]
54
+ achange[achange == 0] = 1e-9
55
+
56
+ downt[mask_nonzero] = (
57
+ tchange / achange * (b_downthreshold[mask_nonzero] - np.abs(wf.denselongdata[cut_indices]))
58
+ + wf.longtimestamp[cut_indices]
59
+ )
60
+ upt[mask_nonzero] = (
61
+ tchange / achange * (b_upthreshold[mask_nonzero] - np.abs(wf.denselongdata[cut_indices]))
62
+ + wf.longtimestamp[cut_indices]
63
+ )
64
+
65
+ downt[~mask_nonzero] = wf.longtimestamp[-1]
66
+ upt[~mask_nonzero] = wf.longtimestamp[-1] + 0.1 * wf.timestamp[-1]
67
+
68
+ ax1 = fig.add_axes([0.4, 0.33, 0.55, 0.6])
69
+ ax1.plot(wf.longtimestamp, wf.denselongdata, linewidth=2.5, color="k", linestyle="-")
70
+ ax1.plot(wf.longtimestamp, abs(wf.denselongdata), linewidth=2.5, color="k", linestyle=":", alpha=0.9)
71
+
72
+ ylim_min = -1 * b_upthreshold[-1]
73
+ ax1.vlines(
74
+ downt[mask_nonzero], ylim_min, b_downthreshold[mask_nonzero],
75
+ color="k", linestyle="--", alpha=0.3, linewidth=0.5
76
+ )
77
+ ax1.vlines(
78
+ upt[mask_nonzero], ylim_min, b_upthreshold[mask_nonzero],
79
+ color="k", linestyle="--", alpha=0.3, linewidth=0.5
80
+ )
81
+ ax1.hlines(
82
+ b_downthreshold[mask_nonzero], 0, downt[mask_nonzero],
83
+ color="k", linestyle="--", alpha=0.3, linewidth=0.5
84
+ )
85
+ ax1.hlines(
86
+ b_upthreshold[mask_nonzero], 0, upt[mask_nonzero],
87
+ color="k", linestyle="--", alpha=0.3, linewidth=0.5
88
+ )
89
+
90
+ for i in range(state.num):
91
+ ax1.fill_between(
92
+ [0, downt[i], upt[i]],
93
+ [b_downthreshold[i], b_downthreshold[i], b_upthreshold[i]],
94
+ [b_upthreshold[i], b_upthreshold[i], b_upthreshold[i]],
95
+ color=colori[i], alpha=alphas[i]
96
+ )
97
+ ax1.fill_betweenx(
98
+ [-1 * b_upthreshold[-2], b_downthreshold[i], b_upthreshold[i]],
99
+ [downt[i], downt[i], upt[i]],
100
+ [upt[i], upt[i], upt[i]],
101
+ color=colori[i], alpha=alphas[i]
102
+ )
103
+
104
+ ax1.set(
105
+ xlim=(0, np.max(wf.densetimestamp) + 0.1 * wf.timestamp[-1]),
106
+ ylim=(-1 * b_upthreshold[-2], b_upthreshold[-1])
107
+ )
108
+ ax1.tick_params(direction="out", size=20)
109
+ ax1.set_yticks(np.array([-1 * b_upthreshold[-2], 0, b_upthreshold[-1]]))
110
+ ax1.set_yticklabels(
111
+ ["%.2f" % (-1 * b_upthreshold[-2]), "%.2f" % 0, "%.2f" % b_upthreshold[-1]],
112
+ fontweight="bold"
113
+ )
114
+ ax1.set_xticks(np.linspace(0, wf.timestamp[-1], 5))
115
+ ax1.set_xticklabels(
116
+ ["%.2f" % (0), "%.2f" % (wf.timestamp[-1] / 4), "%.2f" % (wf.timestamp[-1] / 4 * 2),
117
+ "%.2f" % (wf.timestamp[-1] / 4 * 3), "%.2f" % (wf.timestamp[-1])],
118
+ fontweight="bold"
119
+ )
120
+
121
+ for spine in ax1.spines.values():
122
+ spine.set_linewidth(2)
123
+
124
+ ax2 = fig.add_axes([
125
+ 0.1, 0.93 - 0.6 / (b_upthreshold[-1] + b_upthreshold[-2]) * b_upthreshold[-1],
126
+ 0.25, 0.6 / (b_upthreshold[-1] + b_upthreshold[-2]) * b_upthreshold[-1]
127
+ ])
128
+ ax2.invert_xaxis()
129
+
130
+ ax2.barh(
131
+ y=b_downthreshold, width=prob1, height=b_upthreshold - b_downthreshold,
132
+ left=0, align="edge", color=colori, alpha=1
133
+ )
134
+
135
+ ax2.set(ylim=(0, b_upthreshold[-1]))
136
+ max_prob1 = np.max(prob1)
137
+ ax2.set_xticks(np.linspace(0, max_prob1, 5))
138
+ ax2.set_xticklabels(
139
+ ["%.2f" % (0), "%.2f" % (max_prob1 / 4), "%.2f" % (max_prob1 / 2),
140
+ "%.2f" % (max_prob1 * 3 / 4), "%.2f" % (max_prob1)],
141
+ fontweight="bold"
142
+ )
143
+ ax2.set_yticks(np.linspace(0, b_upthreshold[-1], 5))
144
+ ax2.set_yticklabels(
145
+ ["%.2f" % (0), "%.2f" % (b_upthreshold[-1] / 4), "%.2f" % (b_upthreshold[-1] / 2),
146
+ "%.2f" % (b_upthreshold[-1] * 3 / 4), "%.2f" % (b_upthreshold[-1])],
147
+ fontweight="bold"
148
+ )
149
+ ax2.tick_params(direction="out", size=20)
150
+ ax2.set_ylabel(r"$\mathbf{\varepsilon_{threshold}}$", weight="bold")
151
+ ax2.set_xlabel(r"PDF of $\mathbf{\varepsilon_{threshold}}$", weight="bold")
152
+ for spine in ax2.spines.values():
153
+ spine.set_linewidth(2)
154
+
155
+ ax3 = fig.add_axes([0.4, 0.1, 0.55, 0.15], sharex=ax1)
156
+
157
+ max_prob1_1 = np.max(prob1) * 1.1
158
+ ax3.vlines(downt, 0, max_prob1_1, color="k", linestyle="-", alpha=0.3, linewidth=0.5)
159
+ ax3.vlines(upt, 0, max_prob1_1, color="k", linestyle="-", alpha=0.3, linewidth=0.5)
160
+ ax3.hlines(prob1, downt, upt, color="k", linestyle="-", alpha=0.3, linewidth=0.5)
161
+ ax3.bar(x=downt, height=prob1, width=upt - downt, align="edge", color=colori, alpha=1)
162
+ ax3.plot(
163
+ [state.arrivalestimate, state.arrivalestimate], [0, max_prob1_1],
164
+ linewidth=2.3, color="k", linestyle=":", alpha=0.9
165
+ )
166
+
167
+ ax3.set(ylim=(0, max_prob1_1))
168
+ ax3.set_yticks(np.linspace(0, np.max(prob1), 3))
169
+ ax3.set_yticklabels(
170
+ ["%.2f" % (0), "%.2f" % (np.max(prob1) / 2), "%.2f" % (np.max(prob1))],
171
+ fontweight="bold"
172
+ )
173
+ ax3.set_xticks(np.linspace(0, wf.timestamp[-1], 5))
174
+ ax3.set_xticklabels(
175
+ ["%.2f" % (0), "%.2f" % (wf.timestamp[-1] / 4), "%.2f" % (wf.timestamp[-1] / 2),
176
+ "%.2f" % (wf.timestamp[-1] * 3 / 4), "%.2f" % (wf.timestamp[-1])],
177
+ fontweight="bold"
178
+ )
179
+ ax3.tick_params(direction="out", size=20)
180
+ ax3.set_ylabel("PDF of Time", weight="bold")
181
+ ax3.set_xlabel("Time/s", weight="bold")
182
+ for spine in ax3.spines.values():
183
+ spine.set_linewidth(2)
184
+
185
+ ax4 = fig.add_axes([0.1, 0.13, 0.23, 0.05])
186
+
187
+ width = [float(state.polarityup), float(state.polarityunknown), float(state.polaritydown)]
188
+ left = [0, width[0], width[0] + width[1]]
189
+ colors = [[1, 0, 0], [0.7, 0.7, 0.7], [0, 1, 0]]
190
+ labels = ["Up", "Unknown", "Down"]
191
+ ax4.barh(y=[1, 1, 1], width=width, height=1, left=left, color=colors)
192
+
193
+ ax4.set_xticks([0.5])
194
+ ax4.set_xticklabels(["0.5"], fontweight="bold")
195
+ ax4.text(0, 2, "Up:%.1f%%" % (width[0] * 100))
196
+ ax4.text(0.85, 2, "Down:%.1f%%" % (width[2] * 100))
197
+ ax4.set_yticks([])
198
+ ax4.set(xlim=(0, 1), ylim=(0.5, 1.5))
199
+ ax4.plot([0.5, 0.5], [0.5, 1.5], linewidth=2.5, color="k", linestyle=":", alpha=1)
200
+ for spine in ax4.spines.values():
201
+ spine.set_linewidth(2)
202
+ ax4.legend(
203
+ handles=[plt.Rectangle((0, 0), 1, 1, color=c) for c in colors],
204
+ labels=labels, ncol=3, loc="lower center", bbox_to_anchor=(0.5, 1.5)
205
+ )
206
+
207
+ fig.text(0.21, 0.38, "%s" % (name), {"fontweight": "bold", "fontsize": 25}, horizontalalignment="center")
208
+ fig.text(0.24, 0.33, r"$\mathbf{A_{peak}}$" + ": %.3f" % (state.Apeakestimate), {"fontweight": "bold", "fontsize": 15})
209
+ fig.text(0.24, 0.28, r"$\mathbf{\sigma}$" + ": %.3f" % (state.sigmaestimate), {"fontweight": "bold", "fontsize": 15})
210
+ fig.text(0.1, 0.33, "Arrivaltime" + ": %.3f" % (state.arrivalestimate), {"fontweight": "bold", "fontsize": 15})
211
+ fig.text(0.1, 0.28, "Polarity Up" + ": %.3f" % (state.polarityestimation), {"fontweight": "bold", "fontsize": 15})
212
+ fig.text(0.1, 0.23, "Eig value:" + " %s" % (state.bigeig))
213
+
214
+ fig.savefig("%s" % (outputdir) + "%s_%d.pdf" % (name, qualifiedid))
215
+ plt.close(fig)
216
+
217
+
218
+ def plot_result_graduate(wf, state, qualifiedid, name, outputdir):
219
+ """Generate graduate-style plot for publication.
220
+
221
+ Args:
222
+ wf: Waveform object with processed data.
223
+ state: State object with probability estimates.
224
+ qualifiedid: Index of the solution to plot.
225
+ name: Station name for filename.
226
+ outputdir: Directory to save the plot.
227
+ """
228
+ plt.rcParams.update({
229
+ "font.weight": "normal",
230
+ "axes.labelweight": "normal",
231
+ "font.family": "Times New Roman",
232
+ "font.size": 45,
233
+ })
234
+
235
+ fig = plt.figure(figsize=(20, 12))
236
+ timeprob = state.timeprob[qualifiedid]
237
+
238
+ b_upthreshold = np.array(state.upthreshold)
239
+ b_downthreshold = np.array(state.downthreshold)
240
+ b_Apeak = np.array([item[0] for item in state.Apeak])
241
+
242
+ a_cut = np.array(wf.cut).flatten().astype(int)
243
+
244
+ b_upthreshold[-1] = b_upthreshold[-2] * 1.5
245
+
246
+ prob1 = timeprob / (b_upthreshold - b_downthreshold)
247
+ alphacoefficient = 0.75 / np.max(prob1)
248
+ alphas = timeprob / (b_upthreshold - b_downthreshold) * alphacoefficient
249
+ alphas_fill = 0.03 + alphas
250
+
251
+ colori = np.zeros((state.num, 3))
252
+ colori[b_Apeak > 0] = [1, 0, 0]
253
+ colori[b_Apeak < 0] = [0, 0, 1]
254
+
255
+ oritprob = np.zeros(wf.length)
256
+
257
+ tprobid = np.floor(wf.longtimestamp[a_cut] / wf.delta).astype(int)
258
+ np.add.at(oritprob, tprobid, timeprob)
259
+
260
+ mask_nonzero = np.abs(b_Apeak) > 0
261
+
262
+ downt1 = np.zeros(state.num)
263
+ upt1 = np.zeros(state.num)
264
+
265
+ downt1[mask_nonzero] = wf.timestamp[tprobid[mask_nonzero]]
266
+ upt1[mask_nonzero] = wf.timestamp[tprobid[mask_nonzero] + 1]
267
+ downt1[~mask_nonzero] = wf.timestamp[-1]
268
+ upt1[~mask_nonzero] = wf.timestamp[-1] + 0.1 * wf.timestamp[-1]
269
+
270
+ downt = np.zeros(state.num)
271
+ upt = np.zeros(state.num)
272
+
273
+ cut_indices = a_cut[mask_nonzero]
274
+ tchange = wf.longtimestamp[cut_indices + 1] - wf.longtimestamp[cut_indices]
275
+ achange = wf.denselongdata[cut_indices + 1] - wf.denselongdata[cut_indices]
276
+ achange[achange == 0] = 1e-9
277
+
278
+ downt[mask_nonzero] = (
279
+ tchange / achange * (b_downthreshold[mask_nonzero] - np.abs(wf.denselongdata[cut_indices]))
280
+ + wf.longtimestamp[cut_indices]
281
+ )
282
+ upt[mask_nonzero] = (
283
+ tchange / achange * (b_upthreshold[mask_nonzero] - np.abs(wf.denselongdata[cut_indices]))
284
+ + wf.longtimestamp[cut_indices]
285
+ )
286
+
287
+ downt[~mask_nonzero] = wf.longtimestamp[-1]
288
+ upt[~mask_nonzero] = wf.longtimestamp[-1] + 0.1 * wf.timestamp[-1]
289
+
290
+ ax1 = fig.add_axes([0.47, 0.33, 0.45, 0.6])
291
+ ax1.plot(wf.longtimestamp, wf.denselongdata, linewidth=5, color="k", linestyle="-")
292
+ ax1.plot(wf.longtimestamp, abs(wf.denselongdata), linewidth=5, color="k", linestyle=":", alpha=0.9)
293
+
294
+ for i in range(state.num):
295
+ ax1.fill_between(
296
+ [0, downt[i], upt[i]],
297
+ [b_downthreshold[i], b_downthreshold[i], b_upthreshold[i]],
298
+ [b_upthreshold[i], b_upthreshold[i], b_upthreshold[i]],
299
+ color=colori[i], alpha=alphas_fill[i]
300
+ )
301
+ ax1.fill_betweenx(
302
+ [-1 * b_upthreshold[-2], b_downthreshold[i], b_upthreshold[i]],
303
+ [downt[i], downt[i], upt[i]],
304
+ [upt[i], upt[i], upt[i]],
305
+ color=colori[i], alpha=alphas[i]
306
+ )
307
+
308
+ ax1.set(
309
+ xlim=(0, np.max(wf.densetimestamp) + 0.1 * wf.timestamp[-1]),
310
+ ylim=(-1 * b_upthreshold[-2], b_upthreshold[-1])
311
+ )
312
+ ax1.yaxis.tick_right()
313
+ ax1.tick_params(direction="out", size=20, length=5, width=2)
314
+ ax1.set_yticks(np.array([-1 * b_upthreshold[-2], 0, b_upthreshold[-1]]))
315
+ ax1.set_yticklabels([
316
+ "%.1f" % (-1 * b_upthreshold[-2] / 10000),
317
+ "%.1f" % (0),
318
+ "%.1f" % (b_upthreshold[-1] / 10000)
319
+ ])
320
+ ax1.set_xticks(np.linspace(0, wf.timestamp[-1], 5))
321
+ ax1.set_xticklabels(
322
+ ["%.2f" % (0), "%.2f" % (wf.timestamp[-1] / 4), "%.2f" % (wf.timestamp[-1] / 4 * 2),
323
+ "%.2f" % (wf.timestamp[-1] / 4 * 3), "%.2f" % (wf.timestamp[-1])],
324
+ fontsize=25
325
+ )
326
+ [t.set_color("white") for t in ax1.xaxis.get_ticklabels()]
327
+ for spine in ax1.spines.values():
328
+ spine.set_linewidth(5)
329
+
330
+ ax2 = fig.add_axes([
331
+ 0.11, 0.93 - 0.6 / (b_upthreshold[-1] + b_upthreshold[-2]) * b_upthreshold[-1],
332
+ 0.25, 0.6 / (b_upthreshold[-1] + b_upthreshold[-2]) * b_upthreshold[-1]
333
+ ])
334
+ ax2.invert_xaxis()
335
+ ax2.barh(
336
+ y=b_downthreshold, width=prob1, height=b_upthreshold - b_downthreshold,
337
+ left=0, align="edge", color=colori, alpha=1
338
+ )
339
+
340
+ ax2.set(ylim=(-4000, b_upthreshold[-1]))
341
+ max_prob1 = np.max(prob1)
342
+ ax2.set_xticks(np.linspace(0, max_prob1, 3))
343
+ ax2.set_xticklabels(["%d" % (0), "%.2f" % (max_prob1 / 2 * 70), "%.2f" % (max_prob1 * 70)])
344
+ ax2.set_yticks(np.linspace(0, b_upthreshold[-1], 3))
345
+ ax2.set_yticklabels([
346
+ "%.1f" % (0),
347
+ "%.1f" % (b_upthreshold[-1] / 2 / 10000),
348
+ "%.1f" % (b_upthreshold[-1] / 10000)
349
+ ])
350
+ ax2.tick_params(direction="out", size=20, length=5, width=2)
351
+ ax2.set_ylabel(r"$\epsilon$")
352
+ ax2.set_xlabel("PDF")
353
+ for spine in ax2.spines.values():
354
+ spine.set_linewidth(5)
355
+
356
+ ax3 = fig.add_axes([0.47, 0.13, 0.45, 0.12])
357
+ ax3.bar(x=downt1, height=oritprob[tprobid], width=upt1 - downt1, align="edge", color=colori, alpha=1)
358
+
359
+ max_oritprob = np.max(oritprob)
360
+ ax3.plot(
361
+ [state.arrivalestimate, state.arrivalestimate], [0, max_oritprob * 1.1],
362
+ linewidth=3, color="k", linestyle=":", alpha=0.9
363
+ )
364
+ ax3.yaxis.tick_right()
365
+ ax3.set(
366
+ xlim=(0, np.max(wf.densetimestamp) + 0.1 * wf.timestamp[-1]),
367
+ ylim=(0, max_oritprob * 1.1)
368
+ )
369
+ ax3.set_yticks(np.linspace(0, max_oritprob, 3))
370
+ ax3.set_yticklabels(["%d" % (0), "%d" % (100 * int(max_oritprob / 2)), "%d" % (100 * int(max_oritprob))])
371
+ ax3.set_xticks(np.linspace(0, wf.timestamp[-1], 5))
372
+ ax3.set_xticklabels(
373
+ ["%.2f" % (0), "%.2f" % (wf.timestamp[-1] / 4), "%.2f" % (wf.timestamp[-1] / 4 * 2),
374
+ "%.2f" % (wf.timestamp[-1] / 4 * 3), "%.2f" % (wf.timestamp[-1])],
375
+ fontsize=45
376
+ )
377
+ ax3.tick_params(direction="out", size=20, length=5, width=1)
378
+ ax3.set_ylabel("PDF")
379
+ ax3.set_xlabel("Time (s)")
380
+ for spine in ax3.spines.values():
381
+ spine.set_linewidth(5)
382
+
383
+ ax4 = fig.add_axes([0.125, 0.12, 0.225, 0.05])
384
+ width = [float(state.polarityup), float(state.polarityunknown), float(state.polaritydown)]
385
+ left = [0, width[0], width[0] + width[1]]
386
+ colors = [[1, 0, 0], [0.7, 0.7, 0.7], [0, 0, 1]]
387
+ ax4.barh(y=[1, 1, 1], width=width, height=1, left=left, color=colors)
388
+
389
+ ax4.set_xticks([0.5])
390
+ ax4.set_xticklabels(["0.5"])
391
+ ax4.text(-0.07, 2, "U: %.1f%%" % (abs(width[0]) * 100), fontsize=45)
392
+ ax4.text(0.7, 2, "D: %.1f%%" % (abs(width[2]) * 100), fontsize=45)
393
+ ax4.text(-0.37, 2, "Pol:", fontsize=45)
394
+ ax4.set_yticks([])
395
+ ax4.set(xlim=(0, 1), ylim=(0.5, 1.5))
396
+ ax4.plot([0.5, 0.5], [0.5, 1.5], linewidth=5, color="k", linestyle=":", alpha=1)
397
+ for spine in ax4.spines.values():
398
+ spine.set_linewidth(5)
399
+
400
+ fig.text(0.04, 0.305, f"E.V.: {state.bigeig[0]:.0f}, {state.bigeig[1]:.0f}, {state.bigeig[2]:.2f}, ...", fontsize=45)
401
+
402
+ fig.savefig("%s" % (outputdir) + "%s.eps" % (name))
403
+ fig.savefig("%s" % (outputdir) + "%s_%d.pdf" % (name, qualifiedid))
404
+ plt.close(fig)
sdatip/pmi.py ADDED
@@ -0,0 +1,155 @@
1
+ """Pointwise Mutual Information (PMI) calculations for seismic waveform analysis.
2
+
3
+ This module provides JIT-compiled functions for computing PMI and entropy
4
+ to find optimal cut points in binary sequences.
5
+ """
6
+
7
+ import numba
8
+ import numpy as np
9
+
10
+
11
+ @numba.jit(nopython=True, cache=True)
12
+ def entropy(ampen2):
13
+ """Calculate binary entropy of a 0/1 sequence.
14
+
15
+ Uses Shannon entropy formula: H = -p0*log2(p0) - p1*log2(p1)
16
+ """
17
+ if ampen2.size == 0:
18
+ return 0.0
19
+ counts = np.bincount(ampen2)
20
+ if len(counts) < 2:
21
+ return 0.0
22
+ zero_count = counts[0]
23
+ one_count = counts[1]
24
+ if zero_count == 0 or one_count == 0:
25
+ return 0.0
26
+ length = len(ampen2)
27
+ p_zero = zero_count / length
28
+ p_one = one_count / length
29
+ return -(np.log2(p_zero) * p_zero + np.log2(p_one) * p_one)
30
+
31
+
32
+ @numba.jit(nopython=True, cache=True)
33
+ def pmi(ampbi, t):
34
+ """Calculate Pointwise Mutual Information at split point t.
35
+
36
+ Computes PMI matrix between two parts of binary sequence split at index t.
37
+ Returns (mi_value, normalized_pmi_value).
38
+ """
39
+ length_all = len(ampbi)
40
+ part1 = ampbi[:t]
41
+ part2 = ampbi[t:]
42
+
43
+ zero_before_split = (part1 == 0).sum()
44
+ one_before_split = (part1 == 1).sum()
45
+ zero_after_split = (part2 == 0).sum()
46
+ one_after_split = (part2 == 1).sum()
47
+
48
+ pmi_matrix = np.zeros((2, 2))
49
+ npmi_matrix = np.zeros((2, 2))
50
+
51
+ total_zeros = zero_before_split + zero_after_split
52
+ total_ones = one_before_split + one_after_split
53
+ len_part1 = zero_before_split + one_before_split
54
+ len_part2 = zero_after_split + one_after_split
55
+
56
+ if zero_before_split > 0 and total_zeros > 0 and len_part1 > 0:
57
+ denominator = len_part1 * total_zeros
58
+ pmi_val = zero_before_split * length_all / denominator
59
+ pmi_matrix[0, 0] = (zero_before_split / length_all) * np.log2(pmi_val)
60
+ npmi_matrix[0, 0] = -np.log2(pmi_val) / np.log2(zero_before_split / length_all)
61
+ else:
62
+ npmi_matrix[0, 0] = -1.0
63
+
64
+ if one_before_split > 0 and total_ones > 0 and len_part1 > 0:
65
+ denominator = len_part1 * total_ones
66
+ pmi_val = one_before_split * length_all / denominator
67
+ pmi_matrix[0, 1] = (one_before_split / length_all) * np.log2(pmi_val)
68
+ npmi_matrix[0, 1] = -np.log2(pmi_val) / np.log2(one_before_split / length_all)
69
+ else:
70
+ npmi_matrix[0, 1] = -1.0
71
+
72
+ if zero_after_split > 0 and total_zeros > 0 and len_part2 > 0:
73
+ denominator = len_part2 * total_zeros
74
+ pmi_val = zero_after_split * length_all / denominator
75
+ pmi_matrix[1, 0] = (zero_after_split / length_all) * np.log2(pmi_val)
76
+ npmi_matrix[1, 0] = -np.log2(pmi_val) / np.log2(zero_after_split / length_all)
77
+ else:
78
+ npmi_matrix[1, 0] = -1.0
79
+
80
+ if one_after_split > 0 and total_ones > 0 and len_part2 > 0:
81
+ denominator = len_part2 * total_ones
82
+ pmi_val = one_after_split * length_all / denominator
83
+ pmi_matrix[1, 1] = (one_after_split / length_all) * np.log2(pmi_val)
84
+ npmi_matrix[1, 1] = -np.log2(pmi_val) / np.log2(one_after_split / length_all)
85
+ else:
86
+ npmi_matrix[1, 1] = -1.0
87
+
88
+ mi_value = pmi_matrix.sum()
89
+ normalized_pmi = (
90
+ npmi_matrix[0, 0] * zero_before_split
91
+ - npmi_matrix[0, 1] * one_before_split
92
+ - npmi_matrix[1, 0] * zero_after_split
93
+ + npmi_matrix[1, 1] * one_after_split
94
+ ) / length_all
95
+ return mi_value, normalized_pmi
96
+
97
+
98
+ @numba.jit(nopython=True, cache=True)
99
+ def maxpmi(ampbi, n):
100
+ """Find the split point that maximizes PMI.
101
+
102
+ Args:
103
+ ampbi: Binary sequence (0s and 1s)
104
+ n: If -1, search all transition points; otherwise use specified index
105
+
106
+ Returns:
107
+ (max_mi, max_normalized_pmi, split_indices)
108
+ """
109
+ zero_count = (ampbi == 0).sum()
110
+ one_count = (ampbi == 1).sum()
111
+
112
+ if zero_count == 0 or one_count == 0:
113
+ return -1.0, -1.0, np.array([-1], dtype=np.int64)
114
+
115
+ if n != -1:
116
+ mi_val, normalized_pmi_val = pmi(ampbi, n)
117
+ return mi_val, normalized_pmi_val, np.array([n], dtype=np.int64)
118
+
119
+ transition_indices = np.where(ampbi[1:] - ampbi[:-1] == 1)[0]
120
+ if len(transition_indices) == 0:
121
+ return -1.0, -1.0, np.array([-1], dtype=np.int64)
122
+
123
+ results = np.zeros((len(transition_indices), 2))
124
+ for i in range(len(transition_indices)):
125
+ results[i, 0], results[i, 1] = pmi(ampbi, transition_indices[i] + 1)
126
+
127
+ max_mi_value = np.max(results[:, 0])
128
+ best_indices = np.where(results[:, 0] == max_mi_value)[0]
129
+
130
+ final_split_points = transition_indices[best_indices] + 1
131
+ return max_mi_value, results[best_indices[0], 1], final_split_points
132
+
133
+
134
+ @numba.jit(nopython=True, cache=True)
135
+ def calculate_general(xsquare, n):
136
+ """Calculate general form for n-dimensional Gaussian integral.
137
+
138
+ Uses the formula for computing integrals of x^n * exp(-x^2/2*sigma^2).
139
+ """
140
+ if n == 3:
141
+ return 1.0 / xsquare
142
+ if n == 2:
143
+ return np.sqrt(np.pi / 2.0 / xsquare)
144
+ if n % 2 == 1:
145
+ m = (n - 3) / 2.0
146
+ prod = 1.0
147
+ for val in range(2, n - 1, 2):
148
+ prod *= val
149
+ return prod * (1.0 / xsquare) ** (m + 1.0)
150
+ else:
151
+ m = (n - 2) / 2.0
152
+ prod = 1.0
153
+ for val in range(1, n - 2, 2):
154
+ prod *= val
155
+ return prod * np.sqrt(np.pi / 2.0 / xsquare) * (1.0 / xsquare) ** m