AeroViz 0.1.14__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_absorption.py +2 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__init__.py +1 -1
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/corr_matrix.py +168 -2
- AeroViz/plot/templates/metal_heatmap.py +15 -6
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/timeseries.py +96 -52
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/__init__.py +35 -5
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__init__.py +131 -41
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +9 -9
- AeroViz/rawDataReader/script/SMPS.py +9 -0
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/XRF.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.15.dist-info}/METADATA +9 -11
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.15.dist-info}/RECORD +81 -81
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.15.dist-info}/WHEEL +1 -1
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.15.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.15.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -5,9 +5,11 @@ def _absCoe(df, instru, specified_band: list):
|
|
|
5
5
|
|
|
6
6
|
band_AE33 = np.array([370, 470, 520, 590, 660, 880, 950])
|
|
7
7
|
band_BC1054 = np.array([370, 430, 470, 525, 565, 590, 660, 700, 880, 950])
|
|
8
|
+
band_MA350 = np.array([375, 470, 528, 625, 880])
|
|
8
9
|
|
|
9
10
|
MAE_AE33 = np.array([18.47, 14.54, 13.14, 11.58, 10.35, 7.77, 7.19]) * 1e-3
|
|
10
11
|
MAE_BC1054 = np.array([18.48, 15.90, 14.55, 13.02, 12.10, 11.59, 10.36, 9.77, 7.77, 7.20]) * 1e-3
|
|
12
|
+
MAE_MA350 = np.array([24.069, 19.070, 17.028, 14.091, 10.120]) * 1e-3
|
|
11
13
|
|
|
12
14
|
band = band_AE33 if instru == 'AE33' else band_BC1054
|
|
13
15
|
MAE = MAE_AE33 if instru == 'AE33' else MAE_BC1054
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .ammonium_rich import ammonium_rich
|
|
2
2
|
from .contour import *
|
|
3
|
-
from .corr_matrix import corr_matrix
|
|
3
|
+
from .corr_matrix import corr_matrix, cross_corr_matrix
|
|
4
4
|
from .diurnal_pattern import *
|
|
5
5
|
from .koschmieder import *
|
|
6
6
|
from .metal_heatmap import metal_heatmaps, process_data_with_two_df
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -9,18 +9,20 @@ from scipy.stats import pearsonr
|
|
|
9
9
|
|
|
10
10
|
from AeroViz.plot.utils import *
|
|
11
11
|
|
|
12
|
-
__all__ = ['corr_matrix']
|
|
12
|
+
__all__ = ['corr_matrix', 'cross_corr_matrix']
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@set_figure
|
|
16
16
|
def corr_matrix(data: pd.DataFrame,
|
|
17
17
|
cmap: str = "RdBu",
|
|
18
18
|
ax: Axes | None = None,
|
|
19
|
+
items_order: list = None, # 新增參數用於指定順序
|
|
19
20
|
**kwargs
|
|
20
21
|
) -> tuple[Figure, Axes]:
|
|
21
22
|
fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax)
|
|
22
23
|
|
|
23
24
|
_corr = data.corr()
|
|
25
|
+
breakpoint()
|
|
24
26
|
corr = pd.melt(_corr.reset_index(), id_vars='index')
|
|
25
27
|
corr.columns = ['x', 'y', 'value']
|
|
26
28
|
|
|
@@ -94,8 +96,172 @@ def corr_matrix(data: pd.DataFrame,
|
|
|
94
96
|
label='p < 0.05'
|
|
95
97
|
)
|
|
96
98
|
|
|
97
|
-
ax.legend(handles=[point2], labels=['p < 0.05'], bbox_to_anchor=(0.
|
|
99
|
+
ax.legend(handles=[point2], labels=['p < 0.05'], bbox_to_anchor=(0.02, 1, 0.05, 0.05))
|
|
98
100
|
|
|
99
101
|
plt.show()
|
|
100
102
|
|
|
101
103
|
return fig, ax
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@set_figure(figsize=(6, 6))
|
|
107
|
+
def cross_corr_matrix(data1: pd.DataFrame,
|
|
108
|
+
data2: pd.DataFrame,
|
|
109
|
+
cmap: str = "RdBu",
|
|
110
|
+
ax: Axes | None = None,
|
|
111
|
+
items_order: list = None, # 新增參數用於指定順序
|
|
112
|
+
**kwargs
|
|
113
|
+
) -> tuple[Figure, Axes]:
|
|
114
|
+
"""
|
|
115
|
+
Create a correlation matrix between two different DataFrames.
|
|
116
|
+
|
|
117
|
+
Parameters:
|
|
118
|
+
-----------
|
|
119
|
+
data1 : pd.DataFrame
|
|
120
|
+
First DataFrame
|
|
121
|
+
data2 : pd.DataFrame
|
|
122
|
+
Second DataFrame
|
|
123
|
+
cmap : str, optional
|
|
124
|
+
Color map for the correlation matrix
|
|
125
|
+
ax : Axes, optional
|
|
126
|
+
Matplotlib axes to plot on
|
|
127
|
+
items_order : list, optional
|
|
128
|
+
List specifying the order of items to display
|
|
129
|
+
**kwargs : dict
|
|
130
|
+
Additional keyword arguments
|
|
131
|
+
"""
|
|
132
|
+
if ax is None:
|
|
133
|
+
fig_kws = kwargs.get('fig_kws', {})
|
|
134
|
+
default_figsize = fig_kws.get('figsize', (8, 8))
|
|
135
|
+
fig = plt.figure(figsize=default_figsize)
|
|
136
|
+
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
|
|
137
|
+
else:
|
|
138
|
+
fig = ax.get_figure()
|
|
139
|
+
|
|
140
|
+
# 如果沒有指定順序,使用原始列名順序
|
|
141
|
+
if items_order is None:
|
|
142
|
+
x_labels = list(data1.columns)
|
|
143
|
+
y_labels = list(data2.columns)
|
|
144
|
+
else:
|
|
145
|
+
# 使用指定順序,但只包含實際存在於數據中的列
|
|
146
|
+
x_labels = [item for item in items_order if item in data1.columns]
|
|
147
|
+
y_labels = [item for item in items_order if item in data2.columns]
|
|
148
|
+
|
|
149
|
+
# Calculate cross-correlation between the two DataFrames
|
|
150
|
+
correlations = []
|
|
151
|
+
p_values_list = []
|
|
152
|
+
|
|
153
|
+
for col1 in x_labels: # 使用指定順序的列名
|
|
154
|
+
for col2 in y_labels:
|
|
155
|
+
try:
|
|
156
|
+
mask = ~(np.isnan(data1[col1]) | np.isnan(data2[col2]))
|
|
157
|
+
if mask.sum() > 2:
|
|
158
|
+
corr, p_val = pearsonr(data1[col1][mask], data2[col2][mask])
|
|
159
|
+
else:
|
|
160
|
+
corr, p_val = np.nan, np.nan
|
|
161
|
+
except Exception as e:
|
|
162
|
+
print(f"Error calculating correlation for {col1} and {col2}: {str(e)}")
|
|
163
|
+
corr, p_val = np.nan, np.nan
|
|
164
|
+
|
|
165
|
+
correlations.append({
|
|
166
|
+
'x': col1,
|
|
167
|
+
'y': col2,
|
|
168
|
+
'value': corr
|
|
169
|
+
})
|
|
170
|
+
if p_val is not None and p_val < 0.05:
|
|
171
|
+
p_values_list.append({
|
|
172
|
+
'x': col1,
|
|
173
|
+
'y': col2,
|
|
174
|
+
'value': p_val
|
|
175
|
+
})
|
|
176
|
+
|
|
177
|
+
corr = pd.DataFrame(correlations)
|
|
178
|
+
p_values = pd.DataFrame(p_values_list)
|
|
179
|
+
|
|
180
|
+
# Create mapping using the specified order
|
|
181
|
+
x_to_num = {label: i for i, label in enumerate(x_labels)}
|
|
182
|
+
y_to_num = {label: i for i, label in enumerate(y_labels)}
|
|
183
|
+
|
|
184
|
+
# 調整標籤顯示
|
|
185
|
+
ax.set_xticks([x_to_num[v] for v in x_labels])
|
|
186
|
+
ax.set_xticklabels(x_labels, rotation=45, ha='right')
|
|
187
|
+
ax.set_yticks([y_to_num[v] for v in y_labels])
|
|
188
|
+
ax.set_yticklabels(y_labels)
|
|
189
|
+
|
|
190
|
+
ax.grid(False, 'major')
|
|
191
|
+
ax.grid(True, 'minor')
|
|
192
|
+
ax.set_xticks([t + 0.5 for t in ax.get_xticks()], minor=True)
|
|
193
|
+
ax.set_yticks([t + 0.5 for t in ax.get_yticks()], minor=True)
|
|
194
|
+
|
|
195
|
+
ax.set_xlim([-0.5, max([v for v in x_to_num.values()]) + 0.5])
|
|
196
|
+
ax.set_ylim([-0.5, max([v for v in y_to_num.values()]) + 0.5])
|
|
197
|
+
|
|
198
|
+
# Color mapping
|
|
199
|
+
n_colors = 256
|
|
200
|
+
palette = sns.color_palette(cmap, n_colors=n_colors)
|
|
201
|
+
color_min, color_max = [-1, 1]
|
|
202
|
+
|
|
203
|
+
def value_to_color(val):
|
|
204
|
+
if pd.isna(val):
|
|
205
|
+
return (1, 1, 1)
|
|
206
|
+
val_position = float((val - color_min)) / (color_max - color_min)
|
|
207
|
+
val_position = np.clip(val_position, 0, 1)
|
|
208
|
+
ind = int(val_position * (n_colors - 1))
|
|
209
|
+
return palette[ind]
|
|
210
|
+
|
|
211
|
+
# Plot correlation squares
|
|
212
|
+
x_coords = corr['x'].map(x_to_num)
|
|
213
|
+
y_coords = corr['y'].map(y_to_num)
|
|
214
|
+
sizes = corr['value'].abs().fillna(0) * 70
|
|
215
|
+
colors = [value_to_color(val) for val in corr['value']]
|
|
216
|
+
|
|
217
|
+
point = ax.scatter(
|
|
218
|
+
x=x_coords,
|
|
219
|
+
y=y_coords,
|
|
220
|
+
s=sizes,
|
|
221
|
+
c=colors,
|
|
222
|
+
marker='s',
|
|
223
|
+
label='$R^{2}$'
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# 調整顏色軸的位置和大小
|
|
227
|
+
cax = fig.add_axes([0.91, 0.1, 0.02, 0.8])
|
|
228
|
+
axes_image = plt.cm.ScalarMappable(cmap=colormaps[cmap])
|
|
229
|
+
cbar = plt.colorbar(mappable=axes_image, cax=cax, label=r'$R^{2}$')
|
|
230
|
+
cbar.set_ticks([0, 0.25, 0.5, 0.75, 1])
|
|
231
|
+
cbar.set_ticklabels(np.linspace(-1, 1, 5))
|
|
232
|
+
|
|
233
|
+
# Plot significance markers
|
|
234
|
+
if not p_values.empty:
|
|
235
|
+
point2 = ax.scatter(
|
|
236
|
+
x=p_values['x'].map(x_to_num),
|
|
237
|
+
y=p_values['y'].map(y_to_num),
|
|
238
|
+
s=10,
|
|
239
|
+
marker='*',
|
|
240
|
+
color='k',
|
|
241
|
+
label='p < 0.05'
|
|
242
|
+
)
|
|
243
|
+
ax.legend(handles=[point2], labels=['p < 0.05'],
|
|
244
|
+
bbox_to_anchor=(0.005, 1.04), loc='upper left')
|
|
245
|
+
|
|
246
|
+
# Add labels
|
|
247
|
+
ax.set_xlabel('NZ', labelpad=10)
|
|
248
|
+
ax.set_ylabel('FS', labelpad=10)
|
|
249
|
+
|
|
250
|
+
plt.show()
|
|
251
|
+
|
|
252
|
+
return fig, ax
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
if __name__ == '__main__':
|
|
256
|
+
import pandas as pd
|
|
257
|
+
from pandas import to_numeric
|
|
258
|
+
|
|
259
|
+
df_NZ = pd.read_csv('/Users/chanchihyu/Desktop/NZ_minion_202402-202411.csv', parse_dates=True, index_col=0)
|
|
260
|
+
df_FS = pd.read_csv('/Users/chanchihyu/Desktop/FS_minion_202402-202411.csv', parse_dates=True, index_col=0)
|
|
261
|
+
|
|
262
|
+
items = ['Ext', 'Sca', 'Abs', 'PNC', 'PSC', 'PVC', 'SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC', 'CH4',
|
|
263
|
+
'PM10', 'PM2.5', 'WS', 'AT', 'RH',
|
|
264
|
+
'OC', 'EC', 'Na+', 'NH4+', 'NO3-', 'SO42-', 'Al', 'Si', 'Ca', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Cu', 'Zn']
|
|
265
|
+
df_NZ = df_NZ.apply(to_numeric, errors='coerce')
|
|
266
|
+
|
|
267
|
+
corr_matrix(df_NZ[items], items_order=items)
|
|
@@ -117,11 +117,11 @@ def normalize_and_split(df, df2):
|
|
|
117
117
|
return df, df2
|
|
118
118
|
|
|
119
119
|
|
|
120
|
-
@set_figure(figsize=(
|
|
120
|
+
@set_figure(figsize=(6, 3), fs=8, fw='normal')
|
|
121
121
|
def metal_heatmaps(df,
|
|
122
122
|
process=True,
|
|
123
|
-
major_freq='
|
|
124
|
-
minor_freq='
|
|
123
|
+
major_freq='10d',
|
|
124
|
+
minor_freq='1d',
|
|
125
125
|
cmap='jet',
|
|
126
126
|
ax: Axes | None = None,
|
|
127
127
|
**kwargs
|
|
@@ -131,7 +131,7 @@ def metal_heatmaps(df,
|
|
|
131
131
|
|
|
132
132
|
fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax)
|
|
133
133
|
|
|
134
|
-
sns.heatmap(df.T, vmin=None, vmax=3, cmap=cmap, xticklabels=
|
|
134
|
+
sns.heatmap(df.T, vmin=None, vmax=3, cmap=cmap, xticklabels=True, yticklabels=True,
|
|
135
135
|
cbar_kws={'label': 'Z score', "pad": 0.02})
|
|
136
136
|
ax.grid(color='gray', linestyle='-', linewidth=0.3)
|
|
137
137
|
|
|
@@ -142,14 +142,23 @@ def metal_heatmaps(df,
|
|
|
142
142
|
# Set the major and minor ticks
|
|
143
143
|
ax.set_xticks(ticks=[df.index.get_loc(t) for t in major_tick])
|
|
144
144
|
ax.set_xticks(ticks=[df.index.get_loc(t) for t in minor_tick], minor=True)
|
|
145
|
-
ax.set_xticklabels(major_tick.strftime('%F'))
|
|
145
|
+
ax.set_xticklabels(major_tick.strftime('%F'), rotation=0)
|
|
146
146
|
ax.tick_params(axis='y', rotation=0)
|
|
147
147
|
|
|
148
148
|
ax.set(xlabel='',
|
|
149
|
-
ylabel='',
|
|
149
|
+
ylabel='Trace metals',
|
|
150
150
|
title=kwargs.get('title', None)
|
|
151
151
|
)
|
|
152
152
|
|
|
153
|
+
if kwargs.get('savefig'):
|
|
154
|
+
plt.savefig(kwargs.get('savefig'), dpi=600)
|
|
155
|
+
|
|
153
156
|
plt.show()
|
|
154
157
|
|
|
155
158
|
return fig, ax
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
if __name__ == '__main__':
|
|
162
|
+
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
|
|
163
|
+
plt.title('text', font={'weight': 'bold'})
|
|
164
|
+
plt.show()
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -2,6 +2,7 @@ from typing import Literal
|
|
|
2
2
|
|
|
3
3
|
import matplotlib.pyplot as plt
|
|
4
4
|
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
5
6
|
from matplotlib.cm import ScalarMappable
|
|
6
7
|
from matplotlib.pyplot import Figure, Axes
|
|
7
8
|
from mpl_toolkits.axes_grid1 import make_axes_locatable
|
|
@@ -12,7 +13,6 @@ from AeroViz.plot.utils import *
|
|
|
12
13
|
|
|
13
14
|
__all__ = ['timeseries', 'timeseries_stacked']
|
|
14
15
|
|
|
15
|
-
|
|
16
16
|
default_bar_kws = dict(
|
|
17
17
|
width=0.0417,
|
|
18
18
|
edgecolor=None,
|
|
@@ -105,12 +105,20 @@ def _wind_arrow(ax, df, y, c, scatter_kws, cbar_kws, inset_kws):
|
|
|
105
105
|
# ax.set_xlim(df.index.min() - datetime.timedelta(days=1), df.index.max())
|
|
106
106
|
|
|
107
107
|
|
|
108
|
-
def process_timeseries_data(df, rolling=None, interpolate_limit=None):
|
|
109
|
-
#
|
|
110
|
-
|
|
108
|
+
def process_timeseries_data(df, rolling=None, interpolate_limit=None, full_time_index=None):
|
|
109
|
+
# 1. 先建立完整的時間索引
|
|
110
|
+
if full_time_index is None:
|
|
111
|
+
full_time_index = pd.date_range(start=df.index.min(), end=df.index.max(), freq='h') # 或其他適合的頻率
|
|
112
|
+
|
|
113
|
+
# 2. 重新索引,這會產生缺失值而不是丟棄時間點
|
|
114
|
+
df = df.reindex(full_time_index)
|
|
111
115
|
|
|
112
116
|
# apply interpolation if specified
|
|
113
117
|
df = df.interpolate(method='time', limit=interpolate_limit) if interpolate_limit is not None else df
|
|
118
|
+
|
|
119
|
+
# apply rolling window if specified
|
|
120
|
+
df = df.rolling(window=rolling, min_periods=1).mean(numeric_only=True) if rolling is not None else df
|
|
121
|
+
|
|
114
122
|
return df
|
|
115
123
|
|
|
116
124
|
|
|
@@ -313,17 +321,18 @@ def timeseries(df: DataFrame,
|
|
|
313
321
|
return fig, ax
|
|
314
322
|
|
|
315
323
|
|
|
316
|
-
@set_figure(autolayout=False)
|
|
324
|
+
@set_figure(figsize=(6, 3), fs=6, autolayout=False)
|
|
317
325
|
def timeseries_stacked(df,
|
|
318
326
|
y: list[str] | str,
|
|
319
327
|
yi: list[str] | str,
|
|
320
328
|
label: list[str] | str,
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
329
|
+
plot_type: Literal["absolute", "percentage", "both"] | str = 'both',
|
|
330
|
+
rolling: int | str | None = 4,
|
|
331
|
+
interpolate_limit: int | None = 4,
|
|
332
|
+
major_freq: str = '10d',
|
|
333
|
+
minor_freq: str = '1d',
|
|
334
|
+
support_df: DataFrame | None = None,
|
|
325
335
|
ax: Axes | None = None,
|
|
326
|
-
legend_ncol: int = 1,
|
|
327
336
|
**kwargs
|
|
328
337
|
) -> tuple[Figure, Axes]:
|
|
329
338
|
try:
|
|
@@ -331,17 +340,30 @@ def timeseries_stacked(df,
|
|
|
331
340
|
except IndexError:
|
|
332
341
|
raise IndexError("The DataFrame is empty. Please provide a valid DataFrame.")
|
|
333
342
|
|
|
343
|
+
if plot_type not in ['absolute', 'percentage', 'both']:
|
|
344
|
+
raise ValueError("plot_type must be one of 'absolute', 'percentage', or 'both'")
|
|
345
|
+
|
|
334
346
|
# calculate the percentage of each component
|
|
347
|
+
df = df.dropna()
|
|
335
348
|
df_pct = df[yi].div(df[yi].sum(axis=1), axis=0) * 100
|
|
349
|
+
|
|
336
350
|
mean = [f"{_label} : {df[comp].mean():.2f}" for _label, comp in zip(label, yi)]
|
|
337
351
|
pct = [f"{_label} : {df_pct[comp].mean():.2f}%" for _label, comp in zip(label, yi)]
|
|
338
352
|
|
|
353
|
+
full_time_index = pd.date_range(start=st_tm, end=fn_tm, freq='h')
|
|
354
|
+
|
|
339
355
|
# process data
|
|
340
|
-
df = process_timeseries_data(df, rolling, interpolate_limit)
|
|
341
|
-
df_pct = process_timeseries_data(df_pct, rolling, interpolate_limit)
|
|
356
|
+
df = process_timeseries_data(df, rolling, interpolate_limit, full_time_index)
|
|
357
|
+
df_pct = process_timeseries_data(df_pct, rolling, interpolate_limit, full_time_index)
|
|
342
358
|
|
|
343
|
-
|
|
359
|
+
# Set figure size based on plot_type
|
|
360
|
+
figsize = (7, 6) if plot_type == 'both' else (7, 3)
|
|
361
|
+
if plot_type == 'both':
|
|
362
|
+
fig, (ax1, ax2) = plt.subplots(2, 1, **{**{'figsize': figsize, 'dpi': 600}, **kwargs.get('fig_kws', {})})
|
|
363
|
+
else:
|
|
364
|
+
fig, ax1 = plt.subplots(1, 1, **{**{'figsize': figsize, 'dpi': 600}, **kwargs.get('fig_kws', {})})
|
|
344
365
|
|
|
366
|
+
plt.subplots_adjust(right=0.95)
|
|
345
367
|
width = 0.0417
|
|
346
368
|
color = Color.colors1
|
|
347
369
|
|
|
@@ -349,54 +371,76 @@ def timeseries_stacked(df,
|
|
|
349
371
|
if len(lst) != len(yi):
|
|
350
372
|
raise ValueError(f"The length of {name} must match the combined length of y and y2")
|
|
351
373
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
374
|
+
def plot_stacked_bars(ax, data, labels, is_percentage=False):
|
|
375
|
+
bottom = None
|
|
376
|
+
for i, (_column, _color, _label) in enumerate(zip(yi, color, labels)):
|
|
377
|
+
if i == 0:
|
|
378
|
+
bottom = data[_column] * 0
|
|
379
|
+
ax.bar(data.index, data[_column], color=_color, width=width, bottom=bottom, label=_label)
|
|
380
|
+
bottom += data[_column]
|
|
358
381
|
|
|
359
|
-
|
|
382
|
+
# Set axis properties
|
|
383
|
+
if kwargs.get('legend', True):
|
|
384
|
+
ax.legend(loc='upper left', ncol=2, prop={'weight': 'bold'}, bbox_to_anchor=(0.75, 0, 0.2, 1))
|
|
360
385
|
|
|
361
|
-
|
|
386
|
+
ylim = (0, 100) if is_percentage else kwargs.get('ylim', (None, None))
|
|
387
|
+
ylabel = 'Percentage (%)' if is_percentage else (
|
|
388
|
+
kwargs.get('ylabel', Unit(y) if isinstance(y, str) else Unit(y[0])))
|
|
362
389
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
title=kwargs.get('title', ''),
|
|
368
|
-
)
|
|
390
|
+
ax.set(xlabel=kwargs.get('xlabel', ''),
|
|
391
|
+
xlim=kwargs.get('xlim', (st_tm, fn_tm)),
|
|
392
|
+
ylim=ylim,
|
|
393
|
+
title=kwargs.get('title', ''))
|
|
369
394
|
|
|
370
|
-
|
|
371
|
-
yticks = kwargs.get('yticks', np.linspace(*ax1.get_ylim(), num=6))
|
|
372
|
-
minor_xticks = kwargs.get('minor_xticks', date_range(start=st_tm, end=fn_tm, freq=minor_freq))
|
|
395
|
+
ax.set_ylabel(ylabel, fontsize=12)
|
|
373
396
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
397
|
+
# Set ticks
|
|
398
|
+
xticks = kwargs.get('xticks', date_range(start=st_tm, end=fn_tm, freq=major_freq))
|
|
399
|
+
yticks = kwargs.get('yticks', np.linspace(*ax.get_ylim(), num=6))
|
|
400
|
+
minor_xticks = kwargs.get('minor_xticks', date_range(start=st_tm, end=fn_tm, freq=minor_freq))
|
|
377
401
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
if i == 0:
|
|
382
|
-
bottom = df_pct[_column] * 0 # 第一個堆疊底部為零
|
|
383
|
-
ax2.bar(df_pct.index, df_pct[_column], color=_color, width=width, bottom=bottom, label=_label)
|
|
384
|
-
bottom += df_pct[_column] # 更新堆疊底部位置
|
|
402
|
+
ax.set_xticks(ticks=xticks, labels=xticks.strftime("%F"))
|
|
403
|
+
ax.set_yticks(ticks=yticks, labels=[f'{tick:.0f}' for tick in yticks])
|
|
404
|
+
ax.set_xticks(minor_xticks, minor=True)
|
|
385
405
|
|
|
386
|
-
|
|
406
|
+
# Plot based on plot_type
|
|
407
|
+
if plot_type in ['absolute', 'both']:
|
|
408
|
+
plot_stacked_bars(ax1, df, mean, is_percentage=False)
|
|
409
|
+
if plot_type == 'absolute':
|
|
410
|
+
ax1.axes.xaxis.set_visible(True)
|
|
387
411
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
ylim=(0, 100),
|
|
392
|
-
)
|
|
412
|
+
if support_df is not None: # 確保support_df存在
|
|
413
|
+
# 創建次要Y軸
|
|
414
|
+
ax_right = ax1.twinx()
|
|
393
415
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
416
|
+
support_df = process_timeseries_data(support_df, rolling, interpolate_limit, full_time_index)
|
|
417
|
+
|
|
418
|
+
# 繪製線圖在次要Y軸上
|
|
419
|
+
ax_right.plot(support_df.index, support_df['PM2.5'],
|
|
420
|
+
color='black', linewidth=1.5,
|
|
421
|
+
label=f'Measured $PM_{{2.5}}$')
|
|
397
422
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
423
|
+
# ax_right.plot(support_df.index, support_df['PM10'],
|
|
424
|
+
# color='gray', linewidth=1.5,
|
|
425
|
+
# label=f'Measured $PM_{{10}}$')
|
|
401
426
|
|
|
427
|
+
# 設置次要Y軸的標籤和格式
|
|
428
|
+
# ax_right.set_ylabel(Unit('PM2.5'), fontsize=12)
|
|
429
|
+
ax_right.set_ylim(0, 120)
|
|
430
|
+
ax_right.axes.yaxis.set_visible(False)
|
|
431
|
+
|
|
432
|
+
# ax_right.tick_params(axis='y', colors='black')
|
|
433
|
+
# ax_right.legend(loc='upper right', prop={'size': 12})
|
|
434
|
+
|
|
435
|
+
if plot_type in ['percentage', 'both']:
|
|
436
|
+
ax_pct = ax2 if plot_type == 'both' else ax1
|
|
437
|
+
plot_stacked_bars(ax_pct, df_pct, pct, is_percentage=True)
|
|
438
|
+
|
|
439
|
+
if plot_type == 'both':
|
|
440
|
+
pass
|
|
441
|
+
# ax1.axes.xaxis.set_visible(False)
|
|
442
|
+
|
|
443
|
+
plt.savefig('/Users/chanchihyu/Desktop/times_stacked.png', transparent=True)
|
|
444
|
+
|
|
445
|
+
plt.show()
|
|
402
446
|
return fig, ax1
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -19,10 +19,10 @@ SIZE_RANGE_INSTRUMENTS = ['SMPS', 'APS', 'GRIMM']
|
|
|
19
19
|
|
|
20
20
|
def RawDataReader(instrument: str,
|
|
21
21
|
path: Path | str,
|
|
22
|
-
reset: bool = False,
|
|
22
|
+
reset: bool | str = False,
|
|
23
23
|
qc: bool | str = True,
|
|
24
|
-
start: datetime = None,
|
|
25
|
-
end: datetime = None,
|
|
24
|
+
start: datetime | str = None,
|
|
25
|
+
end: datetime | str = None,
|
|
26
26
|
mean_freq: str = '1h',
|
|
27
27
|
size_range: tuple[float, float] | None = None,
|
|
28
28
|
suppress_warnings: bool = False,
|
|
@@ -94,9 +94,22 @@ def RawDataReader(instrument: str,
|
|
|
94
94
|
|
|
95
95
|
Examples
|
|
96
96
|
--------
|
|
97
|
+
>>> from AeroViz import RawDataReader
|
|
98
|
+
>>>
|
|
99
|
+
>>> # Using string inputs
|
|
100
|
+
>>> df_ae33 = RawDataReader(
|
|
101
|
+
... instrument='AE33',
|
|
102
|
+
... path='/path/to/your/data/folder',
|
|
103
|
+
... reset=True,
|
|
104
|
+
... qc='1MS',
|
|
105
|
+
... start='2024-01-01',
|
|
106
|
+
... end='2024-06-30',
|
|
107
|
+
... mean_freq='1h',
|
|
108
|
+
... )
|
|
109
|
+
|
|
110
|
+
>>> # Using Path and datetime objects
|
|
97
111
|
>>> from pathlib import Path
|
|
98
112
|
>>> from datetime import datetime
|
|
99
|
-
>>> from AeroViz import RawDataReader
|
|
100
113
|
>>>
|
|
101
114
|
>>> df_ae33 = RawDataReader(
|
|
102
115
|
... instrument='AE33',
|
|
@@ -130,9 +143,26 @@ def RawDataReader(instrument: str,
|
|
|
130
143
|
raise ValueError(f"Invalid frequency: {qc}. Must be one of: "
|
|
131
144
|
f"W (week), MS (month start), QS (quarter start), YS (year start)")
|
|
132
145
|
|
|
133
|
-
#
|
|
146
|
+
# Convert and verify input times
|
|
134
147
|
if not (start and end):
|
|
135
148
|
raise ValueError("Both start and end times must be provided.")
|
|
149
|
+
|
|
150
|
+
# Convert start time if it's a string
|
|
151
|
+
if isinstance(start, str):
|
|
152
|
+
try:
|
|
153
|
+
start = datetime.fromisoformat(start.replace('Z', '+00:00'))
|
|
154
|
+
except ValueError as e:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
f"Invalid start time format. Please use ISO format (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS): {e}")
|
|
157
|
+
|
|
158
|
+
# Convert end time if it's a string
|
|
159
|
+
if isinstance(end, str):
|
|
160
|
+
try:
|
|
161
|
+
end = datetime.fromisoformat(end.replace('Z', '+00:00'))
|
|
162
|
+
except ValueError as e:
|
|
163
|
+
raise ValueError(
|
|
164
|
+
f"Invalid end time format. Please use ISO format (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS): {e}")
|
|
165
|
+
|
|
136
166
|
if end <= start:
|
|
137
167
|
raise ValueError(f"Invalid time range: start {start} is after end {end}")
|
|
138
168
|
|
|
Binary file
|