AeroViz 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/data/240228_00.txt +101 -0
- AeroViz/dataProcess/Chemistry/_ocec.py +20 -7
- AeroViz/plot/__init__.py +2 -0
- AeroViz/plot/hysplit/__init__.py +1 -0
- AeroViz/plot/hysplit/hysplit.py +79 -0
- AeroViz/plot/meteorology/meteorology.py +2 -0
- AeroViz/plot/optical/optical.py +60 -59
- AeroViz/plot/pie.py +14 -2
- AeroViz/plot/radar.py +184 -0
- AeroViz/plot/scatter.py +16 -7
- AeroViz/plot/templates/diurnal_pattern.py +24 -7
- AeroViz/plot/templates/koschmieder.py +11 -8
- AeroViz/plot/timeseries/template.py +2 -2
- AeroViz/plot/timeseries/timeseries.py +47 -7
- AeroViz/rawDataReader/__init__.py +75 -68
- AeroViz/rawDataReader/config/supported_instruments.py +52 -19
- AeroViz/rawDataReader/core/__init__.py +194 -106
- AeroViz/rawDataReader/script/AE33.py +11 -6
- AeroViz/rawDataReader/script/AE43.py +10 -5
- AeroViz/rawDataReader/script/Aurora.py +14 -10
- AeroViz/rawDataReader/script/BC1054.py +10 -6
- AeroViz/rawDataReader/script/EPA.py +39 -0
- AeroViz/rawDataReader/script/GRIMM.py +1 -2
- AeroViz/rawDataReader/script/IGAC.py +6 -23
- AeroViz/rawDataReader/script/MA350.py +12 -5
- AeroViz/rawDataReader/script/Minion.py +107 -30
- AeroViz/rawDataReader/script/NEPH.py +15 -5
- AeroViz/rawDataReader/script/OCEC.py +39 -15
- AeroViz/rawDataReader/script/SMPS.py +1 -0
- AeroViz/rawDataReader/script/TEOM.py +15 -11
- AeroViz/rawDataReader/script/VOC.py +1 -1
- AeroViz/rawDataReader/script/XRF.py +11 -0
- AeroViz/rawDataReader/script/__init__.py +2 -2
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/METADATA +54 -30
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/RECORD +40 -51
- AeroViz/process/__init__.py +0 -31
- AeroViz/process/core/DataProc.py +0 -19
- AeroViz/process/core/SizeDist.py +0 -90
- AeroViz/process/core/__init__.py +0 -4
- AeroViz/process/method/__init__.py +0 -2
- AeroViz/process/method/prop.py +0 -62
- AeroViz/process/script/AbstractDistCalc.py +0 -143
- AeroViz/process/script/Chemical.py +0 -177
- AeroViz/process/script/IMPACT.py +0 -49
- AeroViz/process/script/IMPROVE.py +0 -161
- AeroViz/process/script/Others.py +0 -65
- AeroViz/process/script/PSD.py +0 -103
- AeroViz/process/script/PSD_dry.py +0 -93
- AeroViz/process/script/__init__.py +0 -5
- AeroViz/process/script/retrieve_RI.py +0 -69
- AeroViz/rawDataReader/script/EPA_vertical.py +0 -46
- AeroViz/rawDataReader/script/Table.py +0 -27
- /AeroViz/{process/method → plot/optical}/PyMieScatt_update.py +0 -0
- /AeroViz/{process/method → plot/optical}/mie_theory.py +0 -0
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/top_level.txt +0 -0
AeroViz/plot/scatter.py
CHANGED
|
@@ -22,9 +22,11 @@ def scatter(df: pd.DataFrame,
|
|
|
22
22
|
x: str,
|
|
23
23
|
y: str,
|
|
24
24
|
c: str | None = None,
|
|
25
|
+
color: str | None = '#7a97c9',
|
|
25
26
|
s: str | None = None,
|
|
26
27
|
cmap='jet',
|
|
27
28
|
regression=False,
|
|
29
|
+
regression_line_color: str | None = sns.xkcd_rgb["denim blue"],
|
|
28
30
|
diagonal=False,
|
|
29
31
|
ax: Axes | None = None,
|
|
30
32
|
**kwargs
|
|
@@ -41,6 +43,8 @@ def scatter(df: pd.DataFrame,
|
|
|
41
43
|
y : str
|
|
42
44
|
The column name for the y-axis values.
|
|
43
45
|
c : str, optional
|
|
46
|
+
The column name for c encoding. Default is None.
|
|
47
|
+
color : str, optional
|
|
44
48
|
The column name for color encoding. Default is None.
|
|
45
49
|
s : str, optional
|
|
46
50
|
The column name for size encoding. Default is None.
|
|
@@ -48,6 +52,8 @@ def scatter(df: pd.DataFrame,
|
|
|
48
52
|
The colormap to use for the color encoding. Default is 'jet'.
|
|
49
53
|
regression : bool, optional
|
|
50
54
|
If True, fits and plots a linear regression line. Default is False.
|
|
55
|
+
regression_line_color : str, optional
|
|
56
|
+
The color of the regression line. Default is 'sns.xkcd_rgb["denim blue"]'.
|
|
51
57
|
diagonal : bool, optional
|
|
52
58
|
If True, plots a 1:1 diagonal line. Default is False.
|
|
53
59
|
ax : Axes, optional
|
|
@@ -118,7 +124,7 @@ def scatter(df: pd.DataFrame,
|
|
|
118
124
|
x_data, y_data, s_data = df_[x].to_numpy(), df_[y].to_numpy(), df_[s].to_numpy()
|
|
119
125
|
check_empty(x_data, y_data, s_data)
|
|
120
126
|
|
|
121
|
-
scatter = ax.scatter(x_data, y_data, s=50 * (s_data / s_data.max()) ** 1.5, color=
|
|
127
|
+
scatter = ax.scatter(x_data, y_data, s=50 * (s_data / s_data.max()) ** 1.5, color=color, alpha=0.5,
|
|
122
128
|
edgecolors='white')
|
|
123
129
|
colorbar = False
|
|
124
130
|
|
|
@@ -135,7 +141,7 @@ def scatter(df: pd.DataFrame,
|
|
|
135
141
|
x_data, y_data = df_[x].to_numpy(), df_[y].to_numpy()
|
|
136
142
|
check_empty(x_data, y_data)
|
|
137
143
|
|
|
138
|
-
scatter = ax.scatter(x_data, y_data, s=30, color=
|
|
144
|
+
scatter = ax.scatter(x_data, y_data, s=30, color=color, alpha=0.5, edgecolors='white')
|
|
139
145
|
colorbar = False
|
|
140
146
|
|
|
141
147
|
ax.set(xlim=kwargs.get('xlim', (x_data.min(), x_data.max())),
|
|
@@ -144,21 +150,24 @@ def scatter(df: pd.DataFrame,
|
|
|
144
150
|
ylabel=kwargs.get('ylabel', Unit(y)),
|
|
145
151
|
title=kwargs.get('title', ''))
|
|
146
152
|
|
|
153
|
+
ax.xaxis.set_major_formatter(ScalarFormatter())
|
|
154
|
+
ax.yaxis.set_major_formatter(ScalarFormatter())
|
|
155
|
+
|
|
147
156
|
if colorbar:
|
|
148
157
|
plt.colorbar(scatter, extend='both', label=Unit(c))
|
|
149
158
|
|
|
150
159
|
if regression:
|
|
151
160
|
text, y_predict, slope = linear_regression_base(x_data, y_data)
|
|
152
|
-
ax.plot(x_data, y_predict, linewidth=3, color=
|
|
153
|
-
plt.text(0.05, 0.95, text, fontdict={'weight': 'bold'}, color=
|
|
161
|
+
ax.plot(x_data, y_predict, linewidth=3, color=regression_line_color, alpha=1, zorder=3)
|
|
162
|
+
plt.text(0.05, 0.95, text, fontdict={'weight': 'bold'}, color=regression_line_color,
|
|
154
163
|
ha='left', va='top', transform=ax.transAxes)
|
|
155
164
|
|
|
156
165
|
if diagonal:
|
|
157
166
|
ax.axline((0, 0), slope=1., color='k', lw=2, ls='--', alpha=0.5, label='1:1')
|
|
158
|
-
plt.text(0.91, 0.97, r'$\bf 1:1\ Line$', color='k', ha='right', va='top', transform=ax.transAxes)
|
|
159
167
|
|
|
160
|
-
|
|
161
|
-
|
|
168
|
+
data_range = min(ax.get_xlim()[1] - ax.get_xlim()[0], ax.get_ylim()[1] - ax.get_ylim()[0])
|
|
169
|
+
plt.text(0.9 * data_range, 0.9 * data_range, r'$\bf 1:1\ Line$', color='k', ha='left', va='bottom',
|
|
170
|
+
bbox=dict(facecolor='white', edgecolor='none', alpha=0.1, pad=3))
|
|
162
171
|
|
|
163
172
|
plt.show()
|
|
164
173
|
|
|
@@ -15,30 +15,47 @@ def diurnal_pattern(df: DataFrame,
|
|
|
15
15
|
ax: Axes | None = None,
|
|
16
16
|
**kwargs
|
|
17
17
|
) -> tuple[Figure, Axes]:
|
|
18
|
-
if 'hour'
|
|
18
|
+
if 'hour' not in df.columns and 'Hour' not in df.columns:
|
|
19
19
|
df['Hour'] = df.index.hour
|
|
20
20
|
|
|
21
21
|
Hour = range(0, 24)
|
|
22
22
|
mean = df.groupby('Hour')[y].mean()
|
|
23
23
|
std = df.groupby('Hour')[y].std() * std_area
|
|
24
24
|
|
|
25
|
-
fig, ax = plt.subplots(
|
|
25
|
+
fig, ax = plt.subplots() if ax is None else (ax.get_figure(), ax)
|
|
26
26
|
|
|
27
27
|
# Plot Diurnal pattern
|
|
28
|
-
ax.plot(Hour, mean, 'blue')
|
|
29
|
-
ax.fill_between(Hour, y1=mean + std, y2=mean - std, alpha=0.2, color='blue', edgecolor=None)
|
|
28
|
+
ax.plot(Hour, mean, 'blue', zorder=3)
|
|
29
|
+
ax.fill_between(Hour, y1=mean + std, y2=mean - std, alpha=0.2, color='blue', edgecolor=None, zorder=2)
|
|
30
|
+
|
|
31
|
+
# Plot Boxplot for each hour
|
|
32
|
+
bp = ax.boxplot([df[df['Hour'] == h][y].dropna() for h in Hour],
|
|
33
|
+
positions=Hour,
|
|
34
|
+
widths=0.5,
|
|
35
|
+
patch_artist=True,
|
|
36
|
+
showfliers=False,
|
|
37
|
+
zorder=1)
|
|
38
|
+
|
|
39
|
+
# Customize boxplot colors
|
|
40
|
+
for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']:
|
|
41
|
+
plt.setp(bp[element], color='gray')
|
|
42
|
+
|
|
43
|
+
for patch in bp['boxes']:
|
|
44
|
+
patch.set(facecolor='lightgray', alpha=0.5)
|
|
30
45
|
|
|
31
46
|
ax.set(xlabel=kwargs.get('xlabel', 'Hours'),
|
|
32
47
|
ylabel=kwargs.get('ylabel', Unit(y)),
|
|
33
|
-
xlim=kwargs.get('xlim', (0, 23)),
|
|
48
|
+
xlim=kwargs.get('xlim', (-0.5, 23.5)),
|
|
34
49
|
ylim=kwargs.get('ylim', (None, None)),
|
|
35
|
-
xticks=kwargs.get('xticks',
|
|
50
|
+
xticks=kwargs.get('xticks', range(0, 24, 4)),
|
|
51
|
+
xticklabels=kwargs.get('xticklabels', range(0, 24, 4)))
|
|
36
52
|
|
|
37
53
|
ax.tick_params(axis='both', which='major')
|
|
38
54
|
ax.tick_params(axis='x', which='minor')
|
|
39
55
|
ax.xaxis.set_minor_locator(AutoMinorLocator())
|
|
40
56
|
ax.ticklabel_format(axis='y', style='sci', scilimits=(-2, 3), useMathText=True)
|
|
41
57
|
|
|
58
|
+
plt.tight_layout()
|
|
42
59
|
plt.show()
|
|
43
60
|
|
|
44
|
-
return fig, ax
|
|
61
|
+
return fig, ax
|
|
@@ -10,7 +10,7 @@ from AeroViz.plot.utils import *
|
|
|
10
10
|
__all__ = ['koschmieder']
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
@set_figure
|
|
13
|
+
@set_figure(figsize=(2.4, 3))
|
|
14
14
|
def koschmieder(df: pd.DataFrame,
|
|
15
15
|
vis: str,
|
|
16
16
|
ext: list[str],
|
|
@@ -30,8 +30,8 @@ def koschmieder(df: pd.DataFrame,
|
|
|
30
30
|
|
|
31
31
|
fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax)
|
|
32
32
|
|
|
33
|
-
boxcolors = ['#
|
|
34
|
-
scattercolor = ['
|
|
33
|
+
boxcolors = ['#a5bf6b', '#3f83bf']
|
|
34
|
+
scattercolor = ['green', 'blue']
|
|
35
35
|
arts = []
|
|
36
36
|
labels = []
|
|
37
37
|
|
|
@@ -74,15 +74,18 @@ def koschmieder(df: pd.DataFrame,
|
|
|
74
74
|
label=f'Vis (km) = {round(coeff)} / Ext')
|
|
75
75
|
|
|
76
76
|
arts.append(line)
|
|
77
|
-
|
|
77
|
+
if 'dry' in ext_col:
|
|
78
|
+
labels.append(f'Vis (km) = {round(coeff)} / Ext (dry)')
|
|
79
|
+
else:
|
|
80
|
+
labels.append(f'Vis (km) = {round(coeff)} / Ext (amb)')
|
|
78
81
|
|
|
79
82
|
ax.legend(handles=arts, labels=labels, loc='upper right', prop=dict(weight='bold'), bbox_to_anchor=(0.99, 0.99))
|
|
80
83
|
|
|
81
|
-
ax.set(xlabel=kwargs.get('
|
|
82
|
-
ylabel=kwargs.get('
|
|
83
|
-
title=kwargs.get('
|
|
84
|
+
ax.set(xlabel=kwargs.get('xlabel', 'Visibility (km)'),
|
|
85
|
+
ylabel=kwargs.get('ylabel', 'Extinction (1/Mm)'),
|
|
86
|
+
title=kwargs.get('title', 'Koschmieder relationship'),
|
|
84
87
|
xlim=kwargs.get('xlim', (0, 30)),
|
|
85
|
-
ylim=kwargs.get('ylim', (0,
|
|
88
|
+
ylim=kwargs.get('ylim', (0, 800))
|
|
86
89
|
)
|
|
87
90
|
|
|
88
91
|
plt.xticks(ticks=np.array(range(0, 31, 5)), labels=np.array(range(0, 31, 5)))
|
|
@@ -2,7 +2,7 @@ import matplotlib.pyplot as plt
|
|
|
2
2
|
from matplotlib.pyplot import Figure, Axes
|
|
3
3
|
from pandas import DataFrame
|
|
4
4
|
|
|
5
|
-
from AeroViz.plot.timeseries import timeseries
|
|
5
|
+
from AeroViz.plot.timeseries.timeseries import timeseries
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def timeseries_template(df: DataFrame) -> tuple[Figure, Axes]:
|
|
@@ -40,7 +40,7 @@ def timeseries_template(df: DataFrame) -> tuple[Figure, Axes]:
|
|
|
40
40
|
timeseries(df, y='VC', color='PBLH', style='bar', ax=ax4, bar_kws=dict(cmap='Blues'), set_xaxis_visible=False,
|
|
41
41
|
ylim=[0, 5000])
|
|
42
42
|
|
|
43
|
-
timeseries(df, y='
|
|
43
|
+
timeseries(df, y='PM2.5', color='PM1/PM25', style='scatter', ax=ax5, ylim=[0, None])
|
|
44
44
|
|
|
45
45
|
plt.show()
|
|
46
46
|
|
|
@@ -4,8 +4,9 @@ import matplotlib.pyplot as plt
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
from matplotlib.cm import ScalarMappable
|
|
6
6
|
from matplotlib.pyplot import Figure, Axes
|
|
7
|
+
from mpl_toolkits.axes_grid1 import make_axes_locatable
|
|
7
8
|
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
|
|
8
|
-
from pandas import DataFrame, date_range
|
|
9
|
+
from pandas import DataFrame, date_range, Timedelta
|
|
9
10
|
|
|
10
11
|
from AeroViz.plot.utils import *
|
|
11
12
|
|
|
@@ -70,6 +71,40 @@ def _plot(ax, df, _y, _color, plot_kws):
|
|
|
70
71
|
ax.plot(df.index, df[_y], color=_color, **plot_kws)
|
|
71
72
|
|
|
72
73
|
|
|
74
|
+
def _wind_arrow(ax, df, y, c, scatter_kws, cbar_kws, inset_kws):
|
|
75
|
+
"""
|
|
76
|
+
Plot wind arrows on a scatter plot.
|
|
77
|
+
|
|
78
|
+
:param ax: matplotlib axes
|
|
79
|
+
:param df: pandas DataFrame
|
|
80
|
+
:param y: column name for wind speed
|
|
81
|
+
:param c: column name for wind direction
|
|
82
|
+
:param scatter_kws: keyword arguments for scatter plot
|
|
83
|
+
:param cbar_kws: keyword arguments for colorbar
|
|
84
|
+
:param inset_kws: keyword arguments for inset axes
|
|
85
|
+
"""
|
|
86
|
+
# First, create a scatter plot
|
|
87
|
+
sc = ax.scatter(df.index, df[y], c=df[c], **scatter_kws)
|
|
88
|
+
|
|
89
|
+
# Add colorbar
|
|
90
|
+
divider = make_axes_locatable(ax)
|
|
91
|
+
cax = divider.append_axes("right", size="2%", pad=0.05)
|
|
92
|
+
plt.colorbar(sc, cax=cax, **cbar_kws)
|
|
93
|
+
|
|
94
|
+
# Add wind arrows
|
|
95
|
+
for idx, row in df.iterrows():
|
|
96
|
+
wind_speed = row[y]
|
|
97
|
+
wind_dir = np.radians(row[c])
|
|
98
|
+
dx = np.sin(wind_dir) * wind_speed / 20 # Scale factor can be adjusted
|
|
99
|
+
dy = np.cos(wind_dir) * wind_speed / 20
|
|
100
|
+
ax.annotate('', xy=(idx + 10 * dx * Timedelta(hours=5), wind_speed + 4 * dy),
|
|
101
|
+
xytext=(idx - 10 * dx * Timedelta(hours=5), wind_speed - 4 * dy),
|
|
102
|
+
arrowprops=dict(arrowstyle='->', color='k', linewidth=0.5))
|
|
103
|
+
|
|
104
|
+
# Set the x-axis limit to show all data points
|
|
105
|
+
# ax.set_xlim(df.index.min() - datetime.timedelta(days=1), df.index.max())
|
|
106
|
+
|
|
107
|
+
|
|
73
108
|
def process_timeseries_data(df, rolling=None, interpolate_limit=None):
|
|
74
109
|
# apply rolling window if specified
|
|
75
110
|
df = df.rolling(window=rolling, min_periods=1).mean(numeric_only=True) if rolling is not None else df
|
|
@@ -90,7 +125,7 @@ def timeseries(df: DataFrame,
|
|
|
90
125
|
interpolate_limit: int | None = 6,
|
|
91
126
|
major_freq: str = '1MS',
|
|
92
127
|
minor_freq: str = '10d',
|
|
93
|
-
style: list[Literal['scatter', 'bar', 'line']] | str | None = None,
|
|
128
|
+
style: list[Literal['scatter', 'bar', 'line', 'arrow']] | str | None = None,
|
|
94
129
|
ax: Axes | None = None,
|
|
95
130
|
set_xaxis_visible: bool | None = None,
|
|
96
131
|
legend_loc: Literal['best', 'upper right', 'upper left', 'lower left', 'lower right'] = 'best',
|
|
@@ -199,16 +234,16 @@ def timeseries(df: DataFrame,
|
|
|
199
234
|
if y2 and ('scatter' or 'bar') in style:
|
|
200
235
|
fig.subplots_adjust(right=0.8)
|
|
201
236
|
|
|
202
|
-
for i, _c in enumerate(color):
|
|
203
|
-
|
|
204
|
-
|
|
237
|
+
# for i, _c in enumerate(color):
|
|
238
|
+
# if _c is not None and _c in df.columns:
|
|
239
|
+
# style[i] = 'scatter'
|
|
205
240
|
|
|
206
241
|
for i, (_y, _c, _label, _style) in enumerate(zip(y, color, label, style)):
|
|
207
242
|
scatter_kws = {**default_scatter_kws, **{'label': Unit(_y)}, **kwargs.get('scatter_kws', {})}
|
|
208
243
|
bar_kws = {**default_bar_kws, **{'label': Unit(_y)}, **kwargs.get('bar_kws', {})}
|
|
209
244
|
plot_kws = {**default_plot_kws, **{'label': Unit(_y)}, **kwargs.get('plot_kws', {})}
|
|
210
245
|
|
|
211
|
-
if _style in ['scatter', 'bar']:
|
|
246
|
+
if _style in ['scatter', 'bar', 'arrow']:
|
|
212
247
|
cbar_kws = {**default_cbar_kws, **{'label': Unit(_c), 'ticks': None}, **kwargs.get('cbar_kws', {})}
|
|
213
248
|
inset_kws = {**default_insert_kws, **{'bbox_transform': ax.transAxes}, **kwargs.get('inset_kws', {})}
|
|
214
249
|
|
|
@@ -218,6 +253,9 @@ def timeseries(df: DataFrame,
|
|
|
218
253
|
elif _style == 'bar':
|
|
219
254
|
_bar(ax, df, _y, _c, bar_kws, cbar_kws, inset_kws)
|
|
220
255
|
|
|
256
|
+
elif _style == 'arrow':
|
|
257
|
+
_wind_arrow(ax, df, _y, _c, scatter_kws, cbar_kws, inset_kws)
|
|
258
|
+
|
|
221
259
|
else:
|
|
222
260
|
_plot(ax, df, _y, _c, plot_kws)
|
|
223
261
|
|
|
@@ -237,6 +275,9 @@ def timeseries(df: DataFrame,
|
|
|
237
275
|
elif _style == 'bar':
|
|
238
276
|
_bar(ax2, df, _y, _c, bar_kws, cbar_kws, inset_kws)
|
|
239
277
|
|
|
278
|
+
elif _style == 'arrow':
|
|
279
|
+
pass
|
|
280
|
+
|
|
240
281
|
else: # line plot
|
|
241
282
|
_plot(ax2, df, _y, _c, plot_kws)
|
|
242
283
|
|
|
@@ -348,7 +389,6 @@ def timeseries_stacked(df,
|
|
|
348
389
|
ylabel=kwargs.get('ylabel', 'Percentage (%)'),
|
|
349
390
|
xlim=kwargs.get('xlim', (st_tm, fn_tm)),
|
|
350
391
|
ylim=(0, 100),
|
|
351
|
-
title=kwargs.get('title', '')
|
|
352
392
|
)
|
|
353
393
|
|
|
354
394
|
xticks = kwargs.get('xticks', date_range(start=st_tm, end=fn_tm, freq=major_freq))
|
|
@@ -1,104 +1,111 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
+
from pandas import Grouper, Timedelta
|
|
5
|
+
|
|
4
6
|
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
5
7
|
from AeroViz.rawDataReader.script import *
|
|
6
8
|
|
|
7
9
|
__all__ = ['RawDataReader']
|
|
8
10
|
|
|
11
|
+
SUPPORTED_INSTRUMENTS = [
|
|
12
|
+
NEPH, Aurora, SMPS, GRIMM, APS_3321, AE33, AE43, BC1054,
|
|
13
|
+
MA350, TEOM, OCEC, IGAC, VOC, EPA, Minion
|
|
14
|
+
]
|
|
15
|
+
|
|
9
16
|
|
|
10
17
|
def RawDataReader(instrument_name: str,
|
|
11
|
-
path: Path,
|
|
12
|
-
qc: bool = True,
|
|
13
|
-
csv_raw: bool = True,
|
|
18
|
+
path: Path | str,
|
|
14
19
|
reset: bool = False,
|
|
20
|
+
qc: bool | str = True,
|
|
21
|
+
qc_freq: str | None = None,
|
|
15
22
|
rate: bool = True,
|
|
16
23
|
append_data: bool = False,
|
|
17
|
-
start: datetime
|
|
18
|
-
end: datetime
|
|
19
|
-
mean_freq='1h',
|
|
20
|
-
csv_out=True,
|
|
24
|
+
start: datetime = None,
|
|
25
|
+
end: datetime = None,
|
|
26
|
+
mean_freq: str = '1h',
|
|
27
|
+
csv_out: bool = True,
|
|
21
28
|
):
|
|
22
29
|
"""
|
|
23
30
|
Factory function to instantiate the appropriate reader module for a given instrument and
|
|
24
31
|
return the processed data over the specified time range.
|
|
25
32
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
csv_out : bool, optional (default=True)
|
|
49
|
-
If True, output the processed data as a CSV file.
|
|
50
|
-
|
|
51
|
-
Return
|
|
52
|
-
------
|
|
53
|
-
reader_module : Reader
|
|
54
|
-
An instance of the reader module corresponding to the specified instrument, which processes
|
|
55
|
-
the data and returns it in a usable format.
|
|
56
|
-
|
|
57
|
-
Raises
|
|
58
|
-
------
|
|
59
|
-
ValueError
|
|
60
|
-
If the `instrument_name` provided is not a valid key in the `meta` dictionary.
|
|
61
|
-
|
|
62
|
-
Example
|
|
63
|
-
-------
|
|
33
|
+
:param instrument_name: The name of the instrument for which to read data. Must be a valid key in the `meta` dictionary.
|
|
34
|
+
:param path: The directory where raw data files for the instrument are stored.
|
|
35
|
+
:param reset: If True, reset the state and reprocess the data from scratch.
|
|
36
|
+
:param qc: If True, apply quality control (QC) to the raw data.
|
|
37
|
+
:param qc_freq: Frequency at which to perform QC. Must be one of 'W', 'M', 'Q', 'Y' for weekly, monthly, quarterly, or yearly.
|
|
38
|
+
:param rate: If True, calculate rates from the data.
|
|
39
|
+
:param append_data: If True, append new data to the existing dataset instead of overwriting it.
|
|
40
|
+
:param start: Start time for filtering the data. If None, no start time filtering will be applied.
|
|
41
|
+
:param end: End time for filtering the data. If None, no end time filtering will be applied.
|
|
42
|
+
:param mean_freq: Resampling frequency for averaging the data. Example: '1h' for hourly mean.
|
|
43
|
+
:param csv_out: If True, output the processed data as a CSV file.
|
|
44
|
+
|
|
45
|
+
:return: An instance of the reader module corresponding to the specified instrument, which processes the data and returns it in a usable format.
|
|
46
|
+
|
|
47
|
+
:raises ValueError: If the `instrument_name` provided is not a valid key in the `meta` dictionary.
|
|
48
|
+
:raises ValueError: If the specified path does not exist or is not a directory.
|
|
49
|
+
:raises ValueError: If the QC frequency is invalid.
|
|
50
|
+
:raises ValueError: If start and end times are not both provided or are invalid.
|
|
51
|
+
:raises ValueError: If the mean_freq is not a valid frequency string.
|
|
52
|
+
|
|
53
|
+
:Example:
|
|
54
|
+
|
|
64
55
|
To read and process data for the BC1054 instrument:
|
|
65
56
|
|
|
66
57
|
>>> from pathlib import Path
|
|
67
58
|
>>> from datetime import datetime
|
|
68
|
-
>>>
|
|
69
|
-
>>>
|
|
59
|
+
>>>
|
|
60
|
+
>>> data = RawDataReader(
|
|
61
|
+
... instrument_name='BC1054',
|
|
62
|
+
... path=Path('/path/to/data'),
|
|
63
|
+
... start=datetime(2024, 2, 1),
|
|
64
|
+
... end=datetime(2024, 7, 31, 23))
|
|
70
65
|
"""
|
|
71
66
|
# Mapping of instrument names to their respective classes
|
|
72
|
-
instrument_class_map = {
|
|
73
|
-
'NEPH': NEPH,
|
|
74
|
-
'Aurora': Aurora,
|
|
75
|
-
'SMPS': SMPS,
|
|
76
|
-
'GRIMM': GRIMM,
|
|
77
|
-
'APS_3321': APS_3321,
|
|
78
|
-
'AE33': AE33,
|
|
79
|
-
'AE43': AE43,
|
|
80
|
-
'BC1054': BC1054,
|
|
81
|
-
'MA350': MA350,
|
|
82
|
-
'TEOM': TEOM,
|
|
83
|
-
'OCEC': OCEC,
|
|
84
|
-
'IGAC': IGAC,
|
|
85
|
-
'VOC': VOC,
|
|
86
|
-
'Table': Table,
|
|
87
|
-
'EPA_vertical': EPA_vertical,
|
|
88
|
-
'Minion': Minion
|
|
89
|
-
# Add other instruments and their corresponding classes here
|
|
90
|
-
}
|
|
67
|
+
instrument_class_map = {cls.__name__.split('.')[-1]: cls for cls in SUPPORTED_INSTRUMENTS}
|
|
91
68
|
|
|
92
69
|
# Check if the instrument name is in the map
|
|
93
70
|
if instrument_name not in meta.keys():
|
|
94
71
|
raise ValueError(f"Instrument name '{instrument_name}' is not valid. \nMust be one of: {list(meta.keys())}")
|
|
95
72
|
|
|
73
|
+
# 檢查 path 是否存在且是一個目錄
|
|
74
|
+
if not isinstance(path, Path):
|
|
75
|
+
path = Path(path)
|
|
76
|
+
if not path.exists() or not path.is_dir():
|
|
77
|
+
raise ValueError(f"The specified path '{path}' does not exist or is not a directory.")
|
|
78
|
+
|
|
79
|
+
# Validate the QC frequency
|
|
80
|
+
if qc_freq is not None:
|
|
81
|
+
try:
|
|
82
|
+
Grouper(freq=qc_freq)
|
|
83
|
+
except ValueError as e:
|
|
84
|
+
raise ValueError(f"Invalid frequency: {qc_freq}. Error: {str(e)}")
|
|
85
|
+
except TypeError as e:
|
|
86
|
+
raise ValueError(f"Invalid frequency type: {qc_freq}. Frequency should be a string.")
|
|
87
|
+
|
|
88
|
+
if start and end:
|
|
89
|
+
if end.hour == 0 and end.minute == 0 and end.second == 0:
|
|
90
|
+
end = end.replace(hour=23, minute=59, second=59)
|
|
91
|
+
else:
|
|
92
|
+
raise ValueError("Both start and end times must be provided.")
|
|
93
|
+
if end <= start:
|
|
94
|
+
raise ValueError(f"Invalid time range: start {start} is after end {end}")
|
|
95
|
+
|
|
96
|
+
# 驗證 mean_freq 的格式是否正確
|
|
97
|
+
try:
|
|
98
|
+
Timedelta(mean_freq)
|
|
99
|
+
except ValueError:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
f"Invalid mean_freq: '{mean_freq}'. It should be a valid frequency string (e.g., '1h', '30min', '1D').")
|
|
102
|
+
|
|
96
103
|
# Instantiate the class and return the instance
|
|
97
104
|
reader_module = instrument_class_map[instrument_name].Reader(
|
|
98
105
|
path=path,
|
|
99
|
-
qc=qc,
|
|
100
|
-
csv_raw=csv_raw,
|
|
101
106
|
reset=reset,
|
|
107
|
+
qc=qc,
|
|
108
|
+
qc_freq=qc_freq,
|
|
102
109
|
rate=rate,
|
|
103
110
|
append_data=append_data
|
|
104
111
|
)
|
|
@@ -95,6 +95,53 @@ meta = {
|
|
|
95
95
|
},
|
|
96
96
|
},
|
|
97
97
|
|
|
98
|
+
"XRF": {
|
|
99
|
+
"pattern": ["*.csv"],
|
|
100
|
+
"freq": "1h",
|
|
101
|
+
"deter_key": {
|
|
102
|
+
"Al": ["Al"],
|
|
103
|
+
"Si": ["Si"],
|
|
104
|
+
"P": ["P"],
|
|
105
|
+
"S": ["S"],
|
|
106
|
+
"Cl": ["Cl"],
|
|
107
|
+
"K": ["K"],
|
|
108
|
+
"Ca": ["Ca"],
|
|
109
|
+
"Ti": ["Ti"],
|
|
110
|
+
"V": ["V"],
|
|
111
|
+
"Cr": ["Cr"],
|
|
112
|
+
"Mn": ["Mn"],
|
|
113
|
+
"Fe": ["Fe"],
|
|
114
|
+
"Ni": ["Ni"],
|
|
115
|
+
"Cu": ["Cu"],
|
|
116
|
+
"Zn": ["Zn"],
|
|
117
|
+
"As": ["As"],
|
|
118
|
+
"Se": ["Se"],
|
|
119
|
+
"Br": ["Br"],
|
|
120
|
+
"Rb": ["Rb"],
|
|
121
|
+
"Sr": ["Sr"],
|
|
122
|
+
"Y": ["Y"],
|
|
123
|
+
"Zr": ["Zr"],
|
|
124
|
+
"Mo": ["Mo"],
|
|
125
|
+
"Ag": ["Ag"],
|
|
126
|
+
"Cd": ["Cd"],
|
|
127
|
+
"In": ["In"],
|
|
128
|
+
"Sn": ["Sn"],
|
|
129
|
+
"Sb": ["Sb"],
|
|
130
|
+
"Te": ["Te"],
|
|
131
|
+
"Cs": ["Cs"],
|
|
132
|
+
"Ba": ["Ba"],
|
|
133
|
+
"La": ["La"],
|
|
134
|
+
"Ce": ["Ce"],
|
|
135
|
+
"W": ["W"],
|
|
136
|
+
"Pt": ["Pt"],
|
|
137
|
+
"Au": ["Au"],
|
|
138
|
+
"Hg": ["Hg"],
|
|
139
|
+
"Tl": ["Tl"],
|
|
140
|
+
"Pb": ["Pb"],
|
|
141
|
+
"Bi": ["Bi"],
|
|
142
|
+
},
|
|
143
|
+
},
|
|
144
|
+
|
|
98
145
|
"VOC": {
|
|
99
146
|
"pattern": ["*.csv"],
|
|
100
147
|
"freq": "1h",
|
|
@@ -116,32 +163,18 @@ meta = {
|
|
|
116
163
|
"deter_key": None,
|
|
117
164
|
},
|
|
118
165
|
|
|
119
|
-
"
|
|
166
|
+
"EPA": {
|
|
120
167
|
"pattern": ["*.csv"],
|
|
121
168
|
"freq": "1h",
|
|
122
|
-
"deter_key":
|
|
123
|
-
},
|
|
124
|
-
|
|
125
|
-
"EPA_vertical": {
|
|
126
|
-
"pattern": ["*.csv"],
|
|
127
|
-
"freq": "1h",
|
|
128
|
-
"deter_key": None,
|
|
169
|
+
"deter_key": {"Items": ["all"]},
|
|
129
170
|
},
|
|
130
171
|
|
|
131
172
|
"Minion": {
|
|
132
|
-
"pattern": ["*.csv"],
|
|
173
|
+
"pattern": ["*.csv", "*.xlsx"],
|
|
133
174
|
"freq": "1h",
|
|
134
175
|
"deter_key": {
|
|
135
|
-
"Na
|
|
136
|
-
"
|
|
137
|
-
"K+": ["K+"],
|
|
138
|
-
"Mg2+": ["Mg2+"],
|
|
139
|
-
"Ca2+": ["Ca2+"],
|
|
140
|
-
"Cl-": ["Cl-"],
|
|
141
|
-
"NO2-": ["NO2-"],
|
|
142
|
-
"NO3-": ["NO3-"],
|
|
143
|
-
"SO42-": ["SO42-"],
|
|
144
|
-
"Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"],
|
|
176
|
+
"Main Salt (Na+, NH4+, Cl-, NO3-, SO42-)": ["Na+", "NH4+", "Cl-", "NO3-", "SO42-"],
|
|
177
|
+
"XRF (Al, Ti, V, Cr, Mn, Fe)": ["Al", "Ti", "V", "Cr", "Mn", "Fe"],
|
|
145
178
|
},
|
|
146
179
|
},
|
|
147
180
|
}
|