AeroViz 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (57) hide show
  1. AeroViz/data/240228_00.txt +101 -0
  2. AeroViz/dataProcess/Chemistry/_ocec.py +20 -7
  3. AeroViz/plot/__init__.py +2 -0
  4. AeroViz/plot/hysplit/__init__.py +1 -0
  5. AeroViz/plot/hysplit/hysplit.py +79 -0
  6. AeroViz/plot/meteorology/meteorology.py +2 -0
  7. AeroViz/plot/optical/optical.py +60 -59
  8. AeroViz/plot/pie.py +14 -2
  9. AeroViz/plot/radar.py +184 -0
  10. AeroViz/plot/scatter.py +16 -7
  11. AeroViz/plot/templates/diurnal_pattern.py +24 -7
  12. AeroViz/plot/templates/koschmieder.py +11 -8
  13. AeroViz/plot/timeseries/template.py +2 -2
  14. AeroViz/plot/timeseries/timeseries.py +47 -7
  15. AeroViz/rawDataReader/__init__.py +75 -68
  16. AeroViz/rawDataReader/config/supported_instruments.py +52 -19
  17. AeroViz/rawDataReader/core/__init__.py +194 -106
  18. AeroViz/rawDataReader/script/AE33.py +11 -6
  19. AeroViz/rawDataReader/script/AE43.py +10 -5
  20. AeroViz/rawDataReader/script/Aurora.py +14 -10
  21. AeroViz/rawDataReader/script/BC1054.py +10 -6
  22. AeroViz/rawDataReader/script/EPA.py +39 -0
  23. AeroViz/rawDataReader/script/GRIMM.py +1 -2
  24. AeroViz/rawDataReader/script/IGAC.py +6 -23
  25. AeroViz/rawDataReader/script/MA350.py +12 -5
  26. AeroViz/rawDataReader/script/Minion.py +107 -30
  27. AeroViz/rawDataReader/script/NEPH.py +15 -5
  28. AeroViz/rawDataReader/script/OCEC.py +39 -15
  29. AeroViz/rawDataReader/script/SMPS.py +1 -0
  30. AeroViz/rawDataReader/script/TEOM.py +15 -11
  31. AeroViz/rawDataReader/script/VOC.py +1 -1
  32. AeroViz/rawDataReader/script/XRF.py +11 -0
  33. AeroViz/rawDataReader/script/__init__.py +2 -2
  34. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/METADATA +54 -30
  35. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/RECORD +40 -51
  36. AeroViz/process/__init__.py +0 -31
  37. AeroViz/process/core/DataProc.py +0 -19
  38. AeroViz/process/core/SizeDist.py +0 -90
  39. AeroViz/process/core/__init__.py +0 -4
  40. AeroViz/process/method/__init__.py +0 -2
  41. AeroViz/process/method/prop.py +0 -62
  42. AeroViz/process/script/AbstractDistCalc.py +0 -143
  43. AeroViz/process/script/Chemical.py +0 -177
  44. AeroViz/process/script/IMPACT.py +0 -49
  45. AeroViz/process/script/IMPROVE.py +0 -161
  46. AeroViz/process/script/Others.py +0 -65
  47. AeroViz/process/script/PSD.py +0 -103
  48. AeroViz/process/script/PSD_dry.py +0 -93
  49. AeroViz/process/script/__init__.py +0 -5
  50. AeroViz/process/script/retrieve_RI.py +0 -69
  51. AeroViz/rawDataReader/script/EPA_vertical.py +0 -46
  52. AeroViz/rawDataReader/script/Table.py +0 -27
  53. /AeroViz/{process/method → plot/optical}/PyMieScatt_update.py +0 -0
  54. /AeroViz/{process/method → plot/optical}/mie_theory.py +0 -0
  55. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/LICENSE +0 -0
  56. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/WHEEL +0 -0
  57. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/top_level.txt +0 -0
AeroViz/plot/scatter.py CHANGED
@@ -22,9 +22,11 @@ def scatter(df: pd.DataFrame,
22
22
  x: str,
23
23
  y: str,
24
24
  c: str | None = None,
25
+ color: str | None = '#7a97c9',
25
26
  s: str | None = None,
26
27
  cmap='jet',
27
28
  regression=False,
29
+ regression_line_color: str | None = sns.xkcd_rgb["denim blue"],
28
30
  diagonal=False,
29
31
  ax: Axes | None = None,
30
32
  **kwargs
@@ -41,6 +43,8 @@ def scatter(df: pd.DataFrame,
41
43
  y : str
42
44
  The column name for the y-axis values.
43
45
  c : str, optional
46
+ The column name for c encoding. Default is None.
47
+ color : str, optional
44
48
  The column name for color encoding. Default is None.
45
49
  s : str, optional
46
50
  The column name for size encoding. Default is None.
@@ -48,6 +52,8 @@ def scatter(df: pd.DataFrame,
48
52
  The colormap to use for the color encoding. Default is 'jet'.
49
53
  regression : bool, optional
50
54
  If True, fits and plots a linear regression line. Default is False.
55
+ regression_line_color : str, optional
56
+ The color of the regression line. Default is 'sns.xkcd_rgb["denim blue"]'.
51
57
  diagonal : bool, optional
52
58
  If True, plots a 1:1 diagonal line. Default is False.
53
59
  ax : Axes, optional
@@ -118,7 +124,7 @@ def scatter(df: pd.DataFrame,
118
124
  x_data, y_data, s_data = df_[x].to_numpy(), df_[y].to_numpy(), df_[s].to_numpy()
119
125
  check_empty(x_data, y_data, s_data)
120
126
 
121
- scatter = ax.scatter(x_data, y_data, s=50 * (s_data / s_data.max()) ** 1.5, color='#7a97c9', alpha=0.7,
127
+ scatter = ax.scatter(x_data, y_data, s=50 * (s_data / s_data.max()) ** 1.5, color=color, alpha=0.5,
122
128
  edgecolors='white')
123
129
  colorbar = False
124
130
 
@@ -135,7 +141,7 @@ def scatter(df: pd.DataFrame,
135
141
  x_data, y_data = df_[x].to_numpy(), df_[y].to_numpy()
136
142
  check_empty(x_data, y_data)
137
143
 
138
- scatter = ax.scatter(x_data, y_data, s=30, color='#7a97c9', alpha=0.7, edgecolors='white')
144
+ scatter = ax.scatter(x_data, y_data, s=30, color=color, alpha=0.5, edgecolors='white')
139
145
  colorbar = False
140
146
 
141
147
  ax.set(xlim=kwargs.get('xlim', (x_data.min(), x_data.max())),
@@ -144,21 +150,24 @@ def scatter(df: pd.DataFrame,
144
150
  ylabel=kwargs.get('ylabel', Unit(y)),
145
151
  title=kwargs.get('title', ''))
146
152
 
153
+ ax.xaxis.set_major_formatter(ScalarFormatter())
154
+ ax.yaxis.set_major_formatter(ScalarFormatter())
155
+
147
156
  if colorbar:
148
157
  plt.colorbar(scatter, extend='both', label=Unit(c))
149
158
 
150
159
  if regression:
151
160
  text, y_predict, slope = linear_regression_base(x_data, y_data)
152
- ax.plot(x_data, y_predict, linewidth=3, color=sns.xkcd_rgb["denim blue"], alpha=1, zorder=3)
153
- plt.text(0.05, 0.95, text, fontdict={'weight': 'bold'}, color=sns.xkcd_rgb["denim blue"],
161
+ ax.plot(x_data, y_predict, linewidth=3, color=regression_line_color, alpha=1, zorder=3)
162
+ plt.text(0.05, 0.95, text, fontdict={'weight': 'bold'}, color=regression_line_color,
154
163
  ha='left', va='top', transform=ax.transAxes)
155
164
 
156
165
  if diagonal:
157
166
  ax.axline((0, 0), slope=1., color='k', lw=2, ls='--', alpha=0.5, label='1:1')
158
- plt.text(0.91, 0.97, r'$\bf 1:1\ Line$', color='k', ha='right', va='top', transform=ax.transAxes)
159
167
 
160
- ax.xaxis.set_major_formatter(ScalarFormatter())
161
- ax.yaxis.set_major_formatter(ScalarFormatter())
168
+ data_range = min(ax.get_xlim()[1] - ax.get_xlim()[0], ax.get_ylim()[1] - ax.get_ylim()[0])
169
+ plt.text(0.9 * data_range, 0.9 * data_range, r'$\bf 1:1\ Line$', color='k', ha='left', va='bottom',
170
+ bbox=dict(facecolor='white', edgecolor='none', alpha=0.1, pad=3))
162
171
 
163
172
  plt.show()
164
173
 
@@ -15,30 +15,47 @@ def diurnal_pattern(df: DataFrame,
15
15
  ax: Axes | None = None,
16
16
  **kwargs
17
17
  ) -> tuple[Figure, Axes]:
18
- if 'hour' or 'Hour' not in df.columns:
18
+ if 'hour' not in df.columns and 'Hour' not in df.columns:
19
19
  df['Hour'] = df.index.hour
20
20
 
21
21
  Hour = range(0, 24)
22
22
  mean = df.groupby('Hour')[y].mean()
23
23
  std = df.groupby('Hour')[y].std() * std_area
24
24
 
25
- fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax)
25
+ fig, ax = plt.subplots() if ax is None else (ax.get_figure(), ax)
26
26
 
27
27
  # Plot Diurnal pattern
28
- ax.plot(Hour, mean, 'blue')
29
- ax.fill_between(Hour, y1=mean + std, y2=mean - std, alpha=0.2, color='blue', edgecolor=None)
28
+ ax.plot(Hour, mean, 'blue', zorder=3)
29
+ ax.fill_between(Hour, y1=mean + std, y2=mean - std, alpha=0.2, color='blue', edgecolor=None, zorder=2)
30
+
31
+ # Plot Boxplot for each hour
32
+ bp = ax.boxplot([df[df['Hour'] == h][y].dropna() for h in Hour],
33
+ positions=Hour,
34
+ widths=0.5,
35
+ patch_artist=True,
36
+ showfliers=False,
37
+ zorder=1)
38
+
39
+ # Customize boxplot colors
40
+ for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']:
41
+ plt.setp(bp[element], color='gray')
42
+
43
+ for patch in bp['boxes']:
44
+ patch.set(facecolor='lightgray', alpha=0.5)
30
45
 
31
46
  ax.set(xlabel=kwargs.get('xlabel', 'Hours'),
32
47
  ylabel=kwargs.get('ylabel', Unit(y)),
33
- xlim=kwargs.get('xlim', (0, 23)),
48
+ xlim=kwargs.get('xlim', (-0.5, 23.5)),
34
49
  ylim=kwargs.get('ylim', (None, None)),
35
- xticks=kwargs.get('xticks', [0, 4, 8, 12, 16, 20]))
50
+ xticks=kwargs.get('xticks', range(0, 24, 4)),
51
+ xticklabels=kwargs.get('xticklabels', range(0, 24, 4)))
36
52
 
37
53
  ax.tick_params(axis='both', which='major')
38
54
  ax.tick_params(axis='x', which='minor')
39
55
  ax.xaxis.set_minor_locator(AutoMinorLocator())
40
56
  ax.ticklabel_format(axis='y', style='sci', scilimits=(-2, 3), useMathText=True)
41
57
 
58
+ plt.tight_layout()
42
59
  plt.show()
43
60
 
44
- return fig, ax
61
+ return fig, ax
@@ -10,7 +10,7 @@ from AeroViz.plot.utils import *
10
10
  __all__ = ['koschmieder']
11
11
 
12
12
 
13
- @set_figure
13
+ @set_figure(figsize=(2.4, 3))
14
14
  def koschmieder(df: pd.DataFrame,
15
15
  vis: str,
16
16
  ext: list[str],
@@ -30,8 +30,8 @@ def koschmieder(df: pd.DataFrame,
30
30
 
31
31
  fig, ax = plt.subplots(**kwargs.get('fig_kws', {})) if ax is None else (ax.get_figure(), ax)
32
32
 
33
- boxcolors = ['#3f83bf', '#a5bf6b']
34
- scattercolor = ['blue', 'green']
33
+ boxcolors = ['#a5bf6b', '#3f83bf']
34
+ scattercolor = ['green', 'blue']
35
35
  arts = []
36
36
  labels = []
37
37
 
@@ -74,15 +74,18 @@ def koschmieder(df: pd.DataFrame,
74
74
  label=f'Vis (km) = {round(coeff)} / Ext')
75
75
 
76
76
  arts.append(line)
77
- labels.append(f'Vis (km) = {round(coeff)} / Ext')
77
+ if 'dry' in ext_col:
78
+ labels.append(f'Vis (km) = {round(coeff)} / Ext (dry)')
79
+ else:
80
+ labels.append(f'Vis (km) = {round(coeff)} / Ext (amb)')
78
81
 
79
82
  ax.legend(handles=arts, labels=labels, loc='upper right', prop=dict(weight='bold'), bbox_to_anchor=(0.99, 0.99))
80
83
 
81
- ax.set(xlabel=kwargs.get('xlim', 'Visibility (km)'),
82
- ylabel=kwargs.get('xlim', 'Extinction (1/Mm)'),
83
- title=kwargs.get('ylim', 'Koschmieder relationship'),
84
+ ax.set(xlabel=kwargs.get('xlabel', 'Visibility (km)'),
85
+ ylabel=kwargs.get('ylabel', 'Extinction (1/Mm)'),
86
+ title=kwargs.get('title', 'Koschmieder relationship'),
84
87
  xlim=kwargs.get('xlim', (0, 30)),
85
- ylim=kwargs.get('ylim', (0, 500))
88
+ ylim=kwargs.get('ylim', (0, 800))
86
89
  )
87
90
 
88
91
  plt.xticks(ticks=np.array(range(0, 31, 5)), labels=np.array(range(0, 31, 5)))
@@ -2,7 +2,7 @@ import matplotlib.pyplot as plt
2
2
  from matplotlib.pyplot import Figure, Axes
3
3
  from pandas import DataFrame
4
4
 
5
- from AeroViz.plot.timeseries import timeseries
5
+ from AeroViz.plot.timeseries.timeseries import timeseries
6
6
 
7
7
 
8
8
  def timeseries_template(df: DataFrame) -> tuple[Figure, Axes]:
@@ -40,7 +40,7 @@ def timeseries_template(df: DataFrame) -> tuple[Figure, Axes]:
40
40
  timeseries(df, y='VC', color='PBLH', style='bar', ax=ax4, bar_kws=dict(cmap='Blues'), set_xaxis_visible=False,
41
41
  ylim=[0, 5000])
42
42
 
43
- timeseries(df, y='PM25', color='PM1/PM25', style='scatter', ax=ax5, ylim=[0, None])
43
+ timeseries(df, y='PM2.5', color='PM1/PM25', style='scatter', ax=ax5, ylim=[0, None])
44
44
 
45
45
  plt.show()
46
46
 
@@ -4,8 +4,9 @@ import matplotlib.pyplot as plt
4
4
  import numpy as np
5
5
  from matplotlib.cm import ScalarMappable
6
6
  from matplotlib.pyplot import Figure, Axes
7
+ from mpl_toolkits.axes_grid1 import make_axes_locatable
7
8
  from mpl_toolkits.axes_grid1.inset_locator import inset_axes
8
- from pandas import DataFrame, date_range
9
+ from pandas import DataFrame, date_range, Timedelta
9
10
 
10
11
  from AeroViz.plot.utils import *
11
12
 
@@ -70,6 +71,40 @@ def _plot(ax, df, _y, _color, plot_kws):
70
71
  ax.plot(df.index, df[_y], color=_color, **plot_kws)
71
72
 
72
73
 
74
+ def _wind_arrow(ax, df, y, c, scatter_kws, cbar_kws, inset_kws):
75
+ """
76
+ Plot wind arrows on a scatter plot.
77
+
78
+ :param ax: matplotlib axes
79
+ :param df: pandas DataFrame
80
+ :param y: column name for wind speed
81
+ :param c: column name for wind direction
82
+ :param scatter_kws: keyword arguments for scatter plot
83
+ :param cbar_kws: keyword arguments for colorbar
84
+ :param inset_kws: keyword arguments for inset axes
85
+ """
86
+ # First, create a scatter plot
87
+ sc = ax.scatter(df.index, df[y], c=df[c], **scatter_kws)
88
+
89
+ # Add colorbar
90
+ divider = make_axes_locatable(ax)
91
+ cax = divider.append_axes("right", size="2%", pad=0.05)
92
+ plt.colorbar(sc, cax=cax, **cbar_kws)
93
+
94
+ # Add wind arrows
95
+ for idx, row in df.iterrows():
96
+ wind_speed = row[y]
97
+ wind_dir = np.radians(row[c])
98
+ dx = np.sin(wind_dir) * wind_speed / 20 # Scale factor can be adjusted
99
+ dy = np.cos(wind_dir) * wind_speed / 20
100
+ ax.annotate('', xy=(idx + 10 * dx * Timedelta(hours=5), wind_speed + 4 * dy),
101
+ xytext=(idx - 10 * dx * Timedelta(hours=5), wind_speed - 4 * dy),
102
+ arrowprops=dict(arrowstyle='->', color='k', linewidth=0.5))
103
+
104
+ # Set the x-axis limit to show all data points
105
+ # ax.set_xlim(df.index.min() - datetime.timedelta(days=1), df.index.max())
106
+
107
+
73
108
  def process_timeseries_data(df, rolling=None, interpolate_limit=None):
74
109
  # apply rolling window if specified
75
110
  df = df.rolling(window=rolling, min_periods=1).mean(numeric_only=True) if rolling is not None else df
@@ -90,7 +125,7 @@ def timeseries(df: DataFrame,
90
125
  interpolate_limit: int | None = 6,
91
126
  major_freq: str = '1MS',
92
127
  minor_freq: str = '10d',
93
- style: list[Literal['scatter', 'bar', 'line']] | str | None = None,
128
+ style: list[Literal['scatter', 'bar', 'line', 'arrow']] | str | None = None,
94
129
  ax: Axes | None = None,
95
130
  set_xaxis_visible: bool | None = None,
96
131
  legend_loc: Literal['best', 'upper right', 'upper left', 'lower left', 'lower right'] = 'best',
@@ -199,16 +234,16 @@ def timeseries(df: DataFrame,
199
234
  if y2 and ('scatter' or 'bar') in style:
200
235
  fig.subplots_adjust(right=0.8)
201
236
 
202
- for i, _c in enumerate(color):
203
- if _c is not None and _c in df.columns:
204
- style[i] = 'scatter'
237
+ # for i, _c in enumerate(color):
238
+ # if _c is not None and _c in df.columns:
239
+ # style[i] = 'scatter'
205
240
 
206
241
  for i, (_y, _c, _label, _style) in enumerate(zip(y, color, label, style)):
207
242
  scatter_kws = {**default_scatter_kws, **{'label': Unit(_y)}, **kwargs.get('scatter_kws', {})}
208
243
  bar_kws = {**default_bar_kws, **{'label': Unit(_y)}, **kwargs.get('bar_kws', {})}
209
244
  plot_kws = {**default_plot_kws, **{'label': Unit(_y)}, **kwargs.get('plot_kws', {})}
210
245
 
211
- if _style in ['scatter', 'bar']:
246
+ if _style in ['scatter', 'bar', 'arrow']:
212
247
  cbar_kws = {**default_cbar_kws, **{'label': Unit(_c), 'ticks': None}, **kwargs.get('cbar_kws', {})}
213
248
  inset_kws = {**default_insert_kws, **{'bbox_transform': ax.transAxes}, **kwargs.get('inset_kws', {})}
214
249
 
@@ -218,6 +253,9 @@ def timeseries(df: DataFrame,
218
253
  elif _style == 'bar':
219
254
  _bar(ax, df, _y, _c, bar_kws, cbar_kws, inset_kws)
220
255
 
256
+ elif _style == 'arrow':
257
+ _wind_arrow(ax, df, _y, _c, scatter_kws, cbar_kws, inset_kws)
258
+
221
259
  else:
222
260
  _plot(ax, df, _y, _c, plot_kws)
223
261
 
@@ -237,6 +275,9 @@ def timeseries(df: DataFrame,
237
275
  elif _style == 'bar':
238
276
  _bar(ax2, df, _y, _c, bar_kws, cbar_kws, inset_kws)
239
277
 
278
+ elif _style == 'arrow':
279
+ pass
280
+
240
281
  else: # line plot
241
282
  _plot(ax2, df, _y, _c, plot_kws)
242
283
 
@@ -348,7 +389,6 @@ def timeseries_stacked(df,
348
389
  ylabel=kwargs.get('ylabel', 'Percentage (%)'),
349
390
  xlim=kwargs.get('xlim', (st_tm, fn_tm)),
350
391
  ylim=(0, 100),
351
- title=kwargs.get('title', '')
352
392
  )
353
393
 
354
394
  xticks = kwargs.get('xticks', date_range(start=st_tm, end=fn_tm, freq=major_freq))
@@ -1,104 +1,111 @@
1
1
  from datetime import datetime
2
2
  from pathlib import Path
3
3
 
4
+ from pandas import Grouper, Timedelta
5
+
4
6
  from AeroViz.rawDataReader.config.supported_instruments import meta
5
7
  from AeroViz.rawDataReader.script import *
6
8
 
7
9
  __all__ = ['RawDataReader']
8
10
 
11
+ SUPPORTED_INSTRUMENTS = [
12
+ NEPH, Aurora, SMPS, GRIMM, APS_3321, AE33, AE43, BC1054,
13
+ MA350, TEOM, OCEC, IGAC, VOC, EPA, Minion
14
+ ]
15
+
9
16
 
10
17
  def RawDataReader(instrument_name: str,
11
- path: Path,
12
- qc: bool = True,
13
- csv_raw: bool = True,
18
+ path: Path | str,
14
19
  reset: bool = False,
20
+ qc: bool | str = True,
21
+ qc_freq: str | None = None,
15
22
  rate: bool = True,
16
23
  append_data: bool = False,
17
- start: datetime | None = None,
18
- end: datetime | None = None,
19
- mean_freq='1h',
20
- csv_out=True,
24
+ start: datetime = None,
25
+ end: datetime = None,
26
+ mean_freq: str = '1h',
27
+ csv_out: bool = True,
21
28
  ):
22
29
  """
23
30
  Factory function to instantiate the appropriate reader module for a given instrument and
24
31
  return the processed data over the specified time range.
25
32
 
26
- Parameters
27
- ----------
28
- instrument_name : str
29
- The name of the instrument for which to read data. Must be a valid key in the `meta` dictionary.
30
- path : Path
31
- The directory where raw data files for the instrument are stored.
32
- qc : bool, optional (default=True)
33
- If True, apply quality control (QC) to the raw data.
34
- csv_raw : bool, optional (default=True)
35
- If True, read raw data from CSV files.
36
- reset : bool, optional (default=False)
37
- If True, reset the state and reprocess the data from scratch.
38
- rate : bool, optional (default=False)
39
- If True, calculate rates from the data.
40
- append_data : bool, optional (default=False)
41
- If True, append new data to the existing dataset instead of overwriting it.
42
- start : datetime, optional
43
- Start time for filtering the data. If None, no start time filtering will be applied.
44
- end : datetime, optional
45
- End time for filtering the data. If None, no end time filtering will be applied.
46
- mean_freq : str, optional (default='1h')
47
- Resampling frequency for averaging the data. Example: '1h' for hourly mean.
48
- csv_out : bool, optional (default=True)
49
- If True, output the processed data as a CSV file.
50
-
51
- Return
52
- ------
53
- reader_module : Reader
54
- An instance of the reader module corresponding to the specified instrument, which processes
55
- the data and returns it in a usable format.
56
-
57
- Raises
58
- ------
59
- ValueError
60
- If the `instrument_name` provided is not a valid key in the `meta` dictionary.
61
-
62
- Example
63
- -------
33
+ :param instrument_name: The name of the instrument for which to read data. Must be a valid key in the `meta` dictionary.
34
+ :param path: The directory where raw data files for the instrument are stored.
35
+ :param reset: If True, reset the state and reprocess the data from scratch.
36
+ :param qc: If True, apply quality control (QC) to the raw data.
37
+ :param qc_freq: Frequency at which to perform QC. Must be one of 'W', 'M', 'Q', 'Y' for weekly, monthly, quarterly, or yearly.
38
+ :param rate: If True, calculate rates from the data.
39
+ :param append_data: If True, append new data to the existing dataset instead of overwriting it.
40
+ :param start: Start time for filtering the data. If None, no start time filtering will be applied.
41
+ :param end: End time for filtering the data. If None, no end time filtering will be applied.
42
+ :param mean_freq: Resampling frequency for averaging the data. Example: '1h' for hourly mean.
43
+ :param csv_out: If True, output the processed data as a CSV file.
44
+
45
+ :return: An instance of the reader module corresponding to the specified instrument, which processes the data and returns it in a usable format.
46
+
47
+ :raises ValueError: If the `instrument_name` provided is not a valid key in the `meta` dictionary.
48
+ :raises ValueError: If the specified path does not exist or is not a directory.
49
+ :raises ValueError: If the QC frequency is invalid.
50
+ :raises ValueError: If start and end times are not both provided or are invalid.
51
+ :raises ValueError: If the mean_freq is not a valid frequency string.
52
+
53
+ :Example:
54
+
64
55
  To read and process data for the BC1054 instrument:
65
56
 
66
57
  >>> from pathlib import Path
67
58
  >>> from datetime import datetime
68
- >>> data = RawDataReader(instrument_name='BC1054', path=Path('/path/to/data'),
69
- >>> start=datetime(2024, 1, 1), end=datetime(2024, 2, 1))
59
+ >>>
60
+ >>> data = RawDataReader(
61
+ ... instrument_name='BC1054',
62
+ ... path=Path('/path/to/data'),
63
+ ... start=datetime(2024, 2, 1),
64
+ ... end=datetime(2024, 7, 31, 23))
70
65
  """
71
66
  # Mapping of instrument names to their respective classes
72
- instrument_class_map = {
73
- 'NEPH': NEPH,
74
- 'Aurora': Aurora,
75
- 'SMPS': SMPS,
76
- 'GRIMM': GRIMM,
77
- 'APS_3321': APS_3321,
78
- 'AE33': AE33,
79
- 'AE43': AE43,
80
- 'BC1054': BC1054,
81
- 'MA350': MA350,
82
- 'TEOM': TEOM,
83
- 'OCEC': OCEC,
84
- 'IGAC': IGAC,
85
- 'VOC': VOC,
86
- 'Table': Table,
87
- 'EPA_vertical': EPA_vertical,
88
- 'Minion': Minion
89
- # Add other instruments and their corresponding classes here
90
- }
67
+ instrument_class_map = {cls.__name__.split('.')[-1]: cls for cls in SUPPORTED_INSTRUMENTS}
91
68
 
92
69
  # Check if the instrument name is in the map
93
70
  if instrument_name not in meta.keys():
94
71
  raise ValueError(f"Instrument name '{instrument_name}' is not valid. \nMust be one of: {list(meta.keys())}")
95
72
 
73
+ # 檢查 path 是否存在且是一個目錄
74
+ if not isinstance(path, Path):
75
+ path = Path(path)
76
+ if not path.exists() or not path.is_dir():
77
+ raise ValueError(f"The specified path '{path}' does not exist or is not a directory.")
78
+
79
+ # Validate the QC frequency
80
+ if qc_freq is not None:
81
+ try:
82
+ Grouper(freq=qc_freq)
83
+ except ValueError as e:
84
+ raise ValueError(f"Invalid frequency: {qc_freq}. Error: {str(e)}")
85
+ except TypeError as e:
86
+ raise ValueError(f"Invalid frequency type: {qc_freq}. Frequency should be a string.")
87
+
88
+ if start and end:
89
+ if end.hour == 0 and end.minute == 0 and end.second == 0:
90
+ end = end.replace(hour=23, minute=59, second=59)
91
+ else:
92
+ raise ValueError("Both start and end times must be provided.")
93
+ if end <= start:
94
+ raise ValueError(f"Invalid time range: start {start} is after end {end}")
95
+
96
+ # 驗證 mean_freq 的格式是否正確
97
+ try:
98
+ Timedelta(mean_freq)
99
+ except ValueError:
100
+ raise ValueError(
101
+ f"Invalid mean_freq: '{mean_freq}'. It should be a valid frequency string (e.g., '1h', '30min', '1D').")
102
+
96
103
  # Instantiate the class and return the instance
97
104
  reader_module = instrument_class_map[instrument_name].Reader(
98
105
  path=path,
99
- qc=qc,
100
- csv_raw=csv_raw,
101
106
  reset=reset,
107
+ qc=qc,
108
+ qc_freq=qc_freq,
102
109
  rate=rate,
103
110
  append_data=append_data
104
111
  )
@@ -95,6 +95,53 @@ meta = {
95
95
  },
96
96
  },
97
97
 
98
+ "XRF": {
99
+ "pattern": ["*.csv"],
100
+ "freq": "1h",
101
+ "deter_key": {
102
+ "Al": ["Al"],
103
+ "Si": ["Si"],
104
+ "P": ["P"],
105
+ "S": ["S"],
106
+ "Cl": ["Cl"],
107
+ "K": ["K"],
108
+ "Ca": ["Ca"],
109
+ "Ti": ["Ti"],
110
+ "V": ["V"],
111
+ "Cr": ["Cr"],
112
+ "Mn": ["Mn"],
113
+ "Fe": ["Fe"],
114
+ "Ni": ["Ni"],
115
+ "Cu": ["Cu"],
116
+ "Zn": ["Zn"],
117
+ "As": ["As"],
118
+ "Se": ["Se"],
119
+ "Br": ["Br"],
120
+ "Rb": ["Rb"],
121
+ "Sr": ["Sr"],
122
+ "Y": ["Y"],
123
+ "Zr": ["Zr"],
124
+ "Mo": ["Mo"],
125
+ "Ag": ["Ag"],
126
+ "Cd": ["Cd"],
127
+ "In": ["In"],
128
+ "Sn": ["Sn"],
129
+ "Sb": ["Sb"],
130
+ "Te": ["Te"],
131
+ "Cs": ["Cs"],
132
+ "Ba": ["Ba"],
133
+ "La": ["La"],
134
+ "Ce": ["Ce"],
135
+ "W": ["W"],
136
+ "Pt": ["Pt"],
137
+ "Au": ["Au"],
138
+ "Hg": ["Hg"],
139
+ "Tl": ["Tl"],
140
+ "Pb": ["Pb"],
141
+ "Bi": ["Bi"],
142
+ },
143
+ },
144
+
98
145
  "VOC": {
99
146
  "pattern": ["*.csv"],
100
147
  "freq": "1h",
@@ -116,32 +163,18 @@ meta = {
116
163
  "deter_key": None,
117
164
  },
118
165
 
119
- "Table": {
166
+ "EPA": {
120
167
  "pattern": ["*.csv"],
121
168
  "freq": "1h",
122
- "deter_key": None,
123
- },
124
-
125
- "EPA_vertical": {
126
- "pattern": ["*.csv"],
127
- "freq": "1h",
128
- "deter_key": None,
169
+ "deter_key": {"Items": ["all"]},
129
170
  },
130
171
 
131
172
  "Minion": {
132
- "pattern": ["*.csv"],
173
+ "pattern": ["*.csv", "*.xlsx"],
133
174
  "freq": "1h",
134
175
  "deter_key": {
135
- "Na+": ["Na+"],
136
- "NH4+": ["NH4+"],
137
- "K+": ["K+"],
138
- "Mg2+": ["Mg2+"],
139
- "Ca2+": ["Ca2+"],
140
- "Cl-": ["Cl-"],
141
- "NO2-": ["NO2-"],
142
- "NO3-": ["NO3-"],
143
- "SO42-": ["SO42-"],
144
- "Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"],
176
+ "Main Salt (Na+, NH4+, Cl-, NO3-, SO42-)": ["Na+", "NH4+", "Cl-", "NO3-", "SO42-"],
177
+ "XRF (Al, Ti, V, Cr, Mn, Fe)": ["Al", "Ti", "V", "Cr", "Mn", "Fe"],
145
178
  },
146
179
  },
147
180
  }