AeroViz 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (51) hide show
  1. AeroViz/dataProcess/Chemistry/_ocec.py +20 -7
  2. AeroViz/plot/__init__.py +1 -0
  3. AeroViz/plot/meteorology/meteorology.py +2 -0
  4. AeroViz/plot/optical/optical.py +1 -1
  5. AeroViz/plot/pie.py +14 -2
  6. AeroViz/plot/radar.py +184 -0
  7. AeroViz/plot/scatter.py +16 -7
  8. AeroViz/plot/templates/koschmieder.py +11 -8
  9. AeroViz/plot/timeseries/timeseries.py +0 -1
  10. AeroViz/rawDataReader/__init__.py +74 -67
  11. AeroViz/rawDataReader/config/supported_instruments.py +52 -19
  12. AeroViz/rawDataReader/core/__init__.py +129 -104
  13. AeroViz/rawDataReader/script/AE33.py +1 -1
  14. AeroViz/rawDataReader/script/AE43.py +1 -1
  15. AeroViz/rawDataReader/script/Aurora.py +1 -1
  16. AeroViz/rawDataReader/script/BC1054.py +1 -1
  17. AeroViz/rawDataReader/script/EPA.py +39 -0
  18. AeroViz/rawDataReader/script/GRIMM.py +1 -1
  19. AeroViz/rawDataReader/script/IGAC.py +6 -23
  20. AeroViz/rawDataReader/script/MA350.py +1 -1
  21. AeroViz/rawDataReader/script/Minion.py +102 -30
  22. AeroViz/rawDataReader/script/NEPH.py +1 -1
  23. AeroViz/rawDataReader/script/OCEC.py +1 -1
  24. AeroViz/rawDataReader/script/SMPS.py +1 -0
  25. AeroViz/rawDataReader/script/TEOM.py +2 -2
  26. AeroViz/rawDataReader/script/XRF.py +11 -0
  27. AeroViz/rawDataReader/script/__init__.py +2 -2
  28. {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/METADATA +46 -24
  29. {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/RECORD +32 -48
  30. AeroViz/process/__init__.py +0 -31
  31. AeroViz/process/core/DataProc.py +0 -19
  32. AeroViz/process/core/SizeDist.py +0 -90
  33. AeroViz/process/core/__init__.py +0 -4
  34. AeroViz/process/method/PyMieScatt_update.py +0 -567
  35. AeroViz/process/method/__init__.py +0 -2
  36. AeroViz/process/method/mie_theory.py +0 -260
  37. AeroViz/process/method/prop.py +0 -62
  38. AeroViz/process/script/AbstractDistCalc.py +0 -143
  39. AeroViz/process/script/Chemical.py +0 -177
  40. AeroViz/process/script/IMPACT.py +0 -49
  41. AeroViz/process/script/IMPROVE.py +0 -161
  42. AeroViz/process/script/Others.py +0 -65
  43. AeroViz/process/script/PSD.py +0 -103
  44. AeroViz/process/script/PSD_dry.py +0 -93
  45. AeroViz/process/script/__init__.py +0 -5
  46. AeroViz/process/script/retrieve_RI.py +0 -69
  47. AeroViz/rawDataReader/script/EPA_vertical.py +0 -46
  48. AeroViz/rawDataReader/script/Table.py +0 -27
  49. {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/LICENSE +0 -0
  50. {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/WHEEL +0 -0
  51. {AeroViz-0.1.6.dist-info → AeroViz-0.1.7.dist-info}/top_level.txt +0 -0
@@ -95,6 +95,53 @@ meta = {
95
95
  },
96
96
  },
97
97
 
98
+ "XRF": {
99
+ "pattern": ["*.csv"],
100
+ "freq": "1h",
101
+ "deter_key": {
102
+ "Al": ["Al"],
103
+ "Si": ["Si"],
104
+ "P": ["P"],
105
+ "S": ["S"],
106
+ "Cl": ["Cl"],
107
+ "K": ["K"],
108
+ "Ca": ["Ca"],
109
+ "Ti": ["Ti"],
110
+ "V": ["V"],
111
+ "Cr": ["Cr"],
112
+ "Mn": ["Mn"],
113
+ "Fe": ["Fe"],
114
+ "Ni": ["Ni"],
115
+ "Cu": ["Cu"],
116
+ "Zn": ["Zn"],
117
+ "As": ["As"],
118
+ "Se": ["Se"],
119
+ "Br": ["Br"],
120
+ "Rb": ["Rb"],
121
+ "Sr": ["Sr"],
122
+ "Y": ["Y"],
123
+ "Zr": ["Zr"],
124
+ "Mo": ["Mo"],
125
+ "Ag": ["Ag"],
126
+ "Cd": ["Cd"],
127
+ "In": ["In"],
128
+ "Sn": ["Sn"],
129
+ "Sb": ["Sb"],
130
+ "Te": ["Te"],
131
+ "Cs": ["Cs"],
132
+ "Ba": ["Ba"],
133
+ "La": ["La"],
134
+ "Ce": ["Ce"],
135
+ "W": ["W"],
136
+ "Pt": ["Pt"],
137
+ "Au": ["Au"],
138
+ "Hg": ["Hg"],
139
+ "Tl": ["Tl"],
140
+ "Pb": ["Pb"],
141
+ "Bi": ["Bi"],
142
+ },
143
+ },
144
+
98
145
  "VOC": {
99
146
  "pattern": ["*.csv"],
100
147
  "freq": "1h",
@@ -116,32 +163,18 @@ meta = {
116
163
  "deter_key": None,
117
164
  },
118
165
 
119
- "Table": {
166
+ "EPA": {
120
167
  "pattern": ["*.csv"],
121
168
  "freq": "1h",
122
- "deter_key": None,
123
- },
124
-
125
- "EPA_vertical": {
126
- "pattern": ["*.csv"],
127
- "freq": "1h",
128
- "deter_key": None,
169
+ "deter_key": {"Items": ["all"]},
129
170
  },
130
171
 
131
172
  "Minion": {
132
- "pattern": ["*.csv"],
173
+ "pattern": ["*.csv", "*.xlsx"],
133
174
  "freq": "1h",
134
175
  "deter_key": {
135
- "Na+": ["Na+"],
136
- "NH4+": ["NH4+"],
137
- "K+": ["K+"],
138
- "Mg2+": ["Mg2+"],
139
- "Ca2+": ["Ca2+"],
140
- "Cl-": ["Cl-"],
141
- "NO2-": ["NO2-"],
142
- "NO3-": ["NO3-"],
143
- "SO42-": ["SO42-"],
144
- "Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"],
176
+ "Main Salt (Na+, NH4+, Cl-, NO3-, SO42-)": ["Na+", "NH4+", "Cl-", "NO3-", "SO42-"],
177
+ "XRF (Al, Ti, V, Cr, Mn, Fe)": ["Al", "Ti", "V", "Cr", "Mn", "Fe"],
145
178
  },
146
179
  },
147
180
  }
@@ -1,25 +1,21 @@
1
1
  import json
2
2
  import logging
3
- import pickle as pkl
4
3
  from abc import ABC, abstractmethod
5
- from datetime import datetime as dtm
4
+ from datetime import datetime
6
5
  from pathlib import Path
7
- from typing import Any
6
+ from typing import Optional
8
7
 
9
8
  import numpy as np
10
9
  import pandas as pd
11
- from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
10
+ from pandas import DataFrame, concat, read_pickle
12
11
  from rich.console import Console
13
12
  from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
14
13
 
15
- from ..config.supported_instruments import meta
14
+ from AeroViz.rawDataReader.config.supported_instruments import meta
16
15
 
17
16
  __all__ = ['AbstractReader']
18
17
 
19
18
 
20
- console = Console(force_terminal=True, color_system="auto")
21
-
22
-
23
19
  class AbstractReader(ABC):
24
20
  """
25
21
  Abstract class for reading raw data from different instruments. Each instrument should have a separate class that
@@ -34,9 +30,9 @@ class AbstractReader(ABC):
34
30
 
35
31
  def __init__(self,
36
32
  path: Path | str,
37
- qc: bool = True,
38
- csv_raw: bool = True,
39
33
  reset: bool = False,
34
+ qc: bool = True,
35
+ qc_freq: Optional[str] = None,
40
36
  rate: bool = True,
41
37
  append_data: bool = False):
42
38
 
@@ -45,9 +41,9 @@ class AbstractReader(ABC):
45
41
  self.logger = self._setup_logger()
46
42
 
47
43
  self.reset = reset
48
- self.rate = rate
49
44
  self.qc = qc
50
- self.csv = csv_raw
45
+ self.qc_freq = qc_freq
46
+ self.rate = rate
51
47
  self.append = append_data and reset
52
48
 
53
49
  self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
@@ -57,15 +53,12 @@ class AbstractReader(ABC):
57
53
  self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
58
54
 
59
55
  def __call__(self,
60
- start: dtm | None = None,
61
- end: dtm | None = None,
56
+ start: datetime,
57
+ end: datetime,
62
58
  mean_freq: str = '1h',
63
59
  csv_out: bool = True,
64
60
  ) -> DataFrame:
65
61
 
66
- if start and end and end <= start:
67
- raise ValueError(f"Invalid time range: start {start} is after end {end}")
68
-
69
62
  data = self._run(start, end)
70
63
 
71
64
  if data is not None:
@@ -81,15 +74,8 @@ class AbstractReader(ABC):
81
74
  pass
82
75
 
83
76
  @abstractmethod
84
- def _QC(self, df: DataFrame):
85
- return df
86
-
87
- @staticmethod
88
- def basic_QC(df: DataFrame):
89
- df_ave, df_std = df.mean(), df.std()
90
- df_lowb, df_highb = df < (df_ave - df_std * 1.5), df > (df_ave + df_std * 1.5)
91
-
92
- return df.mask(df_lowb | df_highb).copy()
77
+ def _QC(self, df: DataFrame) -> DataFrame:
78
+ return self.n_sigma_QC(df)
93
79
 
94
80
  def _setup_logger(self) -> logging.Logger:
95
81
  logger = logging.getLogger(self.nam)
@@ -99,29 +85,26 @@ class AbstractReader(ABC):
99
85
  logger.removeHandler(handler)
100
86
 
101
87
  handler = logging.FileHandler(self.path / f'{self.nam}.log')
102
- handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
88
+ handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
103
89
  logger.addHandler(handler)
104
90
  return logger
105
91
 
106
- def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw) -> None:
107
- if self.meta['deter_key'] is not None:
108
- _start, _end = _fout_qc.index[[0, -1]]
109
-
110
- _drop_how = 'any'
111
- _the_size = len(_fout_raw.resample('1h').mean().index)
92
+ def _rate_calculate(self, raw_data, qc_data) -> None:
93
+ def __base_rate(raw_data, qc_data):
94
+ period_size = len(raw_data.resample('1h').mean().index)
112
95
 
113
96
  for _nam, _key in self.meta['deter_key'].items():
114
- if _key == ['all']:
115
- _key, _drop_how = _fout_qc.keys(), 'all'
97
+ _key, _drop_how = (qc_data.keys(), 'all') if _key is ['all'] else (_key, 'any')
116
98
 
117
- _real_size = len(_fout_raw[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
118
- _QC_size = len(_fout_qc[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
99
+ sample_size = len(raw_data[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
100
+ qc_size = len(qc_data[_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
119
101
 
120
- try:
121
- _acq_rate = round((_real_size / _the_size) * 100, 1)
122
- _yid_rate = round((_QC_size / _real_size) * 100, 1)
123
- except ZeroDivisionError:
124
- _acq_rate, _yid_rate = 0, 0
102
+ # validate rate calculation
103
+ if period_size < sample_size or sample_size < qc_size or period_size == 0 or sample_size == 0:
104
+ raise ValueError(f"Invalid sample sizes: period={period_size}, sample={sample_size}, QC={qc_size}")
105
+
106
+ _acq_rate = round((sample_size / period_size) * 100, 1)
107
+ _yid_rate = round((qc_size / sample_size) * 100, 1)
125
108
 
126
109
  self.logger.info(f'{_nam}:')
127
110
  self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
@@ -132,39 +115,55 @@ class AbstractReader(ABC):
132
115
  print(f'\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
133
116
  print(f'\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
134
117
 
135
- # set each to true datetime(18:30:01 -> 18:30:00) and rindex data
136
- def _raw_process(self, _df):
137
- # get time from df and set time to whole time to create time index
138
- _st, _ed = _df.index.sort_values()[[0, -1]]
139
- _tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
140
- freq=self.meta['freq'])
141
- _tm_index.name = 'time'
118
+ if self.meta['deter_key'] is not None:
119
+ # use qc_freq to calculate each period rate
120
+ if self.qc_freq is not None:
121
+ raw_data_grouped = raw_data.groupby(pd.Grouper(freq=self.qc_freq))
122
+ qc_data_grouped = qc_data.groupby(pd.Grouper(freq=self.qc_freq))
142
123
 
143
- return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
124
+ for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped):
125
+ self.logger.info(
126
+ f"\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}")
127
+ print(
128
+ f"\n\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}")
144
129
 
145
- # process time index
146
- @staticmethod
147
- def _tmidx_process(_start, _end, _df):
148
- _st, _ed = _df.index.sort_values()[[0, -1]]
149
- _start, _end = to_datetime(_start) or _st, to_datetime(_end) or _ed
150
- _idx = date_range(_start, _end, freq=_df.index.freq.copy())
151
- _idx.name = 'time'
130
+ __base_rate(_sub_raw_data, _sub_qc_data)
152
131
 
153
- return _df.reindex(_idx), _st, _ed
132
+ else:
133
+ __base_rate(raw_data, qc_data)
154
134
 
155
- # append new data to exist pkl
156
- @staticmethod
157
- def _append_process(_df_done, _df_apnd):
135
+ def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None):
136
+ """
137
+ Process time index, resample data, extract specified time range, and optionally append new data.
138
+
139
+ :param _df: Input DataFrame with time index
140
+ :param user_start: Start of user-specified time range (optional)
141
+ :param user_end: End of user-specified time range (optional)
142
+ :param append_df: DataFrame to append (optional)
143
+ :return: Processed DataFrame
144
+ """
145
+ # Round timestamps and remove duplicates
146
+ _df = _df.groupby(_df.index.round('1min')).first()
147
+
148
+ # Determine frequency
149
+ freq = _df.index.inferred_freq or self.meta['freq']
158
150
 
159
- if _df_apnd is not None:
160
- _df = concat([_df_apnd.dropna(how='all').copy(), _df_done.dropna(how='all').copy()])
151
+ # Append new data if provided
152
+ if append_df is not None:
153
+ append_df.index = append_df.index.round('1min')
154
+ _df = pd.concat([append_df.dropna(how='all'), _df.dropna(how='all')])
155
+ _df = _df.loc[~_df.index.duplicated()]
161
156
 
162
- _idx = date_range(*_df.index.sort_values()[[0, -1]], freq=_df_done.index.freq.copy())
163
- _idx.name = 'time'
157
+ # Determine time range
158
+ df_start, df_end = _df.index.sort_values()[[0, -1]]
164
159
 
165
- return _df.loc[~_df.index.duplicated()].copy().reindex(_idx)
160
+ # Create new time index
161
+ new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time')
166
162
 
167
- return _df_done
163
+ # Process data: convert to numeric, resample, and reindex
164
+ return (_df.apply(pd.to_numeric, errors='coerce')
165
+ .resample(freq).mean()
166
+ .reindex(new_index))
168
167
 
169
168
  def _outlier_process(self, _df):
170
169
  outlier_file = self.path / 'outlier.json'
@@ -180,31 +179,17 @@ class AbstractReader(ABC):
180
179
 
181
180
  return _df
182
181
 
183
- # save pickle file
184
182
  def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
185
- self._safe_pickle_dump(self.pkl_nam, qc_data)
186
- if self.csv:
187
- qc_data.to_csv(self.csv_nam)
183
+ try:
184
+ raw_data.to_pickle(self.pkl_nam_raw)
185
+ raw_data.to_csv(self.csv_nam_raw)
188
186
 
189
- if self.meta['deter_key'] is not None:
190
- self._safe_pickle_dump(self.pkl_nam_raw, raw_data)
191
- if self.csv:
192
- raw_data.to_csv(self.csv_nam_raw)
187
+ if self.meta['deter_key'] is not None:
188
+ qc_data.to_pickle(self.pkl_nam)
189
+ qc_data.to_csv(self.csv_nam)
193
190
 
194
- @staticmethod
195
- def _safe_pickle_dump(file_path: Path, data: Any) -> None:
196
- try:
197
- with file_path.open('wb') as f:
198
- pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
199
- except PermissionError as e:
200
- raise IOError(f"Unable to write to {file_path}. The file may be in use or you may not have permission: {e}")
201
191
  except Exception as e:
202
- raise IOError(f"Error writing to {file_path}: {e}")
203
-
204
- # read pickle file
205
- def _read_pkl(self):
206
- with self.pkl_nam.open('rb') as qc_data, self.pkl_nam_raw.open('rb') as raw_data:
207
- return pkl.load(raw_data), pkl.load(qc_data)
192
+ raise IOError(f"Error saving data. {e}")
208
193
 
209
194
  def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
210
195
  files = [f
@@ -223,7 +208,7 @@ class AbstractReader(ABC):
223
208
  TaskProgressColumn(),
224
209
  TimeRemainingColumn(),
225
210
  TextColumn("{task.fields[filename]}", style="yellow"),
226
- console=console,
211
+ console=Console(force_terminal=True, color_system="auto"),
227
212
  expand=False
228
213
  ) as progress:
229
214
  task = progress.add_task(f"Reading {self.nam} files", total=len(files), filename="")
@@ -246,47 +231,87 @@ class AbstractReader(ABC):
246
231
  if not df_list:
247
232
  raise ValueError("All files were either empty or failed to read.")
248
233
 
249
- raw_data = self._raw_process(concat(df_list))
234
+ raw_data = concat(df_list, axis=0).groupby(level=0).first()
235
+
236
+ raw_data = self._timeIndex_process(raw_data)
250
237
  qc_data = self._QC(raw_data)
251
238
 
252
239
  return raw_data, qc_data
253
240
 
254
- def _run(self, _start, _end):
241
+ def _run(self, user_start, user_end):
255
242
  # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
256
243
  if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
257
- print(f"\n{dtm.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mPICKLE\033[0m "
258
- f"from {_start} to {_end}\n")
244
+ print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mPICKLE\033[0m "
245
+ f"from {user_start} to {user_end}\n")
259
246
 
260
- _f_raw_done, _f_qc_done = self._read_pkl()
247
+ _f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam)
261
248
 
262
249
  if self.append:
263
- print(f"Appending new data from {_start} to {_end}")
250
+ print(f"Appending new data from {user_start} to {user_end}")
264
251
  _f_raw_new, _f_qc_new = self._read_raw_files()
265
- _f_raw = self._append_process(_f_raw_done, _f_raw_new)
266
- _f_qc = self._append_process(_f_qc_done, _f_qc_new)
252
+ _f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
253
+ _f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new)
267
254
  else:
268
255
  _f_raw, _f_qc = _f_raw_done, _f_qc_done
256
+ return _f_qc if self.qc else _f_raw
269
257
 
270
258
  else:
271
- print(f"\n{dtm.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mRAW DATA\033[0m "
272
- f"from {_start} to {_end}\n")
259
+ print(f"\n{datetime.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mRAW DATA\033[0m "
260
+ f"from {user_start} to {user_end}\n")
261
+
273
262
  _f_raw, _f_qc = self._read_raw_files()
274
263
 
275
264
  # process time index
276
- _f_raw, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw)
277
- _f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc)
265
+ data_start, data_end = _f_raw.index.sort_values()[[0, -1]]
278
266
 
267
+ _f_raw = self._timeIndex_process(_f_raw, user_start, user_end)
268
+ _f_qc = self._timeIndex_process(_f_qc, user_start, user_end)
279
269
  _f_qc = self._outlier_process(_f_qc)
280
270
 
281
271
  # save
282
272
  self._save_data(_f_raw, _f_qc)
283
273
 
284
274
  self.logger.info(f"{'=' * 60}")
285
- self.logger.info(f"Raw data time : {_start_raw} to {_end_raw}")
286
- self.logger.info(f"Output time : {_start} to {_end}")
275
+ self.logger.info(f"Raw data time : {data_start} to {data_end}")
276
+ self.logger.info(f"Output time : {user_start} to {user_end}")
287
277
  self.logger.info(f"{'-' * 60}")
288
278
 
289
279
  if self.rate:
290
- self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
280
+ self._rate_calculate(_f_raw, _f_qc)
291
281
 
292
282
  return _f_qc if self.qc else _f_raw
283
+
284
+ @staticmethod
285
+ def reorder_dataframe_columns(df, order_lists, others_col=False):
286
+ new_order = []
287
+
288
+ for order in order_lists:
289
+ # 只添加存在於DataFrame中的欄位,且不重複添加
290
+ new_order.extend([col for col in order if col in df.columns and col not in new_order])
291
+
292
+ if others_col:
293
+ # 添加所有不在新順序列表中的原始欄位,保持它們的原始順序
294
+ new_order.extend([col for col in df.columns if col not in new_order])
295
+
296
+ return df[new_order]
297
+
298
+ @staticmethod
299
+ def n_sigma_QC(df: DataFrame, std_range: int = 5) -> DataFrame:
300
+ df_ave, df_std = df.mean(), df.std()
301
+ df_lowb, df_highb = df < (df_ave - df_std * std_range), df > (df_ave + df_std * std_range)
302
+
303
+ return df.mask(df_lowb | df_highb).copy()
304
+
305
+ # "四分位數範圍法"(Inter-quartile Range Method)
306
+ @staticmethod
307
+ def IQR_QC(df: DataFrame, log_dist=False) -> tuple[DataFrame, DataFrame]:
308
+ df = np.log10(df) if log_dist else df
309
+
310
+ _df_qua = df.quantile([.25, .75])
311
+ _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
312
+ _df_iqr = _df_q3 - _df_q1
313
+
314
+ _se = concat([_df_q1 - 1.5 * _df_iqr] * len(df), axis=1).T.set_index(df.index)
315
+ _le = concat([_df_q3 + 1.5 * _df_iqr] * len(df), axis=1).T.set_index(df.index)
316
+
317
+ return (10 ** _se, 10 ** _le) if log_dist else (_se, _le)
@@ -27,4 +27,4 @@ class Reader(AbstractReader):
27
27
  _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7']].mask((_df < 0).copy())
28
28
 
29
29
  # QC data in 1h
30
- return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
30
+ return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
@@ -28,4 +28,4 @@ class Reader(AbstractReader):
28
28
  _df = _df.mask((_df < 0).copy())
29
29
 
30
30
  # QC data in 1h
31
- return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
31
+ return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
@@ -37,4 +37,4 @@ class Reader(AbstractReader):
37
37
  _df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
38
38
 
39
39
  # QC data in 1h
40
- return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
40
+ return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
@@ -39,4 +39,4 @@ class Reader(AbstractReader):
39
39
  _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']].mask((_df < 0).copy())
40
40
 
41
41
  # QC data in 1h
42
- return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
42
+ return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
@@ -0,0 +1,39 @@
1
+ from pandas import read_csv
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+ desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC',
6
+ 'CH4', 'PM10', 'PM2.5', 'PM1', 'WS', 'WD', 'AT', 'RH']
7
+
8
+ desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene']
9
+
10
+
11
+ class Reader(AbstractReader):
12
+ nam = 'EPA'
13
+
14
+ def _raw_reader(self, file):
15
+ # 查詢小時值(測項).csv & 查詢小時值(直式).csv (有、無輸出有效值都可以)
16
+ df = read_csv(file, encoding='big5', encoding_errors='ignore', index_col=0, parse_dates=True,
17
+ on_bad_lines='skip')
18
+
19
+ if len(df.groupby('測站')) > 1:
20
+ raise ValueError(f'Multiple stations found in the file: {df['測站'].unique()}')
21
+ else:
22
+ if '測站' in df.columns:
23
+ df.drop(columns=['測站'], inplace=True)
24
+
25
+ if '測項' in df.columns:
26
+ df = df.pivot(columns='測項', values='資料')
27
+
28
+ df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True)
29
+ df.index.name = 'Time'
30
+
31
+ # 如果沒有將無效值拿掉就輸出 請將包含 #、L 的字串替換成 # 或 _
32
+ df = df.replace(to_replace=r'\d*[#]\b', value='#', regex=True)
33
+ df = df.replace(to_replace=r'\d*[L]\b', value='_', regex=True)
34
+
35
+ # 欄位排序
36
+ return self.reorder_dataframe_columns(df, [desired_order1])
37
+
38
+ def _QC(self, _df):
39
+ return _df.resample('6h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
@@ -25,4 +25,4 @@ class Reader(AbstractReader):
25
25
 
26
26
  def _QC(self, _df):
27
27
  # QC data in 1h
28
- return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
28
+ return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()
@@ -1,8 +1,7 @@
1
1
  # read meteorological data from google sheet
2
2
 
3
3
 
4
- import numpy as np
5
- from pandas import read_csv, concat, to_numeric
4
+ from pandas import read_csv, to_numeric
6
5
 
7
6
  from AeroViz.rawDataReader.core import AbstractReader
8
7
 
@@ -35,24 +34,8 @@ class Reader(AbstractReader):
35
34
  'SO42-': 0.08,
36
35
  }
37
36
 
38
- # _mdl.update(self._oth_set.get('mdl', {}))
39
-
40
- def _se_le(_df_, _log=False):
41
- _df_ = np.log10(_df_) if _log else _df_
42
-
43
- _df_qua = _df_.quantile([.25, .75])
44
- _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
45
- _df_iqr = _df_q3 - _df_q1
46
-
47
- _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
48
- _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
49
-
50
- if _log:
51
- return 10 ** _se, 10 ** _le
52
- return _se, _le
53
-
54
37
  _cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
55
- ['Cl-', 'NO2-', 'NO3-', 'SO42-', ],
38
+ ['Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-', ],
56
39
  ['SO42-', 'NO3-', 'NH4+'])
57
40
 
58
41
  _df_salt = _df[_mdl.keys()].copy()
@@ -68,23 +51,23 @@ class Reader(AbstractReader):
68
51
 
69
52
  # calculate SE LE
70
53
  # salt < LE
71
- _se, _le = _se_le(_df_salt, _log=True)
54
+ _se, _le = self.IQR_QC(_df_salt, log_dist=True)
72
55
  _df_salt = _df_salt.mask(_df_salt > _le).copy()
73
56
 
74
57
  # C/A, A/C
75
58
  _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
76
59
  _rat_AC = (1 / _rat_CA).copy()
77
60
 
78
- _se, _le = _se_le(_rat_CA, )
61
+ _se, _le = self.IQR_QC(_rat_CA, )
79
62
  _cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
80
63
 
81
- _se, _le = _se_le(_rat_AC, )
64
+ _se, _le = self.IQR_QC(_rat_AC, )
82
65
  _cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
83
66
 
84
67
  _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
85
68
 
86
69
  # conc. of main salt > SE
87
- _se, _le = _se_le(_df_salt[_main], _log=True)
70
+ _se, _le = self.IQR_QC(_df_salt[_main], log_dist=True)
88
71
  _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
89
72
 
90
73
  return _df_salt.reindex(_df.index)
@@ -35,4 +35,4 @@ class Reader(AbstractReader):
35
35
  _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
36
36
 
37
37
  # QC data in 1h
38
- return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
38
+ return _df.resample('1h').apply(self.n_sigma_QC).resample(self.meta.get("freq")).mean()