AeroViz 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (89) hide show
  1. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  2. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  4. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  5. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  6. AeroViz/dataProcess/Optical/_absorption.py +2 -0
  7. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  8. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  9. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  10. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  11. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  12. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  13. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  14. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  15. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  16. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  17. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  18. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  19. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  20. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  21. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  22. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  23. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  24. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  25. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  26. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  27. AeroViz/plot/templates/__init__.py +1 -1
  28. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  29. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  30. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  31. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  32. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  33. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  34. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  35. AeroViz/plot/templates/corr_matrix.py +168 -2
  36. AeroViz/plot/templates/koschmieder.py +1 -1
  37. AeroViz/plot/templates/metal_heatmap.py +15 -6
  38. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  39. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  40. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  41. AeroViz/plot/timeseries/timeseries.py +96 -52
  42. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  43. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  44. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  45. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  46. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  47. AeroViz/rawDataReader/__init__.py +154 -59
  48. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  49. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  50. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  51. AeroViz/rawDataReader/config/supported_instruments.py +7 -4
  52. AeroViz/rawDataReader/core/__init__.py +176 -86
  53. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  54. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  55. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  56. AeroViz/rawDataReader/core/logger.py +14 -10
  57. AeroViz/rawDataReader/core/qc.py +1 -1
  58. AeroViz/rawDataReader/script/AE33.py +1 -1
  59. AeroViz/rawDataReader/script/BAM1020.py +35 -0
  60. AeroViz/rawDataReader/script/NEPH.py +6 -10
  61. AeroViz/rawDataReader/script/SMPS.py +20 -6
  62. AeroViz/rawDataReader/script/TEOM.py +15 -3
  63. AeroViz/rawDataReader/script/__init__.py +1 -0
  64. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  65. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  66. AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
  67. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  68. AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
  69. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  70. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  71. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  72. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  73. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  74. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  75. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  76. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  77. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  78. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  79. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  80. AeroViz/rawDataReader/script/__pycache__/XRF.cpython-312.pyc +0 -0
  81. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  82. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  83. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  84. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  85. {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/METADATA +14 -15
  86. {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/RECORD +89 -87
  87. {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/WHEEL +1 -1
  88. {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/LICENSE +0 -0
  89. {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,18 @@
1
1
  import json
2
2
  from abc import ABC, abstractmethod
3
3
  from contextlib import contextmanager
4
- from datetime import datetime
4
+ from datetime import datetime, timedelta
5
5
  from pathlib import Path
6
- from typing import Optional, Generator
6
+ from typing import Generator
7
7
 
8
8
  import numpy as np
9
9
  import pandas as pd
10
- from pandas import DataFrame, concat, read_pickle, to_numeric
11
10
  from rich.console import Console
12
- from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
11
+ from rich.progress import Progress, TextColumn, BarColumn, SpinnerColumn, TaskProgressColumn
13
12
 
14
13
  from AeroViz.rawDataReader.config.supported_instruments import meta
15
14
  from AeroViz.rawDataReader.core.logger import ReaderLogger
16
- from AeroViz.rawDataReader.core.qc import DataQualityControl
15
+ from AeroViz.rawDataReader.core.qc import QualityControl
17
16
 
18
17
  __all__ = ['AbstractReader']
19
18
 
@@ -32,45 +31,44 @@ class AbstractReader(ABC):
32
31
 
33
32
  def __init__(self,
34
33
  path: Path | str,
35
- reset: bool = False,
36
- qc: bool = True,
37
- qc_freq: Optional[str] = None,
38
- rate: bool = True,
39
- append_data: bool = False,
34
+ reset: bool | str = False,
35
+ qc: bool | str = True,
40
36
  **kwargs):
41
37
 
42
38
  self.path = Path(path)
43
39
  self.meta = meta[self.nam]
44
- self.logger = ReaderLogger(self.nam, self.path)
45
-
46
- self.reset = reset
47
- self.qc = qc
48
- self.qc_freq = qc_freq
49
- self.rate = rate
50
- self.append = append_data and reset
51
-
52
- self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
53
- self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
54
- self.pkl_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.pkl'
55
- self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
56
- self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
57
-
58
- self.size_range = kwargs.get('size_range', (11.8, 593.5))
40
+ output_folder = self.path / f'{self.nam.lower()}_outputs'
41
+ output_folder.mkdir(parents=True, exist_ok=True)
42
+
43
+ self.logger = ReaderLogger(
44
+ self.nam, output_folder,
45
+ kwargs.get('log_level').upper() if not kwargs.get('suppress_warnings') else 'ERROR')
46
+
47
+ self.reset = reset is True
48
+ self.append = reset == 'append'
49
+ self.qc = qc # if qc, then calculate rate
50
+ self.qc_freq = qc if isinstance(qc, str) else None
51
+ self.kwargs = kwargs
52
+
53
+ self.pkl_nam = output_folder / f'_read_{self.nam.lower()}.pkl'
54
+ self.csv_nam = output_folder / f'_read_{self.nam.lower()}.csv'
55
+ self.pkl_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.pkl'
56
+ self.csv_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.csv'
57
+ self.csv_out = output_folder / f'output_{self.nam.lower()}.csv'
58
+ self.report_out = output_folder / 'report.json'
59
59
 
60
60
  def __call__(self,
61
61
  start: datetime,
62
62
  end: datetime,
63
63
  mean_freq: str = '1h',
64
- csv_out: bool = True,
65
- ) -> DataFrame:
64
+ ) -> pd.DataFrame:
66
65
 
67
66
  data = self._run(start, end)
68
67
 
69
68
  if data is not None:
70
- if mean_freq:
71
- data = data.resample(mean_freq).mean()
72
- if csv_out:
73
- data.to_csv(self.csv_out)
69
+ data = data.resample(mean_freq).mean()
70
+
71
+ data.to_csv(self.csv_out)
74
72
 
75
73
  return data
76
74
 
@@ -79,58 +77,147 @@ class AbstractReader(ABC):
79
77
  pass
80
78
 
81
79
  @abstractmethod
82
- def _QC(self, df: DataFrame) -> DataFrame:
80
+ def _QC(self, df: pd.DataFrame) -> pd.DataFrame:
83
81
  return df
84
82
 
85
- def _rate_calculate(self, raw_data, qc_data) -> None:
86
- def __base_rate(raw_data, qc_data):
87
- period_size = len(raw_data.resample('1h').mean().index)
83
+ def __calculate_rates(self, raw_data, qc_data, all_keys=False, with_log=False):
84
+ """計算獲取率、良率和總比率
88
85
 
89
- for _nam, _key in self.meta['deter_key'].items():
90
- _columns_key, _drop_how = (qc_data.keys(), 'all') if _key == ['all'] else (_key, 'any')
86
+ Args:
87
+ raw_data: 原始數據
88
+ qc_data: QC後的數據
89
+ all_keys: 是否計算所有 deter_key
90
+ with_log: 是否輸出計算日誌
91
+ """
92
+ if raw_data.empty or qc_data.empty:
93
+ return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
91
94
 
92
- sample_size = len(raw_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
93
- qc_size = len(qc_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
95
+ def _calculate_single_key(key_name, key_columns):
96
+ columns, drop_how = (qc_data.keys(), 'all') if key_columns == ['all'] else (key_columns, 'any')
94
97
 
95
- # validate rate calculation
96
- if period_size == 0 or sample_size == 0 or qc_size == 0:
98
+ # 重採樣並計算有效數據量
99
+ period_size = len(raw_data.resample('1h').mean().index)
100
+ sample_size = len(raw_data[columns].resample('1h').mean().dropna(how=drop_how).index)
101
+ qc_size = len(qc_data[columns].resample('1h').mean().dropna(how=drop_how).index)
102
+
103
+ # 驗證計算
104
+ if any([
105
+ period_size == 0 or sample_size == 0 or qc_size == 0,
106
+ period_size < sample_size,
107
+ sample_size < qc_size
108
+ ]):
109
+ if with_log:
97
110
  self.logger.warning(f'\t\t No data for this period... skip')
98
- continue
99
- if period_size < sample_size:
100
- self.logger.warning(f'\t\tError: Sample({sample_size}) > Period({period_size})... skip')
101
- continue
102
- if sample_size < qc_size:
103
- self.logger.warning(f'\t\tError: QC({qc_size}) > Sample({sample_size})... skip')
104
- continue
105
-
106
- else:
107
- _sample_rate = round((sample_size / period_size) * 100, 1)
108
- _valid_rate = round((qc_size / sample_size) * 100, 1)
109
- _total_rate = round((qc_size / period_size) * 100, 1)
110
-
111
- self.logger.info(f"\t\t{self.logger.CYAN}{self.logger.ARROW} {_nam}{self.logger.RESET}")
111
+ return None
112
+
113
+ # 計算比率
114
+ sample_rate = round((sample_size / period_size) * 100, 1)
115
+ valid_rate = round((qc_size / sample_size) * 100, 1)
116
+ total_rate = round((qc_size / period_size) * 100, 1)
117
+
118
+ if with_log:
119
+ self.logger.info(f"\t\t> {key_name}")
112
120
  self.logger.info(
113
- f"\t\t\t├─ {'Sample Rate':15}: {self.logger.BLUE}{_sample_rate:>6.1f}%{self.logger.RESET}")
121
+ f"\t\t\t> {'Sample Rate':13}: {self.logger.BLUE}{sample_rate:>6.1f}%{self.logger.RESET}")
114
122
  self.logger.info(
115
- f"\t\t\t├─ {'Valid Rate':15}: {self.logger.BLUE}{_valid_rate:>6.1f}%{self.logger.RESET}")
123
+ f"\t\t\t> {'Valid Rate':13}: {self.logger.BLUE}{valid_rate:>6.1f}%{self.logger.RESET}")
116
124
  self.logger.info(
117
- f"\t\t\t└─ {'Total Rate':15}: {self.logger.BLUE}{_total_rate:>6.1f}%{self.logger.RESET}")
125
+ f"\t\t\t> {'Total Rate':13}: {self.logger.BLUE}{total_rate:>6.1f}%{self.logger.RESET}")
126
+
127
+ return {
128
+ 'acquisition_rate': sample_rate,
129
+ 'yield_rate': valid_rate,
130
+ 'total_rate': total_rate
131
+ }
132
+
133
+ if all_keys:
134
+ # 計算所有 key 並回傳所有結果(用於日誌輸出)
135
+ all_results = []
136
+ for name, columns in self.meta['deter_key'].items():
137
+ result = _calculate_single_key(name, columns)
138
+ if result:
139
+ all_results.append(result)
140
+
141
+ if not all_results:
142
+ return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
143
+
144
+ # 回傳所有結果中比率最低的
145
+ return {
146
+ 'acquisition_rate': min(r['acquisition_rate'] for r in all_results),
147
+ 'yield_rate': min(r['yield_rate'] for r in all_results),
148
+ 'total_rate': min(r['total_rate'] for r in all_results)
149
+ }
150
+ else:
151
+ # 計算所有 key 但只回傳最低的比率
152
+ min_rates = {'acquisition_rate': 200, 'yield_rate': 200, 'total_rate': 200}
153
+
154
+ for name, columns in self.meta['deter_key'].items():
155
+ result = _calculate_single_key(name, columns)
156
+ if result:
157
+ min_rates['acquisition_rate'] = min(min_rates['acquisition_rate'], result['acquisition_rate'])
158
+ min_rates['yield_rate'] = min(min_rates['yield_rate'], result['yield_rate'])
159
+ min_rates['total_rate'] = min(min_rates['total_rate'], result['total_rate'])
160
+
161
+ # 如果沒有任何有效結果,回傳 0
162
+ if min_rates['acquisition_rate'] == 200 and min_rates['yield_rate'] == 200:
163
+ return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
118
164
 
165
+ return min_rates
166
+
167
+ def _rate_calculate(self, raw_data, qc_data) -> None:
119
168
  if self.meta['deter_key'] is not None:
120
- # use qc_freq to calculate each period rate
121
169
  if self.qc_freq is not None:
122
170
  raw_data_grouped = raw_data.groupby(pd.Grouper(freq=self.qc_freq))
123
171
  qc_data_grouped = qc_data.groupby(pd.Grouper(freq=self.qc_freq))
124
172
 
125
173
  for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped):
126
174
  self.logger.info(
127
- f"\t{self.logger.BLUE}{self.logger.ARROW} Processing: {_sub_raw_data.index[0].strftime('%F')}"
175
+ f"\t{self.logger.BLUE}> Processing: {_sub_raw_data.index[0].strftime('%F')}"
128
176
  f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}")
129
177
 
130
- __base_rate(_sub_raw_data, _sub_qc_data)
131
-
178
+ self.__calculate_rates(_sub_raw_data, _sub_qc_data, all_keys=True, with_log=True)
132
179
  else:
133
- __base_rate(raw_data, qc_data)
180
+ self.__calculate_rates(raw_data, qc_data, all_keys=True, with_log=True)
181
+
182
+ # 計算週和月的數據
183
+ current_time = datetime.now()
184
+ week_mask = raw_data.index >= current_time - timedelta(days=7)
185
+ month_mask = raw_data.index >= current_time - timedelta(days=30)
186
+
187
+ # 生成報告
188
+ self.__generate_report(
189
+ current_time,
190
+ raw_data[week_mask], qc_data[week_mask],
191
+ raw_data[month_mask], qc_data[month_mask]
192
+ )
193
+
194
+ def __generate_report(self, current_time, week_raw_data, week_qc_data, month_raw_data, month_qc_data):
195
+ """生成獲取率和良率的報告"""
196
+ report = {
197
+ "report_time": current_time.strftime('%Y-%m-%d %H:%M:%S'),
198
+ "instrument_info": {
199
+ "station": self.path.name[:2],
200
+ "instrument": self.nam
201
+ },
202
+ "rates": {
203
+ "weekly": self.__calculate_rates(week_raw_data, week_qc_data),
204
+ "monthly": self.__calculate_rates(month_raw_data, month_qc_data),
205
+ },
206
+ "details": {
207
+ "weekly": {
208
+ "start_time": (current_time - timedelta(days=7)).strftime('%Y-%m-%d %H:%M:%S'),
209
+ "end_time": current_time.strftime('%Y-%m-%d %H:%M:%S')
210
+ },
211
+ "monthly": {
212
+ "start_time": (current_time - timedelta(days=30)).strftime('%Y-%m-%d %H:%M:%S'),
213
+ "end_time": current_time.strftime('%Y-%m-%d %H:%M:%S')
214
+ }
215
+ }
216
+ }
217
+
218
+ # 寫入報告
219
+ with open(self.report_out, 'w') as f:
220
+ json.dump(report, f, indent=4)
134
221
 
135
222
  def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None):
136
223
  """
@@ -182,7 +269,7 @@ class AbstractReader(ABC):
182
269
 
183
270
  return _df
184
271
 
185
- def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
272
+ def _save_data(self, raw_data: pd.DataFrame, qc_data: pd.DataFrame) -> None:
186
273
  try:
187
274
  raw_data.to_pickle(self.pkl_nam_raw)
188
275
  raw_data.to_csv(self.csv_nam_raw)
@@ -205,15 +292,15 @@ class AbstractReader(ABC):
205
292
 
206
293
  try:
207
294
  with Progress(
208
- TextColumn("[bold blue]{task.description}", style="bold blue"),
295
+ SpinnerColumn(finished_text=""),
209
296
  BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"),
210
- TaskProgressColumn(),
211
- TimeRemainingColumn(),
212
- TextColumn("{task.fields[filename]}", style="yellow"),
297
+ TaskProgressColumn(style="bold", text_format="[bright_green]{task.percentage:>3.0f}%"),
298
+ TextColumn("{task.description}", style="bold blue"),
299
+ TextColumn("{task.fields[filename]}", style="bold blue"),
213
300
  console=Console(force_terminal=True, color_system="auto", width=120),
214
301
  expand=False
215
302
  ) as progress:
216
- task = progress.add_task(f"{self.logger.ARROW} Reading {self.nam} files", total=len(files), filename="")
303
+ task = progress.add_task(f"Reading {self.nam} files:", total=len(files), filename="")
217
304
  yield progress, task
218
305
  finally:
219
306
  # Restore logger method and output message
@@ -222,7 +309,7 @@ class AbstractReader(ABC):
222
309
  for msg in msgs:
223
310
  original[level](msg)
224
311
 
225
- def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
312
+ def _read_raw_files(self) -> tuple[pd.DataFrame | None, pd.DataFrame | None]:
226
313
  files = [f
227
314
  for file_pattern in self.meta['pattern']
228
315
  for pattern in {file_pattern.lower(), file_pattern.upper(), file_pattern}
@@ -242,7 +329,7 @@ class AbstractReader(ABC):
242
329
  if (df := self._raw_reader(file)) is not None and not df.empty:
243
330
  df_list.append(df)
244
331
  else:
245
- self.logger.warning(f"\tFile {file.name} produced an empty DataFrame or None.")
332
+ self.logger.debug(f"\tFile {file.name} produced an empty DataFrame or None.")
246
333
 
247
334
  except Exception as e:
248
335
  self.logger.error(f"Error reading {file.name}: {e}")
@@ -250,25 +337,27 @@ class AbstractReader(ABC):
250
337
  if not df_list:
251
338
  raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m")
252
339
 
253
- raw_data = concat(df_list, axis=0).groupby(level=0).first()
340
+ raw_data = pd.concat(df_list, axis=0).groupby(level=0).first()
254
341
 
255
- if self.nam == 'SMPS':
342
+ if self.nam in ['SMPS', 'APS', 'GRIMM']:
256
343
  raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float))
257
344
 
258
- raw_data = self._timeIndex_process(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
259
- qc_data = self._QC(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
345
+ raw_data = self._timeIndex_process(raw_data)
346
+
347
+ raw_data = raw_data.apply(pd.to_numeric, errors='coerce').copy(deep=True)
348
+ qc_data = self._QC(raw_data).apply(pd.to_numeric, errors='coerce').copy(deep=True)
260
349
 
261
350
  return raw_data, qc_data
262
351
 
263
352
  def _run(self, user_start, user_end):
264
353
  # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
265
354
  if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
266
- self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE")
355
+ self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}")
267
356
 
268
- _f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam)
357
+ _f_raw_done, _f_qc_done = pd.read_pickle(self.pkl_nam_raw), pd.read_pickle(self.pkl_nam)
269
358
 
270
359
  if self.append:
271
- self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data")
360
+ self.logger.info_box(f"Appending New data from {user_start} to {user_end}")
272
361
 
273
362
  _f_raw_new, _f_qc_new = self._read_raw_files()
274
363
  _f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
@@ -280,7 +369,7 @@ class AbstractReader(ABC):
280
369
  return _f_qc if self.qc else _f_raw
281
370
 
282
371
  else:
283
- self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}", color_part="RAW DATA")
372
+ self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}")
284
373
 
285
374
  _f_raw, _f_qc = self._read_raw_files()
286
375
 
@@ -292,25 +381,26 @@ class AbstractReader(ABC):
292
381
  # save
293
382
  self._save_data(_f_raw, _f_qc)
294
383
 
295
- if self.rate:
296
- self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'), _f_qc.apply(to_numeric, errors='coerce'))
384
+ if self.qc:
385
+ self._rate_calculate(_f_raw.apply(pd.to_numeric, errors='coerce'),
386
+ _f_qc.apply(pd.to_numeric, errors='coerce'))
297
387
 
298
388
  return _f_qc if self.qc else _f_raw
299
389
 
300
390
  @staticmethod
301
- def reorder_dataframe_columns(df, order_lists, others_col=False):
391
+ def reorder_dataframe_columns(df, order_lists: list[list], keep_others: bool = False):
302
392
  new_order = []
303
393
 
304
394
  for order in order_lists:
305
- # 只添加存在於DataFrame中的欄位,且不重複添加
395
+ # Only add column that exist in the DataFrame and do not add them repeatedly
306
396
  new_order.extend([col for col in order if col in df.columns and col not in new_order])
307
397
 
308
- if others_col:
309
- # 添加所有不在新順序列表中的原始欄位,保持它們的原始順序
398
+ if keep_others:
399
+ # Add all original fields not in the new order list, keeping their original order
310
400
  new_order.extend([col for col in df.columns if col not in new_order])
311
401
 
312
402
  return df[new_order]
313
403
 
314
404
  @staticmethod
315
405
  def time_aware_IQR_QC(df: pd.DataFrame, time_window='1D', log_dist=False) -> pd.DataFrame:
316
- return DataQualityControl().time_aware_iqr(df, time_window=time_window, log_dist=log_dist)
406
+ return QualityControl().time_aware_iqr(df, time_window=time_window, log_dist=log_dist)
@@ -8,9 +8,10 @@ from pathlib import Path
8
8
 
9
9
 
10
10
  class ReaderLogger:
11
- def __init__(self, name: str, log_path: Path):
11
+ def __init__(self, name: str, log_path: Path, log_level: str = 'INFO'):
12
12
  self.name = name
13
13
  self.log_path = log_path
14
+ self._log_level = getattr(logging, log_level)
14
15
 
15
16
  # 檢查是否支持顏色輸出
16
17
  self.color_support = self._check_color_support()
@@ -32,16 +33,16 @@ class ReaderLogger:
32
33
  self.RESET = ''
33
34
 
34
35
  # 檢查 Unicode 支持
35
- self.unicode_support = self._setup_unicode()
36
+ self.unicode_support = self._check_unicode_support()
36
37
 
37
38
  # 設置框架字符
38
39
  if self.unicode_support:
39
- self.BOX_TOP_LEFT = ""
40
- self.BOX_TOP_RIGHT = ""
41
- self.BOX_BOTTOM_LEFT = ""
42
- self.BOX_BOTTOM_RIGHT = ""
43
- self.BOX_HORIZONTAL = ""
44
- self.BOX_VERTICAL = ""
40
+ self.BOX_TOP_LEFT = ""
41
+ self.BOX_TOP_RIGHT = ""
42
+ self.BOX_BOTTOM_LEFT = ""
43
+ self.BOX_BOTTOM_RIGHT = ""
44
+ self.BOX_HORIZONTAL = ""
45
+ self.BOX_VERTICAL = ""
45
46
  self.ARROW = "▶"
46
47
  else:
47
48
  self.BOX_TOP_LEFT = "+"
@@ -74,7 +75,7 @@ class ReaderLogger:
74
75
  # 其他系統檢查
75
76
  return hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
76
77
 
77
- def _setup_unicode(self) -> bool:
78
+ def _check_unicode_support(self) -> bool:
78
79
  """設置 Unicode 支持"""
79
80
  if platform.system().lower() == 'windows':
80
81
  try:
@@ -92,7 +93,7 @@ class ReaderLogger:
92
93
  def _setup_logger(self) -> logging.Logger:
93
94
  """設置logger"""
94
95
  logger = logging.getLogger(self.name)
95
- logger.setLevel(logging.INFO)
96
+ logger.setLevel(self._log_level)
96
97
 
97
98
  # 移除現有的 handlers
98
99
  for handler in logger.handlers[:]:
@@ -135,6 +136,9 @@ class ReaderLogger:
135
136
  text = text.encode('ascii', 'replace').decode('ascii')
136
137
  return text
137
138
 
139
+ def debug(self, msg: str):
140
+ self.logger.debug(self._safe_print(msg))
141
+
138
142
  def info(self, msg: str):
139
143
  self.logger.info(self._safe_print(msg))
140
144
 
@@ -2,7 +2,7 @@ import numpy as np
2
2
  import pandas as pd
3
3
 
4
4
 
5
- class DataQualityControl:
5
+ class QualityControl:
6
6
  """A class providing various methods for data quality control and outlier detection"""
7
7
 
8
8
  @staticmethod
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
8
8
 
9
9
  def _raw_reader(self, file):
10
10
  if file.stat().st_size / 1024 < 550:
11
- self.logger.info(f'\t {file.name} may not be a whole daily data. Make sure the file is correct.')
11
+ self.logger.warning(f'\t {file.name} may not be a whole daily data. Make sure the file is correct.')
12
12
 
13
13
  _df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
14
14
  delimiter=r'\s+', skiprows=5, usecols=range(67))
@@ -0,0 +1,35 @@
1
+ from pandas import read_csv, to_numeric, NA
2
+
3
+ from AeroViz.rawDataReader.core import AbstractReader
4
+
5
+
6
+ class Reader(AbstractReader):
7
+ nam = 'BAM1020'
8
+
9
+ def _raw_reader(self, file):
10
+ PM = 'Conc'
11
+
12
+ _df = read_csv(file, parse_dates=True, index_col=0, usecols=range(0, 21))
13
+ _df.rename(columns={'Conc (mg/m3)': PM}, inplace=True)
14
+
15
+ # remove data when Conc = 1 or 0
16
+ _df[PM] = _df[PM].replace(1, NA)
17
+
18
+ _df = _df[[PM]].apply(to_numeric, errors='coerce')
19
+
20
+ # tranfer unit from mg/m3 to ug/m3
21
+ _df = _df * 1000
22
+
23
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
24
+
25
+ def _QC(self, _df):
26
+ _index = _df.index.copy()
27
+
28
+ # remove negative value
29
+ _df = _df.mask((_df <= 0) | (_df > 500))
30
+
31
+ # use IQR_QC
32
+ _df = self.time_aware_IQR_QC(_df, time_window='1h')
33
+
34
+ # make sure all columns have values, otherwise set to nan
35
+ return _df.dropna(how='any').reindex(_index)
@@ -1,4 +1,4 @@
1
- from pandas import to_datetime, read_csv, DataFrame, to_numeric
1
+ from pandas import to_datetime, read_csv, to_numeric
2
2
 
3
3
  from AeroViz.rawDataReader.core import AbstractReader
4
4
 
@@ -46,17 +46,13 @@ class Reader(AbstractReader):
46
46
 
47
47
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
48
48
 
49
- except ValueError:
50
- # Define valid groups and find invalid indices
49
+ except ValueError: # Define valid groups and find invalid indices
51
50
  invalid_indices = _df[~_df[0].isin({'B', 'G', 'R', 'D', 'T', 'Y', 'Z'})].index
52
- print("Invalid values and their indices:")
53
- print("\n".join([f"Index: {idx}, Value: {_df.at[idx, 0]}" for idx in invalid_indices]))
51
+ self.logger.warning(
52
+ f"\tInvalid values in {file.name}: {', '.join(f'{_}:{_df.at[_, 0]}' for _ in invalid_indices)}."
53
+ f" Skipping file.")
54
54
 
55
- # Return an empty DataFrame with specified columns if there's a length mismatch
56
- _df_out = DataFrame(index=_idx_tm, columns=['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH'])
57
- _df_out.index.name = 'Time'
58
- print(f'\n\t\t\t Length mismatch in {file} data. Returning an empty DataFrame.')
59
- return _df_out
55
+ return None
60
56
 
61
57
  def _QC(self, _df):
62
58
  MDL_sensitivity = {'B': .1, 'G': .1, 'R': .3}
@@ -36,7 +36,16 @@ class Reader(AbstractReader):
36
36
  skip = find_header_row(f, delimiter)
37
37
  f.seek(0)
38
38
 
39
- _df = read_csv(f, sep=delimiter, skiprows=skip)
39
+ _df = read_csv(f, sep=delimiter, skiprows=skip, low_memory=False)
40
+
41
+ if 'Date' not in _df.columns: # 資料需要轉置
42
+ try:
43
+ _df = _df.T # 轉置
44
+ _df.columns = _df.iloc[0] # 使用第一列作為欄位名稱
45
+ _df = _df.iloc[1:] # 移除第一列(因為已經變成欄位名稱)
46
+ _df = _df.reset_index(drop=True) # 重設索引
47
+ except:
48
+ raise NotImplementedError('Not supported date format')
40
49
 
41
50
  for date_format in date_formats:
42
51
  _time_index = parse_date(_df, date_format)
@@ -56,9 +65,12 @@ class Reader(AbstractReader):
56
65
  _df_smps.columns = _df_smps.columns.astype(float)
57
66
  _df_smps = _df_smps.loc[_df_smps.index.dropna().copy()]
58
67
 
59
- if _df_smps.columns[0] != self.size_range[0] or _df_smps.columns[-1] != self.size_range[1]:
60
- self.logger.info(f'\tSMPS file: {file.name} is not match the default size range {self.size_range}, '
61
- f'it is ({_df_smps.columns[0]}, {_df_smps.columns[-1]})')
68
+ size_range = self.kwargs.get('size_range') or (11.8, 593.5)
69
+
70
+ if _df_smps.columns[0] != size_range[0] or _df_smps.columns[-1] != size_range[1]:
71
+ self.logger.warning(f'\tSMPS file: {file.name} is not match the setting size range {size_range}, '
72
+ f'it is ({_df_smps.columns[0]}, {_df_smps.columns[-1]}). '
73
+ f'Please run by another RawDataReader instance, and set the correct size range')
62
74
  return None
63
75
 
64
76
  return _df_smps.apply(to_numeric, errors='coerce')
@@ -68,8 +80,10 @@ class Reader(AbstractReader):
68
80
  _df = _df.copy()
69
81
  _index = _df.index.copy()
70
82
 
71
- size_range_mask = (_df.columns.astype(float) >= self.size_range[0]) & (
72
- _df.columns.astype(float) <= self.size_range[1])
83
+ size_range = self.kwargs.get('size_range') or (11.8, 593.5)
84
+
85
+ size_range_mask = (_df.columns.astype(float) >= size_range[0]) & (
86
+ _df.columns.astype(float) <= size_range[1])
73
87
  _df = _df.loc[:, size_range_mask]
74
88
 
75
89
  # mask out the data size lower than 7
@@ -1,3 +1,4 @@
1
+ import pandas as pd
1
2
  from pandas import to_datetime, read_csv, Timedelta, to_numeric
2
3
 
3
4
  from AeroViz.rawDataReader.core import AbstractReader
@@ -7,9 +8,9 @@ class Reader(AbstractReader):
7
8
  nam = 'TEOM'
8
9
 
9
10
  def _raw_reader(self, file):
10
- with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, skiprows=3, index_col=False)
11
+ _df = read_csv(file, skiprows=3, index_col=False)
12
12
 
13
+ if 'Time Stamp' in _df.columns: # remote download
13
14
  _df = _df.rename(columns={'Time Stamp': 'time',
14
15
  'System status': 'status',
15
16
  'PM-2.5 base MC': 'PM_NV',
@@ -25,8 +26,19 @@ class Reader(AbstractReader):
25
26
 
26
27
  _df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X'))
27
28
 
28
- _df = _df.where(_df['status'] < 1)
29
+ elif 'tmoStatusCondition_0' in _df.columns: # usb download
30
+ _df['time'] = pd.to_datetime(_df['Date'] + ' ' + _df['Time'], errors='coerce', format='%Y-%m-%d %H:%M:%S')
31
+ _df.drop(columns=['Date', 'Time'], inplace=True)
32
+ _df.set_index('time', inplace=True)
29
33
 
34
+ _df = _df.rename(columns={'tmoStatusCondition_0': 'status',
35
+ 'tmoTEOMABaseMC_0': 'PM_NV',
36
+ 'tmoTEOMAMC_0': 'PM_Total',
37
+ 'tmoTEOMANoise_0': 'noise', })
38
+ else:
39
+ raise NotImplementedError
40
+
41
+ _df = _df.where(_df['status'] < 1)
30
42
  _df = _df[['PM_NV', 'PM_Total', 'noise']].apply(to_numeric, errors='coerce')
31
43
 
32
44
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
@@ -8,6 +8,7 @@ __all__ = [
8
8
  'AE43',
9
9
  'BC1054',
10
10
  'MA350',
11
+ 'BAM1020',
11
12
  'TEOM',
12
13
  'OCEC',
13
14
  'IGAC',