AeroViz 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (83) hide show
  1. AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
  2. AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
  3. AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
  4. AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
  5. AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
  6. AeroViz/dataProcess/Optical/_absorption.py +2 -0
  7. AeroViz/dataProcess/Optical/_scattering.py +1 -1
  8. AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
  9. AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
  10. AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
  11. AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
  12. AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
  13. AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
  14. AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
  15. AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
  16. AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
  17. AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
  18. AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
  19. AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
  20. AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
  21. AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
  22. AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
  23. AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
  24. AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
  25. AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
  26. AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
  27. AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
  28. AeroViz/plot/templates/__init__.py +1 -1
  29. AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
  30. AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
  31. AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
  32. AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
  33. AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
  34. AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
  35. AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
  36. AeroViz/plot/templates/corr_matrix.py +168 -2
  37. AeroViz/plot/templates/metal_heatmap.py +15 -6
  38. AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
  39. AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
  40. AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
  41. AeroViz/plot/timeseries/timeseries.py +96 -52
  42. AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  43. AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
  44. AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
  45. AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
  46. AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
  47. AeroViz/rawDataReader/__init__.py +35 -5
  48. AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
  49. AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
  50. AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
  51. AeroViz/rawDataReader/core/__init__.py +185 -44
  52. AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
  53. AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
  54. AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
  55. AeroViz/rawDataReader/core/logger.py +9 -9
  56. AeroViz/rawDataReader/script/SMPS.py +9 -0
  57. AeroViz/rawDataReader/script/TEOM.py +54 -17
  58. AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
  59. AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
  60. AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
  61. AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
  62. AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
  63. AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
  64. AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
  65. AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
  66. AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
  67. AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
  68. AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
  69. AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
  70. AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
  71. AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
  72. AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
  73. AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
  74. AeroViz/rawDataReader/script/__pycache__/XRF.cpython-312.pyc +0 -0
  75. AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
  76. AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  77. AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
  78. AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
  79. {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/METADATA +10 -12
  80. {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/RECORD +83 -83
  81. {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/WHEEL +1 -1
  82. {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/LICENSE +0 -0
  83. {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/top_level.txt +0 -0
@@ -19,10 +19,10 @@ SIZE_RANGE_INSTRUMENTS = ['SMPS', 'APS', 'GRIMM']
19
19
 
20
20
  def RawDataReader(instrument: str,
21
21
  path: Path | str,
22
- reset: bool = False,
22
+ reset: bool | str = False,
23
23
  qc: bool | str = True,
24
- start: datetime = None,
25
- end: datetime = None,
24
+ start: datetime | str = None,
25
+ end: datetime | str = None,
26
26
  mean_freq: str = '1h',
27
27
  size_range: tuple[float, float] | None = None,
28
28
  suppress_warnings: bool = False,
@@ -94,9 +94,22 @@ def RawDataReader(instrument: str,
94
94
 
95
95
  Examples
96
96
  --------
97
+ >>> from AeroViz import RawDataReader
98
+ >>>
99
+ >>> # Using string inputs
100
+ >>> df_ae33 = RawDataReader(
101
+ ... instrument='AE33',
102
+ ... path='/path/to/your/data/folder',
103
+ ... reset=True,
104
+ ... qc='1MS',
105
+ ... start='2024-01-01',
106
+ ... end='2024-06-30',
107
+ ... mean_freq='1h',
108
+ ... )
109
+
110
+ >>> # Using Path and datetime objects
97
111
  >>> from pathlib import Path
98
112
  >>> from datetime import datetime
99
- >>> from AeroViz import RawDataReader
100
113
  >>>
101
114
  >>> df_ae33 = RawDataReader(
102
115
  ... instrument='AE33',
@@ -130,9 +143,26 @@ def RawDataReader(instrument: str,
130
143
  raise ValueError(f"Invalid frequency: {qc}. Must be one of: "
131
144
  f"W (week), MS (month start), QS (quarter start), YS (year start)")
132
145
 
133
- # Verify input times
146
+ # Convert and verify input times
134
147
  if not (start and end):
135
148
  raise ValueError("Both start and end times must be provided.")
149
+
150
+ # Convert start time if it's a string
151
+ if isinstance(start, str):
152
+ try:
153
+ start = datetime.fromisoformat(start.replace('Z', '+00:00'))
154
+ except ValueError as e:
155
+ raise ValueError(
156
+ f"Invalid start time format. Please use ISO format (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS): {e}")
157
+
158
+ # Convert end time if it's a string
159
+ if isinstance(end, str):
160
+ try:
161
+ end = datetime.fromisoformat(end.replace('Z', '+00:00'))
162
+ except ValueError as e:
163
+ raise ValueError(
164
+ f"Invalid end time format. Please use ISO format (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS): {e}")
165
+
136
166
  if end <= start:
137
167
  raise ValueError(f"Invalid time range: start {start} is after end {end}")
138
168
 
@@ -8,7 +8,7 @@ from typing import Generator
8
8
  import numpy as np
9
9
  import pandas as pd
10
10
  from rich.console import Console
11
- from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
11
+ from rich.progress import Progress, TextColumn, BarColumn, SpinnerColumn, TaskProgressColumn
12
12
 
13
13
  from AeroViz.rawDataReader.config.supported_instruments import meta
14
14
  from AeroViz.rawDataReader.core.logger import ReaderLogger
@@ -55,6 +55,7 @@ class AbstractReader(ABC):
55
55
  self.pkl_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.pkl'
56
56
  self.csv_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.csv'
57
57
  self.csv_out = output_folder / f'output_{self.nam.lower()}.csv'
58
+ self.report_out = output_folder / 'report.json'
58
59
 
59
60
  def __call__(self,
60
61
  start: datetime,
@@ -79,55 +80,180 @@ class AbstractReader(ABC):
79
80
  def _QC(self, df: pd.DataFrame) -> pd.DataFrame:
80
81
  return df
81
82
 
82
- def _rate_calculate(self, raw_data, qc_data) -> None:
83
- def __base_rate(raw_data, qc_data):
84
- period_size = len(raw_data.resample('1h').mean().index)
83
+ def __calculate_rates(self, raw_data, qc_data, all_keys=False, with_log=False):
84
+ """計算獲取率、良率和總比率
85
85
 
86
- for _nam, _key in self.meta['deter_key'].items():
87
- _columns_key, _drop_how = (qc_data.keys(), 'all') if _key == ['all'] else (_key, 'any')
86
+ Args:
87
+ raw_data: 原始數據
88
+ qc_data: QC後的數據
89
+ all_keys: 是否計算所有 deter_key
90
+ with_log: 是否輸出計算日誌
91
+ """
92
+ if raw_data.empty or qc_data.empty:
93
+ return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
88
94
 
89
- sample_size = len(raw_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
90
- qc_size = len(qc_data[_columns_key].resample('1h').mean().copy().dropna(how=_drop_how).index)
95
+ def _calculate_single_key(key_name, key_columns):
96
+ columns, drop_how = (qc_data.keys(), 'all') if key_columns == ['all'] else (key_columns, 'any')
91
97
 
92
- # validate rate calculation
93
- if period_size == 0 or sample_size == 0 or qc_size == 0:
98
+ # 重採樣並計算有效數據量
99
+ period_size = len(raw_data.resample('1h').mean().index)
100
+ sample_size = len(raw_data[columns].resample('1h').mean().dropna(how=drop_how).index)
101
+ qc_size = len(qc_data[columns].resample('1h').mean().dropna(how=drop_how).index)
102
+
103
+ # 驗證計算
104
+ if any([
105
+ period_size == 0 or sample_size == 0 or qc_size == 0,
106
+ period_size < sample_size,
107
+ sample_size < qc_size
108
+ ]):
109
+ if with_log:
94
110
  self.logger.warning(f'\t\t No data for this period... skip')
95
- continue
96
- if period_size < sample_size:
97
- self.logger.warning(f'\t\tError: Sample({sample_size}) > Period({period_size})... skip')
98
- continue
99
- if sample_size < qc_size:
100
- self.logger.warning(f'\t\tError: QC({qc_size}) > Sample({sample_size})... skip')
101
- continue
102
-
103
- else:
104
- _sample_rate = round((sample_size / period_size) * 100, 1)
105
- _valid_rate = round((qc_size / sample_size) * 100, 1)
106
- _total_rate = round((qc_size / period_size) * 100, 1)
107
-
108
- self.logger.info(f"\t\t{self.logger.CYAN}{self.logger.ARROW} {_nam}{self.logger.RESET}")
111
+ return None
112
+
113
+ # 計算比率
114
+ sample_rate = round((sample_size / period_size) * 100, 1)
115
+ valid_rate = round((qc_size / sample_size) * 100, 1)
116
+ total_rate = round((qc_size / period_size) * 100, 1)
117
+
118
+ if with_log:
119
+ self.logger.info(f"\t\t> {key_name}")
109
120
  self.logger.info(
110
- f"\t\t\t├─ {'Sample Rate':15}: {self.logger.BLUE}{_sample_rate:>6.1f}%{self.logger.RESET}")
121
+ f"\t\t\t> {'Sample Rate':13}: {self.logger.BLUE}{sample_rate:>6.1f}%{self.logger.RESET}")
111
122
  self.logger.info(
112
- f"\t\t\t├─ {'Valid Rate':15}: {self.logger.BLUE}{_valid_rate:>6.1f}%{self.logger.RESET}")
123
+ f"\t\t\t> {'Valid Rate':13}: {self.logger.BLUE}{valid_rate:>6.1f}%{self.logger.RESET}")
113
124
  self.logger.info(
114
- f"\t\t\t└─ {'Total Rate':15}: {self.logger.BLUE}{_total_rate:>6.1f}%{self.logger.RESET}")
125
+ f"\t\t\t> {'Total Rate':13}: {self.logger.BLUE}{total_rate:>6.1f}%{self.logger.RESET}")
126
+
127
+ return {
128
+ 'acquisition_rate': sample_rate,
129
+ 'yield_rate': valid_rate,
130
+ 'total_rate': total_rate
131
+ }
132
+
133
+ if all_keys:
134
+ # 計算所有 key 並回傳所有結果(用於日誌輸出)
135
+ all_results = []
136
+ for name, columns in self.meta['deter_key'].items():
137
+ result = _calculate_single_key(name, columns)
138
+ if result:
139
+ all_results.append(result)
140
+
141
+ if not all_results:
142
+ return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
143
+
144
+ # 回傳所有結果中比率最低的
145
+ return {
146
+ 'acquisition_rate': min(r['acquisition_rate'] for r in all_results),
147
+ 'yield_rate': min(r['yield_rate'] for r in all_results),
148
+ 'total_rate': min(r['total_rate'] for r in all_results)
149
+ }
150
+ else:
151
+ # 計算所有 key 但只回傳最低的比率
152
+ min_rates = {'acquisition_rate': 200, 'yield_rate': 200, 'total_rate': 200}
115
153
 
154
+ for name, columns in self.meta['deter_key'].items():
155
+ result = _calculate_single_key(name, columns)
156
+ if result:
157
+ min_rates['acquisition_rate'] = min(min_rates['acquisition_rate'], result['acquisition_rate'])
158
+ min_rates['yield_rate'] = min(min_rates['yield_rate'], result['yield_rate'])
159
+ min_rates['total_rate'] = min(min_rates['total_rate'], result['total_rate'])
160
+
161
+ # 如果沒有任何有效結果,回傳 0
162
+ if min_rates['acquisition_rate'] == 200 and min_rates['yield_rate'] == 200:
163
+ return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
164
+
165
+ return min_rates
166
+
167
+ def _rate_calculate(self, raw_data, qc_data) -> None:
116
168
  if self.meta['deter_key'] is not None:
117
- # use qc_freq to calculate each period rate
118
169
  if self.qc_freq is not None:
119
170
  raw_data_grouped = raw_data.groupby(pd.Grouper(freq=self.qc_freq))
120
171
  qc_data_grouped = qc_data.groupby(pd.Grouper(freq=self.qc_freq))
121
172
 
122
173
  for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped):
123
174
  self.logger.info(
124
- f"\t{self.logger.BLUE}{self.logger.ARROW} Processing: {_sub_raw_data.index[0].strftime('%F')}"
175
+ f"\t{self.logger.BLUE}> Processing: {_sub_raw_data.index[0].strftime('%F')}"
125
176
  f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}")
126
177
 
127
- __base_rate(_sub_raw_data, _sub_qc_data)
128
-
178
+ self.__calculate_rates(_sub_raw_data, _sub_qc_data, all_keys=True, with_log=True)
129
179
  else:
130
- __base_rate(raw_data, qc_data)
180
+ self.__calculate_rates(raw_data, qc_data, all_keys=True, with_log=True)
181
+
182
+ # 使用 Grouper 對數據按週和月進行分組
183
+ current_time = datetime.now()
184
+
185
+ # 按週分組 (使用星期一作為每週的開始)
186
+ weekly_raw_groups = raw_data.groupby(pd.Grouper(freq='W-MON'))
187
+ weekly_qc_groups = qc_data.groupby(pd.Grouper(freq='W-MON'))
188
+
189
+ # 按月分組 (使用月初作為每月的開始)
190
+ monthly_raw_groups = raw_data.groupby(pd.Grouper(freq='MS'))
191
+ monthly_qc_groups = qc_data.groupby(pd.Grouper(freq='MS'))
192
+
193
+ # 生成報告
194
+ self.__generate_grouped_report(
195
+ current_time,
196
+ weekly_raw_groups, weekly_qc_groups,
197
+ monthly_raw_groups, monthly_qc_groups
198
+ )
199
+
200
+ def __generate_grouped_report(self, current_time, weekly_raw_groups, weekly_qc_groups,
201
+ monthly_raw_groups, monthly_qc_groups):
202
+ """生成基於分組數據的獲取率和良率報告"""
203
+ report = {
204
+ "report_time": current_time.strftime('%Y-%m-%d %H:%M:%S'),
205
+ "instrument_info": {
206
+ "station": self.path.name[:2],
207
+ "instrument": self.nam
208
+ },
209
+ "rates": {
210
+ "weekly": {},
211
+ "monthly": {}
212
+ }
213
+ }
214
+
215
+ # 處理週數據 - 使用標準週時間範圍
216
+ for week_start, week_raw_data in weekly_raw_groups:
217
+ # 獲取對應的QC數據
218
+ week_qc_data = weekly_qc_groups.get_group(
219
+ week_start) if week_start in weekly_qc_groups.groups else pd.DataFrame()
220
+
221
+ if not week_raw_data.empty:
222
+ # 計算標準週結束時間(週日23:59:59)
223
+ week_end = week_start + pd.Timedelta(days=6, hours=23, minutes=59, seconds=59)
224
+
225
+ # 使用週的開始日期作為鍵
226
+ period_key = week_start.strftime('%Y-%m-%d')
227
+
228
+ report["rates"]["weekly"][period_key] = {
229
+ "start_time": week_start.strftime('%Y-%m-%d %H:%M:%S'),
230
+ "end_time": week_end.strftime('%Y-%m-%d %H:%M:%S'),
231
+ "rates": self.__calculate_rates(week_raw_data, week_qc_data)
232
+ }
233
+
234
+ # 處理月數據 - 使用標準月時間範圍
235
+ for month_start, month_raw_data in monthly_raw_groups:
236
+ # 獲取對應的QC數據
237
+ month_qc_data = monthly_qc_groups.get_group(
238
+ month_start) if month_start in monthly_qc_groups.groups else pd.DataFrame()
239
+
240
+ if not month_raw_data.empty:
241
+ # 計算標準月結束時間(月末23:59:59)
242
+ next_month_start = (month_start + pd.Timedelta(days=32)).replace(day=1)
243
+ month_end = next_month_start - pd.Timedelta(seconds=1)
244
+
245
+ # 使用月份作為鍵
246
+ period_key = month_start.strftime('%Y-%m')
247
+
248
+ report["rates"]["monthly"][period_key] = {
249
+ "start_time": month_start.strftime('%Y-%m-%d %H:%M:%S'),
250
+ "end_time": month_end.strftime('%Y-%m-%d %H:%M:%S'),
251
+ "rates": self.__calculate_rates(month_raw_data, month_qc_data)
252
+ }
253
+
254
+ # 寫入報告
255
+ with open(self.report_out, 'w') as f:
256
+ json.dump(report, f, indent=4)
131
257
 
132
258
  def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None):
133
259
  """
@@ -157,13 +283,28 @@ class AbstractReader(ABC):
157
283
  # Create new time index
158
284
  new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time')
159
285
 
160
- # Process data: convert to numeric, resample, and reindex
286
+ # Process data: convert to numeric, resample, and reindex with controlled tolerance
161
287
  if freq in ['1min', 'min', 'T']:
162
- return _df.reindex(new_index, method='nearest', tolerance='1min')
288
+ # 對於分鐘級數據,使用較小的tolerance,例如30秒
289
+ return _df.reindex(new_index, method='nearest', tolerance='30s')
163
290
  elif freq in ['1h', 'h', 'H']:
164
- return _df.reindex(new_index, method='nearest', tolerance='1h')
291
+ # 對於小時級數據,使用30分鐘作為tolerance
292
+ # 這樣08:20會匹配到08:00,但不會匹配到09:00
293
+ return _df.reindex(new_index, method='nearest', tolerance='30min')
165
294
  else:
166
- return _df.reindex(new_index, method='nearest', tolerance=freq)
295
+ # 對於其他頻率,tolerance設置為頻率的一半
296
+ if isinstance(freq, str) and freq[-1].isalpha():
297
+ # 如果freq格式為'數字+單位',例如'2h','3min'
298
+ try:
299
+ num = int(freq[:-1])
300
+ unit = freq[-1]
301
+ half_freq = f"{num // 2}{unit}" if num > 1 else f"30{'min' if unit == 'h' else 's'}"
302
+ return _df.reindex(new_index, method='nearest', tolerance=half_freq)
303
+ except ValueError:
304
+ # 無法解析freq,使用默認值
305
+ return _df.reindex(new_index, method='nearest', tolerance=freq)
306
+ else:
307
+ return _df.reindex(new_index, method='nearest', tolerance=freq)
167
308
 
168
309
  def _outlier_process(self, _df):
169
310
  outlier_file = self.path / 'outlier.json'
@@ -202,15 +343,15 @@ class AbstractReader(ABC):
202
343
 
203
344
  try:
204
345
  with Progress(
205
- TextColumn("[bold blue]{task.description}", style="bold blue"),
346
+ SpinnerColumn(finished_text=""),
206
347
  BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"),
207
- TaskProgressColumn(),
208
- TimeRemainingColumn(),
209
- TextColumn("{task.fields[filename]}", style="yellow"),
348
+ TaskProgressColumn(style="bold", text_format="[bright_green]{task.percentage:>3.0f}%"),
349
+ TextColumn("{task.description}", style="bold blue"),
350
+ TextColumn("{task.fields[filename]}", style="bold blue"),
210
351
  console=Console(force_terminal=True, color_system="auto", width=120),
211
352
  expand=False
212
353
  ) as progress:
213
- task = progress.add_task(f"{self.logger.ARROW} Reading {self.nam} files", total=len(files), filename="")
354
+ task = progress.add_task(f"Reading {self.nam} files:", total=len(files), filename="")
214
355
  yield progress, task
215
356
  finally:
216
357
  # Restore logger method and output message
@@ -262,12 +403,12 @@ class AbstractReader(ABC):
262
403
  def _run(self, user_start, user_end):
263
404
  # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
264
405
  if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
265
- self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE")
406
+ self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}")
266
407
 
267
408
  _f_raw_done, _f_qc_done = pd.read_pickle(self.pkl_nam_raw), pd.read_pickle(self.pkl_nam)
268
409
 
269
410
  if self.append:
270
- self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data")
411
+ self.logger.info_box(f"Appending New data from {user_start} to {user_end}")
271
412
 
272
413
  _f_raw_new, _f_qc_new = self._read_raw_files()
273
414
  _f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
@@ -279,7 +420,7 @@ class AbstractReader(ABC):
279
420
  return _f_qc if self.qc else _f_raw
280
421
 
281
422
  else:
282
- self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}", color_part="RAW DATA")
423
+ self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}")
283
424
 
284
425
  _f_raw, _f_qc = self._read_raw_files()
285
426
 
@@ -8,7 +8,7 @@ from pathlib import Path
8
8
 
9
9
 
10
10
  class ReaderLogger:
11
- def __init__(self, name: str, log_path: Path, log_level: str = 'WARNING'):
11
+ def __init__(self, name: str, log_path: Path, log_level: str = 'INFO'):
12
12
  self.name = name
13
13
  self.log_path = log_path
14
14
  self._log_level = getattr(logging, log_level)
@@ -33,16 +33,16 @@ class ReaderLogger:
33
33
  self.RESET = ''
34
34
 
35
35
  # 檢查 Unicode 支持
36
- self.unicode_support = self._setup_unicode()
36
+ self.unicode_support = self._check_unicode_support()
37
37
 
38
38
  # 設置框架字符
39
39
  if self.unicode_support:
40
- self.BOX_TOP_LEFT = ""
41
- self.BOX_TOP_RIGHT = ""
42
- self.BOX_BOTTOM_LEFT = ""
43
- self.BOX_BOTTOM_RIGHT = ""
44
- self.BOX_HORIZONTAL = ""
45
- self.BOX_VERTICAL = ""
40
+ self.BOX_TOP_LEFT = ""
41
+ self.BOX_TOP_RIGHT = ""
42
+ self.BOX_BOTTOM_LEFT = ""
43
+ self.BOX_BOTTOM_RIGHT = ""
44
+ self.BOX_HORIZONTAL = ""
45
+ self.BOX_VERTICAL = ""
46
46
  self.ARROW = "▶"
47
47
  else:
48
48
  self.BOX_TOP_LEFT = "+"
@@ -75,7 +75,7 @@ class ReaderLogger:
75
75
  # 其他系統檢查
76
76
  return hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
77
77
 
78
- def _setup_unicode(self) -> bool:
78
+ def _check_unicode_support(self) -> bool:
79
79
  """設置 Unicode 支持"""
80
80
  if platform.system().lower() == 'windows':
81
81
  try:
@@ -38,6 +38,15 @@ class Reader(AbstractReader):
38
38
 
39
39
  _df = read_csv(f, sep=delimiter, skiprows=skip, low_memory=False)
40
40
 
41
+ if 'Date' not in _df.columns: # 資料需要轉置
42
+ try:
43
+ _df = _df.T # 轉置
44
+ _df.columns = _df.iloc[0] # 使用第一列作為欄位名稱
45
+ _df = _df.iloc[1:] # 移除第一列(因為已經變成欄位名稱)
46
+ _df = _df.reset_index(drop=True) # 重設索引
47
+ except:
48
+ raise NotImplementedError('Not supported date format')
49
+
41
50
  for date_format in date_formats:
42
51
  _time_index = parse_date(_df, date_format)
43
52
  if not _time_index.isna().all():
@@ -7,40 +7,77 @@ from AeroViz.rawDataReader.core import AbstractReader
7
7
  class Reader(AbstractReader):
8
8
  nam = 'TEOM'
9
9
 
10
+ # TEOM Output Data Formats
11
+ # There are three data formats from TEOM instrument output:
12
+ #
13
+ # 1. Remote download format
14
+ # - Identified by 'Time Stamp' column
15
+ # - Date format: 'DD - MM - YYYY HH:MM:SS'
16
+ # - May contain Chinese month names requiring conversion
17
+ # - Maps columns: Time Stamp → time, System status → status,
18
+ # PM-2.5 base MC → PM_NV, PM-2.5 MC → PM_Total, PM-2.5 TEOM noise → noise
19
+ #
20
+ # 2. USB download or auto export format
21
+ # - Identified by 'tmoStatusCondition_0' column
22
+ # - Two possible time formats:
23
+ # a) Standard: 'Date' and 'Time' columns (YYYY-MM-DD HH:MM:SS)
24
+ # b) Alternative: 'time_stamp' column (similar to remote format)
25
+ # - Maps columns: tmoStatusCondition_0 → status, tmoTEOMABaseMC_0 → PM_NV,
26
+ # tmoTEOMAMC_0 → PM_Total, tmoTEOMANoise_0 → noise
27
+ #
28
+ # 3. Other formats
29
+ # - Not implemented, raises NotImplementedError
30
+ #
31
+ # All formats are standardized to the same column names with timestamp as index
32
+
10
33
  def _raw_reader(self, file):
34
+ # TEOM Data Format Handler
35
+ # Standardizes different TEOM data formats (remote download and USB/auto export)
11
36
  _df = read_csv(file, skiprows=3, index_col=False)
12
37
 
13
- if 'Time Stamp' in _df.columns: # remote download
14
- _df = _df.rename(columns={'Time Stamp': 'time',
15
- 'System status': 'status',
16
- 'PM-2.5 base MC': 'PM_NV',
17
- 'PM-2.5 MC': 'PM_Total',
18
- 'PM-2.5 TEOM noise': 'noise', })
19
-
20
- _time_replace = {'十一月': '11', '十二月': '12', '一月': '01', '二月': '02', '三月': '03', '四月': '04',
21
- '五月': '05', '六月': '06', '七月': '07', '八月': '08', '九月': '09', '十月': '10'}
22
-
38
+ # Chinese month name conversion dictionary
39
+ _time_replace = {'十一月': '11', '十二月': '12', '一月': '01', '二月': '02', '三月': '03', '四月': '04',
40
+ '五月': '05', '六月': '06', '七月': '07', '八月': '08', '九月': '09', '十月': '10'}
41
+
42
+ # Try both naming conventions (will ignore columns that don't exist)
43
+ _df = _df.rename(columns={
44
+ # Remote download format
45
+ 'Time Stamp': 'time',
46
+ 'System status': 'status',
47
+ 'PM-2.5 base MC': 'PM_NV',
48
+ 'PM-2.5 MC': 'PM_Total',
49
+ 'PM-2.5 TEOM noise': 'noise',
50
+ # USB/auto export format
51
+ 'time_stamp': 'time',
52
+ 'tmoStatusCondition_0': 'status',
53
+ 'tmoTEOMABaseMC_0': 'PM_NV',
54
+ 'tmoTEOMAMC_0': 'PM_Total',
55
+ 'tmoTEOMANoise_0': 'noise'
56
+ })
57
+
58
+ # Handle different time formats
59
+ if 'time' in _df.columns: # Remote download or auto export with time column
23
60
  _tm_idx = _df.time
61
+ # Convert Chinese month names if present
24
62
  for _ori, _rpl in _time_replace.items():
25
63
  _tm_idx = _tm_idx.str.replace(_ori, _rpl)
26
64
 
27
65
  _df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X'))
28
66
 
29
- elif 'tmoStatusCondition_0' in _df.columns: # usb download
30
- _df['time'] = pd.to_datetime(_df['Date'] + ' ' + _df['Time'], errors='coerce', format='%Y-%m-%d %H:%M:%S')
67
+ elif 'Date' in _df.columns and 'Time' in _df.columns: # USB download format
68
+ _df['time'] = pd.to_datetime(_df['Date'] + ' ' + _df['Time'],
69
+ errors='coerce', format='%Y-%m-%d %H:%M:%S')
31
70
  _df.drop(columns=['Date', 'Time'], inplace=True)
32
71
  _df.set_index('time', inplace=True)
33
72
 
34
- _df = _df.rename(columns={'tmoStatusCondition_0': 'status',
35
- 'tmoTEOMABaseMC_0': 'PM_NV',
36
- 'tmoTEOMAMC_0': 'PM_Total',
37
- 'tmoTEOMANoise_0': 'noise', })
38
73
  else:
39
- raise NotImplementedError
74
+ raise NotImplementedError("Unsupported TEOM data format")
40
75
 
76
+ # Filter and clean data
41
77
  _df = _df.where(_df['status'] < 1)
42
78
  _df = _df[['PM_NV', 'PM_Total', 'noise']].apply(to_numeric, errors='coerce')
43
79
 
80
+ # Remove duplicates and NaN indices
44
81
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
45
82
 
46
83
  # QC data