AeroViz 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_absorption.py +2 -0
- AeroViz/dataProcess/Optical/_scattering.py +1 -1
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__init__.py +1 -1
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/corr_matrix.py +168 -2
- AeroViz/plot/templates/metal_heatmap.py +15 -6
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/timeseries.py +96 -52
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/__init__.py +35 -5
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__init__.py +185 -44
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +9 -9
- AeroViz/rawDataReader/script/SMPS.py +9 -0
- AeroViz/rawDataReader/script/TEOM.py +54 -17
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/XRF.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/METADATA +10 -12
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/RECORD +83 -83
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/WHEEL +1 -1
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.14.dist-info → AeroViz-0.1.16.dist-info}/top_level.txt +0 -0
|
@@ -19,10 +19,10 @@ SIZE_RANGE_INSTRUMENTS = ['SMPS', 'APS', 'GRIMM']
|
|
|
19
19
|
|
|
20
20
|
def RawDataReader(instrument: str,
|
|
21
21
|
path: Path | str,
|
|
22
|
-
reset: bool = False,
|
|
22
|
+
reset: bool | str = False,
|
|
23
23
|
qc: bool | str = True,
|
|
24
|
-
start: datetime = None,
|
|
25
|
-
end: datetime = None,
|
|
24
|
+
start: datetime | str = None,
|
|
25
|
+
end: datetime | str = None,
|
|
26
26
|
mean_freq: str = '1h',
|
|
27
27
|
size_range: tuple[float, float] | None = None,
|
|
28
28
|
suppress_warnings: bool = False,
|
|
@@ -94,9 +94,22 @@ def RawDataReader(instrument: str,
|
|
|
94
94
|
|
|
95
95
|
Examples
|
|
96
96
|
--------
|
|
97
|
+
>>> from AeroViz import RawDataReader
|
|
98
|
+
>>>
|
|
99
|
+
>>> # Using string inputs
|
|
100
|
+
>>> df_ae33 = RawDataReader(
|
|
101
|
+
... instrument='AE33',
|
|
102
|
+
... path='/path/to/your/data/folder',
|
|
103
|
+
... reset=True,
|
|
104
|
+
... qc='1MS',
|
|
105
|
+
... start='2024-01-01',
|
|
106
|
+
... end='2024-06-30',
|
|
107
|
+
... mean_freq='1h',
|
|
108
|
+
... )
|
|
109
|
+
|
|
110
|
+
>>> # Using Path and datetime objects
|
|
97
111
|
>>> from pathlib import Path
|
|
98
112
|
>>> from datetime import datetime
|
|
99
|
-
>>> from AeroViz import RawDataReader
|
|
100
113
|
>>>
|
|
101
114
|
>>> df_ae33 = RawDataReader(
|
|
102
115
|
... instrument='AE33',
|
|
@@ -130,9 +143,26 @@ def RawDataReader(instrument: str,
|
|
|
130
143
|
raise ValueError(f"Invalid frequency: {qc}. Must be one of: "
|
|
131
144
|
f"W (week), MS (month start), QS (quarter start), YS (year start)")
|
|
132
145
|
|
|
133
|
-
#
|
|
146
|
+
# Convert and verify input times
|
|
134
147
|
if not (start and end):
|
|
135
148
|
raise ValueError("Both start and end times must be provided.")
|
|
149
|
+
|
|
150
|
+
# Convert start time if it's a string
|
|
151
|
+
if isinstance(start, str):
|
|
152
|
+
try:
|
|
153
|
+
start = datetime.fromisoformat(start.replace('Z', '+00:00'))
|
|
154
|
+
except ValueError as e:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
f"Invalid start time format. Please use ISO format (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS): {e}")
|
|
157
|
+
|
|
158
|
+
# Convert end time if it's a string
|
|
159
|
+
if isinstance(end, str):
|
|
160
|
+
try:
|
|
161
|
+
end = datetime.fromisoformat(end.replace('Z', '+00:00'))
|
|
162
|
+
except ValueError as e:
|
|
163
|
+
raise ValueError(
|
|
164
|
+
f"Invalid end time format. Please use ISO format (YYYY-MM-DD or YYYY-MM-DD HH:MM:SS): {e}")
|
|
165
|
+
|
|
136
166
|
if end <= start:
|
|
137
167
|
raise ValueError(f"Invalid time range: start {start} is after end {end}")
|
|
138
168
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -8,7 +8,7 @@ from typing import Generator
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from rich.console import Console
|
|
11
|
-
from rich.progress import Progress, TextColumn, BarColumn,
|
|
11
|
+
from rich.progress import Progress, TextColumn, BarColumn, SpinnerColumn, TaskProgressColumn
|
|
12
12
|
|
|
13
13
|
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
14
14
|
from AeroViz.rawDataReader.core.logger import ReaderLogger
|
|
@@ -55,6 +55,7 @@ class AbstractReader(ABC):
|
|
|
55
55
|
self.pkl_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.pkl'
|
|
56
56
|
self.csv_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.csv'
|
|
57
57
|
self.csv_out = output_folder / f'output_{self.nam.lower()}.csv'
|
|
58
|
+
self.report_out = output_folder / 'report.json'
|
|
58
59
|
|
|
59
60
|
def __call__(self,
|
|
60
61
|
start: datetime,
|
|
@@ -79,55 +80,180 @@ class AbstractReader(ABC):
|
|
|
79
80
|
def _QC(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
80
81
|
return df
|
|
81
82
|
|
|
82
|
-
def
|
|
83
|
-
|
|
84
|
-
period_size = len(raw_data.resample('1h').mean().index)
|
|
83
|
+
def __calculate_rates(self, raw_data, qc_data, all_keys=False, with_log=False):
|
|
84
|
+
"""計算獲取率、良率和總比率
|
|
85
85
|
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
Args:
|
|
87
|
+
raw_data: 原始數據
|
|
88
|
+
qc_data: QC後的數據
|
|
89
|
+
all_keys: 是否計算所有 deter_key
|
|
90
|
+
with_log: 是否輸出計算日誌
|
|
91
|
+
"""
|
|
92
|
+
if raw_data.empty or qc_data.empty:
|
|
93
|
+
return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
|
|
88
94
|
|
|
89
|
-
|
|
90
|
-
|
|
95
|
+
def _calculate_single_key(key_name, key_columns):
|
|
96
|
+
columns, drop_how = (qc_data.keys(), 'all') if key_columns == ['all'] else (key_columns, 'any')
|
|
91
97
|
|
|
92
|
-
|
|
93
|
-
|
|
98
|
+
# 重採樣並計算有效數據量
|
|
99
|
+
period_size = len(raw_data.resample('1h').mean().index)
|
|
100
|
+
sample_size = len(raw_data[columns].resample('1h').mean().dropna(how=drop_how).index)
|
|
101
|
+
qc_size = len(qc_data[columns].resample('1h').mean().dropna(how=drop_how).index)
|
|
102
|
+
|
|
103
|
+
# 驗證計算
|
|
104
|
+
if any([
|
|
105
|
+
period_size == 0 or sample_size == 0 or qc_size == 0,
|
|
106
|
+
period_size < sample_size,
|
|
107
|
+
sample_size < qc_size
|
|
108
|
+
]):
|
|
109
|
+
if with_log:
|
|
94
110
|
self.logger.warning(f'\t\t No data for this period... skip')
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
_sample_rate = round((sample_size / period_size) * 100, 1)
|
|
105
|
-
_valid_rate = round((qc_size / sample_size) * 100, 1)
|
|
106
|
-
_total_rate = round((qc_size / period_size) * 100, 1)
|
|
107
|
-
|
|
108
|
-
self.logger.info(f"\t\t{self.logger.CYAN}{self.logger.ARROW} {_nam}{self.logger.RESET}")
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
# 計算比率
|
|
114
|
+
sample_rate = round((sample_size / period_size) * 100, 1)
|
|
115
|
+
valid_rate = round((qc_size / sample_size) * 100, 1)
|
|
116
|
+
total_rate = round((qc_size / period_size) * 100, 1)
|
|
117
|
+
|
|
118
|
+
if with_log:
|
|
119
|
+
self.logger.info(f"\t\t> {key_name}")
|
|
109
120
|
self.logger.info(
|
|
110
|
-
f"\t\t\t
|
|
121
|
+
f"\t\t\t> {'Sample Rate':13}: {self.logger.BLUE}{sample_rate:>6.1f}%{self.logger.RESET}")
|
|
111
122
|
self.logger.info(
|
|
112
|
-
f"\t\t\t
|
|
123
|
+
f"\t\t\t> {'Valid Rate':13}: {self.logger.BLUE}{valid_rate:>6.1f}%{self.logger.RESET}")
|
|
113
124
|
self.logger.info(
|
|
114
|
-
f"\t\t\t
|
|
125
|
+
f"\t\t\t> {'Total Rate':13}: {self.logger.BLUE}{total_rate:>6.1f}%{self.logger.RESET}")
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
'acquisition_rate': sample_rate,
|
|
129
|
+
'yield_rate': valid_rate,
|
|
130
|
+
'total_rate': total_rate
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if all_keys:
|
|
134
|
+
# 計算所有 key 並回傳所有結果(用於日誌輸出)
|
|
135
|
+
all_results = []
|
|
136
|
+
for name, columns in self.meta['deter_key'].items():
|
|
137
|
+
result = _calculate_single_key(name, columns)
|
|
138
|
+
if result:
|
|
139
|
+
all_results.append(result)
|
|
140
|
+
|
|
141
|
+
if not all_results:
|
|
142
|
+
return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
|
|
143
|
+
|
|
144
|
+
# 回傳所有結果中比率最低的
|
|
145
|
+
return {
|
|
146
|
+
'acquisition_rate': min(r['acquisition_rate'] for r in all_results),
|
|
147
|
+
'yield_rate': min(r['yield_rate'] for r in all_results),
|
|
148
|
+
'total_rate': min(r['total_rate'] for r in all_results)
|
|
149
|
+
}
|
|
150
|
+
else:
|
|
151
|
+
# 計算所有 key 但只回傳最低的比率
|
|
152
|
+
min_rates = {'acquisition_rate': 200, 'yield_rate': 200, 'total_rate': 200}
|
|
115
153
|
|
|
154
|
+
for name, columns in self.meta['deter_key'].items():
|
|
155
|
+
result = _calculate_single_key(name, columns)
|
|
156
|
+
if result:
|
|
157
|
+
min_rates['acquisition_rate'] = min(min_rates['acquisition_rate'], result['acquisition_rate'])
|
|
158
|
+
min_rates['yield_rate'] = min(min_rates['yield_rate'], result['yield_rate'])
|
|
159
|
+
min_rates['total_rate'] = min(min_rates['total_rate'], result['total_rate'])
|
|
160
|
+
|
|
161
|
+
# 如果沒有任何有效結果,回傳 0
|
|
162
|
+
if min_rates['acquisition_rate'] == 200 and min_rates['yield_rate'] == 200:
|
|
163
|
+
return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
|
|
164
|
+
|
|
165
|
+
return min_rates
|
|
166
|
+
|
|
167
|
+
def _rate_calculate(self, raw_data, qc_data) -> None:
|
|
116
168
|
if self.meta['deter_key'] is not None:
|
|
117
|
-
# use qc_freq to calculate each period rate
|
|
118
169
|
if self.qc_freq is not None:
|
|
119
170
|
raw_data_grouped = raw_data.groupby(pd.Grouper(freq=self.qc_freq))
|
|
120
171
|
qc_data_grouped = qc_data.groupby(pd.Grouper(freq=self.qc_freq))
|
|
121
172
|
|
|
122
173
|
for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped):
|
|
123
174
|
self.logger.info(
|
|
124
|
-
f"\t{self.logger.BLUE}
|
|
175
|
+
f"\t{self.logger.BLUE}> Processing: {_sub_raw_data.index[0].strftime('%F')}"
|
|
125
176
|
f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}")
|
|
126
177
|
|
|
127
|
-
|
|
128
|
-
|
|
178
|
+
self.__calculate_rates(_sub_raw_data, _sub_qc_data, all_keys=True, with_log=True)
|
|
129
179
|
else:
|
|
130
|
-
|
|
180
|
+
self.__calculate_rates(raw_data, qc_data, all_keys=True, with_log=True)
|
|
181
|
+
|
|
182
|
+
# 使用 Grouper 對數據按週和月進行分組
|
|
183
|
+
current_time = datetime.now()
|
|
184
|
+
|
|
185
|
+
# 按週分組 (使用星期一作為每週的開始)
|
|
186
|
+
weekly_raw_groups = raw_data.groupby(pd.Grouper(freq='W-MON'))
|
|
187
|
+
weekly_qc_groups = qc_data.groupby(pd.Grouper(freq='W-MON'))
|
|
188
|
+
|
|
189
|
+
# 按月分組 (使用月初作為每月的開始)
|
|
190
|
+
monthly_raw_groups = raw_data.groupby(pd.Grouper(freq='MS'))
|
|
191
|
+
monthly_qc_groups = qc_data.groupby(pd.Grouper(freq='MS'))
|
|
192
|
+
|
|
193
|
+
# 生成報告
|
|
194
|
+
self.__generate_grouped_report(
|
|
195
|
+
current_time,
|
|
196
|
+
weekly_raw_groups, weekly_qc_groups,
|
|
197
|
+
monthly_raw_groups, monthly_qc_groups
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
def __generate_grouped_report(self, current_time, weekly_raw_groups, weekly_qc_groups,
|
|
201
|
+
monthly_raw_groups, monthly_qc_groups):
|
|
202
|
+
"""生成基於分組數據的獲取率和良率報告"""
|
|
203
|
+
report = {
|
|
204
|
+
"report_time": current_time.strftime('%Y-%m-%d %H:%M:%S'),
|
|
205
|
+
"instrument_info": {
|
|
206
|
+
"station": self.path.name[:2],
|
|
207
|
+
"instrument": self.nam
|
|
208
|
+
},
|
|
209
|
+
"rates": {
|
|
210
|
+
"weekly": {},
|
|
211
|
+
"monthly": {}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
# 處理週數據 - 使用標準週時間範圍
|
|
216
|
+
for week_start, week_raw_data in weekly_raw_groups:
|
|
217
|
+
# 獲取對應的QC數據
|
|
218
|
+
week_qc_data = weekly_qc_groups.get_group(
|
|
219
|
+
week_start) if week_start in weekly_qc_groups.groups else pd.DataFrame()
|
|
220
|
+
|
|
221
|
+
if not week_raw_data.empty:
|
|
222
|
+
# 計算標準週結束時間(週日23:59:59)
|
|
223
|
+
week_end = week_start + pd.Timedelta(days=6, hours=23, minutes=59, seconds=59)
|
|
224
|
+
|
|
225
|
+
# 使用週的開始日期作為鍵
|
|
226
|
+
period_key = week_start.strftime('%Y-%m-%d')
|
|
227
|
+
|
|
228
|
+
report["rates"]["weekly"][period_key] = {
|
|
229
|
+
"start_time": week_start.strftime('%Y-%m-%d %H:%M:%S'),
|
|
230
|
+
"end_time": week_end.strftime('%Y-%m-%d %H:%M:%S'),
|
|
231
|
+
"rates": self.__calculate_rates(week_raw_data, week_qc_data)
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# 處理月數據 - 使用標準月時間範圍
|
|
235
|
+
for month_start, month_raw_data in monthly_raw_groups:
|
|
236
|
+
# 獲取對應的QC數據
|
|
237
|
+
month_qc_data = monthly_qc_groups.get_group(
|
|
238
|
+
month_start) if month_start in monthly_qc_groups.groups else pd.DataFrame()
|
|
239
|
+
|
|
240
|
+
if not month_raw_data.empty:
|
|
241
|
+
# 計算標準月結束時間(月末23:59:59)
|
|
242
|
+
next_month_start = (month_start + pd.Timedelta(days=32)).replace(day=1)
|
|
243
|
+
month_end = next_month_start - pd.Timedelta(seconds=1)
|
|
244
|
+
|
|
245
|
+
# 使用月份作為鍵
|
|
246
|
+
period_key = month_start.strftime('%Y-%m')
|
|
247
|
+
|
|
248
|
+
report["rates"]["monthly"][period_key] = {
|
|
249
|
+
"start_time": month_start.strftime('%Y-%m-%d %H:%M:%S'),
|
|
250
|
+
"end_time": month_end.strftime('%Y-%m-%d %H:%M:%S'),
|
|
251
|
+
"rates": self.__calculate_rates(month_raw_data, month_qc_data)
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# 寫入報告
|
|
255
|
+
with open(self.report_out, 'w') as f:
|
|
256
|
+
json.dump(report, f, indent=4)
|
|
131
257
|
|
|
132
258
|
def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None):
|
|
133
259
|
"""
|
|
@@ -157,13 +283,28 @@ class AbstractReader(ABC):
|
|
|
157
283
|
# Create new time index
|
|
158
284
|
new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time')
|
|
159
285
|
|
|
160
|
-
# Process data: convert to numeric, resample, and reindex
|
|
286
|
+
# Process data: convert to numeric, resample, and reindex with controlled tolerance
|
|
161
287
|
if freq in ['1min', 'min', 'T']:
|
|
162
|
-
|
|
288
|
+
# 對於分鐘級數據,使用較小的tolerance,例如30秒
|
|
289
|
+
return _df.reindex(new_index, method='nearest', tolerance='30s')
|
|
163
290
|
elif freq in ['1h', 'h', 'H']:
|
|
164
|
-
|
|
291
|
+
# 對於小時級數據,使用30分鐘作為tolerance
|
|
292
|
+
# 這樣08:20會匹配到08:00,但不會匹配到09:00
|
|
293
|
+
return _df.reindex(new_index, method='nearest', tolerance='30min')
|
|
165
294
|
else:
|
|
166
|
-
|
|
295
|
+
# 對於其他頻率,tolerance設置為頻率的一半
|
|
296
|
+
if isinstance(freq, str) and freq[-1].isalpha():
|
|
297
|
+
# 如果freq格式為'數字+單位',例如'2h','3min'
|
|
298
|
+
try:
|
|
299
|
+
num = int(freq[:-1])
|
|
300
|
+
unit = freq[-1]
|
|
301
|
+
half_freq = f"{num // 2}{unit}" if num > 1 else f"30{'min' if unit == 'h' else 's'}"
|
|
302
|
+
return _df.reindex(new_index, method='nearest', tolerance=half_freq)
|
|
303
|
+
except ValueError:
|
|
304
|
+
# 無法解析freq,使用默認值
|
|
305
|
+
return _df.reindex(new_index, method='nearest', tolerance=freq)
|
|
306
|
+
else:
|
|
307
|
+
return _df.reindex(new_index, method='nearest', tolerance=freq)
|
|
167
308
|
|
|
168
309
|
def _outlier_process(self, _df):
|
|
169
310
|
outlier_file = self.path / 'outlier.json'
|
|
@@ -202,15 +343,15 @@ class AbstractReader(ABC):
|
|
|
202
343
|
|
|
203
344
|
try:
|
|
204
345
|
with Progress(
|
|
205
|
-
|
|
346
|
+
SpinnerColumn(finished_text="✓"),
|
|
206
347
|
BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"),
|
|
207
|
-
TaskProgressColumn(),
|
|
208
|
-
|
|
209
|
-
TextColumn("{task.fields[filename]}", style="
|
|
348
|
+
TaskProgressColumn(style="bold", text_format="[bright_green]{task.percentage:>3.0f}%"),
|
|
349
|
+
TextColumn("{task.description}", style="bold blue"),
|
|
350
|
+
TextColumn("{task.fields[filename]}", style="bold blue"),
|
|
210
351
|
console=Console(force_terminal=True, color_system="auto", width=120),
|
|
211
352
|
expand=False
|
|
212
353
|
) as progress:
|
|
213
|
-
task = progress.add_task(f"
|
|
354
|
+
task = progress.add_task(f"Reading {self.nam} files:", total=len(files), filename="")
|
|
214
355
|
yield progress, task
|
|
215
356
|
finally:
|
|
216
357
|
# Restore logger method and output message
|
|
@@ -262,12 +403,12 @@ class AbstractReader(ABC):
|
|
|
262
403
|
def _run(self, user_start, user_end):
|
|
263
404
|
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
264
405
|
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
|
|
265
|
-
self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}"
|
|
406
|
+
self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}")
|
|
266
407
|
|
|
267
408
|
_f_raw_done, _f_qc_done = pd.read_pickle(self.pkl_nam_raw), pd.read_pickle(self.pkl_nam)
|
|
268
409
|
|
|
269
410
|
if self.append:
|
|
270
|
-
self.logger.info_box(f"Appending New data from {user_start} to {user_end}"
|
|
411
|
+
self.logger.info_box(f"Appending New data from {user_start} to {user_end}")
|
|
271
412
|
|
|
272
413
|
_f_raw_new, _f_qc_new = self._read_raw_files()
|
|
273
414
|
_f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
|
|
@@ -279,7 +420,7 @@ class AbstractReader(ABC):
|
|
|
279
420
|
return _f_qc if self.qc else _f_raw
|
|
280
421
|
|
|
281
422
|
else:
|
|
282
|
-
self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}"
|
|
423
|
+
self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}")
|
|
283
424
|
|
|
284
425
|
_f_raw, _f_qc = self._read_raw_files()
|
|
285
426
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -8,7 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class ReaderLogger:
|
|
11
|
-
def __init__(self, name: str, log_path: Path, log_level: str = '
|
|
11
|
+
def __init__(self, name: str, log_path: Path, log_level: str = 'INFO'):
|
|
12
12
|
self.name = name
|
|
13
13
|
self.log_path = log_path
|
|
14
14
|
self._log_level = getattr(logging, log_level)
|
|
@@ -33,16 +33,16 @@ class ReaderLogger:
|
|
|
33
33
|
self.RESET = ''
|
|
34
34
|
|
|
35
35
|
# 檢查 Unicode 支持
|
|
36
|
-
self.unicode_support = self.
|
|
36
|
+
self.unicode_support = self._check_unicode_support()
|
|
37
37
|
|
|
38
38
|
# 設置框架字符
|
|
39
39
|
if self.unicode_support:
|
|
40
|
-
self.BOX_TOP_LEFT = "
|
|
41
|
-
self.BOX_TOP_RIGHT = "
|
|
42
|
-
self.BOX_BOTTOM_LEFT = "
|
|
43
|
-
self.BOX_BOTTOM_RIGHT = "
|
|
44
|
-
self.BOX_HORIZONTAL = "
|
|
45
|
-
self.BOX_VERTICAL = "
|
|
40
|
+
self.BOX_TOP_LEFT = "╭"
|
|
41
|
+
self.BOX_TOP_RIGHT = "╮"
|
|
42
|
+
self.BOX_BOTTOM_LEFT = "╰"
|
|
43
|
+
self.BOX_BOTTOM_RIGHT = "╯"
|
|
44
|
+
self.BOX_HORIZONTAL = "─"
|
|
45
|
+
self.BOX_VERTICAL = "│"
|
|
46
46
|
self.ARROW = "▶"
|
|
47
47
|
else:
|
|
48
48
|
self.BOX_TOP_LEFT = "+"
|
|
@@ -75,7 +75,7 @@ class ReaderLogger:
|
|
|
75
75
|
# 其他系統檢查
|
|
76
76
|
return hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
|
|
77
77
|
|
|
78
|
-
def
|
|
78
|
+
def _check_unicode_support(self) -> bool:
|
|
79
79
|
"""設置 Unicode 支持"""
|
|
80
80
|
if platform.system().lower() == 'windows':
|
|
81
81
|
try:
|
|
@@ -38,6 +38,15 @@ class Reader(AbstractReader):
|
|
|
38
38
|
|
|
39
39
|
_df = read_csv(f, sep=delimiter, skiprows=skip, low_memory=False)
|
|
40
40
|
|
|
41
|
+
if 'Date' not in _df.columns: # 資料需要轉置
|
|
42
|
+
try:
|
|
43
|
+
_df = _df.T # 轉置
|
|
44
|
+
_df.columns = _df.iloc[0] # 使用第一列作為欄位名稱
|
|
45
|
+
_df = _df.iloc[1:] # 移除第一列(因為已經變成欄位名稱)
|
|
46
|
+
_df = _df.reset_index(drop=True) # 重設索引
|
|
47
|
+
except:
|
|
48
|
+
raise NotImplementedError('Not supported date format')
|
|
49
|
+
|
|
41
50
|
for date_format in date_formats:
|
|
42
51
|
_time_index = parse_date(_df, date_format)
|
|
43
52
|
if not _time_index.isna().all():
|
|
@@ -7,40 +7,77 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
7
7
|
class Reader(AbstractReader):
|
|
8
8
|
nam = 'TEOM'
|
|
9
9
|
|
|
10
|
+
# TEOM Output Data Formats
|
|
11
|
+
# There are three data formats from TEOM instrument output:
|
|
12
|
+
#
|
|
13
|
+
# 1. Remote download format
|
|
14
|
+
# - Identified by 'Time Stamp' column
|
|
15
|
+
# - Date format: 'DD - MM - YYYY HH:MM:SS'
|
|
16
|
+
# - May contain Chinese month names requiring conversion
|
|
17
|
+
# - Maps columns: Time Stamp → time, System status → status,
|
|
18
|
+
# PM-2.5 base MC → PM_NV, PM-2.5 MC → PM_Total, PM-2.5 TEOM noise → noise
|
|
19
|
+
#
|
|
20
|
+
# 2. USB download or auto export format
|
|
21
|
+
# - Identified by 'tmoStatusCondition_0' column
|
|
22
|
+
# - Two possible time formats:
|
|
23
|
+
# a) Standard: 'Date' and 'Time' columns (YYYY-MM-DD HH:MM:SS)
|
|
24
|
+
# b) Alternative: 'time_stamp' column (similar to remote format)
|
|
25
|
+
# - Maps columns: tmoStatusCondition_0 → status, tmoTEOMABaseMC_0 → PM_NV,
|
|
26
|
+
# tmoTEOMAMC_0 → PM_Total, tmoTEOMANoise_0 → noise
|
|
27
|
+
#
|
|
28
|
+
# 3. Other formats
|
|
29
|
+
# - Not implemented, raises NotImplementedError
|
|
30
|
+
#
|
|
31
|
+
# All formats are standardized to the same column names with timestamp as index
|
|
32
|
+
|
|
10
33
|
def _raw_reader(self, file):
|
|
34
|
+
# TEOM Data Format Handler
|
|
35
|
+
# Standardizes different TEOM data formats (remote download and USB/auto export)
|
|
11
36
|
_df = read_csv(file, skiprows=3, index_col=False)
|
|
12
37
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
38
|
+
# Chinese month name conversion dictionary
|
|
39
|
+
_time_replace = {'十一月': '11', '十二月': '12', '一月': '01', '二月': '02', '三月': '03', '四月': '04',
|
|
40
|
+
'五月': '05', '六月': '06', '七月': '07', '八月': '08', '九月': '09', '十月': '10'}
|
|
41
|
+
|
|
42
|
+
# Try both naming conventions (will ignore columns that don't exist)
|
|
43
|
+
_df = _df.rename(columns={
|
|
44
|
+
# Remote download format
|
|
45
|
+
'Time Stamp': 'time',
|
|
46
|
+
'System status': 'status',
|
|
47
|
+
'PM-2.5 base MC': 'PM_NV',
|
|
48
|
+
'PM-2.5 MC': 'PM_Total',
|
|
49
|
+
'PM-2.5 TEOM noise': 'noise',
|
|
50
|
+
# USB/auto export format
|
|
51
|
+
'time_stamp': 'time',
|
|
52
|
+
'tmoStatusCondition_0': 'status',
|
|
53
|
+
'tmoTEOMABaseMC_0': 'PM_NV',
|
|
54
|
+
'tmoTEOMAMC_0': 'PM_Total',
|
|
55
|
+
'tmoTEOMANoise_0': 'noise'
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
# Handle different time formats
|
|
59
|
+
if 'time' in _df.columns: # Remote download or auto export with time column
|
|
23
60
|
_tm_idx = _df.time
|
|
61
|
+
# Convert Chinese month names if present
|
|
24
62
|
for _ori, _rpl in _time_replace.items():
|
|
25
63
|
_tm_idx = _tm_idx.str.replace(_ori, _rpl)
|
|
26
64
|
|
|
27
65
|
_df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X'))
|
|
28
66
|
|
|
29
|
-
elif '
|
|
30
|
-
_df['time'] = pd.to_datetime(_df['Date'] + ' ' + _df['Time'],
|
|
67
|
+
elif 'Date' in _df.columns and 'Time' in _df.columns: # USB download format
|
|
68
|
+
_df['time'] = pd.to_datetime(_df['Date'] + ' ' + _df['Time'],
|
|
69
|
+
errors='coerce', format='%Y-%m-%d %H:%M:%S')
|
|
31
70
|
_df.drop(columns=['Date', 'Time'], inplace=True)
|
|
32
71
|
_df.set_index('time', inplace=True)
|
|
33
72
|
|
|
34
|
-
_df = _df.rename(columns={'tmoStatusCondition_0': 'status',
|
|
35
|
-
'tmoTEOMABaseMC_0': 'PM_NV',
|
|
36
|
-
'tmoTEOMAMC_0': 'PM_Total',
|
|
37
|
-
'tmoTEOMANoise_0': 'noise', })
|
|
38
73
|
else:
|
|
39
|
-
raise NotImplementedError
|
|
74
|
+
raise NotImplementedError("Unsupported TEOM data format")
|
|
40
75
|
|
|
76
|
+
# Filter and clean data
|
|
41
77
|
_df = _df.where(_df['status'] < 1)
|
|
42
78
|
_df = _df[['PM_NV', 'PM_Total', 'noise']].apply(to_numeric, errors='coerce')
|
|
43
79
|
|
|
80
|
+
# Remove duplicates and NaN indices
|
|
44
81
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
45
82
|
|
|
46
83
|
# QC data
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|