AeroViz 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_absorption.py +2 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__init__.py +1 -1
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/corr_matrix.py +168 -2
- AeroViz/plot/templates/koschmieder.py +1 -1
- AeroViz/plot/templates/metal_heatmap.py +15 -6
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/timeseries.py +96 -52
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/__init__.py +154 -59
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/supported_instruments.py +7 -4
- AeroViz/rawDataReader/core/__init__.py +176 -86
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +14 -10
- AeroViz/rawDataReader/core/qc.py +1 -1
- AeroViz/rawDataReader/script/AE33.py +1 -1
- AeroViz/rawDataReader/script/BAM1020.py +35 -0
- AeroViz/rawDataReader/script/NEPH.py +6 -10
- AeroViz/rawDataReader/script/SMPS.py +20 -6
- AeroViz/rawDataReader/script/TEOM.py +15 -3
- AeroViz/rawDataReader/script/__init__.py +1 -0
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/XRF.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/METADATA +14 -15
- {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/RECORD +89 -87
- {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/WHEEL +1 -1
- {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.13.dist-info → AeroViz-0.1.15.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
from contextlib import contextmanager
|
|
4
|
-
from datetime import datetime
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Generator
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
|
-
from pandas import DataFrame, concat, read_pickle, to_numeric
|
|
11
10
|
from rich.console import Console
|
|
12
|
-
from rich.progress import Progress, TextColumn, BarColumn,
|
|
11
|
+
from rich.progress import Progress, TextColumn, BarColumn, SpinnerColumn, TaskProgressColumn
|
|
13
12
|
|
|
14
13
|
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
15
14
|
from AeroViz.rawDataReader.core.logger import ReaderLogger
|
|
16
|
-
from AeroViz.rawDataReader.core.qc import
|
|
15
|
+
from AeroViz.rawDataReader.core.qc import QualityControl
|
|
17
16
|
|
|
18
17
|
__all__ = ['AbstractReader']
|
|
19
18
|
|
|
@@ -32,45 +31,44 @@ class AbstractReader(ABC):
|
|
|
32
31
|
|
|
33
32
|
def __init__(self,
|
|
34
33
|
path: Path | str,
|
|
35
|
-
reset: bool = False,
|
|
36
|
-
qc: bool = True,
|
|
37
|
-
qc_freq: Optional[str] = None,
|
|
38
|
-
rate: bool = True,
|
|
39
|
-
append_data: bool = False,
|
|
34
|
+
reset: bool | str = False,
|
|
35
|
+
qc: bool | str = True,
|
|
40
36
|
**kwargs):
|
|
41
37
|
|
|
42
38
|
self.path = Path(path)
|
|
43
39
|
self.meta = meta[self.nam]
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
self.
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
54
|
-
self.
|
|
55
|
-
self.
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
self.
|
|
40
|
+
output_folder = self.path / f'{self.nam.lower()}_outputs'
|
|
41
|
+
output_folder.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
|
|
43
|
+
self.logger = ReaderLogger(
|
|
44
|
+
self.nam, output_folder,
|
|
45
|
+
kwargs.get('log_level').upper() if not kwargs.get('suppress_warnings') else 'ERROR')
|
|
46
|
+
|
|
47
|
+
self.reset = reset is True
|
|
48
|
+
self.append = reset == 'append'
|
|
49
|
+
self.qc = qc # if qc, then calculate rate
|
|
50
|
+
self.qc_freq = qc if isinstance(qc, str) else None
|
|
51
|
+
self.kwargs = kwargs
|
|
52
|
+
|
|
53
|
+
self.pkl_nam = output_folder / f'_read_{self.nam.lower()}.pkl'
|
|
54
|
+
self.csv_nam = output_folder / f'_read_{self.nam.lower()}.csv'
|
|
55
|
+
self.pkl_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.pkl'
|
|
56
|
+
self.csv_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.csv'
|
|
57
|
+
self.csv_out = output_folder / f'output_{self.nam.lower()}.csv'
|
|
58
|
+
self.report_out = output_folder / 'report.json'
|
|
59
59
|
|
|
60
60
|
def __call__(self,
|
|
61
61
|
start: datetime,
|
|
62
62
|
end: datetime,
|
|
63
63
|
mean_freq: str = '1h',
|
|
64
|
-
|
|
65
|
-
) -> DataFrame:
|
|
64
|
+
) -> pd.DataFrame:
|
|
66
65
|
|
|
67
66
|
data = self._run(start, end)
|
|
68
67
|
|
|
69
68
|
if data is not None:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
data.to_csv(self.csv_out)
|
|
69
|
+
data = data.resample(mean_freq).mean()
|
|
70
|
+
|
|
71
|
+
data.to_csv(self.csv_out)
|
|
74
72
|
|
|
75
73
|
return data
|
|
76
74
|
|
|
@@ -79,58 +77,147 @@ class AbstractReader(ABC):
|
|
|
79
77
|
pass
|
|
80
78
|
|
|
81
79
|
@abstractmethod
|
|
82
|
-
def _QC(self, df: DataFrame) -> DataFrame:
|
|
80
|
+
def _QC(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
83
81
|
return df
|
|
84
82
|
|
|
85
|
-
def
|
|
86
|
-
|
|
87
|
-
period_size = len(raw_data.resample('1h').mean().index)
|
|
83
|
+
def __calculate_rates(self, raw_data, qc_data, all_keys=False, with_log=False):
|
|
84
|
+
"""計算獲取率、良率和總比率
|
|
88
85
|
|
|
89
|
-
|
|
90
|
-
|
|
86
|
+
Args:
|
|
87
|
+
raw_data: 原始數據
|
|
88
|
+
qc_data: QC後的數據
|
|
89
|
+
all_keys: 是否計算所有 deter_key
|
|
90
|
+
with_log: 是否輸出計算日誌
|
|
91
|
+
"""
|
|
92
|
+
if raw_data.empty or qc_data.empty:
|
|
93
|
+
return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
|
|
91
94
|
|
|
92
|
-
|
|
93
|
-
|
|
95
|
+
def _calculate_single_key(key_name, key_columns):
|
|
96
|
+
columns, drop_how = (qc_data.keys(), 'all') if key_columns == ['all'] else (key_columns, 'any')
|
|
94
97
|
|
|
95
|
-
|
|
96
|
-
|
|
98
|
+
# 重採樣並計算有效數據量
|
|
99
|
+
period_size = len(raw_data.resample('1h').mean().index)
|
|
100
|
+
sample_size = len(raw_data[columns].resample('1h').mean().dropna(how=drop_how).index)
|
|
101
|
+
qc_size = len(qc_data[columns].resample('1h').mean().dropna(how=drop_how).index)
|
|
102
|
+
|
|
103
|
+
# 驗證計算
|
|
104
|
+
if any([
|
|
105
|
+
period_size == 0 or sample_size == 0 or qc_size == 0,
|
|
106
|
+
period_size < sample_size,
|
|
107
|
+
sample_size < qc_size
|
|
108
|
+
]):
|
|
109
|
+
if with_log:
|
|
97
110
|
self.logger.warning(f'\t\t No data for this period... skip')
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
_sample_rate = round((sample_size / period_size) * 100, 1)
|
|
108
|
-
_valid_rate = round((qc_size / sample_size) * 100, 1)
|
|
109
|
-
_total_rate = round((qc_size / period_size) * 100, 1)
|
|
110
|
-
|
|
111
|
-
self.logger.info(f"\t\t{self.logger.CYAN}{self.logger.ARROW} {_nam}{self.logger.RESET}")
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
# 計算比率
|
|
114
|
+
sample_rate = round((sample_size / period_size) * 100, 1)
|
|
115
|
+
valid_rate = round((qc_size / sample_size) * 100, 1)
|
|
116
|
+
total_rate = round((qc_size / period_size) * 100, 1)
|
|
117
|
+
|
|
118
|
+
if with_log:
|
|
119
|
+
self.logger.info(f"\t\t> {key_name}")
|
|
112
120
|
self.logger.info(
|
|
113
|
-
f"\t\t\t
|
|
121
|
+
f"\t\t\t> {'Sample Rate':13}: {self.logger.BLUE}{sample_rate:>6.1f}%{self.logger.RESET}")
|
|
114
122
|
self.logger.info(
|
|
115
|
-
f"\t\t\t
|
|
123
|
+
f"\t\t\t> {'Valid Rate':13}: {self.logger.BLUE}{valid_rate:>6.1f}%{self.logger.RESET}")
|
|
116
124
|
self.logger.info(
|
|
117
|
-
f"\t\t\t
|
|
125
|
+
f"\t\t\t> {'Total Rate':13}: {self.logger.BLUE}{total_rate:>6.1f}%{self.logger.RESET}")
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
'acquisition_rate': sample_rate,
|
|
129
|
+
'yield_rate': valid_rate,
|
|
130
|
+
'total_rate': total_rate
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if all_keys:
|
|
134
|
+
# 計算所有 key 並回傳所有結果(用於日誌輸出)
|
|
135
|
+
all_results = []
|
|
136
|
+
for name, columns in self.meta['deter_key'].items():
|
|
137
|
+
result = _calculate_single_key(name, columns)
|
|
138
|
+
if result:
|
|
139
|
+
all_results.append(result)
|
|
140
|
+
|
|
141
|
+
if not all_results:
|
|
142
|
+
return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
|
|
143
|
+
|
|
144
|
+
# 回傳所有結果中比率最低的
|
|
145
|
+
return {
|
|
146
|
+
'acquisition_rate': min(r['acquisition_rate'] for r in all_results),
|
|
147
|
+
'yield_rate': min(r['yield_rate'] for r in all_results),
|
|
148
|
+
'total_rate': min(r['total_rate'] for r in all_results)
|
|
149
|
+
}
|
|
150
|
+
else:
|
|
151
|
+
# 計算所有 key 但只回傳最低的比率
|
|
152
|
+
min_rates = {'acquisition_rate': 200, 'yield_rate': 200, 'total_rate': 200}
|
|
153
|
+
|
|
154
|
+
for name, columns in self.meta['deter_key'].items():
|
|
155
|
+
result = _calculate_single_key(name, columns)
|
|
156
|
+
if result:
|
|
157
|
+
min_rates['acquisition_rate'] = min(min_rates['acquisition_rate'], result['acquisition_rate'])
|
|
158
|
+
min_rates['yield_rate'] = min(min_rates['yield_rate'], result['yield_rate'])
|
|
159
|
+
min_rates['total_rate'] = min(min_rates['total_rate'], result['total_rate'])
|
|
160
|
+
|
|
161
|
+
# 如果沒有任何有效結果,回傳 0
|
|
162
|
+
if min_rates['acquisition_rate'] == 200 and min_rates['yield_rate'] == 200:
|
|
163
|
+
return {'acquisition_rate': 0, 'yield_rate': 0, 'total_rate': 0}
|
|
118
164
|
|
|
165
|
+
return min_rates
|
|
166
|
+
|
|
167
|
+
def _rate_calculate(self, raw_data, qc_data) -> None:
|
|
119
168
|
if self.meta['deter_key'] is not None:
|
|
120
|
-
# use qc_freq to calculate each period rate
|
|
121
169
|
if self.qc_freq is not None:
|
|
122
170
|
raw_data_grouped = raw_data.groupby(pd.Grouper(freq=self.qc_freq))
|
|
123
171
|
qc_data_grouped = qc_data.groupby(pd.Grouper(freq=self.qc_freq))
|
|
124
172
|
|
|
125
173
|
for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped):
|
|
126
174
|
self.logger.info(
|
|
127
|
-
f"\t{self.logger.BLUE}
|
|
175
|
+
f"\t{self.logger.BLUE}> Processing: {_sub_raw_data.index[0].strftime('%F')}"
|
|
128
176
|
f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}")
|
|
129
177
|
|
|
130
|
-
|
|
131
|
-
|
|
178
|
+
self.__calculate_rates(_sub_raw_data, _sub_qc_data, all_keys=True, with_log=True)
|
|
132
179
|
else:
|
|
133
|
-
|
|
180
|
+
self.__calculate_rates(raw_data, qc_data, all_keys=True, with_log=True)
|
|
181
|
+
|
|
182
|
+
# 計算週和月的數據
|
|
183
|
+
current_time = datetime.now()
|
|
184
|
+
week_mask = raw_data.index >= current_time - timedelta(days=7)
|
|
185
|
+
month_mask = raw_data.index >= current_time - timedelta(days=30)
|
|
186
|
+
|
|
187
|
+
# 生成報告
|
|
188
|
+
self.__generate_report(
|
|
189
|
+
current_time,
|
|
190
|
+
raw_data[week_mask], qc_data[week_mask],
|
|
191
|
+
raw_data[month_mask], qc_data[month_mask]
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
def __generate_report(self, current_time, week_raw_data, week_qc_data, month_raw_data, month_qc_data):
|
|
195
|
+
"""生成獲取率和良率的報告"""
|
|
196
|
+
report = {
|
|
197
|
+
"report_time": current_time.strftime('%Y-%m-%d %H:%M:%S'),
|
|
198
|
+
"instrument_info": {
|
|
199
|
+
"station": self.path.name[:2],
|
|
200
|
+
"instrument": self.nam
|
|
201
|
+
},
|
|
202
|
+
"rates": {
|
|
203
|
+
"weekly": self.__calculate_rates(week_raw_data, week_qc_data),
|
|
204
|
+
"monthly": self.__calculate_rates(month_raw_data, month_qc_data),
|
|
205
|
+
},
|
|
206
|
+
"details": {
|
|
207
|
+
"weekly": {
|
|
208
|
+
"start_time": (current_time - timedelta(days=7)).strftime('%Y-%m-%d %H:%M:%S'),
|
|
209
|
+
"end_time": current_time.strftime('%Y-%m-%d %H:%M:%S')
|
|
210
|
+
},
|
|
211
|
+
"monthly": {
|
|
212
|
+
"start_time": (current_time - timedelta(days=30)).strftime('%Y-%m-%d %H:%M:%S'),
|
|
213
|
+
"end_time": current_time.strftime('%Y-%m-%d %H:%M:%S')
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
# 寫入報告
|
|
219
|
+
with open(self.report_out, 'w') as f:
|
|
220
|
+
json.dump(report, f, indent=4)
|
|
134
221
|
|
|
135
222
|
def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None):
|
|
136
223
|
"""
|
|
@@ -182,7 +269,7 @@ class AbstractReader(ABC):
|
|
|
182
269
|
|
|
183
270
|
return _df
|
|
184
271
|
|
|
185
|
-
def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
|
|
272
|
+
def _save_data(self, raw_data: pd.DataFrame, qc_data: pd.DataFrame) -> None:
|
|
186
273
|
try:
|
|
187
274
|
raw_data.to_pickle(self.pkl_nam_raw)
|
|
188
275
|
raw_data.to_csv(self.csv_nam_raw)
|
|
@@ -205,15 +292,15 @@ class AbstractReader(ABC):
|
|
|
205
292
|
|
|
206
293
|
try:
|
|
207
294
|
with Progress(
|
|
208
|
-
|
|
295
|
+
SpinnerColumn(finished_text="✓"),
|
|
209
296
|
BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"),
|
|
210
|
-
TaskProgressColumn(),
|
|
211
|
-
|
|
212
|
-
TextColumn("{task.fields[filename]}", style="
|
|
297
|
+
TaskProgressColumn(style="bold", text_format="[bright_green]{task.percentage:>3.0f}%"),
|
|
298
|
+
TextColumn("{task.description}", style="bold blue"),
|
|
299
|
+
TextColumn("{task.fields[filename]}", style="bold blue"),
|
|
213
300
|
console=Console(force_terminal=True, color_system="auto", width=120),
|
|
214
301
|
expand=False
|
|
215
302
|
) as progress:
|
|
216
|
-
task = progress.add_task(f"
|
|
303
|
+
task = progress.add_task(f"Reading {self.nam} files:", total=len(files), filename="")
|
|
217
304
|
yield progress, task
|
|
218
305
|
finally:
|
|
219
306
|
# Restore logger method and output message
|
|
@@ -222,7 +309,7 @@ class AbstractReader(ABC):
|
|
|
222
309
|
for msg in msgs:
|
|
223
310
|
original[level](msg)
|
|
224
311
|
|
|
225
|
-
def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
|
|
312
|
+
def _read_raw_files(self) -> tuple[pd.DataFrame | None, pd.DataFrame | None]:
|
|
226
313
|
files = [f
|
|
227
314
|
for file_pattern in self.meta['pattern']
|
|
228
315
|
for pattern in {file_pattern.lower(), file_pattern.upper(), file_pattern}
|
|
@@ -242,7 +329,7 @@ class AbstractReader(ABC):
|
|
|
242
329
|
if (df := self._raw_reader(file)) is not None and not df.empty:
|
|
243
330
|
df_list.append(df)
|
|
244
331
|
else:
|
|
245
|
-
self.logger.
|
|
332
|
+
self.logger.debug(f"\tFile {file.name} produced an empty DataFrame or None.")
|
|
246
333
|
|
|
247
334
|
except Exception as e:
|
|
248
335
|
self.logger.error(f"Error reading {file.name}: {e}")
|
|
@@ -250,25 +337,27 @@ class AbstractReader(ABC):
|
|
|
250
337
|
if not df_list:
|
|
251
338
|
raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m")
|
|
252
339
|
|
|
253
|
-
raw_data = concat(df_list, axis=0).groupby(level=0).first()
|
|
340
|
+
raw_data = pd.concat(df_list, axis=0).groupby(level=0).first()
|
|
254
341
|
|
|
255
|
-
if self.nam
|
|
342
|
+
if self.nam in ['SMPS', 'APS', 'GRIMM']:
|
|
256
343
|
raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float))
|
|
257
344
|
|
|
258
|
-
raw_data = self._timeIndex_process(raw_data)
|
|
259
|
-
|
|
345
|
+
raw_data = self._timeIndex_process(raw_data)
|
|
346
|
+
|
|
347
|
+
raw_data = raw_data.apply(pd.to_numeric, errors='coerce').copy(deep=True)
|
|
348
|
+
qc_data = self._QC(raw_data).apply(pd.to_numeric, errors='coerce').copy(deep=True)
|
|
260
349
|
|
|
261
350
|
return raw_data, qc_data
|
|
262
351
|
|
|
263
352
|
def _run(self, user_start, user_end):
|
|
264
353
|
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
265
354
|
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
|
|
266
|
-
self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}"
|
|
355
|
+
self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}")
|
|
267
356
|
|
|
268
|
-
_f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam)
|
|
357
|
+
_f_raw_done, _f_qc_done = pd.read_pickle(self.pkl_nam_raw), pd.read_pickle(self.pkl_nam)
|
|
269
358
|
|
|
270
359
|
if self.append:
|
|
271
|
-
self.logger.info_box(f"Appending New data from {user_start} to {user_end}"
|
|
360
|
+
self.logger.info_box(f"Appending New data from {user_start} to {user_end}")
|
|
272
361
|
|
|
273
362
|
_f_raw_new, _f_qc_new = self._read_raw_files()
|
|
274
363
|
_f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
|
|
@@ -280,7 +369,7 @@ class AbstractReader(ABC):
|
|
|
280
369
|
return _f_qc if self.qc else _f_raw
|
|
281
370
|
|
|
282
371
|
else:
|
|
283
|
-
self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}"
|
|
372
|
+
self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}")
|
|
284
373
|
|
|
285
374
|
_f_raw, _f_qc = self._read_raw_files()
|
|
286
375
|
|
|
@@ -292,25 +381,26 @@ class AbstractReader(ABC):
|
|
|
292
381
|
# save
|
|
293
382
|
self._save_data(_f_raw, _f_qc)
|
|
294
383
|
|
|
295
|
-
if self.
|
|
296
|
-
self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'),
|
|
384
|
+
if self.qc:
|
|
385
|
+
self._rate_calculate(_f_raw.apply(pd.to_numeric, errors='coerce'),
|
|
386
|
+
_f_qc.apply(pd.to_numeric, errors='coerce'))
|
|
297
387
|
|
|
298
388
|
return _f_qc if self.qc else _f_raw
|
|
299
389
|
|
|
300
390
|
@staticmethod
|
|
301
|
-
def reorder_dataframe_columns(df, order_lists,
|
|
391
|
+
def reorder_dataframe_columns(df, order_lists: list[list], keep_others: bool = False):
|
|
302
392
|
new_order = []
|
|
303
393
|
|
|
304
394
|
for order in order_lists:
|
|
305
|
-
#
|
|
395
|
+
# Only add column that exist in the DataFrame and do not add them repeatedly
|
|
306
396
|
new_order.extend([col for col in order if col in df.columns and col not in new_order])
|
|
307
397
|
|
|
308
|
-
if
|
|
309
|
-
#
|
|
398
|
+
if keep_others:
|
|
399
|
+
# Add all original fields not in the new order list, keeping their original order
|
|
310
400
|
new_order.extend([col for col in df.columns if col not in new_order])
|
|
311
401
|
|
|
312
402
|
return df[new_order]
|
|
313
403
|
|
|
314
404
|
@staticmethod
|
|
315
405
|
def time_aware_IQR_QC(df: pd.DataFrame, time_window='1D', log_dist=False) -> pd.DataFrame:
|
|
316
|
-
return
|
|
406
|
+
return QualityControl().time_aware_iqr(df, time_window=time_window, log_dist=log_dist)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -8,9 +8,10 @@ from pathlib import Path
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class ReaderLogger:
|
|
11
|
-
def __init__(self, name: str, log_path: Path):
|
|
11
|
+
def __init__(self, name: str, log_path: Path, log_level: str = 'INFO'):
|
|
12
12
|
self.name = name
|
|
13
13
|
self.log_path = log_path
|
|
14
|
+
self._log_level = getattr(logging, log_level)
|
|
14
15
|
|
|
15
16
|
# 檢查是否支持顏色輸出
|
|
16
17
|
self.color_support = self._check_color_support()
|
|
@@ -32,16 +33,16 @@ class ReaderLogger:
|
|
|
32
33
|
self.RESET = ''
|
|
33
34
|
|
|
34
35
|
# 檢查 Unicode 支持
|
|
35
|
-
self.unicode_support = self.
|
|
36
|
+
self.unicode_support = self._check_unicode_support()
|
|
36
37
|
|
|
37
38
|
# 設置框架字符
|
|
38
39
|
if self.unicode_support:
|
|
39
|
-
self.BOX_TOP_LEFT = "
|
|
40
|
-
self.BOX_TOP_RIGHT = "
|
|
41
|
-
self.BOX_BOTTOM_LEFT = "
|
|
42
|
-
self.BOX_BOTTOM_RIGHT = "
|
|
43
|
-
self.BOX_HORIZONTAL = "
|
|
44
|
-
self.BOX_VERTICAL = "
|
|
40
|
+
self.BOX_TOP_LEFT = "╭"
|
|
41
|
+
self.BOX_TOP_RIGHT = "╮"
|
|
42
|
+
self.BOX_BOTTOM_LEFT = "╰"
|
|
43
|
+
self.BOX_BOTTOM_RIGHT = "╯"
|
|
44
|
+
self.BOX_HORIZONTAL = "─"
|
|
45
|
+
self.BOX_VERTICAL = "│"
|
|
45
46
|
self.ARROW = "▶"
|
|
46
47
|
else:
|
|
47
48
|
self.BOX_TOP_LEFT = "+"
|
|
@@ -74,7 +75,7 @@ class ReaderLogger:
|
|
|
74
75
|
# 其他系統檢查
|
|
75
76
|
return hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
|
|
76
77
|
|
|
77
|
-
def
|
|
78
|
+
def _check_unicode_support(self) -> bool:
|
|
78
79
|
"""設置 Unicode 支持"""
|
|
79
80
|
if platform.system().lower() == 'windows':
|
|
80
81
|
try:
|
|
@@ -92,7 +93,7 @@ class ReaderLogger:
|
|
|
92
93
|
def _setup_logger(self) -> logging.Logger:
|
|
93
94
|
"""設置logger"""
|
|
94
95
|
logger = logging.getLogger(self.name)
|
|
95
|
-
logger.setLevel(
|
|
96
|
+
logger.setLevel(self._log_level)
|
|
96
97
|
|
|
97
98
|
# 移除現有的 handlers
|
|
98
99
|
for handler in logger.handlers[:]:
|
|
@@ -135,6 +136,9 @@ class ReaderLogger:
|
|
|
135
136
|
text = text.encode('ascii', 'replace').decode('ascii')
|
|
136
137
|
return text
|
|
137
138
|
|
|
139
|
+
def debug(self, msg: str):
|
|
140
|
+
self.logger.debug(self._safe_print(msg))
|
|
141
|
+
|
|
138
142
|
def info(self, msg: str):
|
|
139
143
|
self.logger.info(self._safe_print(msg))
|
|
140
144
|
|
AeroViz/rawDataReader/core/qc.py
CHANGED
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
if file.stat().st_size / 1024 < 550:
|
|
11
|
-
self.logger.
|
|
11
|
+
self.logger.warning(f'\t {file.name} may not be a whole daily data. Make sure the file is correct.')
|
|
12
12
|
|
|
13
13
|
_df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
|
|
14
14
|
delimiter=r'\s+', skiprows=5, usecols=range(67))
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from pandas import read_csv, to_numeric, NA
|
|
2
|
+
|
|
3
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Reader(AbstractReader):
|
|
7
|
+
nam = 'BAM1020'
|
|
8
|
+
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
PM = 'Conc'
|
|
11
|
+
|
|
12
|
+
_df = read_csv(file, parse_dates=True, index_col=0, usecols=range(0, 21))
|
|
13
|
+
_df.rename(columns={'Conc (mg/m3)': PM}, inplace=True)
|
|
14
|
+
|
|
15
|
+
# remove data when Conc = 1 or 0
|
|
16
|
+
_df[PM] = _df[PM].replace(1, NA)
|
|
17
|
+
|
|
18
|
+
_df = _df[[PM]].apply(to_numeric, errors='coerce')
|
|
19
|
+
|
|
20
|
+
# tranfer unit from mg/m3 to ug/m3
|
|
21
|
+
_df = _df * 1000
|
|
22
|
+
|
|
23
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
24
|
+
|
|
25
|
+
def _QC(self, _df):
|
|
26
|
+
_index = _df.index.copy()
|
|
27
|
+
|
|
28
|
+
# remove negative value
|
|
29
|
+
_df = _df.mask((_df <= 0) | (_df > 500))
|
|
30
|
+
|
|
31
|
+
# use IQR_QC
|
|
32
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
33
|
+
|
|
34
|
+
# make sure all columns have values, otherwise set to nan
|
|
35
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import to_datetime, read_csv,
|
|
1
|
+
from pandas import to_datetime, read_csv, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -46,17 +46,13 @@ class Reader(AbstractReader):
|
|
|
46
46
|
|
|
47
47
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
48
48
|
|
|
49
|
-
except ValueError:
|
|
50
|
-
# Define valid groups and find invalid indices
|
|
49
|
+
except ValueError: # Define valid groups and find invalid indices
|
|
51
50
|
invalid_indices = _df[~_df[0].isin({'B', 'G', 'R', 'D', 'T', 'Y', 'Z'})].index
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
self.logger.warning(
|
|
52
|
+
f"\tInvalid values in {file.name}: {', '.join(f'{_}:{_df.at[_, 0]}' for _ in invalid_indices)}."
|
|
53
|
+
f" Skipping file.")
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
_df_out = DataFrame(index=_idx_tm, columns=['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH'])
|
|
57
|
-
_df_out.index.name = 'Time'
|
|
58
|
-
print(f'\n\t\t\t Length mismatch in {file} data. Returning an empty DataFrame.')
|
|
59
|
-
return _df_out
|
|
55
|
+
return None
|
|
60
56
|
|
|
61
57
|
def _QC(self, _df):
|
|
62
58
|
MDL_sensitivity = {'B': .1, 'G': .1, 'R': .3}
|
|
@@ -36,7 +36,16 @@ class Reader(AbstractReader):
|
|
|
36
36
|
skip = find_header_row(f, delimiter)
|
|
37
37
|
f.seek(0)
|
|
38
38
|
|
|
39
|
-
_df = read_csv(f, sep=delimiter, skiprows=skip)
|
|
39
|
+
_df = read_csv(f, sep=delimiter, skiprows=skip, low_memory=False)
|
|
40
|
+
|
|
41
|
+
if 'Date' not in _df.columns: # 資料需要轉置
|
|
42
|
+
try:
|
|
43
|
+
_df = _df.T # 轉置
|
|
44
|
+
_df.columns = _df.iloc[0] # 使用第一列作為欄位名稱
|
|
45
|
+
_df = _df.iloc[1:] # 移除第一列(因為已經變成欄位名稱)
|
|
46
|
+
_df = _df.reset_index(drop=True) # 重設索引
|
|
47
|
+
except:
|
|
48
|
+
raise NotImplementedError('Not supported date format')
|
|
40
49
|
|
|
41
50
|
for date_format in date_formats:
|
|
42
51
|
_time_index = parse_date(_df, date_format)
|
|
@@ -56,9 +65,12 @@ class Reader(AbstractReader):
|
|
|
56
65
|
_df_smps.columns = _df_smps.columns.astype(float)
|
|
57
66
|
_df_smps = _df_smps.loc[_df_smps.index.dropna().copy()]
|
|
58
67
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
68
|
+
size_range = self.kwargs.get('size_range') or (11.8, 593.5)
|
|
69
|
+
|
|
70
|
+
if _df_smps.columns[0] != size_range[0] or _df_smps.columns[-1] != size_range[1]:
|
|
71
|
+
self.logger.warning(f'\tSMPS file: {file.name} is not match the setting size range {size_range}, '
|
|
72
|
+
f'it is ({_df_smps.columns[0]}, {_df_smps.columns[-1]}). '
|
|
73
|
+
f'Please run by another RawDataReader instance, and set the correct size range')
|
|
62
74
|
return None
|
|
63
75
|
|
|
64
76
|
return _df_smps.apply(to_numeric, errors='coerce')
|
|
@@ -68,8 +80,10 @@ class Reader(AbstractReader):
|
|
|
68
80
|
_df = _df.copy()
|
|
69
81
|
_index = _df.index.copy()
|
|
70
82
|
|
|
71
|
-
|
|
72
|
-
|
|
83
|
+
size_range = self.kwargs.get('size_range') or (11.8, 593.5)
|
|
84
|
+
|
|
85
|
+
size_range_mask = (_df.columns.astype(float) >= size_range[0]) & (
|
|
86
|
+
_df.columns.astype(float) <= size_range[1])
|
|
73
87
|
_df = _df.loc[:, size_range_mask]
|
|
74
88
|
|
|
75
89
|
# mask out the data size lower than 7
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import pandas as pd
|
|
1
2
|
from pandas import to_datetime, read_csv, Timedelta, to_numeric
|
|
2
3
|
|
|
3
4
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
@@ -7,9 +8,9 @@ class Reader(AbstractReader):
|
|
|
7
8
|
nam = 'TEOM'
|
|
8
9
|
|
|
9
10
|
def _raw_reader(self, file):
|
|
10
|
-
|
|
11
|
-
_df = read_csv(f, skiprows=3, index_col=False)
|
|
11
|
+
_df = read_csv(file, skiprows=3, index_col=False)
|
|
12
12
|
|
|
13
|
+
if 'Time Stamp' in _df.columns: # remote download
|
|
13
14
|
_df = _df.rename(columns={'Time Stamp': 'time',
|
|
14
15
|
'System status': 'status',
|
|
15
16
|
'PM-2.5 base MC': 'PM_NV',
|
|
@@ -25,8 +26,19 @@ class Reader(AbstractReader):
|
|
|
25
26
|
|
|
26
27
|
_df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X'))
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
elif 'tmoStatusCondition_0' in _df.columns: # usb download
|
|
30
|
+
_df['time'] = pd.to_datetime(_df['Date'] + ' ' + _df['Time'], errors='coerce', format='%Y-%m-%d %H:%M:%S')
|
|
31
|
+
_df.drop(columns=['Date', 'Time'], inplace=True)
|
|
32
|
+
_df.set_index('time', inplace=True)
|
|
29
33
|
|
|
34
|
+
_df = _df.rename(columns={'tmoStatusCondition_0': 'status',
|
|
35
|
+
'tmoTEOMABaseMC_0': 'PM_NV',
|
|
36
|
+
'tmoTEOMAMC_0': 'PM_Total',
|
|
37
|
+
'tmoTEOMANoise_0': 'noise', })
|
|
38
|
+
else:
|
|
39
|
+
raise NotImplementedError
|
|
40
|
+
|
|
41
|
+
_df = _df.where(_df['status'] < 1)
|
|
30
42
|
_df = _df[['PM_NV', 'PM_Total', 'noise']].apply(to_numeric, errors='coerce')
|
|
31
43
|
|
|
32
44
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
Binary file
|
|
Binary file
|