AeroViz 0.1.9.4__py3-none-any.whl → 0.1.9.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__init__.py +1 -2
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/{plot/optical → dataProcess/Optical}/PyMieScatt_update.py +4 -11
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/prop.py +62 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__init__.py +0 -1
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/{meteorology.py → CBPF.py} +150 -122
- AeroViz/plot/meteorology/__init__.py +3 -1
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/wind_rose.py +77 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/optical/optical.py +2 -3
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/plt_utils.py +1 -1
- AeroViz/rawDataReader/__init__.py +4 -2
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__init__.py +71 -72
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +78 -0
- AeroViz/rawDataReader/script/Aurora.py +1 -1
- AeroViz/rawDataReader/script/EPA.py +1 -1
- AeroViz/rawDataReader/script/Minion.py +4 -3
- AeroViz/rawDataReader/script/NEPH.py +1 -1
- AeroViz/rawDataReader/script/OCEC.py +1 -1
- AeroViz/rawDataReader/script/SMPS.py +9 -7
- AeroViz/rawDataReader/script/TEOM.py +1 -1
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS_3321.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/XRF.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__init__.py +0 -1
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- {AeroViz-0.1.9.4.dist-info → AeroViz-0.1.9.6.dist-info}/METADATA +40 -78
- AeroViz-0.1.9.6.dist-info/RECORD +169 -0
- {AeroViz-0.1.9.4.dist-info → AeroViz-0.1.9.6.dist-info}/WHEEL +1 -1
- AeroViz/plot/hysplit/__init__.py +0 -1
- AeroViz/tools/datareader.py +0 -66
- AeroViz-0.1.9.4.dist-info/RECORD +0 -102
- /AeroViz/{plot/optical → dataProcess/Optical}/mie_theory.py +0 -0
- /AeroViz/plot/{hysplit → meteorology}/hysplit.py +0 -0
- {AeroViz-0.1.9.4.dist-info → AeroViz-0.1.9.6.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.9.4.dist-info → AeroViz-0.1.9.6.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
from pandas import Grouper, Timedelta
|
|
5
6
|
|
|
@@ -25,7 +26,7 @@ def RawDataReader(instrument_name: str,
|
|
|
25
26
|
end: datetime = None,
|
|
26
27
|
mean_freq: str = '1h',
|
|
27
28
|
csv_out: bool = True,
|
|
28
|
-
):
|
|
29
|
+
**kwargs: Any):
|
|
29
30
|
"""
|
|
30
31
|
Factory function to instantiate the appropriate reader module for a given instrument and
|
|
31
32
|
return the processed data over the specified time range.
|
|
@@ -107,7 +108,8 @@ def RawDataReader(instrument_name: str,
|
|
|
107
108
|
qc=qc,
|
|
108
109
|
qc_freq=qc_freq,
|
|
109
110
|
rate=rate,
|
|
110
|
-
append_data=append_data
|
|
111
|
+
append_data=append_data,
|
|
112
|
+
**kwargs
|
|
111
113
|
)
|
|
112
114
|
return reader_module(
|
|
113
115
|
start=start,
|
|
Binary file
|
|
Binary file
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import logging
|
|
3
2
|
from abc import ABC, abstractmethod
|
|
3
|
+
from contextlib import contextmanager
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Optional
|
|
6
|
+
from typing import Optional, Generator
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
@@ -12,6 +12,7 @@ from rich.console import Console
|
|
|
12
12
|
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
|
|
13
13
|
|
|
14
14
|
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
15
|
+
from AeroViz.rawDataReader.core.logger import ReaderLogger
|
|
15
16
|
from AeroViz.rawDataReader.core.qc import DataQualityControl
|
|
16
17
|
|
|
17
18
|
__all__ = ['AbstractReader']
|
|
@@ -35,11 +36,12 @@ class AbstractReader(ABC):
|
|
|
35
36
|
qc: bool = True,
|
|
36
37
|
qc_freq: Optional[str] = None,
|
|
37
38
|
rate: bool = True,
|
|
38
|
-
append_data: bool = False
|
|
39
|
+
append_data: bool = False,
|
|
40
|
+
**kwargs):
|
|
39
41
|
|
|
40
42
|
self.path = Path(path)
|
|
41
43
|
self.meta = meta[self.nam]
|
|
42
|
-
self.logger = self.
|
|
44
|
+
self.logger = ReaderLogger(self.nam, self.path)
|
|
43
45
|
|
|
44
46
|
self.reset = reset
|
|
45
47
|
self.qc = qc
|
|
@@ -53,6 +55,8 @@ class AbstractReader(ABC):
|
|
|
53
55
|
self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
|
|
54
56
|
self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
|
|
55
57
|
|
|
58
|
+
self.size_range = kwargs.get('size_range', (11.8, 593.5))
|
|
59
|
+
|
|
56
60
|
def __call__(self,
|
|
57
61
|
start: datetime,
|
|
58
62
|
end: datetime,
|
|
@@ -78,20 +82,6 @@ class AbstractReader(ABC):
|
|
|
78
82
|
def _QC(self, df: DataFrame) -> DataFrame:
|
|
79
83
|
return df
|
|
80
84
|
|
|
81
|
-
def _setup_logger(self) -> logging.Logger:
|
|
82
|
-
logger = logging.getLogger(self.nam)
|
|
83
|
-
logger.setLevel(logging.INFO)
|
|
84
|
-
|
|
85
|
-
for handler in logger.handlers[:]:
|
|
86
|
-
handler.close()
|
|
87
|
-
logger.removeHandler(handler)
|
|
88
|
-
|
|
89
|
-
handler = logging.FileHandler(self.path / f'{self.nam}.log')
|
|
90
|
-
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
|
|
91
|
-
logger.addHandler(handler)
|
|
92
|
-
|
|
93
|
-
return logger
|
|
94
|
-
|
|
95
85
|
def _rate_calculate(self, raw_data, qc_data) -> None:
|
|
96
86
|
def __base_rate(raw_data, qc_data):
|
|
97
87
|
period_size = len(raw_data.resample('1h').mean().index)
|
|
@@ -104,28 +94,27 @@ class AbstractReader(ABC):
|
|
|
104
94
|
|
|
105
95
|
# validate rate calculation
|
|
106
96
|
if period_size == 0 or sample_size == 0 or qc_size == 0:
|
|
107
|
-
|
|
97
|
+
self.logger.warning(f'\t\t No data for this period... skip')
|
|
108
98
|
continue
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
99
|
+
if period_size < sample_size:
|
|
100
|
+
self.logger.warning(f'\t\tError: Sample({sample_size}) > Period({period_size})... skip')
|
|
101
|
+
continue
|
|
102
|
+
if sample_size < qc_size:
|
|
103
|
+
self.logger.warning(f'\t\tError: QC({qc_size}) > Sample({sample_size})... skip')
|
|
113
104
|
continue
|
|
114
105
|
|
|
115
106
|
else:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
self.logger.info(f
|
|
121
|
-
self.logger.info(
|
|
122
|
-
|
|
123
|
-
self.logger.info(
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
print(f'\t\tacquisition rate | yield rate -> OEE rate : '
|
|
128
|
-
f'\033[91m{_acq_rate}% | {_yid_rate}% -> {_OEE_rate}%\033[0m')
|
|
107
|
+
_sample_rate = round((sample_size / period_size) * 100, 1)
|
|
108
|
+
_valid_rate = round((qc_size / sample_size) * 100, 1)
|
|
109
|
+
_total_rate = round((qc_size / period_size) * 100, 1)
|
|
110
|
+
|
|
111
|
+
self.logger.info(f"\t\t{self.logger.CYAN}▶ {_nam}{self.logger.RESET}")
|
|
112
|
+
self.logger.info(
|
|
113
|
+
f"\t\t\t├─ {'Sample Rate':15}: {self.logger.BLUE}{_sample_rate:>6.1f}%{self.logger.RESET}")
|
|
114
|
+
self.logger.info(
|
|
115
|
+
f"\t\t\t├─ {'Valid Rate':15}: {self.logger.BLUE}{_valid_rate:>6.1f}%{self.logger.RESET}")
|
|
116
|
+
self.logger.info(
|
|
117
|
+
f"\t\t\t└─ {'Total Rate':15}: {self.logger.BLUE}{_total_rate:>6.1f}%{self.logger.RESET}")
|
|
129
118
|
|
|
130
119
|
if self.meta['deter_key'] is not None:
|
|
131
120
|
# use qc_freq to calculate each period rate
|
|
@@ -135,9 +124,8 @@ class AbstractReader(ABC):
|
|
|
135
124
|
|
|
136
125
|
for (month, _sub_raw_data), (_, _sub_qc_data) in zip(raw_data_grouped, qc_data_grouped):
|
|
137
126
|
self.logger.info(
|
|
138
|
-
f"\
|
|
139
|
-
|
|
140
|
-
f"\n\tProcessing: {_sub_raw_data.index[0].strftime('%F')} to {_sub_raw_data.index[-1].strftime('%F')}")
|
|
127
|
+
f"\t{self.logger.BLUE}▶ Processing: {_sub_raw_data.index[0].strftime('%F')}"
|
|
128
|
+
f" to {_sub_raw_data.index[-1].strftime('%F')}{self.logger.RESET}")
|
|
141
129
|
|
|
142
130
|
__base_rate(_sub_raw_data, _sub_qc_data)
|
|
143
131
|
|
|
@@ -201,6 +189,34 @@ class AbstractReader(ABC):
|
|
|
201
189
|
except Exception as e:
|
|
202
190
|
raise IOError(f"Error saving data. {e}")
|
|
203
191
|
|
|
192
|
+
@contextmanager
|
|
193
|
+
def progress_reading(self, files: list) -> Generator:
|
|
194
|
+
# Create message temporary storage and replace logger method
|
|
195
|
+
logs = {level: [] for level in ['info', 'warning', 'error']}
|
|
196
|
+
original = {level: getattr(self.logger, level) for level in logs}
|
|
197
|
+
|
|
198
|
+
for level, msgs in logs.items():
|
|
199
|
+
setattr(self.logger, level, msgs.append)
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
with Progress(
|
|
203
|
+
TextColumn("[bold blue]{task.description}", style="bold blue"),
|
|
204
|
+
BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"),
|
|
205
|
+
TaskProgressColumn(),
|
|
206
|
+
TimeRemainingColumn(),
|
|
207
|
+
TextColumn("{task.fields[filename]}", style="yellow"),
|
|
208
|
+
console=Console(force_terminal=True, color_system="auto", width=120),
|
|
209
|
+
expand=False
|
|
210
|
+
) as progress:
|
|
211
|
+
task = progress.add_task(f"▶ Reading {self.nam} files", total=len(files), filename="")
|
|
212
|
+
yield progress, task
|
|
213
|
+
finally:
|
|
214
|
+
# Restore logger method and output message
|
|
215
|
+
for level, msgs in logs.items():
|
|
216
|
+
setattr(self.logger, level, original[level])
|
|
217
|
+
for msg in msgs:
|
|
218
|
+
original[level](msg)
|
|
219
|
+
|
|
204
220
|
def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
|
|
205
221
|
files = [f
|
|
206
222
|
for file_pattern in self.meta['pattern']
|
|
@@ -212,37 +228,28 @@ class AbstractReader(ABC):
|
|
|
212
228
|
raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.")
|
|
213
229
|
|
|
214
230
|
df_list = []
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
TaskProgressColumn(),
|
|
219
|
-
TimeRemainingColumn(),
|
|
220
|
-
TextColumn("{task.fields[filename]}", style="yellow"),
|
|
221
|
-
console=Console(force_terminal=True, color_system="auto"),
|
|
222
|
-
expand=False
|
|
223
|
-
) as progress:
|
|
224
|
-
task = progress.add_task(f"Reading {self.nam} files", total=len(files), filename="")
|
|
231
|
+
|
|
232
|
+
# Context manager for progress bar display
|
|
233
|
+
with self.progress_reading(files) as (progress, task):
|
|
225
234
|
for file in files:
|
|
226
235
|
progress.update(task, advance=1, filename=file.name)
|
|
227
236
|
try:
|
|
228
|
-
df
|
|
229
|
-
|
|
230
|
-
if df is not None and not df.empty:
|
|
237
|
+
if (df := self._raw_reader(file)) is not None and not df.empty:
|
|
231
238
|
df_list.append(df)
|
|
232
239
|
else:
|
|
233
|
-
self.logger.warning(f"
|
|
234
|
-
|
|
235
|
-
except pd.errors.ParserError as e:
|
|
236
|
-
self.logger.error(f"Error tokenizing data: {e}")
|
|
240
|
+
self.logger.warning(f"\tFile {file.name} produced an empty DataFrame or None.")
|
|
237
241
|
|
|
238
242
|
except Exception as e:
|
|
239
243
|
self.logger.error(f"Error reading {file.name}: {e}")
|
|
240
244
|
|
|
241
245
|
if not df_list:
|
|
242
|
-
raise ValueError("
|
|
246
|
+
raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m")
|
|
243
247
|
|
|
244
248
|
raw_data = concat(df_list, axis=0).groupby(level=0).first()
|
|
245
249
|
|
|
250
|
+
if self.nam == 'SMPS':
|
|
251
|
+
raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float))
|
|
252
|
+
|
|
246
253
|
raw_data = self._timeIndex_process(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
|
|
247
254
|
qc_data = self._QC(raw_data).apply(to_numeric, errors='coerce').copy(deep=True)
|
|
248
255
|
|
|
@@ -251,29 +258,28 @@ class AbstractReader(ABC):
|
|
|
251
258
|
def _run(self, user_start, user_end):
|
|
252
259
|
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
253
260
|
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
|
|
254
|
-
|
|
255
|
-
f"from {user_start} to {user_end}\n")
|
|
261
|
+
self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE")
|
|
256
262
|
|
|
257
263
|
_f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam)
|
|
258
264
|
|
|
259
265
|
if self.append:
|
|
260
|
-
|
|
266
|
+
self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data")
|
|
267
|
+
|
|
261
268
|
_f_raw_new, _f_qc_new = self._read_raw_files()
|
|
262
269
|
_f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
|
|
263
270
|
_f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new)
|
|
271
|
+
|
|
264
272
|
else:
|
|
265
273
|
_f_raw, _f_qc = _f_raw_done, _f_qc_done
|
|
274
|
+
|
|
266
275
|
return _f_qc if self.qc else _f_raw
|
|
267
276
|
|
|
268
277
|
else:
|
|
269
|
-
|
|
270
|
-
f"from {user_start} to {user_end}\n")
|
|
278
|
+
self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}", color_part="RAW DATA")
|
|
271
279
|
|
|
272
280
|
_f_raw, _f_qc = self._read_raw_files()
|
|
273
281
|
|
|
274
282
|
# process time index
|
|
275
|
-
data_start, data_end = _f_raw.index.sort_values()[[0, -1]]
|
|
276
|
-
|
|
277
283
|
_f_raw = self._timeIndex_process(_f_raw, user_start, user_end)
|
|
278
284
|
_f_qc = self._timeIndex_process(_f_qc, user_start, user_end)
|
|
279
285
|
_f_qc = self._outlier_process(_f_qc)
|
|
@@ -281,15 +287,8 @@ class AbstractReader(ABC):
|
|
|
281
287
|
# save
|
|
282
288
|
self._save_data(_f_raw, _f_qc)
|
|
283
289
|
|
|
284
|
-
self.logger.info(f"{'=' * 60}")
|
|
285
|
-
self.logger.info(f"Raw data time : {data_start} to {data_end}")
|
|
286
|
-
self.logger.info(f"Output time : {user_start} to {user_end}")
|
|
287
|
-
self.logger.info(f"{'-' * 60}")
|
|
288
|
-
|
|
289
290
|
if self.rate:
|
|
290
|
-
_f_raw =
|
|
291
|
-
_f_qc = _f_qc.apply(to_numeric, errors='coerce')
|
|
292
|
-
self._rate_calculate(_f_raw, _f_qc)
|
|
291
|
+
self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'), _f_qc.apply(to_numeric, errors='coerce'))
|
|
293
292
|
|
|
294
293
|
return _f_qc if self.qc else _f_raw
|
|
295
294
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ReaderLogger:
|
|
8
|
+
def __init__(self, name: str, log_path: Path):
|
|
9
|
+
self.name = name
|
|
10
|
+
self.log_path = log_path
|
|
11
|
+
|
|
12
|
+
# ANSI color codes
|
|
13
|
+
self.CYAN = '\033[96m'
|
|
14
|
+
self.BLUE = '\033[94m'
|
|
15
|
+
self.GREEN = '\033[92m'
|
|
16
|
+
self.YELLOW = '\033[93m'
|
|
17
|
+
self.RED = '\033[91m'
|
|
18
|
+
self.RESET = '\033[0m'
|
|
19
|
+
|
|
20
|
+
self.logger = self._setup_logger()
|
|
21
|
+
|
|
22
|
+
def _setup_logger(self) -> logging.Logger:
|
|
23
|
+
logger = logging.getLogger(self.name)
|
|
24
|
+
logger.setLevel(logging.INFO)
|
|
25
|
+
|
|
26
|
+
# Remove existing handlers
|
|
27
|
+
for handler in logger.handlers[:]:
|
|
28
|
+
handler.close()
|
|
29
|
+
logger.removeHandler(handler)
|
|
30
|
+
|
|
31
|
+
# clean ANSI formatter (for log file)
|
|
32
|
+
class CleanFormatter(logging.Formatter):
|
|
33
|
+
def format(self, record):
|
|
34
|
+
formatted_msg = super().format(record)
|
|
35
|
+
return re.sub(r'\033\[[0-9;]*m', '', formatted_msg)
|
|
36
|
+
|
|
37
|
+
# Set up handlers
|
|
38
|
+
file_handler = logging.FileHandler(self.log_path / f'{self.name}.log')
|
|
39
|
+
file_handler.setFormatter(CleanFormatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
|
|
40
|
+
|
|
41
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
|
42
|
+
console_handler.setFormatter(logging.Formatter('%(message)s'))
|
|
43
|
+
|
|
44
|
+
logger.addHandler(file_handler)
|
|
45
|
+
logger.addHandler(console_handler)
|
|
46
|
+
|
|
47
|
+
return logger
|
|
48
|
+
|
|
49
|
+
def info(self, msg: str):
|
|
50
|
+
self.logger.info(msg)
|
|
51
|
+
|
|
52
|
+
def warning(self, msg: str):
|
|
53
|
+
self.logger.warning(msg)
|
|
54
|
+
|
|
55
|
+
def error(self, msg: str):
|
|
56
|
+
self.logger.error(msg)
|
|
57
|
+
|
|
58
|
+
def info_box(self, text: str, color_part: str = None, width: int = 80):
|
|
59
|
+
"""
|
|
60
|
+
Create a boxed message with optional colored text
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
text: Base text format (e.g., "Reading {} RAW DATA from {} to {}")
|
|
64
|
+
color_part: Part of text to be colored (e.g., "RAW DATA")
|
|
65
|
+
width: Box width
|
|
66
|
+
"""
|
|
67
|
+
display_text = text.replace(color_part, " " * len(color_part)) if color_part else text
|
|
68
|
+
|
|
69
|
+
left_padding = " " * ((width - len(display_text)) // 2)
|
|
70
|
+
right_padding = " " * (width - len(display_text) - len(left_padding))
|
|
71
|
+
|
|
72
|
+
content = text.replace(color_part, f"{self.CYAN}{color_part}{self.RESET}") if color_part else text
|
|
73
|
+
|
|
74
|
+
__content__ = f"{left_padding}{content}{right_padding}"
|
|
75
|
+
|
|
76
|
+
self.info(f"╔{'═' * width}╗")
|
|
77
|
+
self.info(f"║{__content__}║")
|
|
78
|
+
self.info(f"╚{'═' * width}╝")
|
|
@@ -38,7 +38,7 @@ class Reader(AbstractReader):
|
|
|
38
38
|
_df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])]
|
|
39
39
|
|
|
40
40
|
# use IQR_QC
|
|
41
|
-
_df = self.time_aware_IQR_QC(_df)
|
|
41
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
42
42
|
|
|
43
43
|
# make sure all columns have values, otherwise set to nan
|
|
44
44
|
return _df.dropna(how='any').reindex(_index)
|
|
@@ -18,7 +18,7 @@ class Reader(AbstractReader):
|
|
|
18
18
|
on_bad_lines='skip')
|
|
19
19
|
|
|
20
20
|
if len(df.groupby('測站')) > 1:
|
|
21
|
-
raise ValueError(f
|
|
21
|
+
raise ValueError(f"Multiple stations found in the file: {df['測站'].unique()}")
|
|
22
22
|
else:
|
|
23
23
|
if '測站' in df.columns:
|
|
24
24
|
df.drop(columns=['測站'], inplace=True)
|
|
@@ -149,7 +149,7 @@ class Reader(AbstractReader):
|
|
|
149
149
|
columns_to_convert = [col for col in MDL.keys() if col in df.columns]
|
|
150
150
|
df[columns_to_convert] = df[columns_to_convert].div(1000)
|
|
151
151
|
|
|
152
|
-
self.logger.info(f"XRF QAQC summary: transform values below MDL to {MDL_replace}")
|
|
152
|
+
self.logger.info(f"\t{'XRF QAQC summary':21}: transform values below MDL to {MDL_replace}")
|
|
153
153
|
|
|
154
154
|
return df
|
|
155
155
|
|
|
@@ -206,9 +206,10 @@ class Reader(AbstractReader):
|
|
|
206
206
|
# 計算保留的数據的百分比
|
|
207
207
|
retained_percentage = (valid_mask.sum() / len(df)) * 100
|
|
208
208
|
|
|
209
|
-
self.logger.info(
|
|
209
|
+
self.logger.info(
|
|
210
|
+
f"\t{'Ions balance summary':21}: {retained_percentage.__round__(0)}% within tolerance ± {tolerance}")
|
|
210
211
|
|
|
211
212
|
if retained_percentage < 70:
|
|
212
|
-
self.logger.warning("
|
|
213
|
+
self.logger.warning("\tWarning: The percentage of retained data is less than 70%")
|
|
213
214
|
|
|
214
215
|
return df
|
|
@@ -74,7 +74,7 @@ class Reader(AbstractReader):
|
|
|
74
74
|
# _df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])]
|
|
75
75
|
|
|
76
76
|
# use IQR_QC
|
|
77
|
-
_df = self.time_aware_IQR_QC(_df)
|
|
77
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
78
78
|
|
|
79
79
|
# make sure all columns have values, otherwise set to nan
|
|
80
80
|
return _df.dropna(how='any').reindex(_index)
|
|
@@ -87,6 +87,6 @@ class Reader(AbstractReader):
|
|
|
87
87
|
_df.loc[_df[col] <= threshold, col] = np.nan
|
|
88
88
|
|
|
89
89
|
# use IQR_QC
|
|
90
|
-
_df = self.time_aware_IQR_QC(_df
|
|
90
|
+
_df = self.time_aware_IQR_QC(_df)
|
|
91
91
|
|
|
92
92
|
return _df.dropna(subset=['Thermal_OC', 'Optical_OC']).reindex(_index)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import csv
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
|
-
from pandas import to_datetime, to_numeric, read_csv
|
|
4
|
+
from pandas import to_datetime, to_numeric, read_csv
|
|
5
5
|
|
|
6
6
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
7
7
|
|
|
@@ -40,7 +40,7 @@ class Reader(AbstractReader):
|
|
|
40
40
|
|
|
41
41
|
for date_format in date_formats:
|
|
42
42
|
_time_index = parse_date(_df, date_format)
|
|
43
|
-
if not isna(
|
|
43
|
+
if not _time_index.isna().all():
|
|
44
44
|
break
|
|
45
45
|
else:
|
|
46
46
|
raise ValueError("Unable to parse dates with given formats")
|
|
@@ -56,14 +56,17 @@ class Reader(AbstractReader):
|
|
|
56
56
|
_df_smps.columns = _df_smps.columns.astype(float)
|
|
57
57
|
_df_smps = _df_smps.loc[_df_smps.index.dropna().copy()]
|
|
58
58
|
|
|
59
|
-
if _df_smps.columns[0] !=
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
if _df_smps.columns[0] != self.size_range[0] or _df_smps.columns[-1] != self.size_range[1]:
|
|
60
|
+
self.logger.info(f'\tSMPS file: {file.name} is not match the default size range {self.size_range}, '
|
|
61
|
+
f'it is ({_df_smps.columns[0]}, {_df_smps.columns[-1]})')
|
|
62
62
|
|
|
63
63
|
return _df_smps.apply(to_numeric, errors='coerce')
|
|
64
64
|
|
|
65
65
|
# QC data
|
|
66
66
|
def _QC(self, _df):
|
|
67
|
+
size_range_mask = (_df.columns.astype(float) >= self.size_range[0]) & (
|
|
68
|
+
_df.columns.astype(float) <= self.size_range[1])
|
|
69
|
+
_df = _df.loc[:, size_range_mask]
|
|
67
70
|
|
|
68
71
|
# mask out the data size lower than 7
|
|
69
72
|
_df['total'] = _df.sum(axis=1, min_count=1) * (np.diff(np.log(_df.keys().to_numpy(float)))).mean()
|
|
@@ -74,8 +77,7 @@ class Reader(AbstractReader):
|
|
|
74
77
|
_df = _df.mask(_df['total'] < 2000)
|
|
75
78
|
|
|
76
79
|
# remove the bin over 400 nm which num. conc. larger than 4000
|
|
77
|
-
_df_remv_ky = _df.keys()[:-
|
|
78
|
-
|
|
80
|
+
_df_remv_ky = _df.keys()[:-1][_df.keys()[:-1] >= 400.]
|
|
79
81
|
_df[_df_remv_ky] = _df[_df_remv_ky].copy().mask(_df[_df_remv_ky] > 4000.)
|
|
80
82
|
|
|
81
83
|
return _df[_df.keys()[:-1]]
|
|
@@ -40,7 +40,7 @@ class Reader(AbstractReader):
|
|
|
40
40
|
|
|
41
41
|
# QC data in 1 hr
|
|
42
42
|
# use time_aware_IQR_QC
|
|
43
|
-
_df = self.time_aware_IQR_QC(_df, time_window='
|
|
43
|
+
_df = self.time_aware_IQR_QC(_df, time_window='6h')
|
|
44
44
|
|
|
45
45
|
# remove data where size < 50% in 1-hr
|
|
46
46
|
points_per_hour = Timedelta('1h') / Timedelta(self.meta['freq'])
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
AeroViz/tools/__init__.py
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|