AeroViz 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AeroViz/__init__.py +13 -0
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/data/DEFAULT_DATA.csv +1417 -0
- AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
- AeroViz/data/hysplit_example_data.txt +101 -0
- AeroViz/dataProcess/Chemistry/__init__.py +149 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
- AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
- AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
- AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
- AeroViz/dataProcess/Optical/__init__.py +281 -0
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_derived.py +518 -0
- AeroViz/dataProcess/Optical/_extinction.py +123 -0
- AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
- AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
- AeroViz/dataProcess/Optical/coefficient.py +72 -0
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/Optical/mie_theory.py +260 -0
- AeroViz/dataProcess/README.md +271 -0
- AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
- AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
- AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
- AeroViz/dataProcess/SizeDistr/prop.py +62 -0
- AeroViz/dataProcess/VOC/__init__.py +14 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/_potential_par.py +108 -0
- AeroViz/dataProcess/VOC/support_voc.json +446 -0
- AeroViz/dataProcess/__init__.py +66 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__init__.py +272 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/mcp_server.py +352 -0
- AeroViz/plot/__init__.py +13 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/bar.py +126 -0
- AeroViz/plot/box.py +69 -0
- AeroViz/plot/distribution/__init__.py +1 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/distribution.py +576 -0
- AeroViz/plot/meteorology/CBPF.py +295 -0
- AeroViz/plot/meteorology/__init__.py +3 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/hysplit.py +93 -0
- AeroViz/plot/meteorology/wind_rose.py +77 -0
- AeroViz/plot/optical/__init__.py +1 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/optical/optical.py +388 -0
- AeroViz/plot/pie.py +210 -0
- AeroViz/plot/radar.py +184 -0
- AeroViz/plot/regression.py +200 -0
- AeroViz/plot/scatter.py +174 -0
- AeroViz/plot/templates/__init__.py +6 -0
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/contour.py +47 -0
- AeroViz/plot/templates/corr_matrix.py +267 -0
- AeroViz/plot/templates/diurnal_pattern.py +61 -0
- AeroViz/plot/templates/koschmieder.py +95 -0
- AeroViz/plot/templates/metal_heatmap.py +164 -0
- AeroViz/plot/timeseries/__init__.py +2 -0
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +446 -0
- AeroViz/plot/utils/__init__.py +4 -0
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/_color.py +71 -0
- AeroViz/plot/utils/_unit.py +55 -0
- AeroViz/plot/utils/fRH.json +390 -0
- AeroViz/plot/utils/plt_utils.py +92 -0
- AeroViz/plot/utils/sklearn_utils.py +49 -0
- AeroViz/plot/utils/units.json +89 -0
- AeroViz/plot/violin.py +80 -0
- AeroViz/rawDataReader/FLOW.md +138 -0
- AeroViz/rawDataReader/__init__.py +220 -0
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__init__.py +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/supported_instruments.py +135 -0
- AeroViz/rawDataReader/core/__init__.py +658 -0
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +171 -0
- AeroViz/rawDataReader/core/pre_process.py +308 -0
- AeroViz/rawDataReader/core/qc.py +961 -0
- AeroViz/rawDataReader/core/report.py +579 -0
- AeroViz/rawDataReader/script/AE33.py +173 -0
- AeroViz/rawDataReader/script/AE43.py +151 -0
- AeroViz/rawDataReader/script/APS.py +339 -0
- AeroViz/rawDataReader/script/Aurora.py +191 -0
- AeroViz/rawDataReader/script/BAM1020.py +90 -0
- AeroViz/rawDataReader/script/BC1054.py +161 -0
- AeroViz/rawDataReader/script/EPA.py +79 -0
- AeroViz/rawDataReader/script/GRIMM.py +68 -0
- AeroViz/rawDataReader/script/IGAC.py +140 -0
- AeroViz/rawDataReader/script/MA350.py +179 -0
- AeroViz/rawDataReader/script/Minion.py +218 -0
- AeroViz/rawDataReader/script/NEPH.py +199 -0
- AeroViz/rawDataReader/script/OCEC.py +173 -0
- AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
- AeroViz/rawDataReader/script/SMPS.py +389 -0
- AeroViz/rawDataReader/script/TEOM.py +181 -0
- AeroViz/rawDataReader/script/VOC.py +106 -0
- AeroViz/rawDataReader/script/Xact.py +244 -0
- AeroViz/rawDataReader/script/__init__.py +28 -0
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__init__.py +2 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- AeroViz/tools/database.py +95 -0
- AeroViz/tools/dataclassifier.py +117 -0
- AeroViz/tools/dataprinter.py +58 -0
- aeroviz-0.1.21.dist-info/METADATA +294 -0
- aeroviz-0.1.21.dist-info/RECORD +180 -0
- aeroviz-0.1.21.dist-info/WHEEL +5 -0
- aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
- aeroviz-0.1.21.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,658 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Generator
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.progress import Progress, TextColumn, BarColumn, SpinnerColumn, TaskProgressColumn
|
|
12
|
+
|
|
13
|
+
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
14
|
+
from AeroViz.rawDataReader.core.logger import ReaderLogger
|
|
15
|
+
from AeroViz.rawDataReader.core.qc import QualityControl, QCRule, QCFlagBuilder
|
|
16
|
+
from AeroViz.rawDataReader.core.report import calculate_rates, process_rates_report, process_timeline_report, print_timeline_visual
|
|
17
|
+
|
|
18
|
+
__all__ = ['AbstractReader', 'QCRule', 'QCFlagBuilder']
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AbstractReader(ABC):
|
|
22
|
+
"""
|
|
23
|
+
Abstract class for reading raw data from different instruments.
|
|
24
|
+
|
|
25
|
+
This class serves as a base class for reading raw data from various instruments. Each instrument
|
|
26
|
+
should have a separate class that inherits from this class and implements the abstract methods.
|
|
27
|
+
The abstract methods are `_raw_reader` and `_QC`.
|
|
28
|
+
|
|
29
|
+
The class handles file management, including reading from and writing to pickle files, and
|
|
30
|
+
implements quality control measures. It can process data in both batch and streaming modes.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
nam : str
|
|
35
|
+
Name identifier for the reader class
|
|
36
|
+
path : Path
|
|
37
|
+
Path to the raw data files
|
|
38
|
+
meta : dict
|
|
39
|
+
Metadata configuration for the instrument
|
|
40
|
+
logger : ReaderLogger
|
|
41
|
+
Custom logger instance for the reader
|
|
42
|
+
reset : bool
|
|
43
|
+
Flag to indicate whether to reset existing processed data
|
|
44
|
+
append : bool
|
|
45
|
+
Flag to indicate whether to append new data to existing processed data
|
|
46
|
+
qc : bool or str
|
|
47
|
+
Quality control settings
|
|
48
|
+
qc_freq : str or None
|
|
49
|
+
Frequency for quality control calculations
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
nam = 'AbstractReader'
|
|
53
|
+
|
|
54
|
+
def __init__(self,
|
|
55
|
+
path: Path | str,
|
|
56
|
+
reset: bool | str = False,
|
|
57
|
+
qc: bool | str = True,
|
|
58
|
+
**kwargs):
|
|
59
|
+
"""
|
|
60
|
+
Initialize the AbstractReader.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
path : Path or str
|
|
65
|
+
Path to the directory containing raw data files
|
|
66
|
+
reset : bool or str, default=False
|
|
67
|
+
If True, forces re-reading of raw data
|
|
68
|
+
If 'append', appends new data to existing processed data
|
|
69
|
+
qc : bool or str, default=True
|
|
70
|
+
If True, performs quality control
|
|
71
|
+
If str, specifies the frequency for QC calculations
|
|
72
|
+
**kwargs : dict
|
|
73
|
+
Additional keyword arguments:
|
|
74
|
+
log_level : str
|
|
75
|
+
Logging level for the reader
|
|
76
|
+
suppress_warnings : bool
|
|
77
|
+
If True, suppresses warning messages
|
|
78
|
+
|
|
79
|
+
Notes
|
|
80
|
+
-----
|
|
81
|
+
Creates necessary output directories and initializes logging system.
|
|
82
|
+
Sets up paths for pickle files, CSV files, and report outputs.
|
|
83
|
+
"""
|
|
84
|
+
self.path = Path(path)
|
|
85
|
+
self.meta = meta[self.nam]
|
|
86
|
+
output_folder = self.path / f'{self.nam.lower()}_outputs'
|
|
87
|
+
output_folder.mkdir(parents=True, exist_ok=True)
|
|
88
|
+
|
|
89
|
+
self.logger = ReaderLogger(
|
|
90
|
+
self.nam, output_folder,
|
|
91
|
+
kwargs.get('log_level').upper() if not kwargs.get('suppress_warnings') else 'ERROR')
|
|
92
|
+
|
|
93
|
+
self.reset = reset is True
|
|
94
|
+
self.append = reset == 'append'
|
|
95
|
+
self.qc = qc # if qc, then calculate rate
|
|
96
|
+
self.qc_freq = qc if isinstance(qc, str) else None
|
|
97
|
+
self.kwargs = kwargs
|
|
98
|
+
|
|
99
|
+
self.pkl_nam = output_folder / f'_read_{self.nam.lower()}_qc.pkl'
|
|
100
|
+
self.csv_nam = output_folder / f'_read_{self.nam.lower()}_qc.csv'
|
|
101
|
+
self.pkl_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.pkl'
|
|
102
|
+
self.csv_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.csv'
|
|
103
|
+
self.csv_out = output_folder / f'output_{self.nam.lower()}.csv'
|
|
104
|
+
self.report_out = output_folder / 'report.json'
|
|
105
|
+
|
|
106
|
+
def __call__(self,
|
|
107
|
+
start: datetime,
|
|
108
|
+
end: datetime,
|
|
109
|
+
mean_freq: str = '1h',
|
|
110
|
+
) -> pd.DataFrame:
|
|
111
|
+
"""
|
|
112
|
+
Process data for a specified time range.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
start : datetime
|
|
117
|
+
Start time for data processing
|
|
118
|
+
end : datetime
|
|
119
|
+
End time for data processing
|
|
120
|
+
mean_freq : str, default='1h'
|
|
121
|
+
Frequency for resampling the data
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
pd.DataFrame
|
|
126
|
+
Processed and resampled data for the specified time range
|
|
127
|
+
|
|
128
|
+
Notes
|
|
129
|
+
-----
|
|
130
|
+
The processed data is also saved to a CSV file.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
_f_raw, _f_qc = self._run(start, end)
|
|
134
|
+
|
|
135
|
+
if not self.qc: return _f_raw
|
|
136
|
+
|
|
137
|
+
# Extract QC_Flag before processing
|
|
138
|
+
qc_flag = _f_qc['QC_Flag'].copy() if 'QC_Flag' in _f_qc else None
|
|
139
|
+
|
|
140
|
+
# Process QC_Flag
|
|
141
|
+
if 'QC_Flag' in _f_qc:
|
|
142
|
+
# Set rows with QC_Flag != "Valid" to NaN while preserving index
|
|
143
|
+
invalid_mask = _f_qc['QC_Flag'] != 'Valid'
|
|
144
|
+
if invalid_mask.any():
|
|
145
|
+
# Get all numeric columns (excluding QC_Flag column)
|
|
146
|
+
numeric_columns = [col for col in _f_qc.columns if col != 'QC_Flag']
|
|
147
|
+
# Set invalid data to NaN
|
|
148
|
+
_f_qc.loc[invalid_mask, numeric_columns] = np.nan
|
|
149
|
+
|
|
150
|
+
# Drop QC_Flag column
|
|
151
|
+
_f_qc.drop(columns=['QC_Flag'], inplace=True)
|
|
152
|
+
|
|
153
|
+
# Generate data acquisition and quality rate report (instrument time resolution)
|
|
154
|
+
self._generate_report(_f_raw.apply(pd.to_numeric, errors='coerce'),
|
|
155
|
+
_f_qc.apply(pd.to_numeric, errors='coerce'),
|
|
156
|
+
qc_flag=qc_flag)
|
|
157
|
+
|
|
158
|
+
_f_qc = _f_qc.resample(mean_freq).mean().__round__(4)
|
|
159
|
+
|
|
160
|
+
_f_qc.to_csv(self.csv_out)
|
|
161
|
+
|
|
162
|
+
# Generate timeline data (hourly values)
|
|
163
|
+
report_dict = process_timeline_report(self.report_dict, _f_qc)
|
|
164
|
+
|
|
165
|
+
# Write report
|
|
166
|
+
with open(self.report_out, 'w') as f:
|
|
167
|
+
json.dump(report_dict, f, indent=4)
|
|
168
|
+
|
|
169
|
+
return _f_qc
|
|
170
|
+
|
|
171
|
+
@abstractmethod
|
|
172
|
+
def _raw_reader(self, file):
|
|
173
|
+
"""
|
|
174
|
+
Abstract method to read raw data files.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
file : Path or str
|
|
179
|
+
Path to the raw data file
|
|
180
|
+
|
|
181
|
+
Returns
|
|
182
|
+
-------
|
|
183
|
+
pd.DataFrame
|
|
184
|
+
Raw data read from the file
|
|
185
|
+
|
|
186
|
+
Notes
|
|
187
|
+
-----
|
|
188
|
+
Must be implemented by child classes to handle specific file formats.
|
|
189
|
+
"""
|
|
190
|
+
pass
|
|
191
|
+
|
|
192
|
+
@abstractmethod
|
|
193
|
+
def _QC(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
194
|
+
"""
|
|
195
|
+
Abstract method for quality control processing.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
df : pd.DataFrame
|
|
200
|
+
Input DataFrame containing raw data
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
pd.DataFrame
|
|
205
|
+
Quality controlled data with QC_Flag column
|
|
206
|
+
|
|
207
|
+
Notes
|
|
208
|
+
-----
|
|
209
|
+
Must be implemented by child classes to handle instrument-specific QC.
|
|
210
|
+
This method should only check raw data quality (status, range, completeness).
|
|
211
|
+
Derived parameter validation should be done in _process().
|
|
212
|
+
"""
|
|
213
|
+
return df
|
|
214
|
+
|
|
215
|
+
def _process(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
216
|
+
"""
|
|
217
|
+
Process data to calculate derived parameters.
|
|
218
|
+
|
|
219
|
+
This method is called after _QC() to calculate instrument-specific
|
|
220
|
+
derived parameters (e.g., absorption coefficients, AAE, SAE).
|
|
221
|
+
|
|
222
|
+
Parameters
|
|
223
|
+
----------
|
|
224
|
+
df : pd.DataFrame
|
|
225
|
+
Quality-controlled DataFrame with QC_Flag column
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
pd.DataFrame
|
|
230
|
+
DataFrame with derived parameters added and QC_Flag updated
|
|
231
|
+
|
|
232
|
+
Notes
|
|
233
|
+
-----
|
|
234
|
+
Default implementation returns the input unchanged.
|
|
235
|
+
Override in child classes to implement instrument-specific processing.
|
|
236
|
+
|
|
237
|
+
The method should:
|
|
238
|
+
1. Skip calculation for rows where QC_Flag != 'Valid' (optional optimization)
|
|
239
|
+
2. Calculate derived parameters
|
|
240
|
+
3. Validate derived parameters and update QC_Flag if invalid
|
|
241
|
+
"""
|
|
242
|
+
return df
|
|
243
|
+
|
|
244
|
+
def _generate_report(self, raw_data, qc_data, qc_flag=None) -> None:
|
|
245
|
+
"""
|
|
246
|
+
Calculate and log data quality rates for different time periods.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
raw_data : pd.DataFrame
|
|
251
|
+
Raw data before quality control
|
|
252
|
+
qc_data : pd.DataFrame
|
|
253
|
+
Data after quality control
|
|
254
|
+
qc_flag : pd.Series, optional
|
|
255
|
+
QC flag series indicating validity of each row
|
|
256
|
+
|
|
257
|
+
Notes
|
|
258
|
+
-----
|
|
259
|
+
Calculates rates for specified QC frequency if set.
|
|
260
|
+
Updates the quality report with calculated rates.
|
|
261
|
+
"""
|
|
262
|
+
if qc_flag is not None:
|
|
263
|
+
# Add blank line before rate section
|
|
264
|
+
self.logger.info("")
|
|
265
|
+
|
|
266
|
+
if self.qc_freq is not None:
|
|
267
|
+
raw_data_grouped = raw_data.groupby(pd.Grouper(freq=self.qc_freq))
|
|
268
|
+
qc_flag_grouped = qc_flag.groupby(pd.Grouper(freq=self.qc_freq))
|
|
269
|
+
|
|
270
|
+
for (month, _sub_raw_data), (_, _sub_qc_flag) in zip(raw_data_grouped, qc_flag_grouped):
|
|
271
|
+
self.logger.info(
|
|
272
|
+
f"{self.logger.BLUE}Period: {_sub_raw_data.index[0].strftime('%Y-%m-%d')} ~ "
|
|
273
|
+
f"{_sub_raw_data.index[-1].strftime('%Y-%m-%d')}{self.logger.RESET}")
|
|
274
|
+
|
|
275
|
+
calculate_rates(self.logger, _sub_raw_data, _sub_qc_flag, with_log=True)
|
|
276
|
+
else:
|
|
277
|
+
calculate_rates(self.logger, raw_data, qc_flag, with_log=True)
|
|
278
|
+
|
|
279
|
+
# 使用 Grouper 對數據按週和月進行分組
|
|
280
|
+
current_time = datetime.now()
|
|
281
|
+
|
|
282
|
+
# 按週分組 (使用星期一作為每週的開始)
|
|
283
|
+
weekly_raw_groups = raw_data.groupby(pd.Grouper(freq='W-MON', label="left", closed="left"))
|
|
284
|
+
weekly_flag_groups = qc_flag.groupby(pd.Grouper(freq='W-MON', label="left", closed="left"))
|
|
285
|
+
|
|
286
|
+
# 按月分組 (使用月初作為每月的開始)
|
|
287
|
+
monthly_raw_groups = raw_data.groupby(pd.Grouper(freq='MS'))
|
|
288
|
+
monthly_flag_groups = qc_flag.groupby(pd.Grouper(freq='MS'))
|
|
289
|
+
|
|
290
|
+
# 報告基本資訊
|
|
291
|
+
report_dict = {
|
|
292
|
+
'startDate': qc_data.index.min().strftime('%Y/%m/%d %H:%M'),
|
|
293
|
+
'endDate': qc_data.index.max().strftime('%Y/%m/%d %H:%M'),
|
|
294
|
+
"report_time": current_time.strftime('%Y-%m-%d %H:%M:%S'),
|
|
295
|
+
"instrument_id": f"{self.path.name[:2]}_{self.nam}",
|
|
296
|
+
"instrument": self.nam,
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
# 生成報告資料
|
|
300
|
+
self.report_dict = process_rates_report(
|
|
301
|
+
self.logger, report_dict,
|
|
302
|
+
weekly_raw_groups, monthly_raw_groups,
|
|
303
|
+
weekly_flag_groups, monthly_flag_groups
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def _timeIndex_process(self, _df, user_start=None, user_end=None, append_df=None):
|
|
307
|
+
"""
|
|
308
|
+
Process time index of the DataFrame.
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
_df : pd.DataFrame
|
|
313
|
+
Input DataFrame to process
|
|
314
|
+
user_start : datetime, optional
|
|
315
|
+
User-specified start time
|
|
316
|
+
user_end : datetime, optional
|
|
317
|
+
User-specified end time
|
|
318
|
+
append_df : pd.DataFrame, optional
|
|
319
|
+
DataFrame to append to
|
|
320
|
+
|
|
321
|
+
Returns
|
|
322
|
+
-------
|
|
323
|
+
pd.DataFrame
|
|
324
|
+
DataFrame with processed time index
|
|
325
|
+
|
|
326
|
+
Notes
|
|
327
|
+
-----
|
|
328
|
+
Handles time range filtering and data appending.
|
|
329
|
+
"""
|
|
330
|
+
# Round timestamps and remove duplicates
|
|
331
|
+
_df = _df.groupby(_df.index.floor('1min')).first()
|
|
332
|
+
|
|
333
|
+
# Determine frequency
|
|
334
|
+
freq = _df.index.inferred_freq or self.meta['freq']
|
|
335
|
+
|
|
336
|
+
# Append new data if provided
|
|
337
|
+
if append_df is not None:
|
|
338
|
+
append_df.index = append_df.index.round('1min')
|
|
339
|
+
_df = pd.concat([append_df.dropna(how='all'), _df.dropna(how='all')])
|
|
340
|
+
_df = _df.loc[~_df.index.duplicated()]
|
|
341
|
+
|
|
342
|
+
# Determine time range
|
|
343
|
+
df_start, df_end = _df.index.sort_values()[[0, -1]]
|
|
344
|
+
|
|
345
|
+
# Create new time index
|
|
346
|
+
new_index = pd.date_range(user_start or df_start, user_end or df_end, freq=freq, name='time')
|
|
347
|
+
|
|
348
|
+
# Process data: convert to numeric, resample, and reindex with controlled tolerance
|
|
349
|
+
if freq in ['1min', 'min', 'T']:
|
|
350
|
+
# For minute-level data, use smaller tolerance, e.g., 30 seconds
|
|
351
|
+
return _df.reindex(new_index, method='nearest', tolerance='30s')
|
|
352
|
+
elif freq in ['1h', 'h', 'H']:
|
|
353
|
+
# For hourly data, use 30 minutes as tolerance
|
|
354
|
+
# This way 08:20 matches to 08:00, but not to 09:00
|
|
355
|
+
return _df.reindex(new_index, method='nearest', tolerance='30min')
|
|
356
|
+
else:
|
|
357
|
+
# For other frequencies, set tolerance to half the frequency
|
|
358
|
+
if isinstance(freq, str) and freq[-1].isalpha():
|
|
359
|
+
# If freq format is 'number+unit', e.g., '2h', '3min'
|
|
360
|
+
try:
|
|
361
|
+
num = int(freq[:-1])
|
|
362
|
+
unit = freq[-1]
|
|
363
|
+
half_freq = f"{num // 2}{unit}" if num > 1 else f"30{'min' if unit == 'h' else 's'}"
|
|
364
|
+
return _df.reindex(new_index, method='nearest', tolerance=half_freq)
|
|
365
|
+
except ValueError:
|
|
366
|
+
# Cannot parse freq, use default value
|
|
367
|
+
return _df.reindex(new_index, method='nearest', tolerance=freq)
|
|
368
|
+
else:
|
|
369
|
+
return _df.reindex(new_index, method='nearest', tolerance=freq)
|
|
370
|
+
|
|
371
|
+
def _outlier_process(self, _df):
|
|
372
|
+
"""
|
|
373
|
+
Process outliers in the data.
|
|
374
|
+
|
|
375
|
+
Parameters
|
|
376
|
+
----------
|
|
377
|
+
_df : pd.DataFrame
|
|
378
|
+
Input DataFrame containing potential outliers
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
pd.DataFrame
|
|
383
|
+
DataFrame with outliers processed
|
|
384
|
+
|
|
385
|
+
Notes
|
|
386
|
+
-----
|
|
387
|
+
Implementation depends on specific instrument requirements.
|
|
388
|
+
"""
|
|
389
|
+
outlier_file = self.path / 'outlier.json'
|
|
390
|
+
|
|
391
|
+
if not outlier_file.exists():
|
|
392
|
+
return _df
|
|
393
|
+
|
|
394
|
+
with outlier_file.open('r', encoding='utf-8', errors='ignore') as f:
|
|
395
|
+
outliers = json.load(f)
|
|
396
|
+
|
|
397
|
+
for _st, _ed in outliers.values():
|
|
398
|
+
_df.loc[_st:_ed] = np.nan
|
|
399
|
+
|
|
400
|
+
return _df
|
|
401
|
+
|
|
402
|
+
def _save_data(self, raw_data: pd.DataFrame, qc_data: pd.DataFrame) -> None:
|
|
403
|
+
"""
|
|
404
|
+
Save processed data to files.
|
|
405
|
+
|
|
406
|
+
Parameters
|
|
407
|
+
----------
|
|
408
|
+
raw_data : pd.DataFrame
|
|
409
|
+
Raw data to save
|
|
410
|
+
qc_data : pd.DataFrame
|
|
411
|
+
Quality controlled data to save
|
|
412
|
+
|
|
413
|
+
Notes
|
|
414
|
+
-----
|
|
415
|
+
Saves data in both pickle and CSV formats.
|
|
416
|
+
"""
|
|
417
|
+
try:
|
|
418
|
+
raw_data.to_pickle(self.pkl_nam_raw)
|
|
419
|
+
raw_data.to_csv(self.csv_nam_raw)
|
|
420
|
+
qc_data.to_pickle(self.pkl_nam)
|
|
421
|
+
qc_data.to_csv(self.csv_nam)
|
|
422
|
+
|
|
423
|
+
except Exception as e:
|
|
424
|
+
raise IOError(f"Error saving data. {e}")
|
|
425
|
+
|
|
426
|
+
@contextmanager
|
|
427
|
+
def progress_reading(self, files: list) -> Generator:
|
|
428
|
+
"""
|
|
429
|
+
Context manager for tracking file reading progress.
|
|
430
|
+
|
|
431
|
+
Parameters
|
|
432
|
+
----------
|
|
433
|
+
files : list
|
|
434
|
+
List of files to process
|
|
435
|
+
|
|
436
|
+
Yields
|
|
437
|
+
------
|
|
438
|
+
Progress
|
|
439
|
+
Progress bar object for tracking
|
|
440
|
+
|
|
441
|
+
Notes
|
|
442
|
+
-----
|
|
443
|
+
Uses rich library for progress display.
|
|
444
|
+
"""
|
|
445
|
+
# Create message temporary storage and replace logger method
|
|
446
|
+
logs = {level: [] for level in ['info', 'warning', 'error']}
|
|
447
|
+
original = {level: getattr(self.logger, level) for level in logs}
|
|
448
|
+
|
|
449
|
+
for level, msgs in logs.items():
|
|
450
|
+
setattr(self.logger, level, msgs.append)
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
with Progress(
|
|
454
|
+
SpinnerColumn(finished_text="✓"),
|
|
455
|
+
BarColumn(bar_width=25, complete_style="green", finished_style="bright_green"),
|
|
456
|
+
TaskProgressColumn(style="bold", text_format="[bright_green]{task.percentage:>3.0f}%"),
|
|
457
|
+
TextColumn("{task.description}", style="bold blue"),
|
|
458
|
+
TextColumn("{task.fields[filename]}", style="bold blue"),
|
|
459
|
+
console=Console(force_terminal=True, color_system="auto", width=120),
|
|
460
|
+
expand=False
|
|
461
|
+
) as progress:
|
|
462
|
+
task = progress.add_task(f"Reading {self.nam} files:", total=len(files), filename="")
|
|
463
|
+
yield progress, task
|
|
464
|
+
finally:
|
|
465
|
+
# Restore logger method and output message
|
|
466
|
+
for level, msgs in logs.items():
|
|
467
|
+
setattr(self.logger, level, original[level])
|
|
468
|
+
for msg in msgs:
|
|
469
|
+
original[level](msg)
|
|
470
|
+
|
|
471
|
+
def _read_raw_files(self) -> tuple[pd.DataFrame | None, pd.DataFrame | None]:
|
|
472
|
+
"""
|
|
473
|
+
Read and process raw data files.
|
|
474
|
+
|
|
475
|
+
Returns
|
|
476
|
+
-------
|
|
477
|
+
tuple[pd.DataFrame | None, pd.DataFrame | None]
|
|
478
|
+
Tuple containing:
|
|
479
|
+
- Raw data DataFrame or None
|
|
480
|
+
- Quality controlled DataFrame or None
|
|
481
|
+
|
|
482
|
+
Notes
|
|
483
|
+
-----
|
|
484
|
+
Handles file reading and initial processing.
|
|
485
|
+
"""
|
|
486
|
+
files = [f
|
|
487
|
+
for file_pattern in self.meta['pattern']
|
|
488
|
+
for pattern in {file_pattern.lower(), file_pattern.upper(), file_pattern}
|
|
489
|
+
for f in self.path.glob(pattern)
|
|
490
|
+
if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
|
|
491
|
+
|
|
492
|
+
if not files:
|
|
493
|
+
raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.")
|
|
494
|
+
|
|
495
|
+
df_list = []
|
|
496
|
+
|
|
497
|
+
# Context manager for progress bar display
|
|
498
|
+
with self.progress_reading(files) as (progress, task):
|
|
499
|
+
for file in files:
|
|
500
|
+
progress.update(task, advance=1, filename=file.name)
|
|
501
|
+
try:
|
|
502
|
+
if (df := self._raw_reader(file)) is not None and not df.empty:
|
|
503
|
+
df_list.append(df)
|
|
504
|
+
else:
|
|
505
|
+
self.logger.debug(f"File {file.name} produced an empty DataFrame or None.")
|
|
506
|
+
|
|
507
|
+
except Exception as e:
|
|
508
|
+
self.logger.error(f"Error reading {file.name}: {e}")
|
|
509
|
+
|
|
510
|
+
if not df_list:
|
|
511
|
+
raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m")
|
|
512
|
+
|
|
513
|
+
raw_data = pd.concat(df_list, axis=0).groupby(level=0).first()
|
|
514
|
+
|
|
515
|
+
if self.nam in ['SMPS', 'APS', 'GRIMM']:
|
|
516
|
+
raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float))
|
|
517
|
+
|
|
518
|
+
raw_data = self._timeIndex_process(raw_data)
|
|
519
|
+
|
|
520
|
+
raw_data = raw_data.apply(pd.to_numeric, errors='coerce').copy(deep=True)
|
|
521
|
+
|
|
522
|
+
# Perform QC processing (raw data quality checks only)
|
|
523
|
+
qc_data = self._QC(raw_data.copy(deep=True))
|
|
524
|
+
|
|
525
|
+
# Perform processing (calculate derived parameters + validate)
|
|
526
|
+
qc_data = self._process(qc_data)
|
|
527
|
+
|
|
528
|
+
# Only convert numeric columns to numeric, preserve QC_Flag column string values
|
|
529
|
+
if 'QC_Flag' in qc_data.columns:
|
|
530
|
+
numeric_columns = qc_data.select_dtypes(exclude=['object', 'string']).columns
|
|
531
|
+
qc_data[numeric_columns] = qc_data[numeric_columns].apply(pd.to_numeric, errors='coerce')
|
|
532
|
+
else:
|
|
533
|
+
qc_data = qc_data.apply(pd.to_numeric, errors='coerce')
|
|
534
|
+
|
|
535
|
+
# Make a deep copy to ensure data integrity
|
|
536
|
+
qc_data_copy = qc_data.copy(deep=True)
|
|
537
|
+
|
|
538
|
+
return raw_data, qc_data_copy
|
|
539
|
+
|
|
540
|
+
def _run(self, user_start, user_end):
|
|
541
|
+
"""
|
|
542
|
+
Main execution method for data processing.
|
|
543
|
+
|
|
544
|
+
Parameters
|
|
545
|
+
----------
|
|
546
|
+
user_start : datetime
|
|
547
|
+
Start time for processing
|
|
548
|
+
user_end : datetime
|
|
549
|
+
End time for processing
|
|
550
|
+
|
|
551
|
+
Returns
|
|
552
|
+
-------
|
|
553
|
+
pd.DataFrame
|
|
554
|
+
Processed data for the specified time range
|
|
555
|
+
|
|
556
|
+
Notes
|
|
557
|
+
-----
|
|
558
|
+
Coordinates the entire data processing workflow.
|
|
559
|
+
"""
|
|
560
|
+
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
561
|
+
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
|
|
562
|
+
self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}")
|
|
563
|
+
|
|
564
|
+
_f_raw_done, _f_qc_done = pd.read_pickle(self.pkl_nam_raw), pd.read_pickle(self.pkl_nam)
|
|
565
|
+
|
|
566
|
+
if self.append:
|
|
567
|
+
self.logger.info_box(f"Appending New data from {user_start} to {user_end}")
|
|
568
|
+
|
|
569
|
+
_f_raw_new, _f_qc_new = self._read_raw_files()
|
|
570
|
+
_f_raw = self._timeIndex_process(_f_raw_done, append_df=_f_raw_new)
|
|
571
|
+
_f_qc = self._timeIndex_process(_f_qc_done, append_df=_f_qc_new)
|
|
572
|
+
|
|
573
|
+
else:
|
|
574
|
+
_f_raw, _f_qc = _f_raw_done, _f_qc_done
|
|
575
|
+
|
|
576
|
+
return _f_raw, _f_qc
|
|
577
|
+
|
|
578
|
+
else:
|
|
579
|
+
self.logger.info_box(f"Reading {self.nam} RAW DATA from {user_start} to {user_end}")
|
|
580
|
+
|
|
581
|
+
_f_raw, _f_qc = self._read_raw_files()
|
|
582
|
+
|
|
583
|
+
# process time index
|
|
584
|
+
_f_raw = self._timeIndex_process(_f_raw, user_start, user_end)
|
|
585
|
+
_f_qc = self._timeIndex_process(_f_qc, user_start, user_end)
|
|
586
|
+
|
|
587
|
+
# process outlier
|
|
588
|
+
_f_qc = self._outlier_process(_f_qc)
|
|
589
|
+
|
|
590
|
+
# save
|
|
591
|
+
self._save_data(_f_raw, _f_qc)
|
|
592
|
+
|
|
593
|
+
return _f_raw, _f_qc
|
|
594
|
+
|
|
595
|
+
@staticmethod
|
|
596
|
+
def reorder_dataframe_columns(df, order_lists: list[list], keep_others: bool = False):
|
|
597
|
+
"""
|
|
598
|
+
Reorder DataFrame columns according to specified lists.
|
|
599
|
+
|
|
600
|
+
Parameters
|
|
601
|
+
----------
|
|
602
|
+
df : pd.DataFrame
|
|
603
|
+
Input DataFrame
|
|
604
|
+
order_lists : list[list]
|
|
605
|
+
Lists specifying column order
|
|
606
|
+
keep_others : bool, default=False
|
|
607
|
+
If True, keeps unspecified columns at the end
|
|
608
|
+
|
|
609
|
+
Returns
|
|
610
|
+
-------
|
|
611
|
+
pd.DataFrame
|
|
612
|
+
DataFrame with reordered columns
|
|
613
|
+
"""
|
|
614
|
+
new_order = []
|
|
615
|
+
|
|
616
|
+
for order in order_lists:
|
|
617
|
+
# Only add column that exist in the DataFrame and do not add them repeatedly
|
|
618
|
+
new_order.extend([col for col in order if col in df.columns and col not in new_order])
|
|
619
|
+
|
|
620
|
+
if keep_others:
|
|
621
|
+
# Add all original fields not in the new order list, keeping their original order
|
|
622
|
+
new_order.extend([col for col in df.columns if col not in new_order])
|
|
623
|
+
|
|
624
|
+
return df[new_order]
|
|
625
|
+
|
|
626
|
+
@staticmethod
|
|
627
|
+
def QC_control():
|
|
628
|
+
return QualityControl()
|
|
629
|
+
|
|
630
|
+
@staticmethod
|
|
631
|
+
def update_qc_flag(df: pd.DataFrame, mask: pd.Series, flag_name: str) -> pd.DataFrame:
|
|
632
|
+
"""
|
|
633
|
+
Update QC_Flag column for rows matching the mask.
|
|
634
|
+
|
|
635
|
+
Parameters
|
|
636
|
+
----------
|
|
637
|
+
df : pd.DataFrame
|
|
638
|
+
DataFrame with QC_Flag column
|
|
639
|
+
mask : pd.Series
|
|
640
|
+
Boolean mask indicating rows to flag
|
|
641
|
+
flag_name : str
|
|
642
|
+
Name of the flag to add
|
|
643
|
+
|
|
644
|
+
Returns
|
|
645
|
+
-------
|
|
646
|
+
pd.DataFrame
|
|
647
|
+
DataFrame with updated QC_Flag column
|
|
648
|
+
"""
|
|
649
|
+
if 'QC_Flag' not in df.columns:
|
|
650
|
+
df['QC_Flag'] = 'Valid'
|
|
651
|
+
|
|
652
|
+
# For rows that are already Valid, set to flag_name
|
|
653
|
+
# For rows that already have flags, append the new flag
|
|
654
|
+
valid_mask = df['QC_Flag'] == 'Valid'
|
|
655
|
+
df.loc[mask & valid_mask, 'QC_Flag'] = flag_name
|
|
656
|
+
df.loc[mask & ~valid_mask, 'QC_Flag'] = df.loc[mask & ~valid_mask, 'QC_Flag'] + ', ' + flag_name
|
|
657
|
+
|
|
658
|
+
return df
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|