AeroViz 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +1 -1
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/koschmieder.py +1 -1
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/__init__.py +121 -56
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/supported_instruments.py +7 -4
- AeroViz/rawDataReader/core/__init__.py +42 -42
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +6 -2
- AeroViz/rawDataReader/core/qc.py +1 -1
- AeroViz/rawDataReader/script/AE33.py +1 -1
- AeroViz/rawDataReader/script/APS.py +13 -9
- AeroViz/rawDataReader/script/BAM1020.py +35 -0
- AeroViz/rawDataReader/script/NEPH.py +6 -10
- AeroViz/rawDataReader/script/OCEC.py +2 -2
- AeroViz/rawDataReader/script/SMPS.py +36 -16
- AeroViz/rawDataReader/script/TEOM.py +15 -3
- AeroViz/rawDataReader/script/__init__.py +1 -0
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/XRF.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/METADATA +7 -6
- {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/RECORD +87 -85
- {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.12.dist-info → AeroViz-0.1.14.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
Binary file
|
|
@@ -41,7 +41,7 @@ def _revised(_df_mass, _df_RH):
|
|
|
41
41
|
|
|
42
42
|
_df['AS'] = 2.2 * _frhs * _df_mass['S_AS'] + 4.8 * _frhl * _df_mass['L_AS']
|
|
43
43
|
_df['AN'] = 2.4 * _frhs * _df_mass['S_AN'] + 5.1 * _frhl * _df_mass['L_AN']
|
|
44
|
-
_df['OM'] = 2.8 * _df_mass['S_OM'] + 6.1 *
|
|
44
|
+
_df['OM'] = 2.8 * _df_mass['S_OM'] + 6.1 * _df_mass['L_OM']
|
|
45
45
|
_df['Soil'] = _df_mass['Soil']
|
|
46
46
|
_df['SS'] = 1.7 * _frhss * _df_mass['SS']
|
|
47
47
|
_df['EC'] = 10 * _df_mass['EC']
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Literal
|
|
4
4
|
|
|
5
5
|
from pandas import Grouper, Timedelta
|
|
6
6
|
|
|
@@ -10,66 +10,111 @@ from AeroViz.rawDataReader.script import *
|
|
|
10
10
|
__all__ = ['RawDataReader']
|
|
11
11
|
|
|
12
12
|
SUPPORTED_INSTRUMENTS = [
|
|
13
|
-
NEPH, Aurora, SMPS,
|
|
14
|
-
MA350, TEOM, OCEC, IGAC, VOC, EPA, Minion
|
|
13
|
+
NEPH, Aurora, SMPS, APS, GRIMM, AE33, AE43, BC1054,
|
|
14
|
+
MA350, BAM1020, TEOM, OCEC, IGAC, VOC, EPA, Minion
|
|
15
15
|
]
|
|
16
16
|
|
|
17
|
+
SIZE_RANGE_INSTRUMENTS = ['SMPS', 'APS', 'GRIMM']
|
|
17
18
|
|
|
18
|
-
|
|
19
|
+
|
|
20
|
+
def RawDataReader(instrument: str,
|
|
19
21
|
path: Path | str,
|
|
20
22
|
reset: bool = False,
|
|
21
23
|
qc: bool | str = True,
|
|
22
|
-
qc_freq: str | None = None,
|
|
23
|
-
rate: bool = True,
|
|
24
|
-
append_data: bool = False,
|
|
25
24
|
start: datetime = None,
|
|
26
25
|
end: datetime = None,
|
|
27
26
|
mean_freq: str = '1h',
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
size_range: tuple[float, float] | None = None,
|
|
28
|
+
suppress_warnings: bool = False,
|
|
29
|
+
log_level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR'] = 'INFO',
|
|
30
|
+
**kwargs):
|
|
30
31
|
"""
|
|
31
32
|
Factory function to instantiate the appropriate reader module for a given instrument and
|
|
32
33
|
return the processed data over the specified time range.
|
|
33
34
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
instrument : str
|
|
38
|
+
The instrument name for which to read data, must be a valid key in the meta dictionary
|
|
39
|
+
|
|
40
|
+
path : Path or str
|
|
41
|
+
The directory where raw data files for the instrument are stored
|
|
42
|
+
|
|
43
|
+
reset : bool or str
|
|
44
|
+
Data processing control mode:
|
|
45
|
+
False (default) - Use existing processed data if available
|
|
46
|
+
True - Force reprocess all data from raw files
|
|
47
|
+
'append' - Add new data to existing processed data
|
|
48
|
+
|
|
49
|
+
qc : bool or str
|
|
50
|
+
Quality control and rate calculation mode:
|
|
51
|
+
True (default) - Apply QC and calculate overall rates
|
|
52
|
+
False - Skip QC and return raw data only
|
|
53
|
+
str - Calculate rates at specified intervals:
|
|
54
|
+
'W' - Weekly rates
|
|
55
|
+
'MS' - Month start rates
|
|
56
|
+
'QS' - Quarter start rates
|
|
57
|
+
'YS' - Year start rates
|
|
58
|
+
Can add number prefix (e.g., '2MS' for bi-monthly)
|
|
59
|
+
|
|
60
|
+
start : datetime
|
|
61
|
+
Start time for filtering the data
|
|
62
|
+
|
|
63
|
+
end : datetime
|
|
64
|
+
End time for filtering the data
|
|
65
|
+
|
|
66
|
+
mean_freq : str
|
|
67
|
+
Resampling frequency for averaging the data (e.g., '1h' for hourly mean)
|
|
68
|
+
|
|
69
|
+
size_range : tuple[float, float], optional
|
|
70
|
+
Size range in nanometers (min_size, max_size) for SMPS/APS data filtering
|
|
71
|
+
|
|
72
|
+
suppress_warnings : bool, optional
|
|
73
|
+
Whether to suppress warning messages (default: False)
|
|
74
|
+
|
|
75
|
+
log_level : {'DEBUG', 'INFO', 'WARNING', 'ERROR'}
|
|
76
|
+
Logging level (default: 'INFO')
|
|
77
|
+
|
|
78
|
+
**kwargs
|
|
79
|
+
Additional arguments to pass to the reader module
|
|
80
|
+
|
|
81
|
+
Returns
|
|
82
|
+
-------
|
|
83
|
+
pd.DataFrame
|
|
84
|
+
Processed data with specified QC and time range
|
|
85
|
+
|
|
86
|
+
Raises
|
|
87
|
+
------
|
|
88
|
+
ValueError
|
|
89
|
+
If instrument name is invalid
|
|
90
|
+
If path does not exist
|
|
91
|
+
If QC frequency is invalid
|
|
92
|
+
If time range is invalid
|
|
93
|
+
If mean_freq format is invalid
|
|
94
|
+
|
|
95
|
+
Examples
|
|
96
|
+
--------
|
|
58
97
|
>>> from pathlib import Path
|
|
59
98
|
>>> from datetime import datetime
|
|
99
|
+
>>> from AeroViz import RawDataReader
|
|
60
100
|
>>>
|
|
61
|
-
>>>
|
|
62
|
-
...
|
|
63
|
-
... path=Path('/path/to/data'),
|
|
64
|
-
...
|
|
65
|
-
...
|
|
101
|
+
>>> df_ae33 = RawDataReader(
|
|
102
|
+
... instrument='AE33',
|
|
103
|
+
... path=Path('/path/to/your/data/folder'),
|
|
104
|
+
... reset=True,
|
|
105
|
+
... qc='1MS',
|
|
106
|
+
... start=datetime(2024, 1, 1),
|
|
107
|
+
... end=datetime(2024, 6, 30),
|
|
108
|
+
... mean_freq='1h',
|
|
109
|
+
... )
|
|
66
110
|
"""
|
|
111
|
+
|
|
67
112
|
# Mapping of instrument names to their respective classes
|
|
68
113
|
instrument_class_map = {cls.__name__.split('.')[-1]: cls for cls in SUPPORTED_INSTRUMENTS}
|
|
69
114
|
|
|
70
115
|
# Check if the instrument name is in the map
|
|
71
|
-
if
|
|
72
|
-
raise ValueError(f"Instrument name '{
|
|
116
|
+
if instrument not in meta.keys():
|
|
117
|
+
raise ValueError(f"Instrument name '{instrument}' is not valid. \nMust be one of: {list(meta.keys())}")
|
|
73
118
|
|
|
74
119
|
# Check if path exists and is a directory
|
|
75
120
|
if not isinstance(path, Path):
|
|
@@ -78,22 +123,21 @@ def RawDataReader(instrument_name: str,
|
|
|
78
123
|
raise FileNotFoundError(f"The specified path '{path}' does not exist or is not a directory.")
|
|
79
124
|
|
|
80
125
|
# Validate the QC frequency
|
|
81
|
-
if
|
|
126
|
+
if isinstance(qc, str):
|
|
82
127
|
try:
|
|
83
|
-
Grouper(freq=
|
|
84
|
-
except ValueError
|
|
85
|
-
raise ValueError(f"Invalid frequency: {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
if start and end:
|
|
90
|
-
if end.hour == 0 and end.minute == 0 and end.second == 0:
|
|
91
|
-
end = end.replace(hour=23, minute=59, second=59)
|
|
92
|
-
else:
|
|
128
|
+
Grouper(freq=qc)
|
|
129
|
+
except (ValueError, TypeError):
|
|
130
|
+
raise ValueError(f"Invalid frequency: {qc}. Must be one of: "
|
|
131
|
+
f"W (week), MS (month start), QS (quarter start), YS (year start)")
|
|
132
|
+
|
|
133
|
+
# Verify input times
|
|
134
|
+
if not (start and end):
|
|
93
135
|
raise ValueError("Both start and end times must be provided.")
|
|
94
136
|
if end <= start:
|
|
95
137
|
raise ValueError(f"Invalid time range: start {start} is after end {end}")
|
|
96
138
|
|
|
139
|
+
end = end.replace(hour=23, minute=59, second=59) if end.hour == 0 and end.minute == 0 else end
|
|
140
|
+
|
|
97
141
|
# Verify that mean_freq format
|
|
98
142
|
try:
|
|
99
143
|
Timedelta(mean_freq)
|
|
@@ -101,19 +145,40 @@ def RawDataReader(instrument_name: str,
|
|
|
101
145
|
raise ValueError(
|
|
102
146
|
f"Invalid mean_freq: '{mean_freq}'. It should be a valid frequency string (e.g., '1h', '30min', '1D').")
|
|
103
147
|
|
|
148
|
+
# Validate size range
|
|
149
|
+
if size_range is not None:
|
|
150
|
+
if instrument not in SIZE_RANGE_INSTRUMENTS:
|
|
151
|
+
raise ValueError(f"Size range filtering is only supported for {SIZE_RANGE_INSTRUMENTS}")
|
|
152
|
+
|
|
153
|
+
min_size, max_size = size_range
|
|
154
|
+
if not isinstance(min_size, (int, float)) or not isinstance(max_size, (int, float)):
|
|
155
|
+
raise ValueError("Size range values must be numeric")
|
|
156
|
+
if min_size >= max_size:
|
|
157
|
+
raise ValueError("Minimum size must be less than maximum size")
|
|
158
|
+
|
|
159
|
+
if instrument == 'SMPS':
|
|
160
|
+
if not (1 <= min_size <= 1000) or not (1 <= max_size <= 1000):
|
|
161
|
+
raise ValueError("SMPS size range must be between 1 and 1000 nm")
|
|
162
|
+
elif instrument == 'APS':
|
|
163
|
+
if not (500 <= min_size <= 20000) or not (500 <= max_size <= 20000):
|
|
164
|
+
raise ValueError("APS size range must be between 500 and 20000 nm")
|
|
165
|
+
|
|
166
|
+
kwargs.update({'size_range': size_range})
|
|
167
|
+
|
|
168
|
+
kwargs.update({
|
|
169
|
+
'suppress_warnings': suppress_warnings,
|
|
170
|
+
'log_level': log_level
|
|
171
|
+
})
|
|
172
|
+
|
|
104
173
|
# Instantiate the class and return the instance
|
|
105
|
-
reader_module = instrument_class_map[
|
|
174
|
+
reader_module = instrument_class_map[instrument].Reader(
|
|
106
175
|
path=path,
|
|
107
176
|
reset=reset,
|
|
108
177
|
qc=qc,
|
|
109
|
-
qc_freq=qc_freq,
|
|
110
|
-
rate=rate,
|
|
111
|
-
append_data=append_data,
|
|
112
178
|
**kwargs
|
|
113
179
|
)
|
|
114
180
|
return reader_module(
|
|
115
181
|
start=start,
|
|
116
182
|
end=end,
|
|
117
183
|
mean_freq=mean_freq,
|
|
118
|
-
csv_out=csv_out,
|
|
119
184
|
)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -58,6 +58,13 @@ meta = {
|
|
|
58
58
|
"deter_key": {"BC Mass Conc. (880 nm)": ["BC5"]},
|
|
59
59
|
},
|
|
60
60
|
|
|
61
|
+
"BAM1020": {
|
|
62
|
+
"pattern": ["*.csv"],
|
|
63
|
+
"freq": "1h",
|
|
64
|
+
"deter_key": {
|
|
65
|
+
"Mass Conc.": ["Conc"]},
|
|
66
|
+
},
|
|
67
|
+
|
|
61
68
|
"TEOM": {
|
|
62
69
|
"pattern": ["*.csv"],
|
|
63
70
|
"freq": "6min",
|
|
@@ -71,10 +78,6 @@ meta = {
|
|
|
71
78
|
"pattern": ["*LCRes.csv"],
|
|
72
79
|
"freq": "1h",
|
|
73
80
|
"deter_key": {
|
|
74
|
-
"Thermal OC": ["Thermal_OC"],
|
|
75
|
-
"Thermal EC": ["Thermal_EC"],
|
|
76
|
-
"Optical OC": ["Optical_OC"],
|
|
77
|
-
"Optical EC": ["Optical_EC"],
|
|
78
81
|
"Thermal OC & EC": ["Thermal_OC", "Thermal_EC"],
|
|
79
82
|
"Optical OC & EC": ["Optical_OC", "Optical_EC"],
|
|
80
83
|
},
|
|
@@ -3,17 +3,16 @@ from abc import ABC, abstractmethod
|
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Generator
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
|
-
from pandas import DataFrame, concat, read_pickle, to_numeric
|
|
11
10
|
from rich.console import Console
|
|
12
11
|
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
|
|
13
12
|
|
|
14
13
|
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
15
14
|
from AeroViz.rawDataReader.core.logger import ReaderLogger
|
|
16
|
-
from AeroViz.rawDataReader.core.qc import
|
|
15
|
+
from AeroViz.rawDataReader.core.qc import QualityControl
|
|
17
16
|
|
|
18
17
|
__all__ = ['AbstractReader']
|
|
19
18
|
|
|
@@ -32,45 +31,43 @@ class AbstractReader(ABC):
|
|
|
32
31
|
|
|
33
32
|
def __init__(self,
|
|
34
33
|
path: Path | str,
|
|
35
|
-
reset: bool = False,
|
|
36
|
-
qc: bool = True,
|
|
37
|
-
qc_freq: Optional[str] = None,
|
|
38
|
-
rate: bool = True,
|
|
39
|
-
append_data: bool = False,
|
|
34
|
+
reset: bool | str = False,
|
|
35
|
+
qc: bool | str = True,
|
|
40
36
|
**kwargs):
|
|
41
37
|
|
|
42
38
|
self.path = Path(path)
|
|
43
39
|
self.meta = meta[self.nam]
|
|
44
|
-
|
|
40
|
+
output_folder = self.path / f'{self.nam.lower()}_outputs'
|
|
41
|
+
output_folder.mkdir(parents=True, exist_ok=True)
|
|
45
42
|
|
|
46
|
-
self.
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
self.rate = rate
|
|
50
|
-
self.append = append_data and reset
|
|
43
|
+
self.logger = ReaderLogger(
|
|
44
|
+
self.nam, output_folder,
|
|
45
|
+
kwargs.get('log_level').upper() if not kwargs.get('suppress_warnings') else 'ERROR')
|
|
51
46
|
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
54
|
-
self.
|
|
55
|
-
self.
|
|
56
|
-
self.
|
|
47
|
+
self.reset = reset is True
|
|
48
|
+
self.append = reset == 'append'
|
|
49
|
+
self.qc = qc # if qc, then calculate rate
|
|
50
|
+
self.qc_freq = qc if isinstance(qc, str) else None
|
|
51
|
+
self.kwargs = kwargs
|
|
57
52
|
|
|
58
|
-
self.
|
|
53
|
+
self.pkl_nam = output_folder / f'_read_{self.nam.lower()}.pkl'
|
|
54
|
+
self.csv_nam = output_folder / f'_read_{self.nam.lower()}.csv'
|
|
55
|
+
self.pkl_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.pkl'
|
|
56
|
+
self.csv_nam_raw = output_folder / f'_read_{self.nam.lower()}_raw.csv'
|
|
57
|
+
self.csv_out = output_folder / f'output_{self.nam.lower()}.csv'
|
|
59
58
|
|
|
60
59
|
def __call__(self,
|
|
61
60
|
start: datetime,
|
|
62
61
|
end: datetime,
|
|
63
62
|
mean_freq: str = '1h',
|
|
64
|
-
|
|
65
|
-
) -> DataFrame:
|
|
63
|
+
) -> pd.DataFrame:
|
|
66
64
|
|
|
67
65
|
data = self._run(start, end)
|
|
68
66
|
|
|
69
67
|
if data is not None:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
data.to_csv(self.csv_out)
|
|
68
|
+
data = data.resample(mean_freq).mean()
|
|
69
|
+
|
|
70
|
+
data.to_csv(self.csv_out)
|
|
74
71
|
|
|
75
72
|
return data
|
|
76
73
|
|
|
@@ -79,7 +76,7 @@ class AbstractReader(ABC):
|
|
|
79
76
|
pass
|
|
80
77
|
|
|
81
78
|
@abstractmethod
|
|
82
|
-
def _QC(self, df: DataFrame) -> DataFrame:
|
|
79
|
+
def _QC(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
83
80
|
return df
|
|
84
81
|
|
|
85
82
|
def _rate_calculate(self, raw_data, qc_data) -> None:
|
|
@@ -182,7 +179,7 @@ class AbstractReader(ABC):
|
|
|
182
179
|
|
|
183
180
|
return _df
|
|
184
181
|
|
|
185
|
-
def _save_data(self, raw_data: DataFrame, qc_data: DataFrame) -> None:
|
|
182
|
+
def _save_data(self, raw_data: pd.DataFrame, qc_data: pd.DataFrame) -> None:
|
|
186
183
|
try:
|
|
187
184
|
raw_data.to_pickle(self.pkl_nam_raw)
|
|
188
185
|
raw_data.to_csv(self.csv_nam_raw)
|
|
@@ -222,7 +219,7 @@ class AbstractReader(ABC):
|
|
|
222
219
|
for msg in msgs:
|
|
223
220
|
original[level](msg)
|
|
224
221
|
|
|
225
|
-
def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
|
|
222
|
+
def _read_raw_files(self) -> tuple[pd.DataFrame | None, pd.DataFrame | None]:
|
|
226
223
|
files = [f
|
|
227
224
|
for file_pattern in self.meta['pattern']
|
|
228
225
|
for pattern in {file_pattern.lower(), file_pattern.upper(), file_pattern}
|
|
@@ -242,7 +239,7 @@ class AbstractReader(ABC):
|
|
|
242
239
|
if (df := self._raw_reader(file)) is not None and not df.empty:
|
|
243
240
|
df_list.append(df)
|
|
244
241
|
else:
|
|
245
|
-
self.logger.
|
|
242
|
+
self.logger.debug(f"\tFile {file.name} produced an empty DataFrame or None.")
|
|
246
243
|
|
|
247
244
|
except Exception as e:
|
|
248
245
|
self.logger.error(f"Error reading {file.name}: {e}")
|
|
@@ -250,13 +247,15 @@ class AbstractReader(ABC):
|
|
|
250
247
|
if not df_list:
|
|
251
248
|
raise ValueError(f"\033[41m\033[97mAll files were either empty or failed to read.\033[0m")
|
|
252
249
|
|
|
253
|
-
raw_data = concat(df_list, axis=0).groupby(level=0).first()
|
|
250
|
+
raw_data = pd.concat(df_list, axis=0).groupby(level=0).first()
|
|
254
251
|
|
|
255
|
-
if self.nam
|
|
252
|
+
if self.nam in ['SMPS', 'APS', 'GRIMM']:
|
|
256
253
|
raw_data = raw_data.sort_index(axis=1, key=lambda x: x.astype(float))
|
|
257
254
|
|
|
258
|
-
raw_data = self._timeIndex_process(raw_data)
|
|
259
|
-
|
|
255
|
+
raw_data = self._timeIndex_process(raw_data)
|
|
256
|
+
|
|
257
|
+
raw_data = raw_data.apply(pd.to_numeric, errors='coerce').copy(deep=True)
|
|
258
|
+
qc_data = self._QC(raw_data).apply(pd.to_numeric, errors='coerce').copy(deep=True)
|
|
260
259
|
|
|
261
260
|
return raw_data, qc_data
|
|
262
261
|
|
|
@@ -265,7 +264,7 @@ class AbstractReader(ABC):
|
|
|
265
264
|
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
|
|
266
265
|
self.logger.info_box(f"Reading {self.nam} PICKLE from {user_start} to {user_end}", color_part="PICKLE")
|
|
267
266
|
|
|
268
|
-
_f_raw_done, _f_qc_done = read_pickle(self.pkl_nam_raw), read_pickle(self.pkl_nam)
|
|
267
|
+
_f_raw_done, _f_qc_done = pd.read_pickle(self.pkl_nam_raw), pd.read_pickle(self.pkl_nam)
|
|
269
268
|
|
|
270
269
|
if self.append:
|
|
271
270
|
self.logger.info_box(f"Appending New data from {user_start} to {user_end}", color_part="New data")
|
|
@@ -292,25 +291,26 @@ class AbstractReader(ABC):
|
|
|
292
291
|
# save
|
|
293
292
|
self._save_data(_f_raw, _f_qc)
|
|
294
293
|
|
|
295
|
-
if self.
|
|
296
|
-
self._rate_calculate(_f_raw.apply(to_numeric, errors='coerce'),
|
|
294
|
+
if self.qc:
|
|
295
|
+
self._rate_calculate(_f_raw.apply(pd.to_numeric, errors='coerce'),
|
|
296
|
+
_f_qc.apply(pd.to_numeric, errors='coerce'))
|
|
297
297
|
|
|
298
298
|
return _f_qc if self.qc else _f_raw
|
|
299
299
|
|
|
300
300
|
@staticmethod
|
|
301
|
-
def reorder_dataframe_columns(df, order_lists,
|
|
301
|
+
def reorder_dataframe_columns(df, order_lists: list[list], keep_others: bool = False):
|
|
302
302
|
new_order = []
|
|
303
303
|
|
|
304
304
|
for order in order_lists:
|
|
305
|
-
#
|
|
305
|
+
# Only add column that exist in the DataFrame and do not add them repeatedly
|
|
306
306
|
new_order.extend([col for col in order if col in df.columns and col not in new_order])
|
|
307
307
|
|
|
308
|
-
if
|
|
309
|
-
#
|
|
308
|
+
if keep_others:
|
|
309
|
+
# Add all original fields not in the new order list, keeping their original order
|
|
310
310
|
new_order.extend([col for col in df.columns if col not in new_order])
|
|
311
311
|
|
|
312
312
|
return df[new_order]
|
|
313
313
|
|
|
314
314
|
@staticmethod
|
|
315
315
|
def time_aware_IQR_QC(df: pd.DataFrame, time_window='1D', log_dist=False) -> pd.DataFrame:
|
|
316
|
-
return
|
|
316
|
+
return QualityControl().time_aware_iqr(df, time_window=time_window, log_dist=log_dist)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -8,9 +8,10 @@ from pathlib import Path
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class ReaderLogger:
|
|
11
|
-
def __init__(self, name: str, log_path: Path):
|
|
11
|
+
def __init__(self, name: str, log_path: Path, log_level: str = 'WARNING'):
|
|
12
12
|
self.name = name
|
|
13
13
|
self.log_path = log_path
|
|
14
|
+
self._log_level = getattr(logging, log_level)
|
|
14
15
|
|
|
15
16
|
# 檢查是否支持顏色輸出
|
|
16
17
|
self.color_support = self._check_color_support()
|
|
@@ -92,7 +93,7 @@ class ReaderLogger:
|
|
|
92
93
|
def _setup_logger(self) -> logging.Logger:
|
|
93
94
|
"""設置logger"""
|
|
94
95
|
logger = logging.getLogger(self.name)
|
|
95
|
-
logger.setLevel(
|
|
96
|
+
logger.setLevel(self._log_level)
|
|
96
97
|
|
|
97
98
|
# 移除現有的 handlers
|
|
98
99
|
for handler in logger.handlers[:]:
|
|
@@ -135,6 +136,9 @@ class ReaderLogger:
|
|
|
135
136
|
text = text.encode('ascii', 'replace').decode('ascii')
|
|
136
137
|
return text
|
|
137
138
|
|
|
139
|
+
def debug(self, msg: str):
|
|
140
|
+
self.logger.debug(self._safe_print(msg))
|
|
141
|
+
|
|
138
142
|
def info(self, msg: str):
|
|
139
143
|
self.logger.info(self._safe_print(msg))
|
|
140
144
|
|
AeroViz/rawDataReader/core/qc.py
CHANGED
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
if file.stat().st_size / 1024 < 550:
|
|
11
|
-
self.logger.
|
|
11
|
+
self.logger.warning(f'\t {file.name} may not be a whole daily data. Make sure the file is correct.')
|
|
12
12
|
|
|
13
13
|
_df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
|
|
14
14
|
delimiter=r'\s+', skiprows=5, usecols=range(67))
|