AeroViz 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/dataProcess/Chemistry/__init__.py +21 -20
- AeroViz/dataProcess/Chemistry/_isoropia.py +9 -12
- AeroViz/dataProcess/Chemistry/_mass_volume.py +4 -3
- AeroViz/dataProcess/Chemistry/_ocec.py +20 -45
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +2 -3
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/SizeDistr/__init__.py +6 -10
- AeroViz/dataProcess/VOC/__init__.py +1 -6
- AeroViz/dataProcess/VOC/_potential_par.py +71 -37
- AeroViz/dataProcess/VOC/{voc_par.json → support_voc.json} +321 -339
- AeroViz/rawDataReader/__init__.py +52 -5
- AeroViz/rawDataReader/config/supported_instruments.py +45 -53
- AeroViz/rawDataReader/core/__init__.py +113 -98
- AeroViz/rawDataReader/script/AE33.py +3 -3
- AeroViz/rawDataReader/script/AE43.py +2 -2
- AeroViz/rawDataReader/script/APS_3321.py +4 -4
- AeroViz/rawDataReader/script/Aurora.py +5 -2
- AeroViz/rawDataReader/script/BC1054.py +2 -2
- AeroViz/rawDataReader/script/EPA_vertical.py +2 -2
- AeroViz/rawDataReader/script/GRIMM.py +4 -4
- AeroViz/rawDataReader/script/IGAC.py +2 -2
- AeroViz/rawDataReader/script/MA350.py +2 -2
- AeroViz/rawDataReader/script/Minion.py +2 -2
- AeroViz/rawDataReader/script/NEPH.py +9 -14
- AeroViz/rawDataReader/script/{Sunset_OCEC.py → OCEC.py} +24 -18
- AeroViz/rawDataReader/script/SMPS.py +76 -0
- AeroViz/rawDataReader/script/TEOM.py +2 -2
- AeroViz/rawDataReader/script/Table.py +3 -3
- AeroViz/rawDataReader/script/VOC.py +16 -9
- AeroViz/rawDataReader/script/__init__.py +2 -4
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/METADATA +13 -10
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/RECORD +36 -36
- AeroViz/rawDataReader/script/SMPS_TH.py +0 -41
- AeroViz/rawDataReader/script/SMPS_aim11.py +0 -51
- AeroViz/rawDataReader/script/SMPS_genr.py +0 -51
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.6.dist-info}/top_level.txt +0 -0
|
@@ -12,20 +12,67 @@ def RawDataReader(instrument_name: str,
|
|
|
12
12
|
qc: bool = True,
|
|
13
13
|
csv_raw: bool = True,
|
|
14
14
|
reset: bool = False,
|
|
15
|
-
rate: bool =
|
|
15
|
+
rate: bool = True,
|
|
16
16
|
append_data: bool = False,
|
|
17
17
|
start: datetime | None = None,
|
|
18
18
|
end: datetime | None = None,
|
|
19
19
|
mean_freq='1h',
|
|
20
20
|
csv_out=True,
|
|
21
21
|
):
|
|
22
|
+
"""
|
|
23
|
+
Factory function to instantiate the appropriate reader module for a given instrument and
|
|
24
|
+
return the processed data over the specified time range.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
instrument_name : str
|
|
29
|
+
The name of the instrument for which to read data. Must be a valid key in the `meta` dictionary.
|
|
30
|
+
path : Path
|
|
31
|
+
The directory where raw data files for the instrument are stored.
|
|
32
|
+
qc : bool, optional (default=True)
|
|
33
|
+
If True, apply quality control (QC) to the raw data.
|
|
34
|
+
csv_raw : bool, optional (default=True)
|
|
35
|
+
If True, read raw data from CSV files.
|
|
36
|
+
reset : bool, optional (default=False)
|
|
37
|
+
If True, reset the state and reprocess the data from scratch.
|
|
38
|
+
rate : bool, optional (default=False)
|
|
39
|
+
If True, calculate rates from the data.
|
|
40
|
+
append_data : bool, optional (default=False)
|
|
41
|
+
If True, append new data to the existing dataset instead of overwriting it.
|
|
42
|
+
start : datetime, optional
|
|
43
|
+
Start time for filtering the data. If None, no start time filtering will be applied.
|
|
44
|
+
end : datetime, optional
|
|
45
|
+
End time for filtering the data. If None, no end time filtering will be applied.
|
|
46
|
+
mean_freq : str, optional (default='1h')
|
|
47
|
+
Resampling frequency for averaging the data. Example: '1h' for hourly mean.
|
|
48
|
+
csv_out : bool, optional (default=True)
|
|
49
|
+
If True, output the processed data as a CSV file.
|
|
50
|
+
|
|
51
|
+
Return
|
|
52
|
+
------
|
|
53
|
+
reader_module : Reader
|
|
54
|
+
An instance of the reader module corresponding to the specified instrument, which processes
|
|
55
|
+
the data and returns it in a usable format.
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
------
|
|
59
|
+
ValueError
|
|
60
|
+
If the `instrument_name` provided is not a valid key in the `meta` dictionary.
|
|
61
|
+
|
|
62
|
+
Example
|
|
63
|
+
-------
|
|
64
|
+
To read and process data for the BC1054 instrument:
|
|
65
|
+
|
|
66
|
+
>>> from pathlib import Path
|
|
67
|
+
>>> from datetime import datetime
|
|
68
|
+
>>> data = RawDataReader(instrument_name='BC1054', path=Path('/path/to/data'),
|
|
69
|
+
>>> start=datetime(2024, 1, 1), end=datetime(2024, 2, 1))
|
|
70
|
+
"""
|
|
22
71
|
# Mapping of instrument names to their respective classes
|
|
23
72
|
instrument_class_map = {
|
|
24
73
|
'NEPH': NEPH,
|
|
25
74
|
'Aurora': Aurora,
|
|
26
|
-
'
|
|
27
|
-
'SMPS_aim11': SMPS_aim11,
|
|
28
|
-
'SMPS_TH': SMPS_TH,
|
|
75
|
+
'SMPS': SMPS,
|
|
29
76
|
'GRIMM': GRIMM,
|
|
30
77
|
'APS_3321': APS_3321,
|
|
31
78
|
'AE33': AE33,
|
|
@@ -33,7 +80,7 @@ def RawDataReader(instrument_name: str,
|
|
|
33
80
|
'BC1054': BC1054,
|
|
34
81
|
'MA350': MA350,
|
|
35
82
|
'TEOM': TEOM,
|
|
36
|
-
'
|
|
83
|
+
'OCEC': OCEC,
|
|
37
84
|
'IGAC': IGAC,
|
|
38
85
|
'VOC': VOC,
|
|
39
86
|
'Table': Table,
|
|
@@ -2,76 +2,64 @@
|
|
|
2
2
|
|
|
3
3
|
meta = {
|
|
4
4
|
"NEPH": {
|
|
5
|
-
"pattern": "*.dat",
|
|
5
|
+
"pattern": ["*.dat"],
|
|
6
6
|
"freq": "5min",
|
|
7
7
|
"deter_key": {"Scatter Coe. (550 nm)": ["G"]},
|
|
8
8
|
},
|
|
9
9
|
|
|
10
10
|
"Aurora": {
|
|
11
|
-
"pattern": "*.csv",
|
|
11
|
+
"pattern": ["*.csv"],
|
|
12
12
|
"freq": "1min",
|
|
13
13
|
"deter_key": {"Scatter Coe. (550 nm)": ["G"]},
|
|
14
14
|
},
|
|
15
15
|
|
|
16
|
-
"
|
|
17
|
-
"pattern": "*.txt",
|
|
18
|
-
"freq": "6min",
|
|
19
|
-
"deter_key": {"Bins": ["all"]},
|
|
20
|
-
},
|
|
21
|
-
|
|
22
|
-
"SMPS_genr": {
|
|
23
|
-
"pattern": "*.txt",
|
|
24
|
-
"freq": "6min",
|
|
25
|
-
"deter_key": {"Bins": ["all"]},
|
|
26
|
-
},
|
|
27
|
-
|
|
28
|
-
"SMPS_aim11": {
|
|
29
|
-
"pattern": "*.csv",
|
|
16
|
+
"SMPS": {
|
|
17
|
+
"pattern": ["*.txt", "*.csv"],
|
|
30
18
|
"freq": "6min",
|
|
31
19
|
"deter_key": {"Bins": ["all"]},
|
|
32
20
|
},
|
|
33
21
|
|
|
34
22
|
"GRIMM": {
|
|
35
|
-
"pattern": "*.dat",
|
|
23
|
+
"pattern": ["*.dat"],
|
|
36
24
|
"freq": "6min",
|
|
37
25
|
"deter_key": {"Bins": ["all"]},
|
|
38
26
|
},
|
|
39
27
|
|
|
40
28
|
"APS_3321": {
|
|
41
|
-
"pattern": "*.
|
|
29
|
+
"pattern": ["*.txt"],
|
|
42
30
|
"freq": "6min",
|
|
43
31
|
"deter_key": {"Bins": ["all"]},
|
|
44
32
|
},
|
|
45
33
|
|
|
46
34
|
"AE33": {
|
|
47
|
-
"pattern": "[!ST|!CT|!FV]*[!log]_AE33*.dat",
|
|
35
|
+
"pattern": ["[!ST|!CT|!FV]*[!log]_AE33*.dat"],
|
|
48
36
|
"freq": "1min",
|
|
49
37
|
"deter_key": {"BC Mass Conc. (880 nm)": ["BC6"]},
|
|
50
38
|
"error_state": [],
|
|
51
39
|
},
|
|
52
40
|
|
|
53
41
|
"AE43": {
|
|
54
|
-
"pattern": "[!ST|!CT|!FV]*[!log]_AE43*.dat",
|
|
42
|
+
"pattern": ["[!ST|!CT|!FV]*[!log]_AE43*.dat"],
|
|
55
43
|
"freq": "1min",
|
|
56
44
|
"deter_key": {"BC Mass Conc. (880 nm)": ["BC6"]},
|
|
57
45
|
"error_state": [],
|
|
58
46
|
},
|
|
59
47
|
|
|
60
48
|
"BC1054": {
|
|
61
|
-
"pattern": "*.csv",
|
|
49
|
+
"pattern": ["*.csv"],
|
|
62
50
|
"freq": "1min",
|
|
63
51
|
"deter_key": {"BC Mass Conc. (880 nm)": ["BC9"]},
|
|
64
52
|
"error_state": [1, 2, 4, 8, 16, 32, 65536],
|
|
65
53
|
},
|
|
66
54
|
|
|
67
55
|
"MA350": {
|
|
68
|
-
"pattern": "*.csv",
|
|
56
|
+
"pattern": ["*.csv"],
|
|
69
57
|
"freq": "1min",
|
|
70
58
|
"deter_key": {"BC Mass Conc. (880 nm)": ["BC5"]},
|
|
71
59
|
},
|
|
72
60
|
|
|
73
61
|
"TEOM": {
|
|
74
|
-
"pattern": "*.csv",
|
|
62
|
+
"pattern": ["*.csv"],
|
|
75
63
|
"freq": "6min",
|
|
76
64
|
"deter_key": {
|
|
77
65
|
"PM1.0 Mass Conc.": ["PM_Total"],
|
|
@@ -79,21 +67,19 @@ meta = {
|
|
|
79
67
|
},
|
|
80
68
|
},
|
|
81
69
|
|
|
82
|
-
"
|
|
83
|
-
"pattern": "*LCRes.csv",
|
|
70
|
+
"OCEC": {
|
|
71
|
+
"pattern": ["*LCRes.csv"],
|
|
84
72
|
"freq": "1h",
|
|
85
73
|
"deter_key": {
|
|
86
|
-
"Thermal OC/EC": ["Thermal_EC", "Thermal_OC"],
|
|
87
74
|
"Thermal OC": ["Thermal_OC"],
|
|
88
75
|
"Thermal EC": ["Thermal_EC"],
|
|
89
|
-
"Optical OC/EC": ["Optical_EC", "Optical_OC"],
|
|
90
76
|
"Optical OC": ["Optical_OC"],
|
|
91
77
|
"Optical EC": ["Optical_EC"],
|
|
92
78
|
},
|
|
93
79
|
},
|
|
94
80
|
|
|
95
81
|
"IGAC": {
|
|
96
|
-
"pattern": "*.csv",
|
|
82
|
+
"pattern": ["*.csv"],
|
|
97
83
|
"freq": "1h",
|
|
98
84
|
"deter_key": {
|
|
99
85
|
"Na+": ["Na+"],
|
|
@@ -110,46 +96,52 @@ meta = {
|
|
|
110
96
|
},
|
|
111
97
|
|
|
112
98
|
"VOC": {
|
|
113
|
-
"pattern": "*.csv",
|
|
99
|
+
"pattern": ["*.csv"],
|
|
114
100
|
"freq": "1h",
|
|
115
|
-
"key": [
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
'Propene', '1.3-Butadiene', 'Isoprene', '1-Octene',
|
|
131
|
-
'Benzene', 'Toluene', 'Ethylbenzene', 'm.p-Xylene', 'o-Xylene', 'Iso-Propylbenzene', 'Styrene',
|
|
132
|
-
'n-Propylbenzene', '3.4-Ethyltoluene', '1.3.5-TMB', '2-Ethyltoluene', '1.2.4-TMB', '1.2.3-TMB',
|
|
133
|
-
'Acetaldehyde', 'Ethanol', 'Acetone', 'IPA', 'Ethyl Acetate', 'Butyl Acetate',
|
|
134
|
-
'VCM', 'TCE', 'PCE', '1.4-DCB', '1.2-DCB'],
|
|
101
|
+
"key": [
|
|
102
|
+
'Benzene', 'Toluene', 'Ethylbenzene', 'm/p-Xylene', 'o-Xylene', 'Ethane', 'Propane', 'Isobutane',
|
|
103
|
+
'n-Butane', 'Isopentane', 'n-Pentane', 'n-Hexane', 'n-Heptane', 'n-Octane', 'n-Nonane', 'n-Decane',
|
|
104
|
+
'n-Undecane', 'n-Dodecane', 'Ethylene', 'Propylene', '1-Butene', 't-2-Butene', 'cis-2-Butene',
|
|
105
|
+
'1-Pentene', 't-2-Pentene', 'cis-2-Pentene', '1-Hexene', 'Acetylene', 'Cyclopentane', 'Methylcyclopentane',
|
|
106
|
+
'Cyclohexane', 'Methylcyclohexane', 'Isoprene', '2,2-Dimethylbutane', '2,3-Dimethylbutane',
|
|
107
|
+
'2-Methylpentane', '3-Methylpentane', '2,4-Dimethylpentane', '2-Methylhexane', '2,3-Dimethylpentane',
|
|
108
|
+
'3-Methylheptane', '2,2,4-Trimethylpentane', '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylhexane',
|
|
109
|
+
'Styrene', 'Isopropylbenzene', 'n-Propylbenzene', 'm-Ethyltoluene', 'p-Ethyltoluene', 'm-Diethylbenzene',
|
|
110
|
+
'p-Diethylbenzene', '1,3,5-Trimethylbenzene', 'o-Ethyltoluene', '1,2,4-Trimethylbenzene',
|
|
111
|
+
'1,2,3-Trimethylbenzene',
|
|
112
|
+
'1.2-DCB', '1.4-DCB', '1.3-Butadiene', '1-Octene', '2-Ethyltoluene', '3.4-Ethyltoluene', 'Acetaldehyde',
|
|
113
|
+
'Acetone', 'Butyl Acetate', 'Ethanol', 'Ethyl Acetate', 'Hexane', 'IPA', 'Iso-Propylbenzene',
|
|
114
|
+
'PCE', 'Propene', 'TCE', 'VCM',
|
|
115
|
+
],
|
|
135
116
|
"deter_key": None,
|
|
136
117
|
},
|
|
137
118
|
|
|
138
119
|
"Table": {
|
|
139
|
-
"pattern": "*.csv",
|
|
120
|
+
"pattern": ["*.csv"],
|
|
140
121
|
"freq": "1h",
|
|
141
122
|
"deter_key": None,
|
|
142
123
|
},
|
|
143
124
|
|
|
144
125
|
"EPA_vertical": {
|
|
145
|
-
"pattern": "*.csv",
|
|
126
|
+
"pattern": ["*.csv"],
|
|
146
127
|
"freq": "1h",
|
|
147
128
|
"deter_key": None,
|
|
148
129
|
},
|
|
149
130
|
|
|
150
131
|
"Minion": {
|
|
151
|
-
"pattern": "*.csv",
|
|
132
|
+
"pattern": ["*.csv"],
|
|
152
133
|
"freq": "1h",
|
|
153
|
-
"deter_key":
|
|
134
|
+
"deter_key": {
|
|
135
|
+
"Na+": ["Na+"],
|
|
136
|
+
"NH4+": ["NH4+"],
|
|
137
|
+
"K+": ["K+"],
|
|
138
|
+
"Mg2+": ["Mg2+"],
|
|
139
|
+
"Ca2+": ["Ca2+"],
|
|
140
|
+
"Cl-": ["Cl-"],
|
|
141
|
+
"NO2-": ["NO2-"],
|
|
142
|
+
"NO3-": ["NO3-"],
|
|
143
|
+
"SO42-": ["SO42-"],
|
|
144
|
+
"Main Salt (NH4+, NO3-, SO42-)": ["NO3-", "SO42-", "NH4+"],
|
|
145
|
+
},
|
|
154
146
|
},
|
|
155
147
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import json
|
|
1
|
+
import json
|
|
2
2
|
import logging
|
|
3
3
|
import pickle as pkl
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
@@ -7,29 +7,37 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
10
11
|
from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, TaskProgressColumn
|
|
11
14
|
|
|
12
15
|
from ..config.supported_instruments import meta
|
|
13
16
|
|
|
14
17
|
__all__ = ['AbstractReader']
|
|
15
18
|
|
|
16
19
|
|
|
20
|
+
console = Console(force_terminal=True, color_system="auto")
|
|
21
|
+
|
|
22
|
+
|
|
17
23
|
class AbstractReader(ABC):
|
|
18
|
-
|
|
24
|
+
"""
|
|
25
|
+
Abstract class for reading raw data from different instruments. Each instrument should have a separate class that
|
|
26
|
+
inherits from this class and implements the abstract methods. The abstract methods are `_raw_reader` and `_QC`.
|
|
19
27
|
|
|
20
|
-
|
|
21
|
-
|
|
28
|
+
List the file in the path and read pickle file if it exists, else read raw data and dump the pickle file the
|
|
29
|
+
pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
|
|
30
|
+
'reset=True'
|
|
31
|
+
"""
|
|
22
32
|
|
|
23
|
-
|
|
24
|
-
# pickle file will be generated after read raw data first time, if you want to re-read the rawdata, please set
|
|
25
|
-
# 'reset=True'
|
|
33
|
+
nam = 'AbstractReader'
|
|
26
34
|
|
|
27
35
|
def __init__(self,
|
|
28
36
|
path: Path | str,
|
|
29
37
|
qc: bool = True,
|
|
30
38
|
csv_raw: bool = True,
|
|
31
39
|
reset: bool = False,
|
|
32
|
-
rate: bool =
|
|
40
|
+
rate: bool = True,
|
|
33
41
|
append_data: bool = False):
|
|
34
42
|
|
|
35
43
|
self.path = Path(path)
|
|
@@ -40,7 +48,7 @@ class AbstractReader(ABC):
|
|
|
40
48
|
self.rate = rate
|
|
41
49
|
self.qc = qc
|
|
42
50
|
self.csv = csv_raw
|
|
43
|
-
self.
|
|
51
|
+
self.append = append_data and reset
|
|
44
52
|
|
|
45
53
|
self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
|
|
46
54
|
self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
|
|
@@ -48,21 +56,12 @@ class AbstractReader(ABC):
|
|
|
48
56
|
self.csv_nam_raw = self.path / f'_read_{self.nam.lower()}_raw.csv'
|
|
49
57
|
self.csv_out = self.path / f'output_{self.nam.lower()}.csv'
|
|
50
58
|
|
|
51
|
-
# dependency injection function, customize each instrument
|
|
52
|
-
@abstractmethod
|
|
53
|
-
def _raw_reader(self, _file):
|
|
54
|
-
pass
|
|
55
|
-
|
|
56
|
-
@abstractmethod
|
|
57
|
-
def _QC(self, df: DataFrame):
|
|
58
|
-
return df
|
|
59
|
-
|
|
60
59
|
def __call__(self,
|
|
61
60
|
start: dtm | None = None,
|
|
62
61
|
end: dtm | None = None,
|
|
63
62
|
mean_freq: str = '1h',
|
|
64
63
|
csv_out: bool = True,
|
|
65
|
-
) -> DataFrame
|
|
64
|
+
) -> DataFrame:
|
|
66
65
|
|
|
67
66
|
if start and end and end <= start:
|
|
68
67
|
raise ValueError(f"Invalid time range: start {start} is after end {end}")
|
|
@@ -77,6 +76,14 @@ class AbstractReader(ABC):
|
|
|
77
76
|
|
|
78
77
|
return data
|
|
79
78
|
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def _raw_reader(self, file):
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def _QC(self, df: DataFrame):
|
|
85
|
+
return df
|
|
86
|
+
|
|
80
87
|
@staticmethod
|
|
81
88
|
def basic_QC(df: DataFrame):
|
|
82
89
|
df_ave, df_std = df.mean(), df.std()
|
|
@@ -84,40 +91,25 @@ class AbstractReader(ABC):
|
|
|
84
91
|
|
|
85
92
|
return df.mask(df_lowb | df_highb).copy()
|
|
86
93
|
|
|
87
|
-
# set each to true datetime(18:30:01 -> 18:30:00) and rindex data
|
|
88
|
-
def _raw_process(self, _df):
|
|
89
|
-
# get time from df and set time to whole time to create time index
|
|
90
|
-
_st, _ed = _df.index.sort_values()[[0, -1]]
|
|
91
|
-
_tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
|
|
92
|
-
freq=self.meta['freq'])
|
|
93
|
-
_tm_index.name = 'time'
|
|
94
|
-
|
|
95
|
-
return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
|
|
96
|
-
|
|
97
94
|
def _setup_logger(self) -> logging.Logger:
|
|
98
95
|
logger = logging.getLogger(self.nam)
|
|
99
96
|
logger.setLevel(logging.INFO)
|
|
97
|
+
|
|
98
|
+
for handler in logger.handlers[:]:
|
|
99
|
+
logger.removeHandler(handler)
|
|
100
|
+
|
|
100
101
|
handler = logging.FileHandler(self.path / f'{self.nam}.log')
|
|
101
102
|
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
|
|
102
103
|
logger.addHandler(handler)
|
|
103
104
|
return logger
|
|
104
105
|
|
|
105
|
-
|
|
106
|
-
def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw):
|
|
106
|
+
def _rate_calculate(self, _fout_raw, _fout_qc, _st_raw, _ed_raw) -> None:
|
|
107
107
|
if self.meta['deter_key'] is not None:
|
|
108
108
|
_start, _end = _fout_qc.index[[0, -1]]
|
|
109
109
|
|
|
110
110
|
_drop_how = 'any'
|
|
111
111
|
_the_size = len(_fout_raw.resample('1h').mean().index)
|
|
112
112
|
|
|
113
|
-
self.logger.info(f"{'=' * 60}")
|
|
114
|
-
self.logger.info(
|
|
115
|
-
f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} ~ {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
116
|
-
self.logger.info(
|
|
117
|
-
f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} ~ {_end.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
118
|
-
self.logger.info(f"{'-' * 60}")
|
|
119
|
-
print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
120
|
-
|
|
121
113
|
for _nam, _key in self.meta['deter_key'].items():
|
|
122
114
|
if _key == ['all']:
|
|
123
115
|
_key, _drop_how = _fout_qc.keys(), 'all'
|
|
@@ -136,9 +128,19 @@ class AbstractReader(ABC):
|
|
|
136
128
|
self.logger.info(f'\tYield rate: {_yid_rate}%')
|
|
137
129
|
self.logger.info(f"{'=' * 60}")
|
|
138
130
|
|
|
139
|
-
print(f'\
|
|
140
|
-
print(f'\t\
|
|
141
|
-
print(f'\t\
|
|
131
|
+
print(f'\n\t{_nam} : ')
|
|
132
|
+
print(f'\t\tacquisition rate : \033[91m{_acq_rate}%\033[0m')
|
|
133
|
+
print(f'\t\tyield rate : \033[91m{_yid_rate}%\033[0m')
|
|
134
|
+
|
|
135
|
+
# set each to true datetime(18:30:01 -> 18:30:00) and rindex data
|
|
136
|
+
def _raw_process(self, _df):
|
|
137
|
+
# get time from df and set time to whole time to create time index
|
|
138
|
+
_st, _ed = _df.index.sort_values()[[0, -1]]
|
|
139
|
+
_tm_index = date_range(_st.strftime('%Y%m%d %H00'), _ed.floor('h').strftime('%Y%m%d %H00'),
|
|
140
|
+
freq=self.meta['freq'])
|
|
141
|
+
_tm_index.name = 'time'
|
|
142
|
+
|
|
143
|
+
return _df.apply(to_numeric, errors='coerce').resample(self.meta['freq']).mean().reindex(_tm_index)
|
|
142
144
|
|
|
143
145
|
# process time index
|
|
144
146
|
@staticmethod
|
|
@@ -152,7 +154,7 @@ class AbstractReader(ABC):
|
|
|
152
154
|
|
|
153
155
|
# append new data to exist pkl
|
|
154
156
|
@staticmethod
|
|
155
|
-
def
|
|
157
|
+
def _append_process(_df_done, _df_apnd):
|
|
156
158
|
|
|
157
159
|
if _df_apnd is not None:
|
|
158
160
|
_df = concat([_df_apnd.dropna(how='all').copy(), _df_done.dropna(how='all').copy()])
|
|
@@ -164,16 +166,16 @@ class AbstractReader(ABC):
|
|
|
164
166
|
|
|
165
167
|
return _df_done
|
|
166
168
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
+
def _outlier_process(self, _df):
|
|
170
|
+
outlier_file = self.path / 'outlier.json'
|
|
169
171
|
|
|
170
|
-
if
|
|
172
|
+
if not outlier_file.exists():
|
|
171
173
|
return _df
|
|
172
174
|
|
|
173
|
-
with
|
|
174
|
-
|
|
175
|
+
with outlier_file.open('r', encoding='utf-8', errors='ignore') as f:
|
|
176
|
+
outliers = json.load(f)
|
|
175
177
|
|
|
176
|
-
for _st, _ed in
|
|
178
|
+
for _st, _ed in outliers.values():
|
|
177
179
|
_df.loc[_st:_ed] = np.nan
|
|
178
180
|
|
|
179
181
|
return _df
|
|
@@ -191,14 +193,13 @@ class AbstractReader(ABC):
|
|
|
191
193
|
|
|
192
194
|
@staticmethod
|
|
193
195
|
def _safe_pickle_dump(file_path: Path, data: Any) -> None:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
input('\t\t\33[41m Please close the file and press "Enter" \33[0m\n')
|
|
196
|
+
try:
|
|
197
|
+
with file_path.open('wb') as f:
|
|
198
|
+
pkl.dump(data, f, protocol=pkl.HIGHEST_PROTOCOL)
|
|
199
|
+
except PermissionError as e:
|
|
200
|
+
raise IOError(f"Unable to write to {file_path}. The file may be in use or you may not have permission: {e}")
|
|
201
|
+
except Exception as e:
|
|
202
|
+
raise IOError(f"Error writing to {file_path}: {e}")
|
|
202
203
|
|
|
203
204
|
# read pickle file
|
|
204
205
|
def _read_pkl(self):
|
|
@@ -206,72 +207,86 @@ class AbstractReader(ABC):
|
|
|
206
207
|
return pkl.load(raw_data), pkl.load(qc_data)
|
|
207
208
|
|
|
208
209
|
def _read_raw_files(self) -> tuple[DataFrame | None, DataFrame | None]:
|
|
209
|
-
|
|
210
|
-
|
|
210
|
+
files = [f
|
|
211
|
+
for file_pattern in self.meta['pattern']
|
|
212
|
+
for pattern in {file_pattern.lower(), file_pattern.upper(), file_pattern}
|
|
213
|
+
for f in self.path.glob(pattern)
|
|
211
214
|
if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
|
|
212
215
|
|
|
213
216
|
if not files:
|
|
214
|
-
|
|
215
|
-
return None, None
|
|
217
|
+
raise FileNotFoundError(f"No files in '{self.path}' could be read. Please check the current path.")
|
|
216
218
|
|
|
217
219
|
df_list = []
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
220
|
+
with Progress(
|
|
221
|
+
TextColumn("[bold blue]{task.description}", style="bold blue"),
|
|
222
|
+
BarColumn(bar_width=18, complete_style="green", finished_style="bright_green"),
|
|
223
|
+
TaskProgressColumn(),
|
|
224
|
+
TimeRemainingColumn(),
|
|
225
|
+
TextColumn("{task.fields[filename]}", style="yellow"),
|
|
226
|
+
console=console,
|
|
227
|
+
expand=False
|
|
228
|
+
) as progress:
|
|
229
|
+
task = progress.add_task(f"Reading {self.nam} files", total=len(files), filename="")
|
|
230
|
+
for file in files:
|
|
231
|
+
progress.update(task, advance=1, filename=file.name)
|
|
232
|
+
try:
|
|
233
|
+
df = self._raw_reader(file)
|
|
234
|
+
|
|
235
|
+
if df is not None and not df.empty:
|
|
236
|
+
df_list.append(df)
|
|
237
|
+
else:
|
|
238
|
+
self.logger.warning(f"File {file.name} produced an empty DataFrame or None.")
|
|
239
|
+
|
|
240
|
+
except pd.errors.ParserError as e:
|
|
241
|
+
self.logger.error(f"Error tokenizing data: {e}")
|
|
242
|
+
|
|
243
|
+
except Exception as e:
|
|
244
|
+
self.logger.error(f"Error reading {file.name}: {e}")
|
|
223
245
|
|
|
224
246
|
if not df_list:
|
|
225
|
-
|
|
247
|
+
raise ValueError("All files were either empty or failed to read.")
|
|
226
248
|
|
|
227
249
|
raw_data = self._raw_process(concat(df_list))
|
|
228
250
|
qc_data = self._QC(raw_data)
|
|
229
251
|
|
|
230
252
|
return raw_data, qc_data
|
|
231
253
|
|
|
232
|
-
# main flow
|
|
233
254
|
def _run(self, _start, _end):
|
|
234
|
-
_f_raw_done, _f_qc_done = None, None
|
|
235
|
-
|
|
236
255
|
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
237
|
-
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and
|
|
238
|
-
print(f"\n
|
|
256
|
+
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and not self.reset:
|
|
257
|
+
print(f"\n{dtm.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mPICKLE\033[0m "
|
|
258
|
+
f"from {_start} to {_end}\n")
|
|
239
259
|
|
|
240
260
|
_f_raw_done, _f_qc_done = self._read_pkl()
|
|
241
261
|
|
|
242
|
-
if
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
self._rate_calculate(_f_raw_done, _f_qc_done, _start_raw, _end_raw)
|
|
250
|
-
|
|
251
|
-
return _f_qc_done if self.qc else _f_raw_done
|
|
262
|
+
if self.append:
|
|
263
|
+
print(f"Appending new data from {_start} to {_end}")
|
|
264
|
+
_f_raw_new, _f_qc_new = self._read_raw_files()
|
|
265
|
+
_f_raw = self._append_process(_f_raw_done, _f_raw_new)
|
|
266
|
+
_f_qc = self._append_process(_f_qc_done, _f_qc_new)
|
|
267
|
+
else:
|
|
268
|
+
_f_raw, _f_qc = _f_raw_done, _f_qc_done
|
|
252
269
|
|
|
253
|
-
|
|
254
|
-
|
|
270
|
+
else:
|
|
271
|
+
print(f"\n{dtm.now().strftime('%m/%d %X')} : Reading {self.nam} \033[96mRAW DATA\033[0m "
|
|
272
|
+
f"from {_start} to {_end}\n")
|
|
273
|
+
_f_raw, _f_qc = self._read_raw_files()
|
|
255
274
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
# append new data and pickle data
|
|
261
|
-
if self.apnd and self.pkl_nam.exists():
|
|
262
|
-
_f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
|
|
263
|
-
_f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
|
|
275
|
+
# process time index
|
|
276
|
+
_f_raw, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw)
|
|
277
|
+
_f_qc, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc)
|
|
264
278
|
|
|
265
|
-
_f_qc = self.
|
|
279
|
+
_f_qc = self._outlier_process(_f_qc)
|
|
266
280
|
|
|
267
281
|
# save
|
|
268
282
|
self._save_data(_f_raw, _f_qc)
|
|
269
283
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
284
|
+
self.logger.info(f"{'=' * 60}")
|
|
285
|
+
self.logger.info(f"Raw data time : {_start_raw} to {_end_raw}")
|
|
286
|
+
self.logger.info(f"Output time : {_start} to {_end}")
|
|
287
|
+
self.logger.info(f"{'-' * 60}")
|
|
274
288
|
|
|
275
|
-
self.
|
|
289
|
+
if self.rate:
|
|
290
|
+
self._rate_calculate(_f_raw, _f_qc, _start_raw, _end_raw)
|
|
276
291
|
|
|
277
292
|
return _f_qc if self.qc else _f_raw
|
|
@@ -6,11 +6,11 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'AE33'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
if
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
if file.stat().st_size / 1024 < 550:
|
|
11
11
|
print('\t It may not be a whole daily data.')
|
|
12
12
|
|
|
13
|
-
_df = read_table(
|
|
13
|
+
_df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
|
|
14
14
|
delimiter=r'\s+', skiprows=5, usecols=range(67))
|
|
15
15
|
_df.columns = _df.columns.str.strip(';')
|
|
16
16
|
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'AE43'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
_df = read_csv(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
_df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
|
|
11
11
|
_df_id = _df['SetupID'].iloc[-1]
|
|
12
12
|
|
|
13
13
|
# get last SetupID data
|