AeroViz 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/dataProcess/Chemistry/__init__.py +21 -20
- AeroViz/dataProcess/Chemistry/_isoropia.py +9 -12
- AeroViz/dataProcess/Chemistry/_ocec.py +20 -45
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/VOC/__init__.py +1 -6
- AeroViz/dataProcess/VOC/_potential_par.py +71 -37
- AeroViz/dataProcess/VOC/{voc_par.json → support_voc.json} +321 -339
- AeroViz/rawDataReader/__init__.py +49 -0
- AeroViz/rawDataReader/config/supported_instruments.py +15 -22
- AeroViz/rawDataReader/core/__init__.py +25 -15
- AeroViz/rawDataReader/script/AE33.py +3 -3
- AeroViz/rawDataReader/script/AE43.py +2 -2
- AeroViz/rawDataReader/script/APS_3321.py +2 -2
- AeroViz/rawDataReader/script/Aurora.py +5 -2
- AeroViz/rawDataReader/script/BC1054.py +2 -2
- AeroViz/rawDataReader/script/EPA_vertical.py +2 -2
- AeroViz/rawDataReader/script/GRIMM.py +4 -4
- AeroViz/rawDataReader/script/IGAC.py +2 -2
- AeroViz/rawDataReader/script/MA350.py +2 -2
- AeroViz/rawDataReader/script/Minion.py +2 -2
- AeroViz/rawDataReader/script/NEPH.py +9 -14
- AeroViz/rawDataReader/script/SMPS_TH.py +2 -2
- AeroViz/rawDataReader/script/SMPS_aim11.py +2 -2
- AeroViz/rawDataReader/script/SMPS_genr.py +2 -2
- AeroViz/rawDataReader/script/Sunset_OCEC.py +23 -17
- AeroViz/rawDataReader/script/TEOM.py +2 -2
- AeroViz/rawDataReader/script/Table.py +3 -3
- AeroViz/rawDataReader/script/VOC.py +16 -9
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/METADATA +1 -1
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/RECORD +34 -32
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/top_level.txt +0 -0
|
@@ -19,6 +19,55 @@ def RawDataReader(instrument_name: str,
|
|
|
19
19
|
mean_freq='1h',
|
|
20
20
|
csv_out=True,
|
|
21
21
|
):
|
|
22
|
+
"""
|
|
23
|
+
Factory function to instantiate the appropriate reader module for a given instrument and
|
|
24
|
+
return the processed data over the specified time range.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
instrument_name : str
|
|
29
|
+
The name of the instrument for which to read data. Must be a valid key in the `meta` dictionary.
|
|
30
|
+
path : Path
|
|
31
|
+
The directory where raw data files for the instrument are stored.
|
|
32
|
+
qc : bool, optional (default=True)
|
|
33
|
+
If True, apply quality control (QC) to the raw data.
|
|
34
|
+
csv_raw : bool, optional (default=True)
|
|
35
|
+
If True, read raw data from CSV files.
|
|
36
|
+
reset : bool, optional (default=False)
|
|
37
|
+
If True, reset the state and reprocess the data from scratch.
|
|
38
|
+
rate : bool, optional (default=False)
|
|
39
|
+
If True, calculate rates from the data.
|
|
40
|
+
append_data : bool, optional (default=False)
|
|
41
|
+
If True, append new data to the existing dataset instead of overwriting it.
|
|
42
|
+
start : datetime, optional
|
|
43
|
+
Start time for filtering the data. If None, no start time filtering will be applied.
|
|
44
|
+
end : datetime, optional
|
|
45
|
+
End time for filtering the data. If None, no end time filtering will be applied.
|
|
46
|
+
mean_freq : str, optional (default='1h')
|
|
47
|
+
Resampling frequency for averaging the data. Example: '1h' for hourly mean.
|
|
48
|
+
csv_out : bool, optional (default=True)
|
|
49
|
+
If True, output the processed data as a CSV file.
|
|
50
|
+
|
|
51
|
+
Return
|
|
52
|
+
------
|
|
53
|
+
reader_module : Reader
|
|
54
|
+
An instance of the reader module corresponding to the specified instrument, which processes
|
|
55
|
+
the data and returns it in a usable format.
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
------
|
|
59
|
+
ValueError
|
|
60
|
+
If the `instrument_name` provided is not a valid key in the `meta` dictionary.
|
|
61
|
+
|
|
62
|
+
Example
|
|
63
|
+
-------
|
|
64
|
+
To read and process data for the BC1054 instrument:
|
|
65
|
+
|
|
66
|
+
>>> from pathlib import Path
|
|
67
|
+
>>> from datetime import datetime
|
|
68
|
+
>>> data = RawDataReader(instrument_name='BC1054', path=Path('/path/to/data'),
|
|
69
|
+
>>> start=datetime(2024, 1, 1), end=datetime(2024, 2, 1))
|
|
70
|
+
"""
|
|
22
71
|
# Mapping of instrument names to their respective classes
|
|
23
72
|
instrument_class_map = {
|
|
24
73
|
'NEPH': NEPH,
|
|
@@ -83,10 +83,8 @@ meta = {
|
|
|
83
83
|
"pattern": "*LCRes.csv",
|
|
84
84
|
"freq": "1h",
|
|
85
85
|
"deter_key": {
|
|
86
|
-
"Thermal OC/EC": ["Thermal_EC", "Thermal_OC"],
|
|
87
86
|
"Thermal OC": ["Thermal_OC"],
|
|
88
87
|
"Thermal EC": ["Thermal_EC"],
|
|
89
|
-
"Optical OC/EC": ["Optical_EC", "Optical_OC"],
|
|
90
88
|
"Optical OC": ["Optical_OC"],
|
|
91
89
|
"Optical EC": ["Optical_EC"],
|
|
92
90
|
},
|
|
@@ -112,26 +110,21 @@ meta = {
|
|
|
112
110
|
"VOC": {
|
|
113
111
|
"pattern": "*.csv",
|
|
114
112
|
"freq": "1h",
|
|
115
|
-
"key": [
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
'Propene', '1.3-Butadiene', 'Isoprene', '1-Octene',
|
|
131
|
-
'Benzene', 'Toluene', 'Ethylbenzene', 'm.p-Xylene', 'o-Xylene', 'Iso-Propylbenzene', 'Styrene',
|
|
132
|
-
'n-Propylbenzene', '3.4-Ethyltoluene', '1.3.5-TMB', '2-Ethyltoluene', '1.2.4-TMB', '1.2.3-TMB',
|
|
133
|
-
'Acetaldehyde', 'Ethanol', 'Acetone', 'IPA', 'Ethyl Acetate', 'Butyl Acetate',
|
|
134
|
-
'VCM', 'TCE', 'PCE', '1.4-DCB', '1.2-DCB'],
|
|
113
|
+
"key": [
|
|
114
|
+
'Benzene', 'Toluene', 'Ethylbenzene', 'm/p-Xylene', 'o-Xylene', 'Ethane', 'Propane', 'Isobutane',
|
|
115
|
+
'n-Butane', 'Isopentane', 'n-Pentane', 'n-Hexane', 'n-Heptane', 'n-Octane', 'n-Nonane', 'n-Decane',
|
|
116
|
+
'n-Undecane', 'n-Dodecane', 'Ethylene', 'Propylene', '1-Butene', 't-2-Butene', 'cis-2-Butene',
|
|
117
|
+
'1-Pentene', 't-2-Pentene', 'cis-2-Pentene', '1-Hexene', 'Acetylene', 'Cyclopentane', 'Methylcyclopentane',
|
|
118
|
+
'Cyclohexane', 'Methylcyclohexane', 'Isoprene', '2,2-Dimethylbutane', '2,3-Dimethylbutane',
|
|
119
|
+
'2-Methylpentane', '3-Methylpentane', '2,4-Dimethylpentane', '2-Methylhexane', '2,3-Dimethylpentane',
|
|
120
|
+
'3-Methylheptane', '2,2,4-Trimethylpentane', '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylhexane',
|
|
121
|
+
'Styrene', 'Isopropylbenzene', 'n-Propylbenzene', 'm-Ethyltoluene', 'p-Ethyltoluene', 'm-Diethylbenzene',
|
|
122
|
+
'p-Diethylbenzene', '1,3,5-Trimethylbenzene', 'o-Ethyltoluene', '1,2,4-Trimethylbenzene',
|
|
123
|
+
'1,2,3-Trimethylbenzene',
|
|
124
|
+
'1.2-DCB', '1.4-DCB', '1.3-Butadiene', '1-Octene', '2-Ethyltoluene', '3.4-Ethyltoluene', 'Acetaldehyde',
|
|
125
|
+
'Acetone', 'Butyl Acetate', 'Ethanol', 'Ethyl Acetate', 'Hexane', 'IPA', 'Iso-Propylbenzene',
|
|
126
|
+
'PCE', 'Propene', 'TCE', 'VCM',
|
|
127
|
+
],
|
|
135
128
|
"deter_key": None,
|
|
136
129
|
},
|
|
137
130
|
|
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
10
11
|
from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
|
|
11
12
|
|
|
12
13
|
from ..config.supported_instruments import meta
|
|
@@ -40,7 +41,7 @@ class AbstractReader(ABC):
|
|
|
40
41
|
self.rate = rate
|
|
41
42
|
self.qc = qc
|
|
42
43
|
self.csv = csv_raw
|
|
43
|
-
self.
|
|
44
|
+
self.append = append_data & reset
|
|
44
45
|
|
|
45
46
|
self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
|
|
46
47
|
self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
|
|
@@ -50,7 +51,7 @@ class AbstractReader(ABC):
|
|
|
50
51
|
|
|
51
52
|
# dependency injection function, customize each instrument
|
|
52
53
|
@abstractmethod
|
|
53
|
-
def _raw_reader(self,
|
|
54
|
+
def _raw_reader(self, file):
|
|
54
55
|
pass
|
|
55
56
|
|
|
56
57
|
@abstractmethod
|
|
@@ -112,9 +113,9 @@ class AbstractReader(ABC):
|
|
|
112
113
|
|
|
113
114
|
self.logger.info(f"{'=' * 60}")
|
|
114
115
|
self.logger.info(
|
|
115
|
-
f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')}
|
|
116
|
+
f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} to {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
116
117
|
self.logger.info(
|
|
117
|
-
f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')}
|
|
118
|
+
f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
118
119
|
self.logger.info(f"{'-' * 60}")
|
|
119
120
|
print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
120
121
|
|
|
@@ -211,18 +212,29 @@ class AbstractReader(ABC):
|
|
|
211
212
|
if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
|
|
212
213
|
|
|
213
214
|
if not files:
|
|
214
|
-
|
|
215
|
-
|
|
215
|
+
raise FileNotFoundError(f"\t\t\033[31mNo files in '{self.path}' could be read."
|
|
216
|
+
f"Please check the current path.\033[0m")
|
|
216
217
|
|
|
217
218
|
df_list = []
|
|
218
219
|
for file in files:
|
|
219
220
|
print(f"\r\t\treading {file.name}", end='')
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
df = self._raw_reader(file)
|
|
224
|
+
|
|
225
|
+
if df is not None and not df.empty:
|
|
226
|
+
df_list.append(df)
|
|
227
|
+
else:
|
|
228
|
+
self.logger.warning(f"File {file.name} produced an empty DataFrame or None.")
|
|
229
|
+
|
|
230
|
+
except pd.errors.ParserError as e:
|
|
231
|
+
self.logger.error(f"Error tokenizing data: {e}")
|
|
232
|
+
|
|
233
|
+
except Exception as e:
|
|
234
|
+
self.logger.error(f"Error reading {file.name}: {e}")
|
|
223
235
|
|
|
224
236
|
if not df_list:
|
|
225
|
-
|
|
237
|
+
raise ValueError("All files were either empty or failed to read.")
|
|
226
238
|
|
|
227
239
|
raw_data = self._raw_process(concat(df_list))
|
|
228
240
|
qc_data = self._QC(raw_data)
|
|
@@ -234,12 +246,12 @@ class AbstractReader(ABC):
|
|
|
234
246
|
_f_raw_done, _f_qc_done = None, None
|
|
235
247
|
|
|
236
248
|
# read pickle if pickle file exists and 'reset=False' or process raw data or append new data
|
|
237
|
-
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.
|
|
249
|
+
if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.append):
|
|
238
250
|
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
|
|
239
251
|
|
|
240
252
|
_f_raw_done, _f_qc_done = self._read_pkl()
|
|
241
253
|
|
|
242
|
-
if not self.
|
|
254
|
+
if not self.append:
|
|
243
255
|
_f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done)
|
|
244
256
|
_f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done)
|
|
245
257
|
|
|
@@ -254,11 +266,9 @@ class AbstractReader(ABC):
|
|
|
254
266
|
print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
|
|
255
267
|
|
|
256
268
|
_f_raw, _f_qc = self._read_raw_files()
|
|
257
|
-
if _f_raw is None:
|
|
258
|
-
return None
|
|
259
269
|
|
|
260
270
|
# append new data and pickle data
|
|
261
|
-
if self.
|
|
271
|
+
if self.append and self.pkl_nam.exists():
|
|
262
272
|
_f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
|
|
263
273
|
_f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
|
|
264
274
|
|
|
@@ -6,11 +6,11 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'AE33'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
if
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
if file.stat().st_size / 1024 < 550:
|
|
11
11
|
print('\t It may not be a whole daily data.')
|
|
12
12
|
|
|
13
|
-
_df = read_table(
|
|
13
|
+
_df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
|
|
14
14
|
delimiter=r'\s+', skiprows=5, usecols=range(67))
|
|
15
15
|
_df.columns = _df.columns.str.strip(';')
|
|
16
16
|
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'AE43'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
_df = read_csv(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
_df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
|
|
11
11
|
_df_id = _df['SetupID'].iloc[-1]
|
|
12
12
|
|
|
13
13
|
# get last SetupID data
|
|
@@ -7,8 +7,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
7
7
|
class Reader(AbstractReader):
|
|
8
8
|
nam = 'APS_3321'
|
|
9
9
|
|
|
10
|
-
def _raw_reader(self,
|
|
11
|
-
with open(
|
|
10
|
+
def _raw_reader(self, file):
|
|
11
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
12
12
|
_df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
|
|
13
13
|
_key = list(_df.keys()[3:54]) ## 542 ~ 1981
|
|
14
14
|
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'Aurora'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
11
11
|
_df = read_csv(f, low_memory=False, index_col=0)
|
|
12
12
|
|
|
13
13
|
_df.index = to_datetime(_df.index, errors='coerce')
|
|
@@ -33,5 +33,8 @@ class Reader(AbstractReader):
|
|
|
33
33
|
# remove negative value
|
|
34
34
|
_df = _df.mask((_df <= 0) | (_df > 2000)).copy()
|
|
35
35
|
|
|
36
|
+
# total scattering is larger than back scattering
|
|
37
|
+
_df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
38
|
+
|
|
36
39
|
# QC data in 1h
|
|
37
40
|
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'BC1054'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
_df = read_csv(f, parse_dates=True, index_col=0)
|
|
12
12
|
|
|
13
13
|
_df.columns = _df.columns.str.replace(' ', '')
|
|
@@ -7,8 +7,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
7
7
|
class Reader(AbstractReader):
|
|
8
8
|
nam = 'EPA_vertical'
|
|
9
9
|
|
|
10
|
-
def _raw_reader(self,
|
|
11
|
-
with
|
|
10
|
+
def _raw_reader(self, file):
|
|
11
|
+
with file.open('r', encoding='ascii', errors='ignore') as f:
|
|
12
12
|
# 有、無輸出有效值都可以
|
|
13
13
|
# read 查詢小時值(測項).csv
|
|
14
14
|
df = read_csv(f, encoding='ascii', encoding_errors='ignore', index_col=0, parse_dates=True,
|
|
@@ -6,19 +6,19 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'GRIMM'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
10
|
|
|
11
|
-
_df = read_csv(
|
|
11
|
+
_df = read_csv(file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
|
|
12
12
|
dayfirst=True).rename_axis("Time")
|
|
13
13
|
_df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True)
|
|
14
14
|
|
|
15
|
-
if
|
|
15
|
+
if file.name.startswith("A407ST"):
|
|
16
16
|
_df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True)
|
|
17
17
|
else:
|
|
18
18
|
_df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True)
|
|
19
19
|
|
|
20
20
|
if _df.empty:
|
|
21
|
-
print(
|
|
21
|
+
print(file, "is empty")
|
|
22
22
|
return None
|
|
23
23
|
|
|
24
24
|
return _df / 0.035
|
|
@@ -10,9 +10,9 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
10
10
|
class Reader(AbstractReader):
|
|
11
11
|
nam = 'IGAC'
|
|
12
12
|
|
|
13
|
-
def _raw_reader(self,
|
|
13
|
+
def _raw_reader(self, file):
|
|
14
14
|
|
|
15
|
-
with
|
|
15
|
+
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
16
16
|
_df = read_csv(f, parse_dates=True, index_col=0, na_values='-').apply(to_numeric, errors='coerce')
|
|
17
17
|
|
|
18
18
|
_df.columns = _df.keys().str.strip(' ')
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'MA350'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
_df = read_csv(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
_df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
|
|
11
11
|
|
|
12
12
|
_df = _df.rename(columns={
|
|
13
13
|
'UV BCc': 'BC1',
|
|
@@ -7,8 +7,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
7
7
|
class Reader(AbstractReader):
|
|
8
8
|
nam = 'Minion'
|
|
9
9
|
|
|
10
|
-
def _raw_reader(self,
|
|
11
|
-
with
|
|
10
|
+
def _raw_reader(self, file):
|
|
11
|
+
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
12
12
|
_df = read_csv(f, low_memory=False, index_col=0)
|
|
13
13
|
|
|
14
14
|
_df.index = to_datetime(_df.index, errors='coerce')
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'NEPH'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with file.open('r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
_df = read_csv(f, header=None, names=range(11))
|
|
12
12
|
|
|
13
13
|
_df_grp = _df.groupby(0)
|
|
@@ -47,23 +47,15 @@ class Reader(AbstractReader):
|
|
|
47
47
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
48
48
|
|
|
49
49
|
except ValueError:
|
|
50
|
-
group_sizes = _df_grp.size()
|
|
51
|
-
print(group_sizes)
|
|
52
|
-
|
|
53
50
|
# Define valid groups and find invalid indices
|
|
54
|
-
|
|
55
|
-
invalid_indices = _df[~_df[0].isin(valid_groups)].index
|
|
56
|
-
|
|
57
|
-
# Print invalid indices and values
|
|
51
|
+
invalid_indices = _df[~_df[0].isin({'B', 'G', 'R', 'D', 'T', 'Y', 'Z'})].index
|
|
58
52
|
print("Invalid values and their indices:")
|
|
59
|
-
for idx in invalid_indices
|
|
60
|
-
print(f"Index: {idx}, Value: {_df.at[idx, 0]}")
|
|
53
|
+
print("\n".join([f"Index: {idx}, Value: {_df.at[idx, 0]}" for idx in invalid_indices]))
|
|
61
54
|
|
|
62
55
|
# Return an empty DataFrame with specified columns if there's a length mismatch
|
|
63
|
-
|
|
64
|
-
_df_out = DataFrame(index=_idx_tm, columns=columns)
|
|
56
|
+
_df_out = DataFrame(index=_idx_tm, columns=['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH'])
|
|
65
57
|
_df_out.index.name = 'Time'
|
|
66
|
-
print(f'\n\t\t\t Length mismatch in {
|
|
58
|
+
print(f'\n\t\t\t Length mismatch in {file} data. Returning an empty DataFrame.')
|
|
67
59
|
return _df_out
|
|
68
60
|
|
|
69
61
|
# QC data
|
|
@@ -71,5 +63,8 @@ class Reader(AbstractReader):
|
|
|
71
63
|
# remove negative value
|
|
72
64
|
_df = _df.mask((_df <= 5).copy())
|
|
73
65
|
|
|
66
|
+
# total scattering is larger than back scattering
|
|
67
|
+
_df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
68
|
+
|
|
74
69
|
# QC data in 1h
|
|
75
70
|
return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'SMPS_TH'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
_df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
|
|
12
12
|
_key = list(_df.keys()[6:-26])
|
|
13
13
|
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'SMPS_aim11'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
|
|
12
12
|
skiprows = 0
|
|
13
13
|
for _line in f:
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'SMPS_genr'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
|
|
12
12
|
skiprows = 0
|
|
13
13
|
for _line in f:
|
|
@@ -6,38 +6,44 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'Sunset_OCEC'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
11
|
-
_df = read_csv(f, skiprows=3)
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
+
_df = read_csv(f, skiprows=3, nrows=25)
|
|
12
12
|
|
|
13
13
|
_df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
|
|
14
14
|
_df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
|
|
15
15
|
_df = _df.set_index('time')
|
|
16
|
+
_df.index = _df.index.round('1h')
|
|
16
17
|
|
|
17
18
|
_df = _df.rename(columns={
|
|
18
19
|
'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
|
|
19
|
-
'OC ugC/m^3 (Thermal/Optical)': 'Thermal_OC',
|
|
20
|
-
|
|
21
20
|
'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
|
|
22
|
-
'EC ugC/m^3 (Thermal/Optical)': 'Thermal_EC',
|
|
23
|
-
|
|
24
21
|
'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
|
|
25
|
-
'OC by diff ugC (TC-OptEC)': 'Optical_OC',
|
|
26
|
-
|
|
27
22
|
'BC (ugC/LCm^3)': 'Optical_EC',
|
|
23
|
+
'TC (ugC/LCm^3)': 'TC',
|
|
24
|
+
|
|
25
|
+
'OC ugC/m^3 (Thermal/Optical)': 'Thermal_OC',
|
|
26
|
+
'EC ugC/m^3 (Thermal/Optical)': 'Thermal_EC',
|
|
27
|
+
'OC by diff ugC (TC-OptEC)': 'Optical_OC',
|
|
28
28
|
'OptEC ugC/m^3': 'Optical_EC',
|
|
29
|
+
'TC ugC/m^3': 'TC',
|
|
29
30
|
|
|
30
31
|
'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
|
|
31
|
-
|
|
32
|
-
'
|
|
33
|
-
'
|
|
34
|
-
'
|
|
35
|
-
'
|
|
36
|
-
'
|
|
37
|
-
'
|
|
32
|
+
|
|
33
|
+
'OCPk1-ug C': 'OC1_raw',
|
|
34
|
+
'OCPk2-ug C': 'OC2_raw',
|
|
35
|
+
'OCPk3-ug C': 'OC3_raw',
|
|
36
|
+
'OCPk4-ug C': 'OC4_raw',
|
|
37
|
+
'ECPk1-ug C': 'EC1_raw',
|
|
38
|
+
'ECPk2-ug C': 'EC2_raw',
|
|
39
|
+
'ECPk3-ug C': 'EC3_raw',
|
|
40
|
+
'ECPk4-ug C': 'EC4_raw',
|
|
41
|
+
'ECPk5-ug C': 'EC5_raw',
|
|
38
42
|
})
|
|
39
43
|
|
|
40
|
-
_df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', '
|
|
44
|
+
_df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', 'Sample_Volume',
|
|
45
|
+
'OC1_raw', 'OC2_raw', 'OC3_raw', 'OC4_raw', 'EC1_raw', 'EC2_raw', 'EC3_raw', 'EC4_raw',
|
|
46
|
+
'EC5_raw']]
|
|
41
47
|
|
|
42
48
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
43
49
|
|
|
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
6
6
|
class Reader(AbstractReader):
|
|
7
7
|
nam = 'TEOM'
|
|
8
8
|
|
|
9
|
-
def _raw_reader(self,
|
|
10
|
-
with open(
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
11
|
_df = read_csv(f, skiprows=3, index_col=False)
|
|
12
12
|
|
|
13
13
|
_df = _df.rename(columns={'Time Stamp': 'time',
|
|
@@ -8,11 +8,11 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
8
8
|
class Reader(AbstractReader):
|
|
9
9
|
nam = 'Table'
|
|
10
10
|
|
|
11
|
-
def _raw_reader(self,
|
|
12
|
-
with
|
|
11
|
+
def _raw_reader(self, file):
|
|
12
|
+
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
13
13
|
_df = read_csv(f, low_memory=False, index_col=0)
|
|
14
14
|
|
|
15
|
-
_df.index = to_datetime(_df.index, errors='coerce'
|
|
15
|
+
_df.index = to_datetime(_df.index, errors='coerce')
|
|
16
16
|
_df.index.name = 'time'
|
|
17
17
|
|
|
18
18
|
_df.columns = _df.keys().str.strip(' ')
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
from pandas import read_csv
|
|
3
2
|
|
|
4
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
@@ -7,20 +6,28 @@ from AeroViz.rawDataReader.core import AbstractReader
|
|
|
7
6
|
class Reader(AbstractReader):
|
|
8
7
|
nam = 'VOC'
|
|
9
8
|
|
|
10
|
-
def _raw_reader(self,
|
|
11
|
-
with
|
|
12
|
-
_df = read_csv(f, parse_dates=
|
|
9
|
+
def _raw_reader(self, file):
|
|
10
|
+
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
11
|
+
_df = read_csv(f, parse_dates=True, index_col=0, na_values=('-', 'N.D.'))
|
|
13
12
|
|
|
14
13
|
_df.columns = _df.keys().str.strip(' ')
|
|
15
14
|
_df.index.name = 'time'
|
|
16
15
|
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
support_voc = set(self.meta["key"])
|
|
17
|
+
|
|
18
|
+
valid_keys = [key for key in _df.keys() if key in support_voc]
|
|
19
|
+
invalid_keys = [key for key in _df.keys() if key not in support_voc]
|
|
19
20
|
|
|
20
|
-
|
|
21
|
-
|
|
21
|
+
if invalid_keys:
|
|
22
|
+
self.logger.warning(f'{invalid_keys} are not supported keys.')
|
|
23
|
+
print(f'\n\t\t{invalid_keys} are not supported keys.'
|
|
24
|
+
f'\n\t\tPlease check the\033[91m support_voc.md\033[0m file to use the correct name.')
|
|
22
25
|
|
|
23
|
-
|
|
26
|
+
if valid_keys:
|
|
27
|
+
return _df[valid_keys].loc[~_df.index.duplicated() & _df.index.notna()]
|
|
28
|
+
else:
|
|
29
|
+
self.logger.warning("沒有找到匹配的鍵。返回原始DataFrame並移除含NaN的行。")
|
|
30
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
24
31
|
|
|
25
32
|
def _QC(self, _df):
|
|
26
33
|
return _df
|