AeroViz 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (34) hide show
  1. AeroViz/dataProcess/Chemistry/__init__.py +21 -20
  2. AeroViz/dataProcess/Chemistry/_isoropia.py +9 -12
  3. AeroViz/dataProcess/Chemistry/_ocec.py +20 -45
  4. AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
  5. AeroViz/dataProcess/Optical/fRH.pkl +0 -0
  6. AeroViz/dataProcess/VOC/__init__.py +1 -6
  7. AeroViz/dataProcess/VOC/_potential_par.py +71 -37
  8. AeroViz/dataProcess/VOC/{voc_par.json → support_voc.json} +321 -339
  9. AeroViz/rawDataReader/__init__.py +49 -0
  10. AeroViz/rawDataReader/config/supported_instruments.py +15 -22
  11. AeroViz/rawDataReader/core/__init__.py +25 -15
  12. AeroViz/rawDataReader/script/AE33.py +3 -3
  13. AeroViz/rawDataReader/script/AE43.py +2 -2
  14. AeroViz/rawDataReader/script/APS_3321.py +2 -2
  15. AeroViz/rawDataReader/script/Aurora.py +5 -2
  16. AeroViz/rawDataReader/script/BC1054.py +2 -2
  17. AeroViz/rawDataReader/script/EPA_vertical.py +2 -2
  18. AeroViz/rawDataReader/script/GRIMM.py +4 -4
  19. AeroViz/rawDataReader/script/IGAC.py +2 -2
  20. AeroViz/rawDataReader/script/MA350.py +2 -2
  21. AeroViz/rawDataReader/script/Minion.py +2 -2
  22. AeroViz/rawDataReader/script/NEPH.py +9 -14
  23. AeroViz/rawDataReader/script/SMPS_TH.py +2 -2
  24. AeroViz/rawDataReader/script/SMPS_aim11.py +2 -2
  25. AeroViz/rawDataReader/script/SMPS_genr.py +2 -2
  26. AeroViz/rawDataReader/script/Sunset_OCEC.py +23 -17
  27. AeroViz/rawDataReader/script/TEOM.py +2 -2
  28. AeroViz/rawDataReader/script/Table.py +3 -3
  29. AeroViz/rawDataReader/script/VOC.py +16 -9
  30. {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/METADATA +1 -1
  31. {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/RECORD +34 -32
  32. {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/LICENSE +0 -0
  33. {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/WHEEL +0 -0
  34. {AeroViz-0.1.4.dist-info → AeroViz-0.1.5.dist-info}/top_level.txt +0 -0
@@ -19,6 +19,55 @@ def RawDataReader(instrument_name: str,
19
19
  mean_freq='1h',
20
20
  csv_out=True,
21
21
  ):
22
+ """
23
+ Factory function to instantiate the appropriate reader module for a given instrument and
24
+ return the processed data over the specified time range.
25
+
26
+ Parameters
27
+ ----------
28
+ instrument_name : str
29
+ The name of the instrument for which to read data. Must be a valid key in the `meta` dictionary.
30
+ path : Path
31
+ The directory where raw data files for the instrument are stored.
32
+ qc : bool, optional (default=True)
33
+ If True, apply quality control (QC) to the raw data.
34
+ csv_raw : bool, optional (default=True)
35
+ If True, read raw data from CSV files.
36
+ reset : bool, optional (default=False)
37
+ If True, reset the state and reprocess the data from scratch.
38
+ rate : bool, optional (default=False)
39
+ If True, calculate rates from the data.
40
+ append_data : bool, optional (default=False)
41
+ If True, append new data to the existing dataset instead of overwriting it.
42
+ start : datetime, optional
43
+ Start time for filtering the data. If None, no start time filtering will be applied.
44
+ end : datetime, optional
45
+ End time for filtering the data. If None, no end time filtering will be applied.
46
+ mean_freq : str, optional (default='1h')
47
+ Resampling frequency for averaging the data. Example: '1h' for hourly mean.
48
+ csv_out : bool, optional (default=True)
49
+ If True, output the processed data as a CSV file.
50
+
51
+ Return
52
+ ------
53
+ reader_module : Reader
54
+ An instance of the reader module corresponding to the specified instrument, which processes
55
+ the data and returns it in a usable format.
56
+
57
+ Raises
58
+ ------
59
+ ValueError
60
+ If the `instrument_name` provided is not a valid key in the `meta` dictionary.
61
+
62
+ Example
63
+ -------
64
+ To read and process data for the BC1054 instrument:
65
+
66
+ >>> from pathlib import Path
67
+ >>> from datetime import datetime
68
+ >>> data = RawDataReader(instrument_name='BC1054', path=Path('/path/to/data'),
69
+ >>> start=datetime(2024, 1, 1), end=datetime(2024, 2, 1))
70
+ """
22
71
  # Mapping of instrument names to their respective classes
23
72
  instrument_class_map = {
24
73
  'NEPH': NEPH,
@@ -83,10 +83,8 @@ meta = {
83
83
  "pattern": "*LCRes.csv",
84
84
  "freq": "1h",
85
85
  "deter_key": {
86
- "Thermal OC/EC": ["Thermal_EC", "Thermal_OC"],
87
86
  "Thermal OC": ["Thermal_OC"],
88
87
  "Thermal EC": ["Thermal_EC"],
89
- "Optical OC/EC": ["Optical_EC", "Optical_OC"],
90
88
  "Optical OC": ["Optical_OC"],
91
89
  "Optical EC": ["Optical_EC"],
92
90
  },
@@ -112,26 +110,21 @@ meta = {
112
110
  "VOC": {
113
111
  "pattern": "*.csv",
114
112
  "freq": "1h",
115
- "key": ['Ethane', 'Propane', 'Isobutane', 'n-Butane', 'Cyclopentane', 'Isopentane',
116
- 'n-Pentane', '2,2-Dimethylbutane', '2,3-Dimethylbutane', '2-Methylpentane',
117
- '3-Methylpentane', 'n-Hexane', 'Methylcyclopentane', '2,4-Dimethylpentane',
118
- 'Cyclohexane', '2-Methylhexane', '2-Methylhexane', '3-Methylheptane',
119
- '2,2,4-Trimethylpentane', 'n-Heptane', 'Methylcyclohexane',
120
- '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylhexane', 'n-Octane',
121
- 'n-Nonane', 'n-Decane', 'n-Undecane', 'Ethylene', 'Propylene', 't-2-Butene',
122
- '1-Butene', 'cis-2-Butene', 't-2-Pentene', '1-Pentene', 'cis-2-Pentene',
123
- 'isoprene', 'Acetylene', 'Benzene', 'Toluene', 'Ethylbenzene', 'm,p-Xylene',
124
- 'Styrene', 'o-Xylene', 'Isopropylbenzene', 'n-Propylbenzene', 'm-Ethyltoluene',
125
- 'p-Ethyltoluene', '1,3,5-Trimethylbenzene', 'o-Ethyltoluene',
126
- '1,2,4-Trimethylbenzene', '1,2,3-Trimethylbenzene', 'm-Diethylbenzene',
127
- 'p-Diethylbenzene'],
128
-
129
- "key_2": ['Isopentane', 'Hexane', '2-Methylhexane', '3-Methylhexane', '2-Methylheptane', '3-Methylheptane',
130
- 'Propene', '1.3-Butadiene', 'Isoprene', '1-Octene',
131
- 'Benzene', 'Toluene', 'Ethylbenzene', 'm.p-Xylene', 'o-Xylene', 'Iso-Propylbenzene', 'Styrene',
132
- 'n-Propylbenzene', '3.4-Ethyltoluene', '1.3.5-TMB', '2-Ethyltoluene', '1.2.4-TMB', '1.2.3-TMB',
133
- 'Acetaldehyde', 'Ethanol', 'Acetone', 'IPA', 'Ethyl Acetate', 'Butyl Acetate',
134
- 'VCM', 'TCE', 'PCE', '1.4-DCB', '1.2-DCB'],
113
+ "key": [
114
+ 'Benzene', 'Toluene', 'Ethylbenzene', 'm/p-Xylene', 'o-Xylene', 'Ethane', 'Propane', 'Isobutane',
115
+ 'n-Butane', 'Isopentane', 'n-Pentane', 'n-Hexane', 'n-Heptane', 'n-Octane', 'n-Nonane', 'n-Decane',
116
+ 'n-Undecane', 'n-Dodecane', 'Ethylene', 'Propylene', '1-Butene', 't-2-Butene', 'cis-2-Butene',
117
+ '1-Pentene', 't-2-Pentene', 'cis-2-Pentene', '1-Hexene', 'Acetylene', 'Cyclopentane', 'Methylcyclopentane',
118
+ 'Cyclohexane', 'Methylcyclohexane', 'Isoprene', '2,2-Dimethylbutane', '2,3-Dimethylbutane',
119
+ '2-Methylpentane', '3-Methylpentane', '2,4-Dimethylpentane', '2-Methylhexane', '2,3-Dimethylpentane',
120
+ '3-Methylheptane', '2,2,4-Trimethylpentane', '2,3,4-Trimethylpentane', '2-Methylheptane', '3-Methylhexane',
121
+ 'Styrene', 'Isopropylbenzene', 'n-Propylbenzene', 'm-Ethyltoluene', 'p-Ethyltoluene', 'm-Diethylbenzene',
122
+ 'p-Diethylbenzene', '1,3,5-Trimethylbenzene', 'o-Ethyltoluene', '1,2,4-Trimethylbenzene',
123
+ '1,2,3-Trimethylbenzene',
124
+ '1.2-DCB', '1.4-DCB', '1.3-Butadiene', '1-Octene', '2-Ethyltoluene', '3.4-Ethyltoluene', 'Acetaldehyde',
125
+ 'Acetone', 'Butyl Acetate', 'Ethanol', 'Ethyl Acetate', 'Hexane', 'IPA', 'Iso-Propylbenzene',
126
+ 'PCE', 'Propene', 'TCE', 'VCM',
127
+ ],
135
128
  "deter_key": None,
136
129
  },
137
130
 
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
  from typing import Any
8
8
 
9
9
  import numpy as np
10
+ import pandas as pd
10
11
  from pandas import DataFrame, date_range, concat, to_numeric, to_datetime
11
12
 
12
13
  from ..config.supported_instruments import meta
@@ -40,7 +41,7 @@ class AbstractReader(ABC):
40
41
  self.rate = rate
41
42
  self.qc = qc
42
43
  self.csv = csv_raw
43
- self.apnd = append_data & reset
44
+ self.append = append_data & reset
44
45
 
45
46
  self.pkl_nam = self.path / f'_read_{self.nam.lower()}.pkl'
46
47
  self.csv_nam = self.path / f'_read_{self.nam.lower()}.csv'
@@ -50,7 +51,7 @@ class AbstractReader(ABC):
50
51
 
51
52
  # dependency injection function, customize each instrument
52
53
  @abstractmethod
53
- def _raw_reader(self, _file):
54
+ def _raw_reader(self, file):
54
55
  pass
55
56
 
56
57
  @abstractmethod
@@ -112,9 +113,9 @@ class AbstractReader(ABC):
112
113
 
113
114
  self.logger.info(f"{'=' * 60}")
114
115
  self.logger.info(
115
- f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} ~ {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
116
+ f"Raw data time : {_st_raw.strftime('%Y-%m-%d %H:%M:%S')} to {_ed_raw.strftime('%Y-%m-%d %H:%M:%S')}")
116
117
  self.logger.info(
117
- f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} ~ {_end.strftime('%Y-%m-%d %H:%M:%S')}")
118
+ f"Output time : {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}")
118
119
  self.logger.info(f"{'-' * 60}")
119
120
  print(f"\n\n\t\tfrom {_start.strftime('%Y-%m-%d %H:%M:%S')} to {_end.strftime('%Y-%m-%d %H:%M:%S')}\n")
120
121
 
@@ -211,18 +212,29 @@ class AbstractReader(ABC):
211
212
  if f.name not in [self.csv_out.name, self.csv_nam.name, self.csv_nam_raw.name, f'{self.nam}.log']]
212
213
 
213
214
  if not files:
214
- print(f"\t\t\033[31mNo files in '{self.path}' could be read. Please check the current path.\033[0m")
215
- return None, None
215
+ raise FileNotFoundError(f"\t\t\033[31mNo files in '{self.path}' could be read."
216
+ f"Please check the current path.\033[0m")
216
217
 
217
218
  df_list = []
218
219
  for file in files:
219
220
  print(f"\r\t\treading {file.name}", end='')
220
- df = self._raw_reader(file)
221
- if df is not None:
222
- df_list.append(df)
221
+
222
+ try:
223
+ df = self._raw_reader(file)
224
+
225
+ if df is not None and not df.empty:
226
+ df_list.append(df)
227
+ else:
228
+ self.logger.warning(f"File {file.name} produced an empty DataFrame or None.")
229
+
230
+ except pd.errors.ParserError as e:
231
+ self.logger.error(f"Error tokenizing data: {e}")
232
+
233
+ except Exception as e:
234
+ self.logger.error(f"Error reading {file.name}: {e}")
223
235
 
224
236
  if not df_list:
225
- return None, None
237
+ raise ValueError("All files were either empty or failed to read.")
226
238
 
227
239
  raw_data = self._raw_process(concat(df_list))
228
240
  qc_data = self._QC(raw_data)
@@ -234,12 +246,12 @@ class AbstractReader(ABC):
234
246
  _f_raw_done, _f_qc_done = None, None
235
247
 
236
248
  # read pickle if pickle file exists and 'reset=False' or process raw data or append new data
237
- if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.apnd):
249
+ if self.pkl_nam_raw.exists() and self.pkl_nam.exists() and (not self.reset or self.append):
238
250
  print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mPICKLE\033[0m file of {self.nam}")
239
251
 
240
252
  _f_raw_done, _f_qc_done = self._read_pkl()
241
253
 
242
- if not self.apnd:
254
+ if not self.append:
243
255
  _f_raw_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_raw_done)
244
256
  _f_qc_done, _start_raw, _end_raw = self._tmidx_process(_start, _end, _f_qc_done)
245
257
 
@@ -254,11 +266,9 @@ class AbstractReader(ABC):
254
266
  print(f"\n\t{dtm.now().strftime('%m/%d %X')} : Reading \033[96mRAW DATA\033[0m of {self.nam} and process it")
255
267
 
256
268
  _f_raw, _f_qc = self._read_raw_files()
257
- if _f_raw is None:
258
- return None
259
269
 
260
270
  # append new data and pickle data
261
- if self.apnd and self.pkl_nam.exists():
271
+ if self.append and self.pkl_nam.exists():
262
272
  _f_raw = self._apnd_prcs(_f_raw_done, _f_raw)
263
273
  _f_qc = self._apnd_prcs(_f_qc_done, _f_qc)
264
274
 
@@ -6,11 +6,11 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'AE33'
8
8
 
9
- def _raw_reader(self, _file):
10
- if _file.stat().st_size / 1024 < 550:
9
+ def _raw_reader(self, file):
10
+ if file.stat().st_size / 1024 < 550:
11
11
  print('\t It may not be a whole daily data.')
12
12
 
13
- _df = read_table(_file, parse_dates={'time': [0, 1]}, index_col='time',
13
+ _df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
14
14
  delimiter=r'\s+', skiprows=5, usecols=range(67))
15
15
  _df.columns = _df.columns.str.strip(';')
16
16
 
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'AE43'
8
8
 
9
- def _raw_reader(self, _file):
10
- _df = read_csv(_file, parse_dates={'time': ['StartTime']}, index_col='time')
9
+ def _raw_reader(self, file):
10
+ _df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
11
11
  _df_id = _df['SetupID'].iloc[-1]
12
12
 
13
13
  # get last SetupID data
@@ -7,8 +7,8 @@ from AeroViz.rawDataReader.core import AbstractReader
7
7
  class Reader(AbstractReader):
8
8
  nam = 'APS_3321'
9
9
 
10
- def _raw_reader(self, _file):
11
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
10
+ def _raw_reader(self, file):
11
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
12
12
  _df = read_table(f, skiprows=6, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
13
13
  _key = list(_df.keys()[3:54]) ## 542 ~ 1981
14
14
 
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'Aurora'
8
8
 
9
- def _raw_reader(self, _file):
10
- with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
9
+ def _raw_reader(self, file):
10
+ with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
11
11
  _df = read_csv(f, low_memory=False, index_col=0)
12
12
 
13
13
  _df.index = to_datetime(_df.index, errors='coerce')
@@ -33,5 +33,8 @@ class Reader(AbstractReader):
33
33
  # remove negative value
34
34
  _df = _df.mask((_df <= 0) | (_df > 2000)).copy()
35
35
 
36
+ # total scattering is larger than back scattering
37
+ _df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
38
+
36
39
  # QC data in 1h
37
40
  return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'BC1054'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, file):
10
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
11
  _df = read_csv(f, parse_dates=True, index_col=0)
12
12
 
13
13
  _df.columns = _df.columns.str.replace(' ', '')
@@ -7,8 +7,8 @@ from AeroViz.rawDataReader.core import AbstractReader
7
7
  class Reader(AbstractReader):
8
8
  nam = 'EPA_vertical'
9
9
 
10
- def _raw_reader(self, _file):
11
- with _file.open('r', encoding='ascii', errors='ignore') as f:
10
+ def _raw_reader(self, file):
11
+ with file.open('r', encoding='ascii', errors='ignore') as f:
12
12
  # 有、無輸出有效值都可以
13
13
  # read 查詢小時值(測項).csv
14
14
  df = read_csv(f, encoding='ascii', encoding_errors='ignore', index_col=0, parse_dates=True,
@@ -6,19 +6,19 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'GRIMM'
8
8
 
9
- def _raw_reader(self, _file):
9
+ def _raw_reader(self, file):
10
10
 
11
- _df = read_csv(_file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
11
+ _df = read_csv(file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
12
12
  dayfirst=True).rename_axis("Time")
13
13
  _df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True)
14
14
 
15
- if _file.name.startswith("A407ST"):
15
+ if file.name.startswith("A407ST"):
16
16
  _df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True)
17
17
  else:
18
18
  _df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True)
19
19
 
20
20
  if _df.empty:
21
- print(_file, "is empty")
21
+ print(file, "is empty")
22
22
  return None
23
23
 
24
24
  return _df / 0.035
@@ -10,9 +10,9 @@ from AeroViz.rawDataReader.core import AbstractReader
10
10
  class Reader(AbstractReader):
11
11
  nam = 'IGAC'
12
12
 
13
- def _raw_reader(self, _file):
13
+ def _raw_reader(self, file):
14
14
 
15
- with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
15
+ with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
16
16
  _df = read_csv(f, parse_dates=True, index_col=0, na_values='-').apply(to_numeric, errors='coerce')
17
17
 
18
18
  _df.columns = _df.keys().str.strip(' ')
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'MA350'
8
8
 
9
- def _raw_reader(self, _file):
10
- _df = read_csv(_file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
9
+ def _raw_reader(self, file):
10
+ _df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
11
11
 
12
12
  _df = _df.rename(columns={
13
13
  'UV BCc': 'BC1',
@@ -7,8 +7,8 @@ from AeroViz.rawDataReader.core import AbstractReader
7
7
  class Reader(AbstractReader):
8
8
  nam = 'Minion'
9
9
 
10
- def _raw_reader(self, _file):
11
- with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
10
+ def _raw_reader(self, file):
11
+ with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
12
12
  _df = read_csv(f, low_memory=False, index_col=0)
13
13
 
14
14
  _df.index = to_datetime(_df.index, errors='coerce')
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'NEPH'
8
8
 
9
- def _raw_reader(self, _file):
10
- with _file.open('r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, file):
10
+ with file.open('r', encoding='utf-8', errors='ignore') as f:
11
11
  _df = read_csv(f, header=None, names=range(11))
12
12
 
13
13
  _df_grp = _df.groupby(0)
@@ -47,23 +47,15 @@ class Reader(AbstractReader):
47
47
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
48
48
 
49
49
  except ValueError:
50
- group_sizes = _df_grp.size()
51
- print(group_sizes)
52
-
53
50
  # Define valid groups and find invalid indices
54
- valid_groups = {'B', 'G', 'R', 'D', 'T', 'Y', 'Z'}
55
- invalid_indices = _df[~_df[0].isin(valid_groups)].index
56
-
57
- # Print invalid indices and values
51
+ invalid_indices = _df[~_df[0].isin({'B', 'G', 'R', 'D', 'T', 'Y', 'Z'})].index
58
52
  print("Invalid values and their indices:")
59
- for idx in invalid_indices:
60
- print(f"Index: {idx}, Value: {_df.at[idx, 0]}")
53
+ print("\n".join([f"Index: {idx}, Value: {_df.at[idx, 0]}" for idx in invalid_indices]))
61
54
 
62
55
  # Return an empty DataFrame with specified columns if there's a length mismatch
63
- columns = ['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']
64
- _df_out = DataFrame(index=_idx_tm, columns=columns)
56
+ _df_out = DataFrame(index=_idx_tm, columns=['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH'])
65
57
  _df_out.index.name = 'Time'
66
- print(f'\n\t\t\t Length mismatch in {_file} data. Returning an empty DataFrame.')
58
+ print(f'\n\t\t\t Length mismatch in {file} data. Returning an empty DataFrame.')
67
59
  return _df_out
68
60
 
69
61
  # QC data
@@ -71,5 +63,8 @@ class Reader(AbstractReader):
71
63
  # remove negative value
72
64
  _df = _df.mask((_df <= 5).copy())
73
65
 
66
+ # total scattering is larger than back scattering
67
+ _df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
68
+
74
69
  # QC data in 1h
75
70
  return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'SMPS_TH'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, file):
10
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
11
  _df = read_table(f, skiprows=18, parse_dates={'Time': ['Date', 'Start Time']}).set_index('Time')
12
12
  _key = list(_df.keys()[6:-26])
13
13
 
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'SMPS_aim11'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, file):
10
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
11
 
12
12
  skiprows = 0
13
13
  for _line in f:
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'SMPS_genr'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, file):
10
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
11
 
12
12
  skiprows = 0
13
13
  for _line in f:
@@ -6,38 +6,44 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'Sunset_OCEC'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, skiprows=3)
9
+ def _raw_reader(self, file):
10
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
+ _df = read_csv(f, skiprows=3, nrows=25)
12
12
 
13
13
  _df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
14
14
  _df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
15
15
  _df = _df.set_index('time')
16
+ _df.index = _df.index.round('1h')
16
17
 
17
18
  _df = _df.rename(columns={
18
19
  'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
19
- 'OC ugC/m^3 (Thermal/Optical)': 'Thermal_OC',
20
-
21
20
  'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
22
- 'EC ugC/m^3 (Thermal/Optical)': 'Thermal_EC',
23
-
24
21
  'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
25
- 'OC by diff ugC (TC-OptEC)': 'Optical_OC',
26
-
27
22
  'BC (ugC/LCm^3)': 'Optical_EC',
23
+ 'TC (ugC/LCm^3)': 'TC',
24
+
25
+ 'OC ugC/m^3 (Thermal/Optical)': 'Thermal_OC',
26
+ 'EC ugC/m^3 (Thermal/Optical)': 'Thermal_EC',
27
+ 'OC by diff ugC (TC-OptEC)': 'Optical_OC',
28
28
  'OptEC ugC/m^3': 'Optical_EC',
29
+ 'TC ugC/m^3': 'TC',
29
30
 
30
31
  'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
31
- 'TC (ugC/LCm^3)': 'TC',
32
- 'TC ugC/m^3': 'TC',
33
- 'OCPk1-ug C': 'OC1',
34
- 'OCPk2-ug C': 'OC2',
35
- 'OCPk3-ug C': 'OC3',
36
- 'OCPk4-ug C': 'OC4',
37
- 'Pyrolized C ug': 'PC'
32
+
33
+ 'OCPk1-ug C': 'OC1_raw',
34
+ 'OCPk2-ug C': 'OC2_raw',
35
+ 'OCPk3-ug C': 'OC3_raw',
36
+ 'OCPk4-ug C': 'OC4_raw',
37
+ 'ECPk1-ug C': 'EC1_raw',
38
+ 'ECPk2-ug C': 'EC2_raw',
39
+ 'ECPk3-ug C': 'EC3_raw',
40
+ 'ECPk4-ug C': 'EC4_raw',
41
+ 'ECPk5-ug C': 'EC5_raw',
38
42
  })
39
43
 
40
- _df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', 'OC1', 'OC2', 'OC3', 'OC4']]
44
+ _df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', 'Sample_Volume',
45
+ 'OC1_raw', 'OC2_raw', 'OC3_raw', 'OC4_raw', 'EC1_raw', 'EC2_raw', 'EC3_raw', 'EC4_raw',
46
+ 'EC5_raw']]
41
47
 
42
48
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
43
49
 
@@ -6,8 +6,8 @@ from AeroViz.rawDataReader.core import AbstractReader
6
6
  class Reader(AbstractReader):
7
7
  nam = 'TEOM'
8
8
 
9
- def _raw_reader(self, _file):
10
- with open(_file, 'r', encoding='utf-8', errors='ignore') as f:
9
+ def _raw_reader(self, file):
10
+ with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
11
  _df = read_csv(f, skiprows=3, index_col=False)
12
12
 
13
13
  _df = _df.rename(columns={'Time Stamp': 'time',
@@ -8,11 +8,11 @@ from AeroViz.rawDataReader.core import AbstractReader
8
8
  class Reader(AbstractReader):
9
9
  nam = 'Table'
10
10
 
11
- def _raw_reader(self, _file):
12
- with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
11
+ def _raw_reader(self, file):
12
+ with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
13
13
  _df = read_csv(f, low_memory=False, index_col=0)
14
14
 
15
- _df.index = to_datetime(_df.index, errors='coerce', format=self._oth_set.get('date_format') or 'mixed')
15
+ _df.index = to_datetime(_df.index, errors='coerce')
16
16
  _df.index.name = 'time'
17
17
 
18
18
  _df.columns = _df.keys().str.strip(' ')
@@ -1,4 +1,3 @@
1
-
2
1
  from pandas import read_csv
3
2
 
4
3
  from AeroViz.rawDataReader.core import AbstractReader
@@ -7,20 +6,28 @@ from AeroViz.rawDataReader.core import AbstractReader
7
6
  class Reader(AbstractReader):
8
7
  nam = 'VOC'
9
8
 
10
- def _raw_reader(self, _file):
11
- with _file.open('r', encoding='utf-8-sig', errors='ignore') as f:
12
- _df = read_csv(f, parse_dates=[0], index_col=[0], na_values=('-', 'N.D.'))
9
+ def _raw_reader(self, file):
10
+ with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
11
+ _df = read_csv(f, parse_dates=True, index_col=0, na_values=('-', 'N.D.'))
13
12
 
14
13
  _df.columns = _df.keys().str.strip(' ')
15
14
  _df.index.name = 'time'
16
15
 
17
- try:
18
- _df = _df[self.meta["key"]].loc[_df.index.dropna()]
16
+ support_voc = set(self.meta["key"])
17
+
18
+ valid_keys = [key for key in _df.keys() if key in support_voc]
19
+ invalid_keys = [key for key in _df.keys() if key not in support_voc]
19
20
 
20
- except KeyError:
21
- _df = _df[self.meta["key_2"]].loc[_df.index.dropna()]
21
+ if invalid_keys:
22
+ self.logger.warning(f'{invalid_keys} are not supported keys.')
23
+ print(f'\n\t\t{invalid_keys} are not supported keys.'
24
+ f'\n\t\tPlease check the\033[91m support_voc.md\033[0m file to use the correct name.')
22
25
 
23
- return _df.loc[~_df.index.duplicated() & _df.index.notna()]
26
+ if valid_keys:
27
+ return _df[valid_keys].loc[~_df.index.duplicated() & _df.index.notna()]
28
+ else:
29
+ self.logger.warning("沒有找到匹配的鍵。返回原始DataFrame並移除含NaN的行。")
30
+ return _df.loc[~_df.index.duplicated() & _df.index.notna()]
24
31
 
25
32
  def _QC(self, _df):
26
33
  return _df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: AeroViz
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Aerosol science
5
5
  Home-page: https://github.com/Alex870521/AeroViz
6
6
  Author: alex