AeroViz 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (57) hide show
  1. AeroViz/data/240228_00.txt +101 -0
  2. AeroViz/dataProcess/Chemistry/_ocec.py +20 -7
  3. AeroViz/plot/__init__.py +2 -0
  4. AeroViz/plot/hysplit/__init__.py +1 -0
  5. AeroViz/plot/hysplit/hysplit.py +79 -0
  6. AeroViz/plot/meteorology/meteorology.py +2 -0
  7. AeroViz/plot/optical/optical.py +60 -59
  8. AeroViz/plot/pie.py +14 -2
  9. AeroViz/plot/radar.py +184 -0
  10. AeroViz/plot/scatter.py +16 -7
  11. AeroViz/plot/templates/diurnal_pattern.py +24 -7
  12. AeroViz/plot/templates/koschmieder.py +11 -8
  13. AeroViz/plot/timeseries/template.py +2 -2
  14. AeroViz/plot/timeseries/timeseries.py +47 -7
  15. AeroViz/rawDataReader/__init__.py +75 -68
  16. AeroViz/rawDataReader/config/supported_instruments.py +52 -19
  17. AeroViz/rawDataReader/core/__init__.py +194 -106
  18. AeroViz/rawDataReader/script/AE33.py +11 -6
  19. AeroViz/rawDataReader/script/AE43.py +10 -5
  20. AeroViz/rawDataReader/script/Aurora.py +14 -10
  21. AeroViz/rawDataReader/script/BC1054.py +10 -6
  22. AeroViz/rawDataReader/script/EPA.py +39 -0
  23. AeroViz/rawDataReader/script/GRIMM.py +1 -2
  24. AeroViz/rawDataReader/script/IGAC.py +6 -23
  25. AeroViz/rawDataReader/script/MA350.py +12 -5
  26. AeroViz/rawDataReader/script/Minion.py +107 -30
  27. AeroViz/rawDataReader/script/NEPH.py +15 -5
  28. AeroViz/rawDataReader/script/OCEC.py +39 -15
  29. AeroViz/rawDataReader/script/SMPS.py +1 -0
  30. AeroViz/rawDataReader/script/TEOM.py +15 -11
  31. AeroViz/rawDataReader/script/VOC.py +1 -1
  32. AeroViz/rawDataReader/script/XRF.py +11 -0
  33. AeroViz/rawDataReader/script/__init__.py +2 -2
  34. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/METADATA +54 -30
  35. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/RECORD +40 -51
  36. AeroViz/process/__init__.py +0 -31
  37. AeroViz/process/core/DataProc.py +0 -19
  38. AeroViz/process/core/SizeDist.py +0 -90
  39. AeroViz/process/core/__init__.py +0 -4
  40. AeroViz/process/method/__init__.py +0 -2
  41. AeroViz/process/method/prop.py +0 -62
  42. AeroViz/process/script/AbstractDistCalc.py +0 -143
  43. AeroViz/process/script/Chemical.py +0 -177
  44. AeroViz/process/script/IMPACT.py +0 -49
  45. AeroViz/process/script/IMPROVE.py +0 -161
  46. AeroViz/process/script/Others.py +0 -65
  47. AeroViz/process/script/PSD.py +0 -103
  48. AeroViz/process/script/PSD_dry.py +0 -93
  49. AeroViz/process/script/__init__.py +0 -5
  50. AeroViz/process/script/retrieve_RI.py +0 -69
  51. AeroViz/rawDataReader/script/EPA_vertical.py +0 -46
  52. AeroViz/rawDataReader/script/Table.py +0 -27
  53. /AeroViz/{process/method → plot/optical}/PyMieScatt_update.py +0 -0
  54. /AeroViz/{process/method → plot/optical}/mie_theory.py +0 -0
  55. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/LICENSE +0 -0
  56. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/WHEEL +0 -0
  57. {AeroViz-0.1.6.dist-info → AeroViz-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,7 @@
1
1
  # read meteorological data from google sheet
2
2
 
3
3
 
4
- import numpy as np
5
- from pandas import read_csv, concat, to_numeric
4
+ from pandas import read_csv, to_numeric
6
5
 
7
6
  from AeroViz.rawDataReader.core import AbstractReader
8
7
 
@@ -35,24 +34,8 @@ class Reader(AbstractReader):
35
34
  'SO42-': 0.08,
36
35
  }
37
36
 
38
- # _mdl.update(self._oth_set.get('mdl', {}))
39
-
40
- def _se_le(_df_, _log=False):
41
- _df_ = np.log10(_df_) if _log else _df_
42
-
43
- _df_qua = _df_.quantile([.25, .75])
44
- _df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
45
- _df_iqr = _df_q3 - _df_q1
46
-
47
- _se = concat([_df_q1 - 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
48
- _le = concat([_df_q3 + 1.5 * _df_iqr] * len(_df_), axis=1).T.set_index(_df_.index)
49
-
50
- if _log:
51
- return 10 ** _se, 10 ** _le
52
- return _se, _le
53
-
54
37
  _cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
55
- ['Cl-', 'NO2-', 'NO3-', 'SO42-', ],
38
+ ['Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-', ],
56
39
  ['SO42-', 'NO3-', 'NH4+'])
57
40
 
58
41
  _df_salt = _df[_mdl.keys()].copy()
@@ -68,23 +51,23 @@ class Reader(AbstractReader):
68
51
 
69
52
  # calculate SE LE
70
53
  # salt < LE
71
- _se, _le = _se_le(_df_salt, _log=True)
54
+ _se, _le = self.IQR_QC(_df_salt, log_dist=True)
72
55
  _df_salt = _df_salt.mask(_df_salt > _le).copy()
73
56
 
74
57
  # C/A, A/C
75
58
  _rat_CA = (_df_salt[_cation].sum(axis=1) / _df_salt[_anion].sum(axis=1)).to_frame()
76
59
  _rat_AC = (1 / _rat_CA).copy()
77
60
 
78
- _se, _le = _se_le(_rat_CA, )
61
+ _se, _le = self.IQR_QC(_rat_CA, )
79
62
  _cond_CA = (_rat_CA < _le) & (_rat_CA > 0)
80
63
 
81
- _se, _le = _se_le(_rat_AC, )
64
+ _se, _le = self.IQR_QC(_rat_AC, )
82
65
  _cond_AC = (_rat_AC < _le) & (_rat_AC > 0)
83
66
 
84
67
  _df_salt = _df_salt.where((_cond_CA * _cond_AC)[0]).copy()
85
68
 
86
69
  # conc. of main salt > SE
87
- _se, _le = _se_le(_df_salt[_main], _log=True)
70
+ _se, _le = self.IQR_QC(_df_salt[_main], log_dist=True)
88
71
  _df_salt[_main] = _df_salt[_main].mask(_df_salt[_main] < _se).copy()
89
72
 
90
73
  return _df_salt.reindex(_df.index)
@@ -1,4 +1,4 @@
1
- from pandas import read_csv
1
+ from pandas import read_csv, to_numeric
2
2
 
3
3
  from AeroViz.rawDataReader.core import AbstractReader
4
4
 
@@ -7,7 +7,8 @@ class Reader(AbstractReader):
7
7
  nam = 'MA350'
8
8
 
9
9
  def _raw_reader(self, file):
10
- _df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis("Time")
10
+ _df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis(
11
+ "Time").apply(to_numeric, errors='coerce')
11
12
 
12
13
  _df = _df.rename(columns={
13
14
  'UV BCc': 'BC1',
@@ -31,8 +32,14 @@ class Reader(AbstractReader):
31
32
 
32
33
  # QC data
33
34
  def _QC(self, _df):
35
+ _index = _df.index.copy()
36
+
34
37
  # remove negative value
35
- _df = _df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'AAE', 'BB']].mask((_df < 0).copy())
38
+ _df = _df.mask(
39
+ (_df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5']] <= 0) | (_df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5']] > 20000))
40
+
41
+ # use IQR_QC
42
+ _df = self.time_aware_IQR_QC(_df, time_window='1h')
36
43
 
37
- # QC data in 1h
38
- return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
44
+ # make sure all columns have values, otherwise set to nan
45
+ return _df.dropna(how='any').reindex(_index)
@@ -1,38 +1,80 @@
1
+ from typing import Literal
2
+
1
3
  import numpy as np
2
- from pandas import read_csv, to_datetime, to_numeric
4
+ import pandas
5
+ from pandas import read_excel, to_numeric
3
6
 
4
7
  from AeroViz.rawDataReader.core import AbstractReader
5
8
 
9
+ pandas.set_option("future.no_silent_downcasting", True)
10
+
11
+ desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC',
12
+ 'CH4', 'PM10', 'PM2.5', 'WS', 'WD', 'AT', 'RH']
13
+
14
+ desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene']
15
+
16
+ desired_order3 = ['Al', 'Si', 'P', 'S', 'Cl', 'K', 'Ca', 'Ti', 'V', 'Cr', 'Mn', 'Fe',
17
+ 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Rb', 'Sr',
18
+ 'Y', 'Zr', 'Nb', 'Mo', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te',
19
+ 'Cs', 'Ba', 'La', 'Ce', 'W', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi']
20
+
21
+ desired_order4 = ['NH3', 'HF', 'HCl', 'HNO2', 'HNO3', 'G-SO2',
22
+ 'Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+',
23
+ 'F-', 'Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
24
+
6
25
 
7
26
  class Reader(AbstractReader):
8
27
  nam = 'Minion'
9
28
 
10
29
  def _raw_reader(self, file):
11
- with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
12
- _df = read_csv(f, low_memory=False, index_col=0)
30
+ # 讀取 Excel 文件
31
+ df = read_excel(file, index_col=0, parse_dates=True)
32
+
33
+ # 重命名列,去除空白
34
+ df = df.rename(columns=lambda x: x.strip())
35
+
36
+ # 保存單位行並給它一個名稱
37
+ units = df.iloc[0].copy()
13
38
 
14
- _df.index = to_datetime(_df.index, errors='coerce')
15
- _df.index.name = 'time'
39
+ # 刪除原始數據中的單位行
40
+ df = df.iloc[1:]
16
41
 
17
- _df.columns = _df.keys().str.strip(' ')
42
+ # 替換特定值
43
+ df = df.replace({'維護校正': '*', np.nan: '-', '0L': '_', 'Nodata': '-'}, inplace=False)
44
+ df = df.replace(to_replace=r'\d*[#]\b', value='#', regex=True)
45
+ df = df.replace(to_replace=r'\d*[L]\b', value='_', regex=True)
18
46
 
19
- return _df.loc[~_df.index.duplicated() & _df.index.notna()]
47
+ # 處理除了'WD'列的 0
48
+ non_wd_columns = [col for col in df.columns if col != 'WD']
49
+ df.loc[:, non_wd_columns] = df.loc[:, non_wd_columns].replace({0: '_'})
50
+
51
+ # 重新排序列
52
+ df = self.reorder_dataframe_columns(df, [desired_order1, desired_order2, desired_order3, desired_order4])
53
+
54
+ # 將單位行添加回 DataFrame
55
+ # df = concat([units.to_frame().T, df])
56
+
57
+ df.index.name = 'Time'
58
+
59
+ return df.loc[~df.index.duplicated() & df.index.notna()]
20
60
 
21
61
  def _QC(self, _df):
62
+ # remove negative value
63
+ _df = _df.mask((_df < 0))
64
+
22
65
  # XRF QAQC
23
66
  _df = self.XRF_QAQC(_df)
24
67
 
25
68
  # ions balance
26
- _df = self.ions_balance(_df)
27
-
28
- # remove negative value
29
- _df = _df.mask((_df < 0).copy())
69
+ _df = self.IGAC_QAQC(_df)
30
70
 
31
71
  # QC data in 6h
32
- return _df.resample('6h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
72
+ _df = self.time_aware_IQR_QC(_df)
73
+
74
+ return _df
33
75
 
34
76
  # base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
35
- def XRF_QAQC(self, df):
77
+ def XRF_QAQC(self, df, MDL_replace: Literal['nan', '0.5 * MDL'] = 'nan'):
36
78
  MDL = {
37
79
  'Al': 100, 'Si': 18, 'P': 5.2, 'S': 3.2,
38
80
  'Cl': 1.7, 'K': 1.2, 'Ca': 0.3, 'Ti': 1.6,
@@ -40,34 +82,71 @@ class Reader(AbstractReader):
40
82
  'Co': 0.14, 'Ni': 0.096, 'Cu': 0.079, 'Zn': 0.067,
41
83
  'Ga': 0.059, 'Ge': 0.056, 'As': 0.063, 'Se': 0.081,
42
84
  'Br': 0.1, 'Rb': 0.19, 'Sr': 0.22, 'Y': 0.28,
43
- 'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, 'Ag': 1.9,
44
- 'Cd': 2.5, 'In': 3.1, 'Sn': 4.1, 'Sb': 5.2,
45
- 'Te': 0.6, 'I': 0.49, 'Cs': 0.37, 'Ba': 0.39,
46
- 'La': 0.36, 'Ce': 0.3, 'Pt': 0.12, 'Au': 0.1,
47
- 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13, 'Bi': 0.13
85
+ 'Zr': 0.33, 'Nb': 0.41, 'Mo': 0.48, 'Pd': 2.2,
86
+ 'Ag': 1.9, 'Cd': 2.5, 'In': 3.1, 'Sn': 4.1,
87
+ 'Sb': 5.2, 'Te': 0.6, 'Cs': 0.37, 'Ba': 0.39,
88
+ 'La': 0.36, 'Ce': 0.3, 'W': 0.0001, 'Pt': 0.12,
89
+ 'Au': 0.1, 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13,
90
+ 'Bi': 0.13
48
91
  }
49
- # 將小於 MDL 值的數據替換為 NaN
92
+
93
+ # Br Li internal standard
94
+
95
+ # 將小於 MDL 值的數據替換為 nan or 1/2 MDL
50
96
  for element, threshold in MDL.items():
51
97
  if element in df.columns:
52
- df[element] = df[element].where(df[element] >= threshold, np.nan)
98
+ rep = np.nan if MDL_replace == 'nan' else 0.5 * threshold
99
+ df[element] = df[element].where(df[element] >= threshold, rep)
53
100
 
54
101
  self.logger.info(f"{'=' * 60}")
55
102
  self.logger.info(f"XRF QAQC summary:")
56
103
  self.logger.info("\t\ttransform values below MDL to NaN")
57
104
  self.logger.info(f"{'=' * 60}")
58
105
 
106
+ # 轉換單位 ng/m3 -> ug/m3
107
+ if df.Al.max() > 10 and df.Fe.max() > 10:
108
+ # 確保 MDL.keys() 中的所有列都存在於 _df 中
109
+ columns_to_convert = [col for col in MDL.keys() if col in df.columns]
110
+
111
+ df[columns_to_convert] = df[columns_to_convert].div(1000)
112
+
59
113
  return df
60
114
 
61
- def ions_balance(self, df, tolerance=0.3):
115
+ def IGAC_QAQC(self, df, tolerance=1):
62
116
  """
63
117
  Calculate the balance of ions in the system
64
118
  """
119
+ # https://www.yangyao-env.com/web/product/product_in2.jsp?pd_id=PD1640151884502
120
+ MDL = {
121
+ 'HF': 0.08, 'HCl': 0.05, 'HNO2': 0.01, 'HNO3': 0.05, 'G-SO2': 0.05, 'NH3': 0.1,
122
+ 'Na+': 0.05, 'NH4+': 0.08, 'K+': 0.08, 'Mg2+': 0.05, 'Ca2+': 0.05,
123
+ 'F-': 0.08, 'Cl-': 0.05, 'NO2-': 0.05, 'NO3-': 0.01, 'PO43-': None, 'SO42-': 0.05,
124
+ }
125
+
126
+ MR = {
127
+ 'HF': 200, 'HCl': 200, 'HNO2': 200, 'HNO3': 200, 'G-SO2': 200, 'NH3': 300,
128
+ 'Na+': 300, 'NH4+': 300, 'K+': 300, 'Mg2+': 300, 'Ca2+': 300,
129
+ 'F-': 300, 'Cl-': 300, 'NO2-': 300, 'NO3-': 300, 'PO43-': None, 'SO42-': 300,
130
+ }
131
+
132
+ _cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
133
+ ['Cl-', 'NO2-', 'NO3-', 'SO42-'],
134
+ ['SO42-', 'NO3-', 'NH4+'])
135
+ # QC: replace values below MDL with 0.5 * MDL -> ions balance -> PM2.5 > main salt
136
+ # mass tolerance = 0.3, ions balance tolerance = 0.3
137
+
138
+ # # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
139
+ # _df_salt = df.mask(df.sum(axis=1, min_count=1) > df.PM25).dropna(subset=_main).copy()
140
+
65
141
  # Define the ions
66
- item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'F-', 'Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
142
+ item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'Cl-', 'NO2-', 'NO3-', 'SO42-']
67
143
 
68
144
  # Calculate the balance
69
- _df = df[item].copy()
70
- _df = _df.apply(lambda x: to_numeric(x, errors='coerce'))
145
+ _df = df[item].apply(to_numeric, errors='coerce')
146
+
147
+ # for (_key, _df_col) in _df.items():
148
+ # _df[_key] = _df_col.mask(_df_col < MDL[_key], MDL[_key] / 2)
149
+
71
150
  _df['+_mole'] = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1,
72
151
  skipna=True)
73
152
  _df['-_mole'] = _df[['Cl-', 'NO2-', 'NO3-', 'SO42-']].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
@@ -79,12 +158,8 @@ class Reader(AbstractReader):
79
158
  lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
80
159
 
81
160
  # 根据ratio决定是否保留原始数据
82
- valid_mask = (
83
- (_df['ratio'] <= upper_bound) &
84
- (_df['ratio'] >= lower_bound) &
85
- ~np.isnan(_df['+_mole']) &
86
- ~np.isnan(_df['-_mole'])
87
- )
161
+ valid_mask = ((_df['ratio'] <= upper_bound) & (_df['ratio'] >= lower_bound) &
162
+ ~np.isnan(_df['+_mole']) & ~np.isnan(_df['-_mole']))
88
163
 
89
164
  # 保留数据或将不符合条件的行设为NaN
90
165
  df.loc[~valid_mask, item] = np.nan
@@ -100,4 +175,6 @@ class Reader(AbstractReader):
100
175
  if retained_percentage < 70:
101
176
  self.logger.warning("Warning: The percentage of retained data is less than 70%")
102
177
 
178
+ # print(f"\tretain {retained_percentage.__round__(0)}% data within tolerance {tolerance}")
179
+
103
180
  return df
@@ -58,13 +58,23 @@ class Reader(AbstractReader):
58
58
  print(f'\n\t\t\t Length mismatch in {file} data. Returning an empty DataFrame.')
59
59
  return _df_out
60
60
 
61
- # QC data
62
61
  def _QC(self, _df):
62
+ MDL_sensitivity = {'B': .1, 'G': .1, 'R': .3}
63
+
64
+ _index = _df.index.copy()
65
+
63
66
  # remove negative value
64
- _df = _df.mask((_df <= 5).copy())
67
+ _df = _df.mask((_df <= 0) | (_df > 2000))
65
68
 
66
69
  # total scattering is larger than back scattering
67
- _df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
70
+ _df = _df.loc[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
71
+
72
+ # blue scattering is larger than green scattering, green scattering is larger than red scattering
73
+ # due to the nephelometer's Green PMT in FS is already aged, this QC may delete too many data
74
+ # _df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])]
75
+
76
+ # use IQR_QC
77
+ _df = self.time_aware_IQR_QC(_df)
68
78
 
69
- # QC data in 1h
70
- return _df.resample('1h').apply(self.basic_QC).resample(self.meta.get("freq")).mean()
79
+ # make sure all columns have values, otherwise set to nan
80
+ return _df.dropna(how='any').reindex(_index)
@@ -1,4 +1,5 @@
1
- from pandas import to_datetime, read_csv
1
+ import numpy as np
2
+ from pandas import to_datetime, read_csv, to_numeric
2
3
 
3
4
  from AeroViz.rawDataReader.core import AbstractReader
4
5
 
@@ -8,11 +9,18 @@ class Reader(AbstractReader):
8
9
 
9
10
  def _raw_reader(self, file):
10
11
  with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, skiprows=3, nrows=25)
12
+ _df = read_csv(f, skiprows=3).apply(to_numeric, errors='coerce')
12
13
 
13
14
  _df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
14
15
  _df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
16
+
17
+ if _df['time'].isna().any():
18
+ _df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %H:%M:%S', errors='coerce')
19
+
15
20
  _df = _df.set_index('time')
21
+
22
+ _df = _df.loc[~_df.index.duplicated() & _df.index.notna()]
23
+
16
24
  _df.index = _df.index.round('1h')
17
25
 
18
26
  _df = _df.rename(columns={
@@ -34,6 +42,8 @@ class Reader(AbstractReader):
34
42
  'OCPk2-ug C': 'OC2_raw',
35
43
  'OCPk3-ug C': 'OC3_raw',
36
44
  'OCPk4-ug C': 'OC4_raw',
45
+ 'Pyrolized C ug': 'PC_raw',
46
+
37
47
  'ECPk1-ug C': 'EC1_raw',
38
48
  'ECPk2-ug C': 'EC2_raw',
39
49
  'ECPk3-ug C': 'EC3_raw',
@@ -41,26 +51,40 @@ class Reader(AbstractReader):
41
51
  'ECPk5-ug C': 'EC5_raw',
42
52
  })
43
53
 
54
+ _df['OC1'] = _df['OC1_raw'] / _df['Sample_Volume']
55
+ _df['OC2'] = _df['OC2_raw'] / _df['Sample_Volume']
56
+ _df['OC3'] = _df['OC3_raw'] / _df['Sample_Volume']
57
+ _df['OC4'] = _df['OC4_raw'] / _df['Sample_Volume']
58
+
59
+ _df['PC'] = _df['Thermal_OC'] - _df['OC1'] - _df['OC2'] - _df['OC3'] - _df['OC4']
60
+
61
+ # _df['EC1'] = _df['EC1_raw'] / _df['Sample_Volume']
62
+ # _df['EC2'] = _df['EC2_raw'] / _df['Sample_Volume']
63
+ # _df['EC3'] = _df['EC3_raw'] / _df['Sample_Volume']
64
+ # _df['EC4'] = _df['EC4_raw'] / _df['Sample_Volume']
65
+ # _df['EC5'] = _df['EC5_raw'] / _df['Sample_Volume']
66
+
44
67
  _df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', 'Sample_Volume',
45
- 'OC1_raw', 'OC2_raw', 'OC3_raw', 'OC4_raw', 'EC1_raw', 'EC2_raw', 'EC3_raw', 'EC4_raw',
46
- 'EC5_raw']]
68
+ 'OC1', 'OC2', 'OC3', 'OC4', 'PC']]
47
69
 
48
70
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
49
71
 
50
72
  # QC data
51
73
  def _QC(self, _df):
52
- import numpy as np
74
+ MDL = {'Thermal_OC': 0.3,
75
+ 'Optical_OC': 0.3,
76
+ 'Thermal_EC': 0.015,
77
+ 'Optical_EC': 0.015
78
+ }
79
+
80
+ _index = _df.index.copy()
53
81
 
54
- _df = _df.where(_df > 0)
82
+ _df = _df.mask((_df <= -5) | (_df > 100))
55
83
 
56
- thresholds = {
57
- 'Thermal_OC': 0.3,
58
- 'Optical_OC': 0.3,
59
- 'Thermal_EC': 0.015,
60
- 'Optical_EC': 0.015
61
- }
84
+ for col, threshold in MDL.items():
85
+ _df.loc[_df[col] <= threshold, col] = np.nan
62
86
 
63
- for col, thresh in thresholds.items():
64
- _df.loc[_df[col] <= thresh, col] = np.nan
87
+ # use IQR_QC
88
+ _df = self.time_aware_IQR_QC(_df, time_window='1h')
65
89
 
66
- return _df
90
+ return _df.dropna(subset=['Thermal_OC', 'Optical_OC']).reindex(_index)
@@ -54,6 +54,7 @@ class Reader(AbstractReader):
54
54
 
55
55
  _df_smps = _df[numeric_cols]
56
56
  _df_smps.columns = _df_smps.columns.astype(float)
57
+ _df_smps = _df_smps.loc[_df_smps.index.dropna().copy()]
57
58
 
58
59
  return _df_smps.apply(to_numeric, errors='coerce')
59
60
 
@@ -1,4 +1,4 @@
1
- from pandas import to_datetime, read_csv
1
+ from pandas import to_datetime, read_csv, Timedelta, to_numeric
2
2
 
3
3
  from AeroViz.rawDataReader.core import AbstractReader
4
4
 
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
8
8
 
9
9
  def _raw_reader(self, file):
10
10
  with open(file, 'r', encoding='utf-8', errors='ignore') as f:
11
- _df = read_csv(f, skiprows=3, index_col=False)
11
+ _df = read_csv(f, skiprows=3, index_col=False).apply(to_numeric, errors='coerce')
12
12
 
13
13
  _df = _df.rename(columns={'Time Stamp': 'time',
14
14
  'System status': 'status',
@@ -25,24 +25,28 @@ class Reader(AbstractReader):
25
25
 
26
26
  _df = _df.set_index(to_datetime(_tm_idx, errors='coerce', format='%d - %m - %Y %X'))
27
27
 
28
- _df = _df.where(_df['status'] < 1e-7)
28
+ _df = _df.where(_df['status'] < 1)
29
29
 
30
- _df = _df[['PM_NV', 'PM_Total', 'noise', ]]
30
+ _df = _df[['PM_NV', 'PM_Total', 'noise']]
31
31
 
32
32
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
33
33
 
34
34
  # QC data
35
35
  def _QC(self, _df):
36
-
37
- _df_idx = _df.index.copy()
36
+ _index = _df.index.copy()
38
37
 
39
38
  # remove negative value
40
- _df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0).copy())
39
+ _df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0))
41
40
 
42
41
  # QC data in 1 hr
43
- # remove data where size < 8 in 1-hr
42
+ # use time_aware_IQR_QC
43
+ _df = self.time_aware_IQR_QC(_df, time_window='1h')
44
+
45
+ # remove data where size < 50% in 1-hr
46
+ points_per_hour = Timedelta('1h') / Timedelta(self.meta['freq'])
44
47
  for _key in ['PM_Total', 'PM_NV']:
45
- _size = _df[_key].dropna().resample('1h').size().reindex(_df_idx).ffill().copy()
46
- _df[_key] = _df[_key].mask(_size < 8)
48
+ _size = _df[_key].dropna().resample('1h').size().reindex(_index).ffill()
49
+ _df[_key] = _df[_key].mask(_size < points_per_hour * 0.5)
47
50
 
48
- return _df.reindex(_df_idx)
51
+ # make sure all columns have values, otherwise set to nan
52
+ return _df.dropna(how='any').reindex(_index)
@@ -26,7 +26,7 @@ class Reader(AbstractReader):
26
26
  if valid_keys:
27
27
  return _df[valid_keys].loc[~_df.index.duplicated() & _df.index.notna()]
28
28
  else:
29
- self.logger.warning("沒有找到匹配的鍵。返回原始DataFrame並移除含NaN的行。")
29
+ self.logger.warning("沒有找到匹配的鍵。返回原始DataFrame")
30
30
  return _df.loc[~_df.index.duplicated() & _df.index.notna()]
31
31
 
32
32
  def _QC(self, _df):
@@ -0,0 +1,11 @@
1
+ from AeroViz.rawDataReader.core import AbstractReader
2
+
3
+
4
+ class Reader(AbstractReader):
5
+ nam = 'XRF'
6
+
7
+ def _raw_reader(self, file):
8
+ pass
9
+
10
+ def _QC(self, _df):
11
+ pass
@@ -11,8 +11,8 @@ __all__ = [
11
11
  'TEOM',
12
12
  'OCEC',
13
13
  'IGAC',
14
+ 'XRF',
14
15
  'VOC',
15
- 'Table',
16
- 'EPA_vertical',
16
+ 'EPA',
17
17
  'Minion'
18
18
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: AeroViz
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Aerosol science
5
5
  Home-page: https://github.com/Alex870521/AeroViz
6
6
  Author: alex
@@ -28,7 +28,7 @@ Requires-Dist: rich ~=13.7.1
28
28
  <img alt="Static Badge" src="https://img.shields.io/badge/python-3.12-blue?logo=python">
29
29
  <img alt="Static Badge" src="https://img.shields.io/badge/License-MIT-yellow">
30
30
  <img alt="Static Badge" src="https://img.shields.io/badge/github-updating-red?logo=github">
31
- <img src="https://img.shields.io/badge/testing-green?logo=Pytest&logoColor=blue">
31
+ <img alt="Static Badge" src="https://img.shields.io/badge/testing-green?logo=Pytest&logoColor=blue">
32
32
 
33
33
  </p>
34
34
 
@@ -56,41 +56,68 @@ Requires-Dist: rich ~=13.7.1
56
56
  pip install AeroViz
57
57
  ```
58
58
 
59
- ## <div align="center">Usage</div>
59
+ For Windows users: Run `install_windows.bat`
60
60
 
61
- ```python
62
- import AeroViz
61
+ For Linux and Mac users: Run `install_unix.bat`
62
+
63
+ ## <div align="center">Quick Start</div>
63
64
 
65
+ ```python
66
+ from datetime import datetime
67
+ from pathlib import Path
64
68
  from AeroViz import RawDataReader, DataProcess, plot
69
+
70
+ # Read data from a supported instrument
71
+ data = RawDataReader('NEPH', Path('/path/to/data'), start=datetime(2024, 2, 1), end=datetime(2024, 4, 30))
72
+
73
+ # Create a visualization
74
+ plot.timeseries(data, y='scattering_coefficient')
65
75
  ```
66
76
 
67
- ## <div align="center">RawDataReader Supported Instruments</div>
77
+ For more detailed usage instructions, please refer to our [User Guide]().
78
+
79
+ ## <div align="center"> RawDataReader
80
+
81
+ RawDataReader supports a wide range of aerosol instruments, including NEPH, SMPS, AE33, and many more. It handles
82
+ various file types and time resolutions, making data processing efficient and standardized.
83
+
84
+ For a detailed list of supported instruments, file types, and data columns, please refer to
85
+ our [RawDataReader Usage Guide](docs/user_guide/RawDataReader) in the `docs` folder.
86
+
87
+ ### Key Features:
88
+
89
+ - Supports multiple aerosol instruments
90
+ - Applies customizable quality control measures
91
+ - Offers flexible data filtering and resampling options
92
+ - Enables easy data export to CSV format
68
93
 
69
- > [!NOTE]\
70
- > We are continuously working to support more instruments. Please check back for updates or contribute to our project on
71
- > GitHub.
94
+ ### Supported Instruments
72
95
 
73
96
  The AeroViz project currently supports data from the following instruments:
74
97
 
75
- - **SMPS (Scanning Mobility Particle Sizer)**
76
- - **APS (Aerodynamic Particle Sizer)**
77
- - **GRIMM (GRIMM Aerosol Technik)**
78
- - **TEOM (Continuous Ambient Particulate Monitor)**
79
- - **NEPH (Nephelometer)**
80
- - **Aurora (Nephelometer)**
81
- - **AE33 (Aethalometer Model 33)**
82
- - **AE43 (Aethalometer Model 43)**
83
- - **BC1054 (Black Carbon Monitor 1054)**
84
- - **MA350 (MicroAeth MA350)**
85
- - **OCEC (Organic Carbon Elemental Carbon Analyzer)**
86
- - **IGAC (In-situ Gas and Aerosol Compositions monitor)**
87
- - **VOC (Volatile Organic Compounds Monitor)**
88
-
89
- ## <div align="center">DataProcess Supported Method</div>
98
+ - SMPS (Scanning Mobility Particle Sizer)
99
+ - APS (Aerodynamic Particle Sizer)
100
+ - GRIMM (GRIMM Aerosol Technik)
101
+ - TEOM (Continuous Ambient Particulate Monitor)
102
+ - NEPH (Nephelometer)
103
+ - Aurora (Nephelometer)
104
+ - AE33 (Aethalometer Model 33)
105
+ - AE43 (Aethalometer Model 43)
106
+ - BC1054 (Black Carbon Monitor 1054)
107
+ - MA350 (MicroAeth MA350)
108
+ - OCEC (Organic Carbon Elemental Carbon Analyzer)
109
+ - IGAC (In-situ Gas and Aerosol Compositions monitor)
110
+ - XRF (X-ray Fluorescence Spectrometer)
111
+ - VOC (Volatile Organic Compounds Monitor)
112
+
113
+ > **Note:** We are continuously working to support more instruments. Please check back for updates or contribute to our
114
+ > project on GitHub.
115
+
116
+ ## <div align="center">DataProcess</div>
90
117
 
91
118
  The AeroViz project currently supports the following processing methods:
92
119
 
93
- - **Chemistry**
120
+ - **Chemistry**:
94
121
  - **Optical**
95
122
  - **SizeDistr**
96
123
  - **VOC**
@@ -103,15 +130,12 @@ For detailed documentation, please refer to the `docs` folder, which includes:
103
130
 
104
131
  | Documentation | Description |
105
132
  |--------------------------------------------|----------------------------|
106
- | [User Guide](docs/user_guide.md) | Basic usage instructions |
107
- | [Developer Guide](docs/developer_guide.md) | Developer guidelines |
108
- | [API Reference](docs/api_reference.md) | API documentation |
109
- | [FAQ](docs/faq.md) | Frequently Asked Questions |
133
+ | [User Guide](docs/user_guide) | Basic usage instructions |
110
134
  | [Changelog](docs/changelog.md) | List of changes |
111
135
 
112
136
  </div>
113
137
 
114
- ## <div align="center">Related Dependencies</div>
138
+ ## <div align="center">Related Source</div>
115
139
 
116
140
  * #### [PyMieScatt](https://github.com/bsumlin/PyMieScatt.git)
117
141
  * #### [py-smps](https://github.com/quant-aq/py-smps.git)