AeroViz 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/data/240228_00.txt +101 -0
- AeroViz/plot/__init__.py +1 -0
- AeroViz/plot/hysplit/__init__.py +1 -0
- AeroViz/plot/hysplit/hysplit.py +79 -0
- AeroViz/plot/optical/PyMieScatt_update.py +567 -0
- AeroViz/plot/optical/mie_theory.py +260 -0
- AeroViz/plot/optical/optical.py +60 -59
- AeroViz/plot/templates/diurnal_pattern.py +24 -7
- AeroViz/plot/timeseries/template.py +2 -2
- AeroViz/plot/timeseries/timeseries.py +47 -6
- AeroViz/rawDataReader/__init__.py +3 -3
- AeroViz/rawDataReader/core/__init__.py +77 -14
- AeroViz/rawDataReader/script/AE33.py +11 -6
- AeroViz/rawDataReader/script/AE43.py +10 -5
- AeroViz/rawDataReader/script/Aurora.py +14 -10
- AeroViz/rawDataReader/script/BC1054.py +10 -6
- AeroViz/rawDataReader/script/EPA.py +3 -3
- AeroViz/rawDataReader/script/GRIMM.py +1 -2
- AeroViz/rawDataReader/script/MA350.py +12 -5
- AeroViz/rawDataReader/script/Minion.py +9 -4
- AeroViz/rawDataReader/script/NEPH.py +15 -5
- AeroViz/rawDataReader/script/OCEC.py +39 -15
- AeroViz/rawDataReader/script/TEOM.py +13 -9
- AeroViz/rawDataReader/script/VOC.py +1 -1
- {AeroViz-0.1.7.dist-info → AeroViz-0.1.8.dist-info}/METADATA +11 -9
- {AeroViz-0.1.7.dist-info → AeroViz-0.1.8.dist-info}/RECORD +29 -24
- {AeroViz-0.1.7.dist-info → AeroViz-0.1.8.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.7.dist-info → AeroViz-0.1.8.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.7.dist-info → AeroViz-0.1.8.dist-info}/top_level.txt +0 -0
|
@@ -105,15 +105,17 @@ class AbstractReader(ABC):
|
|
|
105
105
|
|
|
106
106
|
_acq_rate = round((sample_size / period_size) * 100, 1)
|
|
107
107
|
_yid_rate = round((qc_size / sample_size) * 100, 1)
|
|
108
|
+
_OEE_rate = round((qc_size / period_size) * 100, 1)
|
|
108
109
|
|
|
109
110
|
self.logger.info(f'{_nam}:')
|
|
110
111
|
self.logger.info(f"\tAcquisition rate: {_acq_rate}%")
|
|
111
112
|
self.logger.info(f'\tYield rate: {_yid_rate}%')
|
|
113
|
+
self.logger.info(f'\tOEE rate: {_OEE_rate}%')
|
|
112
114
|
self.logger.info(f"{'=' * 60}")
|
|
113
115
|
|
|
114
116
|
print(f'\n\t{_nam} : ')
|
|
115
|
-
print(f'\t\tacquisition rate :
|
|
116
|
-
|
|
117
|
+
print(f'\t\tacquisition rate | yield rate | OEE rate :'
|
|
118
|
+
f' \033[91m{_acq_rate}% | {_yid_rate}% -> {_OEE_rate}%\033[0m')
|
|
117
119
|
|
|
118
120
|
if self.meta['deter_key'] is not None:
|
|
119
121
|
# use qc_freq to calculate each period rate
|
|
@@ -296,22 +298,83 @@ class AbstractReader(ABC):
|
|
|
296
298
|
return df[new_order]
|
|
297
299
|
|
|
298
300
|
@staticmethod
|
|
299
|
-
def n_sigma_QC(df: DataFrame, std_range: int = 5) -> DataFrame:
|
|
300
|
-
|
|
301
|
-
|
|
301
|
+
def n_sigma_QC(df: pd.DataFrame, std_range: int = 5) -> pd.DataFrame:
|
|
302
|
+
# 確保輸入是DataFrame
|
|
303
|
+
df = df.to_frame() if isinstance(df, pd.Series) else df
|
|
302
304
|
|
|
303
|
-
|
|
305
|
+
df_ave = df.mean()
|
|
306
|
+
df_std = df.std()
|
|
307
|
+
|
|
308
|
+
lower_bound = df < (df_ave - df_std * std_range)
|
|
309
|
+
upper_bound = df > (df_ave + df_std * std_range)
|
|
310
|
+
|
|
311
|
+
return df.mask(lower_bound | upper_bound)
|
|
304
312
|
|
|
305
|
-
# "四分位數範圍法"(Inter-quartile Range Method)
|
|
306
313
|
@staticmethod
|
|
307
|
-
def IQR_QC(df: DataFrame, log_dist=False) ->
|
|
308
|
-
|
|
314
|
+
def IQR_QC(df: pd.DataFrame, log_dist=False) -> pd.DataFrame:
|
|
315
|
+
# 確保輸入是DataFrame
|
|
316
|
+
df = df.to_frame() if isinstance(df, pd.Series) else df
|
|
317
|
+
|
|
318
|
+
df_transformed = np.log10(df) if log_dist else df
|
|
319
|
+
|
|
320
|
+
_df_q1 = df_transformed.quantile(0.25)
|
|
321
|
+
_df_q3 = df_transformed.quantile(0.75)
|
|
309
322
|
|
|
310
|
-
_df_qua = df.quantile([.25, .75])
|
|
311
|
-
_df_q1, _df_q3 = _df_qua.loc[.25].copy(), _df_qua.loc[.75].copy()
|
|
312
323
|
_df_iqr = _df_q3 - _df_q1
|
|
313
324
|
|
|
314
|
-
|
|
315
|
-
|
|
325
|
+
# Calculate lower and upper bounds
|
|
326
|
+
lower_bound = df_transformed < (_df_q1 - 1.5 * _df_iqr)
|
|
327
|
+
upper_bound = df_transformed > (_df_q3 + 1.5 * _df_iqr)
|
|
328
|
+
|
|
329
|
+
# Apply the filter to the original dataframe
|
|
330
|
+
return df.mask(lower_bound | upper_bound)
|
|
331
|
+
|
|
332
|
+
@staticmethod
|
|
333
|
+
def rolling_IQR_QC(df: pd.DataFrame, window_size=24, log_dist=False) -> pd.DataFrame:
|
|
334
|
+
df = df.to_frame() if isinstance(df, pd.Series) else df
|
|
335
|
+
df_transformed = np.log10(df) if log_dist else df
|
|
336
|
+
|
|
337
|
+
def iqr_filter(x):
|
|
338
|
+
q1, q3 = x.quantile(0.25), x.quantile(0.75)
|
|
339
|
+
iqr = q3 - q1
|
|
340
|
+
lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
341
|
+
return (x >= lower) & (x <= upper)
|
|
342
|
+
|
|
343
|
+
mask = df_transformed.rolling(window=window_size, center=True, min_periods=1).apply(iqr_filter)
|
|
344
|
+
return df.where(mask, np.nan)
|
|
345
|
+
|
|
346
|
+
@staticmethod
|
|
347
|
+
def time_aware_IQR_QC(df: pd.DataFrame, time_window='1D', log_dist=False) -> pd.DataFrame:
|
|
348
|
+
df = df.to_frame() if isinstance(df, pd.Series) else df
|
|
349
|
+
df_transformed = np.log10(df) if log_dist else df
|
|
316
350
|
|
|
317
|
-
|
|
351
|
+
def iqr_filter(group):
|
|
352
|
+
q1, q3 = group.quantile(0.25), group.quantile(0.75)
|
|
353
|
+
iqr = q3 - q1
|
|
354
|
+
lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
355
|
+
return (group >= lower) & (group <= upper)
|
|
356
|
+
|
|
357
|
+
mask = df_transformed.groupby(pd.Grouper(freq=time_window)).transform(iqr_filter)
|
|
358
|
+
return df.where(mask, np.nan)
|
|
359
|
+
|
|
360
|
+
@staticmethod
|
|
361
|
+
def mad_iqr_hybrid_QC(df: pd.DataFrame, mad_threshold=3.5, log_dist=False) -> pd.DataFrame:
|
|
362
|
+
df = df.to_frame() if isinstance(df, pd.Series) else df
|
|
363
|
+
df_transformed = np.log10(df) if log_dist else df
|
|
364
|
+
|
|
365
|
+
# IQR 方法
|
|
366
|
+
q1, q3 = df_transformed.quantile(0.25), df_transformed.quantile(0.75)
|
|
367
|
+
iqr = q3 - q1
|
|
368
|
+
iqr_lower, iqr_upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
369
|
+
|
|
370
|
+
# MAD 方法
|
|
371
|
+
median = df_transformed.median()
|
|
372
|
+
mad = (df_transformed - median).abs().median()
|
|
373
|
+
mad_lower, mad_upper = median - mad_threshold * mad, median + mad_threshold * mad
|
|
374
|
+
|
|
375
|
+
# 结合两种方法
|
|
376
|
+
lower = np.maximum(iqr_lower, mad_lower)
|
|
377
|
+
upper = np.minimum(iqr_upper, mad_upper)
|
|
378
|
+
|
|
379
|
+
mask = (df_transformed >= lower) & (df_transformed <= upper)
|
|
380
|
+
return df.where(mask, np.nan)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import read_table
|
|
1
|
+
from pandas import read_table, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -8,10 +8,10 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
if file.stat().st_size / 1024 < 550:
|
|
11
|
-
|
|
11
|
+
self.logger.info(f'\t {file} may not be a whole daily data. Make sure the file is correct.')
|
|
12
12
|
|
|
13
13
|
_df = read_table(file, parse_dates={'time': [0, 1]}, index_col='time',
|
|
14
|
-
delimiter=r'\s+', skiprows=5, usecols=range(67))
|
|
14
|
+
delimiter=r'\s+', skiprows=5, usecols=range(67)).apply(to_numeric, errors='coerce')
|
|
15
15
|
_df.columns = _df.columns.str.strip(';')
|
|
16
16
|
|
|
17
17
|
# remove data without Status=0, 128 (Not much filter tape), 256 (Not much filter tape)
|
|
@@ -23,8 +23,13 @@ class Reader(AbstractReader):
|
|
|
23
23
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
24
24
|
|
|
25
25
|
def _QC(self, _df):
|
|
26
|
+
_index = _df.index.copy()
|
|
27
|
+
|
|
26
28
|
# remove negative value
|
|
27
|
-
_df = _df
|
|
29
|
+
_df = _df.mask((_df <= 0) | (_df > 20000))
|
|
30
|
+
|
|
31
|
+
# use IQR_QC
|
|
32
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
28
33
|
|
|
29
|
-
#
|
|
30
|
-
return _df.
|
|
34
|
+
# make sure all columns have values, otherwise set to nan
|
|
35
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import read_csv
|
|
1
|
+
from pandas import read_csv, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -7,7 +7,7 @@ class Reader(AbstractReader):
|
|
|
7
7
|
nam = 'AE43'
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
|
-
_df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time')
|
|
10
|
+
_df = read_csv(file, parse_dates={'time': ['StartTime']}, index_col='time').apply(to_numeric, errors='coerce')
|
|
11
11
|
_df_id = _df['SetupID'].iloc[-1]
|
|
12
12
|
|
|
13
13
|
# get last SetupID data
|
|
@@ -24,8 +24,13 @@ class Reader(AbstractReader):
|
|
|
24
24
|
|
|
25
25
|
# QC data
|
|
26
26
|
def _QC(self, _df):
|
|
27
|
+
_index = _df.index.copy()
|
|
28
|
+
|
|
27
29
|
# remove negative value
|
|
28
|
-
_df = _df.mask((_df
|
|
30
|
+
_df = _df.mask((_df <= 0) | (_df > 20000))
|
|
31
|
+
|
|
32
|
+
# use IQR_QC
|
|
33
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
29
34
|
|
|
30
|
-
#
|
|
31
|
-
return _df.
|
|
35
|
+
# make sure all columns have values, otherwise set to nan
|
|
36
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import to_datetime, read_csv
|
|
1
|
+
from pandas import to_datetime, read_csv, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, low_memory=False, index_col=0)
|
|
11
|
+
_df = read_csv(f, low_memory=False, index_col=0).apply(to_numeric, errors='coerce')
|
|
12
12
|
|
|
13
13
|
_df.index = to_datetime(_df.index, errors='coerce')
|
|
14
14
|
_df.index.name = 'time'
|
|
@@ -24,17 +24,21 @@ class Reader(AbstractReader):
|
|
|
24
24
|
'RH': 'RH'
|
|
25
25
|
})
|
|
26
26
|
|
|
27
|
-
_df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR'
|
|
27
|
+
_df = _df[['B', 'G', 'R', 'BB', 'BG', 'BR']]
|
|
28
28
|
|
|
29
29
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
30
30
|
|
|
31
|
-
# QC data
|
|
32
31
|
def _QC(self, _df):
|
|
33
|
-
|
|
34
|
-
_df = _df.mask((_df <= 0) | (_df > 2000)).copy()
|
|
32
|
+
_index = _df.index.copy()
|
|
35
33
|
|
|
36
|
-
|
|
37
|
-
_df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
34
|
+
_df = _df.mask((_df <= 0) | (_df > 2000))
|
|
38
35
|
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
_df = _df.loc[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
37
|
+
|
|
38
|
+
_df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])]
|
|
39
|
+
|
|
40
|
+
# use IQR_QC
|
|
41
|
+
_df = self.time_aware_IQR_QC(_df)
|
|
42
|
+
|
|
43
|
+
# make sure all columns have values, otherwise set to nan
|
|
44
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import read_csv
|
|
1
|
+
from pandas import read_csv, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, parse_dates=True, index_col=0)
|
|
11
|
+
_df = read_csv(f, parse_dates=True, index_col=0).apply(to_numeric, errors='coerce')
|
|
12
12
|
|
|
13
13
|
_df.columns = _df.columns.str.replace(' ', '')
|
|
14
14
|
|
|
@@ -33,10 +33,14 @@ class Reader(AbstractReader):
|
|
|
33
33
|
|
|
34
34
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
35
35
|
|
|
36
|
-
# QC data
|
|
37
36
|
def _QC(self, _df):
|
|
37
|
+
_index = _df.index.copy()
|
|
38
|
+
|
|
38
39
|
# remove negative value
|
|
39
|
-
_df = _df
|
|
40
|
+
_df = _df.mask((_df <= 0) | (_df > 20000))
|
|
41
|
+
|
|
42
|
+
# use IQR_QC
|
|
43
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
40
44
|
|
|
41
|
-
#
|
|
42
|
-
return _df.
|
|
45
|
+
# make sure all columns have values, otherwise set to nan
|
|
46
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import read_csv
|
|
1
|
+
from pandas import read_csv, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -14,7 +14,7 @@ class Reader(AbstractReader):
|
|
|
14
14
|
def _raw_reader(self, file):
|
|
15
15
|
# 查詢小時值(測項).csv & 查詢小時值(直式).csv (有、無輸出有效值都可以)
|
|
16
16
|
df = read_csv(file, encoding='big5', encoding_errors='ignore', index_col=0, parse_dates=True,
|
|
17
|
-
on_bad_lines='skip')
|
|
17
|
+
on_bad_lines='skip').apply(to_numeric, errors='coerce')
|
|
18
18
|
|
|
19
19
|
if len(df.groupby('測站')) > 1:
|
|
20
20
|
raise ValueError(f'Multiple stations found in the file: {df['測站'].unique()}')
|
|
@@ -36,4 +36,4 @@ class Reader(AbstractReader):
|
|
|
36
36
|
return self.reorder_dataframe_columns(df, [desired_order1])
|
|
37
37
|
|
|
38
38
|
def _QC(self, _df):
|
|
39
|
-
return _df
|
|
39
|
+
return _df
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import read_csv
|
|
1
|
+
from pandas import read_csv, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -7,7 +7,8 @@ class Reader(AbstractReader):
|
|
|
7
7
|
nam = 'MA350'
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
|
-
_df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis(
|
|
10
|
+
_df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis(
|
|
11
|
+
"Time").apply(to_numeric, errors='coerce')
|
|
11
12
|
|
|
12
13
|
_df = _df.rename(columns={
|
|
13
14
|
'UV BCc': 'BC1',
|
|
@@ -31,8 +32,14 @@ class Reader(AbstractReader):
|
|
|
31
32
|
|
|
32
33
|
# QC data
|
|
33
34
|
def _QC(self, _df):
|
|
35
|
+
_index = _df.index.copy()
|
|
36
|
+
|
|
34
37
|
# remove negative value
|
|
35
|
-
_df = _df
|
|
38
|
+
_df = _df.mask(
|
|
39
|
+
(_df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5']] <= 0) | (_df[['BC1', 'BC2', 'BC3', 'BC4', 'BC5']] > 20000))
|
|
40
|
+
|
|
41
|
+
# use IQR_QC
|
|
42
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
36
43
|
|
|
37
|
-
#
|
|
38
|
-
return _df.
|
|
44
|
+
# make sure all columns have values, otherwise set to nan
|
|
45
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -60,7 +60,7 @@ class Reader(AbstractReader):
|
|
|
60
60
|
|
|
61
61
|
def _QC(self, _df):
|
|
62
62
|
# remove negative value
|
|
63
|
-
_df = _df.mask((_df < 0)
|
|
63
|
+
_df = _df.mask((_df < 0))
|
|
64
64
|
|
|
65
65
|
# XRF QAQC
|
|
66
66
|
_df = self.XRF_QAQC(_df)
|
|
@@ -69,7 +69,9 @@ class Reader(AbstractReader):
|
|
|
69
69
|
_df = self.IGAC_QAQC(_df)
|
|
70
70
|
|
|
71
71
|
# QC data in 6h
|
|
72
|
-
|
|
72
|
+
_df = self.time_aware_IQR_QC(_df)
|
|
73
|
+
|
|
74
|
+
return _df
|
|
73
75
|
|
|
74
76
|
# base on Xact 625i Minimum Decision Limit (MDL) for XRF in ng/m3, 60 min sample time
|
|
75
77
|
def XRF_QAQC(self, df, MDL_replace: Literal['nan', '0.5 * MDL'] = 'nan'):
|
|
@@ -87,7 +89,10 @@ class Reader(AbstractReader):
|
|
|
87
89
|
'Au': 0.1, 'Hg': 0.12, 'Tl': 0.12, 'Pb': 0.13,
|
|
88
90
|
'Bi': 0.13
|
|
89
91
|
}
|
|
90
|
-
|
|
92
|
+
|
|
93
|
+
# Br Li internal standard
|
|
94
|
+
|
|
95
|
+
# 將小於 MDL 值的數據替換為 nan or 1/2 MDL
|
|
91
96
|
for element, threshold in MDL.items():
|
|
92
97
|
if element in df.columns:
|
|
93
98
|
rep = np.nan if MDL_replace == 'nan' else 0.5 * threshold
|
|
@@ -137,7 +142,7 @@ class Reader(AbstractReader):
|
|
|
137
142
|
item = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'Cl-', 'NO2-', 'NO3-', 'SO42-']
|
|
138
143
|
|
|
139
144
|
# Calculate the balance
|
|
140
|
-
_df = df[item].apply(
|
|
145
|
+
_df = df[item].apply(to_numeric, errors='coerce')
|
|
141
146
|
|
|
142
147
|
# for (_key, _df_col) in _df.items():
|
|
143
148
|
# _df[_key] = _df_col.mask(_df_col < MDL[_key], MDL[_key] / 2)
|
|
@@ -58,13 +58,23 @@ class Reader(AbstractReader):
|
|
|
58
58
|
print(f'\n\t\t\t Length mismatch in {file} data. Returning an empty DataFrame.')
|
|
59
59
|
return _df_out
|
|
60
60
|
|
|
61
|
-
# QC data
|
|
62
61
|
def _QC(self, _df):
|
|
62
|
+
MDL_sensitivity = {'B': .1, 'G': .1, 'R': .3}
|
|
63
|
+
|
|
64
|
+
_index = _df.index.copy()
|
|
65
|
+
|
|
63
66
|
# remove negative value
|
|
64
|
-
_df = _df.mask((_df <=
|
|
67
|
+
_df = _df.mask((_df <= 0) | (_df > 2000))
|
|
65
68
|
|
|
66
69
|
# total scattering is larger than back scattering
|
|
67
|
-
_df = _df[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
70
|
+
_df = _df.loc[(_df['BB'] < _df['B']) & (_df['BG'] < _df['G']) & (_df['BR'] < _df['R'])]
|
|
71
|
+
|
|
72
|
+
# blue scattering is larger than green scattering, green scattering is larger than red scattering
|
|
73
|
+
# due to the nephelometer's Green PMT in FS is already aged, this QC may delete too many data
|
|
74
|
+
# _df = _df.loc[(_df['B'] > _df['G']) & (_df['G'] > _df['R'])]
|
|
75
|
+
|
|
76
|
+
# use IQR_QC
|
|
77
|
+
_df = self.time_aware_IQR_QC(_df)
|
|
68
78
|
|
|
69
|
-
#
|
|
70
|
-
return _df.
|
|
79
|
+
# make sure all columns have values, otherwise set to nan
|
|
80
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pandas import to_datetime, read_csv, to_numeric
|
|
2
3
|
|
|
3
4
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
5
|
|
|
@@ -8,11 +9,18 @@ class Reader(AbstractReader):
|
|
|
8
9
|
|
|
9
10
|
def _raw_reader(self, file):
|
|
10
11
|
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, skiprows=3,
|
|
12
|
+
_df = read_csv(f, skiprows=3).apply(to_numeric, errors='coerce')
|
|
12
13
|
|
|
13
14
|
_df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
|
|
14
15
|
_df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
|
|
16
|
+
|
|
17
|
+
if _df['time'].isna().any():
|
|
18
|
+
_df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %H:%M:%S', errors='coerce')
|
|
19
|
+
|
|
15
20
|
_df = _df.set_index('time')
|
|
21
|
+
|
|
22
|
+
_df = _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
23
|
+
|
|
16
24
|
_df.index = _df.index.round('1h')
|
|
17
25
|
|
|
18
26
|
_df = _df.rename(columns={
|
|
@@ -34,6 +42,8 @@ class Reader(AbstractReader):
|
|
|
34
42
|
'OCPk2-ug C': 'OC2_raw',
|
|
35
43
|
'OCPk3-ug C': 'OC3_raw',
|
|
36
44
|
'OCPk4-ug C': 'OC4_raw',
|
|
45
|
+
'Pyrolized C ug': 'PC_raw',
|
|
46
|
+
|
|
37
47
|
'ECPk1-ug C': 'EC1_raw',
|
|
38
48
|
'ECPk2-ug C': 'EC2_raw',
|
|
39
49
|
'ECPk3-ug C': 'EC3_raw',
|
|
@@ -41,26 +51,40 @@ class Reader(AbstractReader):
|
|
|
41
51
|
'ECPk5-ug C': 'EC5_raw',
|
|
42
52
|
})
|
|
43
53
|
|
|
54
|
+
_df['OC1'] = _df['OC1_raw'] / _df['Sample_Volume']
|
|
55
|
+
_df['OC2'] = _df['OC2_raw'] / _df['Sample_Volume']
|
|
56
|
+
_df['OC3'] = _df['OC3_raw'] / _df['Sample_Volume']
|
|
57
|
+
_df['OC4'] = _df['OC4_raw'] / _df['Sample_Volume']
|
|
58
|
+
|
|
59
|
+
_df['PC'] = _df['Thermal_OC'] - _df['OC1'] - _df['OC2'] - _df['OC3'] - _df['OC4']
|
|
60
|
+
|
|
61
|
+
# _df['EC1'] = _df['EC1_raw'] / _df['Sample_Volume']
|
|
62
|
+
# _df['EC2'] = _df['EC2_raw'] / _df['Sample_Volume']
|
|
63
|
+
# _df['EC3'] = _df['EC3_raw'] / _df['Sample_Volume']
|
|
64
|
+
# _df['EC4'] = _df['EC4_raw'] / _df['Sample_Volume']
|
|
65
|
+
# _df['EC5'] = _df['EC5_raw'] / _df['Sample_Volume']
|
|
66
|
+
|
|
44
67
|
_df = _df[['Thermal_OC', 'Optical_OC', 'Thermal_EC', 'Optical_EC', 'TC', 'Sample_Volume',
|
|
45
|
-
'
|
|
46
|
-
'EC5_raw']]
|
|
68
|
+
'OC1', 'OC2', 'OC3', 'OC4', 'PC']]
|
|
47
69
|
|
|
48
70
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
49
71
|
|
|
50
72
|
# QC data
|
|
51
73
|
def _QC(self, _df):
|
|
52
|
-
|
|
74
|
+
MDL = {'Thermal_OC': 0.3,
|
|
75
|
+
'Optical_OC': 0.3,
|
|
76
|
+
'Thermal_EC': 0.015,
|
|
77
|
+
'Optical_EC': 0.015
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
_index = _df.index.copy()
|
|
53
81
|
|
|
54
|
-
_df = _df.mask((_df <=
|
|
82
|
+
_df = _df.mask((_df <= -5) | (_df > 100))
|
|
55
83
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
'Optical_OC': 0.3,
|
|
59
|
-
'Thermal_EC': 0.015,
|
|
60
|
-
'Optical_EC': 0.015
|
|
61
|
-
}
|
|
84
|
+
for col, threshold in MDL.items():
|
|
85
|
+
_df.loc[_df[col] <= threshold, col] = np.nan
|
|
62
86
|
|
|
63
|
-
|
|
64
|
-
|
|
87
|
+
# use IQR_QC
|
|
88
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
65
89
|
|
|
66
|
-
return _df
|
|
90
|
+
return _df.dropna(subset=['Thermal_OC', 'Optical_OC']).reindex(_index)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pandas import to_datetime, read_csv
|
|
1
|
+
from pandas import to_datetime, read_csv, Timedelta, to_numeric
|
|
2
2
|
|
|
3
3
|
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ class Reader(AbstractReader):
|
|
|
8
8
|
|
|
9
9
|
def _raw_reader(self, file):
|
|
10
10
|
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
11
|
-
_df = read_csv(f, skiprows=3, index_col=False)
|
|
11
|
+
_df = read_csv(f, skiprows=3, index_col=False).apply(to_numeric, errors='coerce')
|
|
12
12
|
|
|
13
13
|
_df = _df.rename(columns={'Time Stamp': 'time',
|
|
14
14
|
'System status': 'status',
|
|
@@ -33,16 +33,20 @@ class Reader(AbstractReader):
|
|
|
33
33
|
|
|
34
34
|
# QC data
|
|
35
35
|
def _QC(self, _df):
|
|
36
|
-
|
|
37
|
-
_df_idx = _df.index.copy()
|
|
36
|
+
_index = _df.index.copy()
|
|
38
37
|
|
|
39
38
|
# remove negative value
|
|
40
|
-
_df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0)
|
|
39
|
+
_df = _df.where(_df.noise < 0.01)[['PM_NV', 'PM_Total']].mask((_df <= 0))
|
|
41
40
|
|
|
42
41
|
# QC data in 1 hr
|
|
43
|
-
#
|
|
42
|
+
# use time_aware_IQR_QC
|
|
43
|
+
_df = self.time_aware_IQR_QC(_df, time_window='1h')
|
|
44
|
+
|
|
45
|
+
# remove data where size < 50% in 1-hr
|
|
46
|
+
points_per_hour = Timedelta('1h') / Timedelta(self.meta['freq'])
|
|
44
47
|
for _key in ['PM_Total', 'PM_NV']:
|
|
45
|
-
_size = _df[_key].dropna().resample('1h').size().reindex(
|
|
46
|
-
_df[_key] = _df[_key].mask(_size <
|
|
48
|
+
_size = _df[_key].dropna().resample('1h').size().reindex(_index).ffill()
|
|
49
|
+
_df[_key] = _df[_key].mask(_size < points_per_hour * 0.5)
|
|
47
50
|
|
|
48
|
-
|
|
51
|
+
# make sure all columns have values, otherwise set to nan
|
|
52
|
+
return _df.dropna(how='any').reindex(_index)
|
|
@@ -26,7 +26,7 @@ class Reader(AbstractReader):
|
|
|
26
26
|
if valid_keys:
|
|
27
27
|
return _df[valid_keys].loc[~_df.index.duplicated() & _df.index.notna()]
|
|
28
28
|
else:
|
|
29
|
-
self.logger.warning("沒有找到匹配的鍵。返回原始DataFrame
|
|
29
|
+
self.logger.warning("沒有找到匹配的鍵。返回原始DataFrame。")
|
|
30
30
|
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
31
31
|
|
|
32
32
|
def _QC(self, _df):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: AeroViz
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Aerosol science
|
|
5
5
|
Home-page: https://github.com/Alex870521/AeroViz
|
|
6
6
|
Author: alex
|
|
@@ -56,14 +56,19 @@ Requires-Dist: rich ~=13.7.1
|
|
|
56
56
|
pip install AeroViz
|
|
57
57
|
```
|
|
58
58
|
|
|
59
|
+
For Windows users: Run `install_windows.bat`
|
|
60
|
+
|
|
61
|
+
For Linux and Mac users: Run `install_unix.bat`
|
|
62
|
+
|
|
59
63
|
## <div align="center">Quick Start</div>
|
|
60
64
|
|
|
61
65
|
```python
|
|
62
|
-
import
|
|
66
|
+
from datetime import datetime
|
|
67
|
+
from pathlib import Path
|
|
63
68
|
from AeroViz import RawDataReader, DataProcess, plot
|
|
64
69
|
|
|
65
70
|
# Read data from a supported instrument
|
|
66
|
-
data = RawDataReader('NEPH', '/path/to/data', start=
|
|
71
|
+
data = RawDataReader('NEPH', Path('/path/to/data'), start=datetime(2024, 2, 1), end=datetime(2024, 4, 30))
|
|
67
72
|
|
|
68
73
|
# Create a visualization
|
|
69
74
|
plot.timeseries(data, y='scattering_coefficient')
|
|
@@ -71,13 +76,13 @@ plot.timeseries(data, y='scattering_coefficient')
|
|
|
71
76
|
|
|
72
77
|
For more detailed usage instructions, please refer to our [User Guide]().
|
|
73
78
|
|
|
74
|
-
## RawDataReader
|
|
79
|
+
## <div align="center"> RawDataReader
|
|
75
80
|
|
|
76
81
|
RawDataReader supports a wide range of aerosol instruments, including NEPH, SMPS, AE33, and many more. It handles
|
|
77
82
|
various file types and time resolutions, making data processing efficient and standardized.
|
|
78
83
|
|
|
79
84
|
For a detailed list of supported instruments, file types, and data columns, please refer to
|
|
80
|
-
our [RawDataReader Usage Guide](docs/
|
|
85
|
+
our [RawDataReader Usage Guide](docs/user_guide/RawDataReader) in the `docs` folder.
|
|
81
86
|
|
|
82
87
|
### Key Features:
|
|
83
88
|
|
|
@@ -108,7 +113,7 @@ The AeroViz project currently supports data from the following instruments:
|
|
|
108
113
|
> **Note:** We are continuously working to support more instruments. Please check back for updates or contribute to our
|
|
109
114
|
> project on GitHub.
|
|
110
115
|
|
|
111
|
-
## <div align="center">DataProcess
|
|
116
|
+
## <div align="center">DataProcess</div>
|
|
112
117
|
|
|
113
118
|
The AeroViz project currently supports the following processing methods:
|
|
114
119
|
|
|
@@ -126,9 +131,6 @@ For detailed documentation, please refer to the `docs` folder, which includes:
|
|
|
126
131
|
| Documentation | Description |
|
|
127
132
|
|--------------------------------------------|----------------------------|
|
|
128
133
|
| [User Guide](docs/user_guide) | Basic usage instructions |
|
|
129
|
-
| [Developer Guide](docs/developer_guide.md) | Developer guidelines |
|
|
130
|
-
| [API Reference](docs/api_reference.md) | API documentation |
|
|
131
|
-
| [FAQ](docs/faq.md) | Frequently Asked Questions |
|
|
132
134
|
| [Changelog](docs/changelog.md) | List of changes |
|
|
133
135
|
|
|
134
136
|
</div>
|