AeroViz 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AeroViz/__init__.py +13 -0
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/data/DEFAULT_DATA.csv +1417 -0
- AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
- AeroViz/data/hysplit_example_data.txt +101 -0
- AeroViz/dataProcess/Chemistry/__init__.py +149 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
- AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
- AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
- AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
- AeroViz/dataProcess/Optical/__init__.py +281 -0
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_derived.py +518 -0
- AeroViz/dataProcess/Optical/_extinction.py +123 -0
- AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
- AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
- AeroViz/dataProcess/Optical/coefficient.py +72 -0
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/Optical/mie_theory.py +260 -0
- AeroViz/dataProcess/README.md +271 -0
- AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
- AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
- AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
- AeroViz/dataProcess/SizeDistr/prop.py +62 -0
- AeroViz/dataProcess/VOC/__init__.py +14 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/_potential_par.py +108 -0
- AeroViz/dataProcess/VOC/support_voc.json +446 -0
- AeroViz/dataProcess/__init__.py +66 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__init__.py +272 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/mcp_server.py +352 -0
- AeroViz/plot/__init__.py +13 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/bar.py +126 -0
- AeroViz/plot/box.py +69 -0
- AeroViz/plot/distribution/__init__.py +1 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/distribution.py +576 -0
- AeroViz/plot/meteorology/CBPF.py +295 -0
- AeroViz/plot/meteorology/__init__.py +3 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/hysplit.py +93 -0
- AeroViz/plot/meteorology/wind_rose.py +77 -0
- AeroViz/plot/optical/__init__.py +1 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/optical/optical.py +388 -0
- AeroViz/plot/pie.py +210 -0
- AeroViz/plot/radar.py +184 -0
- AeroViz/plot/regression.py +200 -0
- AeroViz/plot/scatter.py +174 -0
- AeroViz/plot/templates/__init__.py +6 -0
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/contour.py +47 -0
- AeroViz/plot/templates/corr_matrix.py +267 -0
- AeroViz/plot/templates/diurnal_pattern.py +61 -0
- AeroViz/plot/templates/koschmieder.py +95 -0
- AeroViz/plot/templates/metal_heatmap.py +164 -0
- AeroViz/plot/timeseries/__init__.py +2 -0
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +446 -0
- AeroViz/plot/utils/__init__.py +4 -0
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/_color.py +71 -0
- AeroViz/plot/utils/_unit.py +55 -0
- AeroViz/plot/utils/fRH.json +390 -0
- AeroViz/plot/utils/plt_utils.py +92 -0
- AeroViz/plot/utils/sklearn_utils.py +49 -0
- AeroViz/plot/utils/units.json +89 -0
- AeroViz/plot/violin.py +80 -0
- AeroViz/rawDataReader/FLOW.md +138 -0
- AeroViz/rawDataReader/__init__.py +220 -0
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__init__.py +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/supported_instruments.py +135 -0
- AeroViz/rawDataReader/core/__init__.py +658 -0
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +171 -0
- AeroViz/rawDataReader/core/pre_process.py +308 -0
- AeroViz/rawDataReader/core/qc.py +961 -0
- AeroViz/rawDataReader/core/report.py +579 -0
- AeroViz/rawDataReader/script/AE33.py +173 -0
- AeroViz/rawDataReader/script/AE43.py +151 -0
- AeroViz/rawDataReader/script/APS.py +339 -0
- AeroViz/rawDataReader/script/Aurora.py +191 -0
- AeroViz/rawDataReader/script/BAM1020.py +90 -0
- AeroViz/rawDataReader/script/BC1054.py +161 -0
- AeroViz/rawDataReader/script/EPA.py +79 -0
- AeroViz/rawDataReader/script/GRIMM.py +68 -0
- AeroViz/rawDataReader/script/IGAC.py +140 -0
- AeroViz/rawDataReader/script/MA350.py +179 -0
- AeroViz/rawDataReader/script/Minion.py +218 -0
- AeroViz/rawDataReader/script/NEPH.py +199 -0
- AeroViz/rawDataReader/script/OCEC.py +173 -0
- AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
- AeroViz/rawDataReader/script/SMPS.py +389 -0
- AeroViz/rawDataReader/script/TEOM.py +181 -0
- AeroViz/rawDataReader/script/VOC.py +106 -0
- AeroViz/rawDataReader/script/Xact.py +244 -0
- AeroViz/rawDataReader/script/__init__.py +28 -0
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__init__.py +2 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- AeroViz/tools/database.py +95 -0
- AeroViz/tools/dataclassifier.py +117 -0
- AeroViz/tools/dataprinter.py +58 -0
- aeroviz-0.1.21.dist-info/METADATA +294 -0
- aeroviz-0.1.21.dist-info/RECORD +180 -0
- aeroviz-0.1.21.dist-info/WHEEL +5 -0
- aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
- aeroviz-0.1.21.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# read meteorological data from google sheet
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from pandas import read_csv, to_numeric, Series
|
|
5
|
+
|
|
6
|
+
from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Reader(AbstractReader):
|
|
10
|
+
"""IGAC (In-situ Gas and Aerosol Composition) Monitor Data Reader
|
|
11
|
+
|
|
12
|
+
This class handles the reading and parsing of IGAC monitor data files,
|
|
13
|
+
which provide real-time measurements of water-soluble inorganic ions in
|
|
14
|
+
particulate matter.
|
|
15
|
+
|
|
16
|
+
See full documentation at docs/source/instruments/IGAC.md for detailed information
|
|
17
|
+
on supported formats and QC procedures.
|
|
18
|
+
"""
|
|
19
|
+
nam = 'IGAC'
|
|
20
|
+
|
|
21
|
+
# =========================================================================
|
|
22
|
+
# Column Definitions
|
|
23
|
+
# =========================================================================
|
|
24
|
+
CATION_COLUMNS = ['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+']
|
|
25
|
+
ANION_COLUMNS = ['Cl-', 'NO2-', 'NO3-', 'PO43-', 'SO42-']
|
|
26
|
+
MAIN_IONS = ['SO42-', 'NO3-', 'NH4+']
|
|
27
|
+
|
|
28
|
+
# =========================================================================
|
|
29
|
+
# Detection Limits (MDL) in ug/m3
|
|
30
|
+
# =========================================================================
|
|
31
|
+
MDL = {
|
|
32
|
+
'Na+': 0.06,
|
|
33
|
+
'NH4+': 0.05,
|
|
34
|
+
'K+': 0.05,
|
|
35
|
+
'Mg2+': 0.12,
|
|
36
|
+
'Ca2+': 0.07,
|
|
37
|
+
'Cl-': 0.07,
|
|
38
|
+
'NO2-': 0.05,
|
|
39
|
+
'NO3-': 0.11,
|
|
40
|
+
'SO42-': 0.08,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
def _raw_reader(self, file):
|
|
44
|
+
"""
|
|
45
|
+
Read and parse raw IGAC monitor data files.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
file : Path or str
|
|
50
|
+
Path to the IGAC data file.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
pandas.DataFrame
|
|
55
|
+
Processed IGAC data with datetime index and ion concentration columns.
|
|
56
|
+
"""
|
|
57
|
+
with file.open('r', encoding='utf-8-sig', errors='ignore') as f:
|
|
58
|
+
_df = read_csv(f, parse_dates=True, index_col=0, na_values='-')
|
|
59
|
+
|
|
60
|
+
_df.columns = _df.keys().str.strip(' ')
|
|
61
|
+
_df.index.name = 'time'
|
|
62
|
+
|
|
63
|
+
_df = _df.apply(to_numeric, errors='coerce')
|
|
64
|
+
|
|
65
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
66
|
+
|
|
67
|
+
def _QC(self, _df):
|
|
68
|
+
"""
|
|
69
|
+
Perform quality control on IGAC ion composition data.
|
|
70
|
+
|
|
71
|
+
QC Rules Applied
|
|
72
|
+
----------------
|
|
73
|
+
1. Mass Closure : Total ion mass > PM2.5 mass
|
|
74
|
+
2. Missing Main : Main ions (NH4+, SO42-, NO3-) not present
|
|
75
|
+
3. Below MDL : Ion concentration below detection limit
|
|
76
|
+
4. Ion Balance : Cation/Anion ratio outside valid range
|
|
77
|
+
"""
|
|
78
|
+
_index = _df.index.copy()
|
|
79
|
+
|
|
80
|
+
# Get ion columns that exist in the data
|
|
81
|
+
ion_columns = [col for col in self.MDL.keys() if col in _df.columns]
|
|
82
|
+
df_qc = _df[ion_columns].copy()
|
|
83
|
+
|
|
84
|
+
# Calculate total ion mass for mass closure check
|
|
85
|
+
total_ions = df_qc.sum(axis=1, min_count=1)
|
|
86
|
+
pm25 = _df['PM2.5'] if 'PM2.5' in _df.columns else Series(float('inf'), index=_df.index)
|
|
87
|
+
|
|
88
|
+
# Calculate cation/anion ratio for ion balance check
|
|
89
|
+
cation_cols = [c for c in self.CATION_COLUMNS if c in df_qc.columns]
|
|
90
|
+
anion_cols = [c for c in self.ANION_COLUMNS if c in df_qc.columns]
|
|
91
|
+
cation_sum = df_qc[cation_cols].sum(axis=1, min_count=1) if cation_cols else Series(0, index=df_qc.index)
|
|
92
|
+
anion_sum = df_qc[anion_cols].sum(axis=1, min_count=1) if anion_cols else Series(1, index=df_qc.index)
|
|
93
|
+
ca_ratio = cation_sum / anion_sum.replace(0, float('nan'))
|
|
94
|
+
|
|
95
|
+
# Calculate IQR bounds for ion balance
|
|
96
|
+
q1, q3 = ca_ratio.quantile(0.25), ca_ratio.quantile(0.75)
|
|
97
|
+
iqr = q3 - q1
|
|
98
|
+
ca_lower, ca_upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
|
|
99
|
+
|
|
100
|
+
# Build QC rules declaratively
|
|
101
|
+
qc = QCFlagBuilder()
|
|
102
|
+
qc.add_rules([
|
|
103
|
+
QCRule(
|
|
104
|
+
name='Mass Closure',
|
|
105
|
+
condition=lambda df: total_ions > pm25,
|
|
106
|
+
description='Total ion mass exceeds PM2.5 mass'
|
|
107
|
+
),
|
|
108
|
+
QCRule(
|
|
109
|
+
name='Missing Main',
|
|
110
|
+
condition=lambda df: df[self.MAIN_IONS].isna().any(axis=1) if all(
|
|
111
|
+
c in df.columns for c in self.MAIN_IONS) else Series(False, index=df.index),
|
|
112
|
+
description='Missing main ions (NH4+, SO42-, NO3-)'
|
|
113
|
+
),
|
|
114
|
+
QCRule(
|
|
115
|
+
name='Below MDL',
|
|
116
|
+
condition=lambda df: Series(
|
|
117
|
+
[any(df.loc[idx, col] < self.MDL.get(col, 0)
|
|
118
|
+
for col in ion_columns if col in df.columns and not Series(df.loc[idx, col]).isna().any())
|
|
119
|
+
for idx in df.index],
|
|
120
|
+
index=df.index
|
|
121
|
+
),
|
|
122
|
+
description='Ion concentration below detection limit'
|
|
123
|
+
),
|
|
124
|
+
QCRule(
|
|
125
|
+
name='Ion Balance',
|
|
126
|
+
condition=lambda df: (ca_ratio < ca_lower) | (ca_ratio > ca_upper) | ca_ratio.isna(),
|
|
127
|
+
description='Cation/Anion ratio outside valid range'
|
|
128
|
+
),
|
|
129
|
+
])
|
|
130
|
+
|
|
131
|
+
# Apply all QC rules and get flagged DataFrame
|
|
132
|
+
df_qc = qc.apply(df_qc)
|
|
133
|
+
|
|
134
|
+
# Log QC summary
|
|
135
|
+
summary = qc.get_summary(df_qc)
|
|
136
|
+
self.logger.info(f"{self.nam} QC Summary:")
|
|
137
|
+
for _, row in summary.iterrows():
|
|
138
|
+
self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
|
|
139
|
+
|
|
140
|
+
return df_qc.reindex(_index)
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
from pandas import read_csv, to_numeric, concat, Series
|
|
2
|
+
|
|
3
|
+
from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
|
|
4
|
+
from AeroViz.rawDataReader.core.pre_process import _absCoe
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Reader(AbstractReader):
|
|
8
|
+
"""MA350 Aethalometer Data Reader
|
|
9
|
+
|
|
10
|
+
A specialized reader for MA350 Aethalometer data files, which measure
|
|
11
|
+
black carbon at multiple wavelengths and provide source apportionment.
|
|
12
|
+
|
|
13
|
+
See full documentation at docs/source/instruments/MA350.md for detailed information
|
|
14
|
+
on supported formats and QC procedures.
|
|
15
|
+
"""
|
|
16
|
+
nam = 'MA350'
|
|
17
|
+
|
|
18
|
+
# =========================================================================
|
|
19
|
+
# Column Definitions
|
|
20
|
+
# =========================================================================
|
|
21
|
+
BC_COLUMNS = ['BC1', 'BC2', 'BC3', 'BC4', 'BC5']
|
|
22
|
+
ABS_COLUMNS = ['abs_375', 'abs_470', 'abs_528', 'abs_625', 'abs_880']
|
|
23
|
+
CAL_COLUMNS = ['abs_550', 'AAE', 'eBC']
|
|
24
|
+
|
|
25
|
+
# =========================================================================
|
|
26
|
+
# QC Thresholds
|
|
27
|
+
# =========================================================================
|
|
28
|
+
MIN_BC = 0 # Minimum BC concentration (ng/m³)
|
|
29
|
+
MAX_BC = 20000 # Maximum BC concentration (ng/m³)
|
|
30
|
+
MIN_AAE = 0.7 # Minimum valid AAE (absolute value)
|
|
31
|
+
MAX_AAE = 2.0 # Maximum valid AAE (absolute value)
|
|
32
|
+
|
|
33
|
+
# =========================================================================
|
|
34
|
+
# Status Error Codes (bitwise flags)
|
|
35
|
+
# =========================================================================
|
|
36
|
+
ERROR_STATES = [
|
|
37
|
+
1, # Power Failure
|
|
38
|
+
2, # Start up
|
|
39
|
+
4, # Tape advance
|
|
40
|
+
16, # Optical saturation
|
|
41
|
+
32, # Sample timing error
|
|
42
|
+
128, # Flow unstable
|
|
43
|
+
256, # Pump drive limit
|
|
44
|
+
2048, # System busy
|
|
45
|
+
8192, # Tape jam
|
|
46
|
+
16384, # Tape at end
|
|
47
|
+
32768, # Tape not ready
|
|
48
|
+
65536, # Tape transport not ready
|
|
49
|
+
262144, # Invalid date/time
|
|
50
|
+
524288, # Tape error
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
def _raw_reader(self, file):
|
|
54
|
+
"""
|
|
55
|
+
Read and parse raw MA350 Aethalometer data files.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
file : Path or str
|
|
60
|
+
Path to the MA350 data file.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
pandas.DataFrame
|
|
65
|
+
Processed MA350 data with datetime index and standardized black carbon
|
|
66
|
+
and source apportionment columns.
|
|
67
|
+
"""
|
|
68
|
+
_df = read_csv(file, parse_dates=['Date / time local'], index_col='Date / time local').rename_axis(
|
|
69
|
+
"Time")
|
|
70
|
+
|
|
71
|
+
_df = _df.rename(columns={
|
|
72
|
+
'UV BCc': 'BC1',
|
|
73
|
+
'Blue BCc': 'BC2',
|
|
74
|
+
'Green BCc': 'BC3',
|
|
75
|
+
'Red BCc': 'BC4',
|
|
76
|
+
'IR BCc': 'BC5',
|
|
77
|
+
'Biomass BCc (ng/m^3)': 'BB mass',
|
|
78
|
+
'Fossil fuel BCc (ng/m^3)': 'FF mass',
|
|
79
|
+
'Delta-C (ng/m^3)': 'Delta-C',
|
|
80
|
+
'AAE': 'AAE_ref',
|
|
81
|
+
'BB (%)': 'BB',
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
_df = _df[
|
|
85
|
+
['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BB mass', 'FF mass', 'Delta-C', 'AAE_ref', 'BB', 'Status']].apply(
|
|
86
|
+
to_numeric,
|
|
87
|
+
errors='coerce')
|
|
88
|
+
|
|
89
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
90
|
+
|
|
91
|
+
def _QC(self, _df):
|
|
92
|
+
"""
|
|
93
|
+
Perform quality control on MA350 Aethalometer raw data.
|
|
94
|
+
|
|
95
|
+
QC Rules Applied (raw data only)
|
|
96
|
+
---------------------------------
|
|
97
|
+
1. Status Error : Invalid instrument status codes
|
|
98
|
+
2. Invalid BC : BC concentration outside 0-20000 ng/m³
|
|
99
|
+
3. Insufficient : Less than 50% hourly data completeness
|
|
100
|
+
|
|
101
|
+
Note: AAE validation is done in _process() after calculation.
|
|
102
|
+
"""
|
|
103
|
+
_index = _df.index.copy()
|
|
104
|
+
df_qc = _df.copy()
|
|
105
|
+
|
|
106
|
+
# Build QC rules declaratively
|
|
107
|
+
qc = QCFlagBuilder()
|
|
108
|
+
qc.add_rules([
|
|
109
|
+
QCRule(
|
|
110
|
+
name='Status Error',
|
|
111
|
+
condition=lambda df: self.QC_control().filter_error_status(df, self.ERROR_STATES),
|
|
112
|
+
description='Invalid instrument status code detected'
|
|
113
|
+
),
|
|
114
|
+
QCRule(
|
|
115
|
+
name='Invalid BC',
|
|
116
|
+
condition=lambda df: ((df[self.BC_COLUMNS] <= self.MIN_BC) |
|
|
117
|
+
(df[self.BC_COLUMNS] > self.MAX_BC)).any(axis=1),
|
|
118
|
+
description=f'BC concentration outside valid range {self.MIN_BC}-{self.MAX_BC} ng/m³'
|
|
119
|
+
),
|
|
120
|
+
QCRule(
|
|
121
|
+
name='Insufficient',
|
|
122
|
+
condition=lambda df: self.QC_control().hourly_completeness_QC(
|
|
123
|
+
df[self.BC_COLUMNS], freq=self.meta['freq']
|
|
124
|
+
),
|
|
125
|
+
description='Less than 50% hourly data completeness'
|
|
126
|
+
),
|
|
127
|
+
])
|
|
128
|
+
|
|
129
|
+
# Apply all QC rules and get flagged DataFrame
|
|
130
|
+
df_qc = qc.apply(df_qc)
|
|
131
|
+
|
|
132
|
+
# Store QC summary for combined output in _process()
|
|
133
|
+
self._qc_summary = qc.get_summary(df_qc)
|
|
134
|
+
|
|
135
|
+
return df_qc.reindex(_index)
|
|
136
|
+
|
|
137
|
+
def _process(self, _df):
|
|
138
|
+
"""
|
|
139
|
+
Calculate absorption coefficients and validate derived parameters.
|
|
140
|
+
|
|
141
|
+
Processing Steps
|
|
142
|
+
----------------
|
|
143
|
+
1. Calculate absorption coefficients at each wavelength
|
|
144
|
+
2. Calculate AAE (Absorption Ångström Exponent)
|
|
145
|
+
3. Calculate eBC (equivalent Black Carbon)
|
|
146
|
+
4. Validate AAE range and update QC_Flag
|
|
147
|
+
"""
|
|
148
|
+
_index = _df.index.copy()
|
|
149
|
+
|
|
150
|
+
# Calculate absorption coefficients, AAE, and eBC
|
|
151
|
+
_df_cal = _absCoe(_df[self.BC_COLUMNS], instru=self.nam, specified_band=[550])
|
|
152
|
+
|
|
153
|
+
# Combine with Status and QC_Flag
|
|
154
|
+
df_out = concat([_df_cal, _df[['Status', 'QC_Flag']]], axis=1)
|
|
155
|
+
|
|
156
|
+
# Validate AAE and update QC_Flag
|
|
157
|
+
invalid_aae = (-df_out['AAE'] < self.MIN_AAE) | (-df_out['AAE'] > self.MAX_AAE)
|
|
158
|
+
df_out = self.update_qc_flag(df_out, invalid_aae, 'Invalid AAE')
|
|
159
|
+
|
|
160
|
+
# Log combined QC summary with calculated info
|
|
161
|
+
if hasattr(self, '_qc_summary') and self._qc_summary is not None:
|
|
162
|
+
import pandas as pd
|
|
163
|
+
# Add Invalid AAE row before Valid row
|
|
164
|
+
total = len(df_out)
|
|
165
|
+
invalid_aae_row = pd.DataFrame([{
|
|
166
|
+
'Rule': 'Invalid AAE',
|
|
167
|
+
'Count': invalid_aae.sum(),
|
|
168
|
+
'Percentage': f'{invalid_aae.sum() / total * 100:.1f}%',
|
|
169
|
+
'Description': f'AAE outside valid range {self.MIN_AAE}-{self.MAX_AAE}'
|
|
170
|
+
}])
|
|
171
|
+
# Insert before Valid row (last row)
|
|
172
|
+
summary = pd.concat([self._qc_summary.iloc[:-1], invalid_aae_row, self._qc_summary.iloc[-1:]], ignore_index=True)
|
|
173
|
+
self.logger.info(f"{self.nam} QC Summary:")
|
|
174
|
+
for _, row in summary.iterrows():
|
|
175
|
+
self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
|
|
176
|
+
|
|
177
|
+
# Reorder columns
|
|
178
|
+
all_data_cols = self.BC_COLUMNS + self.ABS_COLUMNS + self.CAL_COLUMNS
|
|
179
|
+
return df_out[all_data_cols + ['QC_Flag']].reindex(_index)
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas
|
|
5
|
+
from pandas import DataFrame, read_excel
|
|
6
|
+
|
|
7
|
+
from AeroViz.rawDataReader.config.supported_instruments import meta
|
|
8
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
9
|
+
|
|
10
|
+
pandas.set_option("future.no_silent_downcasting", True)
|
|
11
|
+
|
|
12
|
+
desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC',
|
|
13
|
+
'CH4', 'PM10', 'PM2.5', 'WS', 'WD', 'AT', 'RH']
|
|
14
|
+
|
|
15
|
+
desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene']
|
|
16
|
+
|
|
17
|
+
MDL_NUMBER = -999
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Reader(AbstractReader):
|
|
21
|
+
nam = 'Minion'
|
|
22
|
+
|
|
23
|
+
# 楠梓8月數據(環境部)(空品、重金屬和氣膠可用率) -> 楠梓8月數據_level1 -> NZ_minion_XXXX
|
|
24
|
+
def _raw_reader(self, file):
|
|
25
|
+
df = read_excel(file, index_col=0, parse_dates=True)
|
|
26
|
+
df.index.name = 'Time'
|
|
27
|
+
|
|
28
|
+
# 重命名列,去除空白
|
|
29
|
+
df = df.rename(columns=lambda x: x.strip())
|
|
30
|
+
|
|
31
|
+
# 保存單位
|
|
32
|
+
self.units = df.iloc[0].copy()
|
|
33
|
+
|
|
34
|
+
# 刪除原始數據中的單位行
|
|
35
|
+
df = df.iloc[1:]
|
|
36
|
+
|
|
37
|
+
# 替換特定值
|
|
38
|
+
df = df.replace({'維護校正': '*', np.nan: '-', 'Nodata': '-', '0L': MDL_NUMBER})
|
|
39
|
+
# df = df.replace(to_replace=r'\d*\.?\d*[#]\b', value='_', regex=True)
|
|
40
|
+
df = df.replace(to_replace=r'\d*\.?\d*[L]\b', value=MDL_NUMBER, regex=True)
|
|
41
|
+
|
|
42
|
+
# 處理除了'WD'列的 0 值 替換為 '_'
|
|
43
|
+
for col in [col for col in df.columns if col != 'WD']:
|
|
44
|
+
df[col] = df[col].replace({0: MDL_NUMBER})
|
|
45
|
+
|
|
46
|
+
# replace to numeric for estimating qc rate
|
|
47
|
+
df = df.replace({'_': MDL_NUMBER})
|
|
48
|
+
|
|
49
|
+
XRF_col = list(meta.get('XRF').get('MDL').keys())
|
|
50
|
+
IGAC_col = list(meta.get('IGAC').get('MDL').keys())
|
|
51
|
+
|
|
52
|
+
# 重新排序列
|
|
53
|
+
df = self.reorder_dataframe_columns(df, [desired_order1, desired_order2, XRF_col, IGAC_col])
|
|
54
|
+
|
|
55
|
+
# 將單位行添加回 DataFrame
|
|
56
|
+
# df = concat([units.to_frame().T, df])
|
|
57
|
+
|
|
58
|
+
# save Level1 data
|
|
59
|
+
output_folder = file.parent / 'Level1'
|
|
60
|
+
output_folder.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
df.to_csv(output_folder / f'{file.stem}_Level1.csv')
|
|
62
|
+
|
|
63
|
+
return df.loc[~df.index.duplicated() & df.index.notna()]
|
|
64
|
+
|
|
65
|
+
def _QC(self, _df):
|
|
66
|
+
IGAC_col = list(meta.get('IGAC').get('MDL'))
|
|
67
|
+
XRF_col = list(meta.get('XRF').get('MDL'))
|
|
68
|
+
|
|
69
|
+
# IGAC MDL QC
|
|
70
|
+
_df[IGAC_col] = self.IGAC_QAQC(_df[IGAC_col])
|
|
71
|
+
|
|
72
|
+
# XRF MDL QC
|
|
73
|
+
_df[XRF_col] = self.XRF_QAQC(_df[XRF_col])
|
|
74
|
+
|
|
75
|
+
# remove negative value
|
|
76
|
+
# _df = _df.mask((_df < 0))
|
|
77
|
+
_df = _df.mask(_df == MDL_NUMBER, np.nan)
|
|
78
|
+
|
|
79
|
+
col = [col for col in desired_order1 if col != 'WD']
|
|
80
|
+
_df[col] = self.QC_control().time_aware_rolling_iqr(_df[col])
|
|
81
|
+
|
|
82
|
+
# Calculate the mass and ion balance
|
|
83
|
+
# mass tolerance = ± 1, ions balance tolerance = ± 1
|
|
84
|
+
|
|
85
|
+
# # conc. of main salt should be present at the same time (NH4+, SO42-, NO3-)
|
|
86
|
+
# _df_salt = df.mask(df.sum(axis=1, min_count=1) > df.PM25).dropna(subset=_main).copy()
|
|
87
|
+
|
|
88
|
+
ions_mass = _df[['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+', 'Cl-', 'NO3-', 'SO42-']].sum(axis=1)
|
|
89
|
+
element_mass = _df[XRF_col].sum(axis=1)
|
|
90
|
+
|
|
91
|
+
estimated_mass = ions_mass + element_mass
|
|
92
|
+
|
|
93
|
+
valid_mask = 2 * _df['PM2.5'] > estimated_mass
|
|
94
|
+
|
|
95
|
+
_df.loc[~valid_mask, IGAC_col + XRF_col] = np.nan
|
|
96
|
+
|
|
97
|
+
return _df
|
|
98
|
+
|
|
99
|
+
def mdlReplace_timeAware_qc(self, df: DataFrame, MDL: dict, MDL_replace) -> DataFrame:
|
|
100
|
+
# Step 1: Track MDL positions and values below threshold
|
|
101
|
+
mdl_mask = (df.eq(MDL_NUMBER) |
|
|
102
|
+
df.apply(lambda x: x < MDL.get(x.name, float('-inf'))))
|
|
103
|
+
|
|
104
|
+
# Step 2: Convert all values below MDL to MDL_NUMBER (-999)
|
|
105
|
+
df_mdl = df.mask(mdl_mask, MDL_NUMBER)
|
|
106
|
+
|
|
107
|
+
# Step 3: Apply time_aware_IQR_QC (excluding MDL_NUMBER values)
|
|
108
|
+
df_qc = self.time_aware_IQR_QC(df_mdl.mask(df_mdl == MDL_NUMBER))
|
|
109
|
+
|
|
110
|
+
# Step 4: Handle values below MDL according to specified method
|
|
111
|
+
if MDL_replace == '0.5 * MDL':
|
|
112
|
+
for column, threshold in MDL.items():
|
|
113
|
+
if column in df.columns and threshold is not None:
|
|
114
|
+
df_qc.loc[df_mdl[column] == MDL_NUMBER, column] = 0.5 * threshold
|
|
115
|
+
else:
|
|
116
|
+
df_qc.loc[df_mdl[column] == MDL_NUMBER, column] = np.nan
|
|
117
|
+
else: # 'nan'
|
|
118
|
+
df_qc = df_qc.mask(df_mdl == MDL_NUMBER, np.nan)
|
|
119
|
+
|
|
120
|
+
return df_qc
|
|
121
|
+
|
|
122
|
+
def XRF_QAQC(self,
|
|
123
|
+
df: DataFrame,
|
|
124
|
+
MDL_replace: Literal['nan', '0.5 * MDL'] = '0.5 * MDL'
|
|
125
|
+
) -> DataFrame:
|
|
126
|
+
"""
|
|
127
|
+
Perform Quality Assurance and Quality Control for XRF data
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
df : pd.DataFrame
|
|
132
|
+
Input dataframe with XRF data
|
|
133
|
+
MDL_replace : {'nan', '0.5 * MDL'}, default='nan'
|
|
134
|
+
Method to handle values below MDL:
|
|
135
|
+
- 'nan': Replace with NaN
|
|
136
|
+
- '0.5 * MDL': Replace with half of MDL value
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
pd.DataFrame
|
|
141
|
+
Processed dataframe with QC applied and MDL values handled
|
|
142
|
+
"""
|
|
143
|
+
MDL = meta.get('XRF').get('MDL')
|
|
144
|
+
|
|
145
|
+
df = self.mdlReplace_timeAware_qc(df, MDL, MDL_replace)
|
|
146
|
+
|
|
147
|
+
# 轉換單位 ng/m3 -> ug/m3
|
|
148
|
+
if df.Al.max() > 10 and df.Fe.max() > 10:
|
|
149
|
+
columns_to_convert = [col for col in MDL.keys() if col in df.columns]
|
|
150
|
+
df[columns_to_convert] = df[columns_to_convert].div(1000)
|
|
151
|
+
|
|
152
|
+
self.logger.info("")
|
|
153
|
+
self.logger.info(f"XRF QAQC: values below MDL -> {MDL_replace}")
|
|
154
|
+
|
|
155
|
+
return df
|
|
156
|
+
|
|
157
|
+
def IGAC_QAQC(self,
|
|
158
|
+
df: DataFrame,
|
|
159
|
+
MDL_replace: Literal['nan', '0.5 * MDL'] = '0.5 * MDL',
|
|
160
|
+
tolerance: float = 1
|
|
161
|
+
) -> DataFrame:
|
|
162
|
+
"""
|
|
163
|
+
Perform Quality Assurance and Quality Control for IGAC data
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
df : pd.DataFrame
|
|
168
|
+
Input dataframe with IGAC data
|
|
169
|
+
MDL_replace : {'nan', '0.5 * MDL'}, default='nan'
|
|
170
|
+
Method to handle values below MDL:
|
|
171
|
+
- 'nan': Replace with NaN
|
|
172
|
+
- '0.5 * MDL': Replace with half of MDL value
|
|
173
|
+
tolerance : float, default=1
|
|
174
|
+
Tolerance value for QC checks
|
|
175
|
+
|
|
176
|
+
Returns
|
|
177
|
+
-------
|
|
178
|
+
pd.DataFrame
|
|
179
|
+
Processed dataframe with QC applied and MDL values handled
|
|
180
|
+
"""
|
|
181
|
+
MDL = meta.get('IGAC').get('MDL')
|
|
182
|
+
|
|
183
|
+
df = self.mdlReplace_timeAware_qc(df, MDL, MDL_replace)
|
|
184
|
+
|
|
185
|
+
# Define the ions
|
|
186
|
+
_df = df.copy()
|
|
187
|
+
_cation, _anion, _main = (['Na+', 'NH4+', 'K+', 'Mg2+', 'Ca2+'],
|
|
188
|
+
['Cl-', 'NO2-', 'NO3-', 'SO42-'],
|
|
189
|
+
['SO42-', 'NO3-', 'NH4+'])
|
|
190
|
+
|
|
191
|
+
CA_range = () # CA, AC Q3=1.5 * IQR
|
|
192
|
+
|
|
193
|
+
_df['+_mole'] = _df[_cation].div([23, 18, 39, (24 / 2), (40 / 2)]).sum(axis=1, skipna=True)
|
|
194
|
+
_df['-_mole'] = _df[_anion].div([35.5, 46, 62, (96 / 2)]).sum(axis=1, skipna=True)
|
|
195
|
+
|
|
196
|
+
# Avoid division by zero
|
|
197
|
+
_df['ratio'] = np.where(_df['-_mole'] != 0, _df['+_mole'] / _df['-_mole'], np.nan)
|
|
198
|
+
|
|
199
|
+
# Calculate bounds
|
|
200
|
+
lower_bound, upper_bound = 1 - tolerance, 1 + tolerance
|
|
201
|
+
|
|
202
|
+
# 根據ratio决定是否保留原始数据
|
|
203
|
+
valid_mask = ((_df['ratio'] <= upper_bound) & (_df['ratio'] >= lower_bound) &
|
|
204
|
+
~np.isnan(_df['+_mole']) & ~np.isnan(_df['-_mole']))
|
|
205
|
+
|
|
206
|
+
# 保留数據或將不符合的條件設為NaN
|
|
207
|
+
df.loc[~valid_mask] = np.nan
|
|
208
|
+
|
|
209
|
+
# 計算保留的数據的百分比
|
|
210
|
+
retained_percentage = (valid_mask.sum() / len(df)) * 100
|
|
211
|
+
|
|
212
|
+
self.logger.info("")
|
|
213
|
+
self.logger.info(f"Ions balance: {retained_percentage.__round__(0)}% within tolerance (±{tolerance})")
|
|
214
|
+
|
|
215
|
+
if retained_percentage < 70:
|
|
216
|
+
self.logger.warning("Warning: retained data < 70%")
|
|
217
|
+
|
|
218
|
+
return df
|