AeroViz 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AeroViz/__init__.py +13 -0
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/data/DEFAULT_DATA.csv +1417 -0
- AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
- AeroViz/data/hysplit_example_data.txt +101 -0
- AeroViz/dataProcess/Chemistry/__init__.py +149 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
- AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
- AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
- AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
- AeroViz/dataProcess/Optical/__init__.py +281 -0
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_derived.py +518 -0
- AeroViz/dataProcess/Optical/_extinction.py +123 -0
- AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
- AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
- AeroViz/dataProcess/Optical/coefficient.py +72 -0
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/Optical/mie_theory.py +260 -0
- AeroViz/dataProcess/README.md +271 -0
- AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
- AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
- AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
- AeroViz/dataProcess/SizeDistr/prop.py +62 -0
- AeroViz/dataProcess/VOC/__init__.py +14 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/_potential_par.py +108 -0
- AeroViz/dataProcess/VOC/support_voc.json +446 -0
- AeroViz/dataProcess/__init__.py +66 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__init__.py +272 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/mcp_server.py +352 -0
- AeroViz/plot/__init__.py +13 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/bar.py +126 -0
- AeroViz/plot/box.py +69 -0
- AeroViz/plot/distribution/__init__.py +1 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/distribution.py +576 -0
- AeroViz/plot/meteorology/CBPF.py +295 -0
- AeroViz/plot/meteorology/__init__.py +3 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/hysplit.py +93 -0
- AeroViz/plot/meteorology/wind_rose.py +77 -0
- AeroViz/plot/optical/__init__.py +1 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/optical/optical.py +388 -0
- AeroViz/plot/pie.py +210 -0
- AeroViz/plot/radar.py +184 -0
- AeroViz/plot/regression.py +200 -0
- AeroViz/plot/scatter.py +174 -0
- AeroViz/plot/templates/__init__.py +6 -0
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/contour.py +47 -0
- AeroViz/plot/templates/corr_matrix.py +267 -0
- AeroViz/plot/templates/diurnal_pattern.py +61 -0
- AeroViz/plot/templates/koschmieder.py +95 -0
- AeroViz/plot/templates/metal_heatmap.py +164 -0
- AeroViz/plot/timeseries/__init__.py +2 -0
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +446 -0
- AeroViz/plot/utils/__init__.py +4 -0
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/_color.py +71 -0
- AeroViz/plot/utils/_unit.py +55 -0
- AeroViz/plot/utils/fRH.json +390 -0
- AeroViz/plot/utils/plt_utils.py +92 -0
- AeroViz/plot/utils/sklearn_utils.py +49 -0
- AeroViz/plot/utils/units.json +89 -0
- AeroViz/plot/violin.py +80 -0
- AeroViz/rawDataReader/FLOW.md +138 -0
- AeroViz/rawDataReader/__init__.py +220 -0
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__init__.py +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/supported_instruments.py +135 -0
- AeroViz/rawDataReader/core/__init__.py +658 -0
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +171 -0
- AeroViz/rawDataReader/core/pre_process.py +308 -0
- AeroViz/rawDataReader/core/qc.py +961 -0
- AeroViz/rawDataReader/core/report.py +579 -0
- AeroViz/rawDataReader/script/AE33.py +173 -0
- AeroViz/rawDataReader/script/AE43.py +151 -0
- AeroViz/rawDataReader/script/APS.py +339 -0
- AeroViz/rawDataReader/script/Aurora.py +191 -0
- AeroViz/rawDataReader/script/BAM1020.py +90 -0
- AeroViz/rawDataReader/script/BC1054.py +161 -0
- AeroViz/rawDataReader/script/EPA.py +79 -0
- AeroViz/rawDataReader/script/GRIMM.py +68 -0
- AeroViz/rawDataReader/script/IGAC.py +140 -0
- AeroViz/rawDataReader/script/MA350.py +179 -0
- AeroViz/rawDataReader/script/Minion.py +218 -0
- AeroViz/rawDataReader/script/NEPH.py +199 -0
- AeroViz/rawDataReader/script/OCEC.py +173 -0
- AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
- AeroViz/rawDataReader/script/SMPS.py +389 -0
- AeroViz/rawDataReader/script/TEOM.py +181 -0
- AeroViz/rawDataReader/script/VOC.py +106 -0
- AeroViz/rawDataReader/script/Xact.py +244 -0
- AeroViz/rawDataReader/script/__init__.py +28 -0
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__init__.py +2 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- AeroViz/tools/database.py +95 -0
- AeroViz/tools/dataclassifier.py +117 -0
- AeroViz/tools/dataprinter.py +58 -0
- aeroviz-0.1.21.dist-info/METADATA +294 -0
- aeroviz-0.1.21.dist-info/RECORD +180 -0
- aeroviz-0.1.21.dist-info/WHEEL +5 -0
- aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
- aeroviz-0.1.21.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from pandas import Series, concat
|
|
3
|
+
|
|
4
|
+
from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
|
|
5
|
+
from AeroViz.rawDataReader.core.pre_process import _scaCoe
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Reader(AbstractReader):
|
|
9
|
+
"""Aurora Integrating Nephelometer Data Reader
|
|
10
|
+
|
|
11
|
+
A specialized reader for Aurora nephelometer data files, which measure aerosol light
|
|
12
|
+
scattering properties at multiple wavelengths.
|
|
13
|
+
|
|
14
|
+
See full documentation at docs/source/instruments/Aurora.md for detailed information
|
|
15
|
+
on supported formats and QC procedures.
|
|
16
|
+
"""
|
|
17
|
+
nam = 'Aurora'
|
|
18
|
+
|
|
19
|
+
# =========================================================================
|
|
20
|
+
# Column Definitions
|
|
21
|
+
# =========================================================================
|
|
22
|
+
SCAT_COLUMNS = ['B', 'G', 'R', 'BB', 'BG', 'BR']
|
|
23
|
+
CAL_COLUMNS = ['sca_550', 'SAE']
|
|
24
|
+
|
|
25
|
+
# =========================================================================
|
|
26
|
+
# QC Thresholds
|
|
27
|
+
# =========================================================================
|
|
28
|
+
MIN_SCAT_VALUE = 0 # Minimum scattering coefficient (Mm^-1)
|
|
29
|
+
MAX_SCAT_VALUE = 2000 # Maximum scattering coefficient (Mm^-1)
|
|
30
|
+
|
|
31
|
+
# Status Flag
|
|
32
|
+
STATUS_COLUMN = 'Status' # Common status column names to check
|
|
33
|
+
STATUS_COLUMNS = ['Status', 'status', 'Error', 'error', 'Flag', 'flag']
|
|
34
|
+
STATUS_OK = 0 # Status code 0 means normal operation
|
|
35
|
+
|
|
36
|
+
def __init__(self, *args, **kwargs):
|
|
37
|
+
super().__init__(*args, **kwargs)
|
|
38
|
+
|
|
39
|
+
def _raw_reader(self, file):
|
|
40
|
+
"""
|
|
41
|
+
Read and parse raw Aurora nephelometer data files.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
file : Path or str
|
|
46
|
+
Path to the Aurora data file.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
pandas.DataFrame
|
|
51
|
+
Processed Aurora data with datetime index and standardized
|
|
52
|
+
scattering coefficient columns.
|
|
53
|
+
"""
|
|
54
|
+
_df = pd.read_csv(file, low_memory=False, index_col=0)
|
|
55
|
+
|
|
56
|
+
_df.index = pd.to_datetime(_df.index, errors='coerce')
|
|
57
|
+
_df.index.name = 'time'
|
|
58
|
+
|
|
59
|
+
_df.columns = _df.keys().str.strip(' ')
|
|
60
|
+
|
|
61
|
+
# consider another csv format
|
|
62
|
+
_df = _df.rename(columns={
|
|
63
|
+
'0°σspB': 'B',
|
|
64
|
+
'0°σspG': 'G',
|
|
65
|
+
'0°σspR': 'R',
|
|
66
|
+
'90°σspB': 'BB',
|
|
67
|
+
'90°σspG': 'BG',
|
|
68
|
+
'90°σspR': 'BR',
|
|
69
|
+
'Blue': 'B',
|
|
70
|
+
'Green': 'G',
|
|
71
|
+
'Red': 'R',
|
|
72
|
+
'B_Blue': 'BB',
|
|
73
|
+
'B_Green': 'BG',
|
|
74
|
+
'B_Red': 'BR',
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
# Check for status column (try multiple common names)
|
|
78
|
+
status_col_name = None
|
|
79
|
+
for col_name in self.STATUS_COLUMNS:
|
|
80
|
+
if col_name in _df.columns:
|
|
81
|
+
status_col_name = col_name
|
|
82
|
+
break
|
|
83
|
+
|
|
84
|
+
_df_out = _df[['B', 'G', 'R', 'BB', 'BG', 'BR']].apply(pd.to_numeric, errors='coerce')
|
|
85
|
+
|
|
86
|
+
# Include status column in _df (will be processed by core together)
|
|
87
|
+
if status_col_name is not None:
|
|
88
|
+
_df_out[self.STATUS_COLUMN] = pd.to_numeric(_df[status_col_name], errors='coerce').astype('Int64')
|
|
89
|
+
|
|
90
|
+
_df_out = _df_out.loc[~_df_out.index.duplicated() & _df_out.index.notna()]
|
|
91
|
+
|
|
92
|
+
return _df_out
|
|
93
|
+
|
|
94
|
+
def _QC(self, _df):
|
|
95
|
+
"""
|
|
96
|
+
Perform quality control on Aurora nephelometer raw data.
|
|
97
|
+
|
|
98
|
+
QC Rules Applied (raw data only)
|
|
99
|
+
---------------------------------
|
|
100
|
+
1. Status Error : Non-zero status code indicates instrument error
|
|
101
|
+
2. No Data : All scattering columns are NaN
|
|
102
|
+
3. Invalid Scat Value: Scattering coefficient outside 0-2000 Mm^-1
|
|
103
|
+
4. Invalid Scat Rel. : Wavelength dependence violation (B < G < R)
|
|
104
|
+
5. Insufficient : Less than 50% hourly data completeness
|
|
105
|
+
|
|
106
|
+
Note: SAE calculation is done in _process() after QC.
|
|
107
|
+
"""
|
|
108
|
+
_index = _df.index.copy()
|
|
109
|
+
df_qc = _df.copy()
|
|
110
|
+
|
|
111
|
+
# Identify rows with all data missing (handled separately)
|
|
112
|
+
all_missing_mask = df_qc[self.SCAT_COLUMNS].isna().all(axis=1)
|
|
113
|
+
|
|
114
|
+
# Build QC rules declaratively
|
|
115
|
+
qc = QCFlagBuilder()
|
|
116
|
+
|
|
117
|
+
qc.add_rules([
|
|
118
|
+
QCRule(
|
|
119
|
+
name='Status Error',
|
|
120
|
+
condition=lambda df: self.QC_control().filter_error_status(
|
|
121
|
+
_df, status_column=self.STATUS_COLUMN, status_type='numeric', ok_value=self.STATUS_OK
|
|
122
|
+
),
|
|
123
|
+
description=f'Status code is not {self.STATUS_OK} (non-zero indicates error)'
|
|
124
|
+
),
|
|
125
|
+
QCRule(
|
|
126
|
+
name='No Data',
|
|
127
|
+
condition=lambda df: Series(all_missing_mask, index=df.index),
|
|
128
|
+
description='All scattering columns are NaN'
|
|
129
|
+
),
|
|
130
|
+
QCRule(
|
|
131
|
+
name='Invalid Scat Value',
|
|
132
|
+
condition=lambda df: ((df[self.SCAT_COLUMNS] <= self.MIN_SCAT_VALUE) |
|
|
133
|
+
(df[self.SCAT_COLUMNS] > self.MAX_SCAT_VALUE)).any(axis=1),
|
|
134
|
+
description=f'Scattering coefficient outside {self.MIN_SCAT_VALUE}-{self.MAX_SCAT_VALUE} Mm^-1'
|
|
135
|
+
),
|
|
136
|
+
QCRule(
|
|
137
|
+
name='Invalid Scat Rel',
|
|
138
|
+
condition=lambda df: (df['B'] < df['G']) & (df['G'] < df['R']),
|
|
139
|
+
description='Wavelength dependence violation (Blue < Green < Red)'
|
|
140
|
+
),
|
|
141
|
+
QCRule(
|
|
142
|
+
name='Insufficient',
|
|
143
|
+
condition=lambda df: self.QC_control().hourly_completeness_QC(
|
|
144
|
+
df[self.SCAT_COLUMNS], freq=self.meta['freq']
|
|
145
|
+
),
|
|
146
|
+
description='Less than 50% hourly data completeness'
|
|
147
|
+
),
|
|
148
|
+
])
|
|
149
|
+
|
|
150
|
+
# Apply all QC rules and get flagged DataFrame
|
|
151
|
+
df_qc = qc.apply(df_qc)
|
|
152
|
+
|
|
153
|
+
# Store QC summary for combined output in _process()
|
|
154
|
+
self._qc_summary = qc.get_summary(df_qc)
|
|
155
|
+
|
|
156
|
+
return df_qc.reindex(_index)
|
|
157
|
+
|
|
158
|
+
def _process(self, _df):
|
|
159
|
+
"""
|
|
160
|
+
Calculate scattering coefficients and SAE.
|
|
161
|
+
|
|
162
|
+
Processing Steps
|
|
163
|
+
----------------
|
|
164
|
+
1. Calculate scattering coefficient at 550nm
|
|
165
|
+
2. Calculate SAE (Scattering Ångström Exponent)
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
_df : pd.DataFrame
|
|
170
|
+
Quality-controlled DataFrame with scattering columns and QC_Flag
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
pd.DataFrame
|
|
175
|
+
DataFrame with sca_550, SAE, and updated QC_Flag
|
|
176
|
+
"""
|
|
177
|
+
_index = _df.index.copy()
|
|
178
|
+
|
|
179
|
+
# Calculate SAE and scattering at 550nm
|
|
180
|
+
_df_cal = _scaCoe(_df[self.SCAT_COLUMNS], instru=self.nam, specified_band=[550])
|
|
181
|
+
|
|
182
|
+
# Combine with QC_Flag
|
|
183
|
+
df_out = concat([_df_cal, _df[['QC_Flag']]], axis=1)
|
|
184
|
+
|
|
185
|
+
# Log QC summary
|
|
186
|
+
if hasattr(self, '_qc_summary') and self._qc_summary is not None:
|
|
187
|
+
self.logger.info(f"{self.nam} QC Summary:")
|
|
188
|
+
for _, row in self._qc_summary.iterrows():
|
|
189
|
+
self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
|
|
190
|
+
|
|
191
|
+
return df_out.reindex(_index)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from pandas import read_csv, to_numeric, NA
|
|
2
|
+
|
|
3
|
+
from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Reader(AbstractReader):
|
|
7
|
+
"""BAM1020 (Beta Attenuation Monitor) Data Reader
|
|
8
|
+
|
|
9
|
+
A specialized reader for BAM1020 data files, which measure PM2.5 mass concentration
|
|
10
|
+
using beta attenuation technology.
|
|
11
|
+
|
|
12
|
+
See full documentation at docs/source/instruments/BAM1020.md for detailed information
|
|
13
|
+
on supported formats and QC procedures.
|
|
14
|
+
"""
|
|
15
|
+
nam = 'BAM1020'
|
|
16
|
+
|
|
17
|
+
# =========================================================================
|
|
18
|
+
# QC Thresholds
|
|
19
|
+
# =========================================================================
|
|
20
|
+
MIN_CONC = 0 # Minimum PM2.5 concentration (ug/m3)
|
|
21
|
+
MAX_CONC = 500 # Maximum PM2.5 concentration (ug/m3)
|
|
22
|
+
|
|
23
|
+
def _raw_reader(self, file):
|
|
24
|
+
"""
|
|
25
|
+
Read and parse raw BAM1020 data files.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
file : Path or str
|
|
30
|
+
Path to the BAM1020 data file.
|
|
31
|
+
|
|
32
|
+
Returns
|
|
33
|
+
-------
|
|
34
|
+
pandas.DataFrame
|
|
35
|
+
Processed BAM1020 data with datetime index and PM2.5 concentration column.
|
|
36
|
+
"""
|
|
37
|
+
PM = 'Conc'
|
|
38
|
+
|
|
39
|
+
_df = read_csv(file, parse_dates=True, index_col=0, usecols=range(0, 21))
|
|
40
|
+
_df.rename(columns={'Conc (mg/m3)': PM}, inplace=True)
|
|
41
|
+
|
|
42
|
+
# remove data when Conc = 1 or 0
|
|
43
|
+
_df[PM] = _df[PM].replace(1, NA)
|
|
44
|
+
|
|
45
|
+
_df = _df[[PM]].apply(to_numeric, errors='coerce')
|
|
46
|
+
|
|
47
|
+
# tranfer unit from mg/m3 to ug/m3
|
|
48
|
+
_df = _df * 1000
|
|
49
|
+
|
|
50
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
51
|
+
|
|
52
|
+
def _QC(self, _df):
|
|
53
|
+
"""
|
|
54
|
+
Perform quality control on BAM1020 data.
|
|
55
|
+
|
|
56
|
+
QC Rules Applied
|
|
57
|
+
----------------
|
|
58
|
+
1. Invalid Conc : Concentration outside valid range (0-500 ug/m3)
|
|
59
|
+
2. Spike : Sudden value change (vectorized spike detection)
|
|
60
|
+
"""
|
|
61
|
+
_index = _df.index.copy()
|
|
62
|
+
df_qc = _df.copy()
|
|
63
|
+
|
|
64
|
+
# Build QC rules declaratively
|
|
65
|
+
qc = QCFlagBuilder()
|
|
66
|
+
qc.add_rules([
|
|
67
|
+
QCRule(
|
|
68
|
+
name='Invalid Conc',
|
|
69
|
+
condition=lambda df: (df['Conc'] <= self.MIN_CONC) | (df['Conc'] > self.MAX_CONC),
|
|
70
|
+
description=f'Concentration outside valid range ({self.MIN_CONC}-{self.MAX_CONC} ug/m3)'
|
|
71
|
+
),
|
|
72
|
+
QCRule(
|
|
73
|
+
name='Spike',
|
|
74
|
+
condition=lambda df: self.QC_control().spike_detection(
|
|
75
|
+
df[['Conc']], max_change_rate=3.0
|
|
76
|
+
),
|
|
77
|
+
description='Sudden unreasonable value change detected'
|
|
78
|
+
),
|
|
79
|
+
])
|
|
80
|
+
|
|
81
|
+
# Apply all QC rules and get flagged DataFrame
|
|
82
|
+
df_qc = qc.apply(df_qc)
|
|
83
|
+
|
|
84
|
+
# Log QC summary
|
|
85
|
+
summary = qc.get_summary(df_qc)
|
|
86
|
+
self.logger.info(f"{self.nam} QC Summary:")
|
|
87
|
+
for _, row in summary.iterrows():
|
|
88
|
+
self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
|
|
89
|
+
|
|
90
|
+
return df_qc[['Conc', 'QC_Flag']].reindex(_index)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from pandas import read_csv, to_numeric, concat
|
|
2
|
+
|
|
3
|
+
from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
|
|
4
|
+
from AeroViz.rawDataReader.core.pre_process import _absCoe
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Reader(AbstractReader):
|
|
8
|
+
"""BC1054 Black Carbon Monitor Data Reader
|
|
9
|
+
|
|
10
|
+
A specialized reader for BC1054 data files, which measure black carbon
|
|
11
|
+
concentrations using light absorption at 10 wavelengths.
|
|
12
|
+
|
|
13
|
+
See full documentation at docs/source/instruments/BC1054.md for detailed information
|
|
14
|
+
on supported formats and QC procedures.
|
|
15
|
+
"""
|
|
16
|
+
nam = 'BC1054'
|
|
17
|
+
|
|
18
|
+
# =========================================================================
|
|
19
|
+
# Column Definitions
|
|
20
|
+
# =========================================================================
|
|
21
|
+
BC_COLUMNS = ['BC1', 'BC2', 'BC3', 'BC4', 'BC5', 'BC6', 'BC7', 'BC8', 'BC9', 'BC10']
|
|
22
|
+
ABS_COLUMNS = ['abs_370', 'abs_430', 'abs_470', 'abs_525', 'abs_565',
|
|
23
|
+
'abs_590', 'abs_660', 'abs_700', 'abs_880', 'abs_950']
|
|
24
|
+
CAL_COLUMNS = ['abs_550', 'AAE', 'eBC']
|
|
25
|
+
|
|
26
|
+
# =========================================================================
|
|
27
|
+
# QC Thresholds
|
|
28
|
+
# =========================================================================
|
|
29
|
+
MIN_BC = 0 # Minimum BC concentration (ng/m³)
|
|
30
|
+
MAX_BC = 20000 # Maximum BC concentration (ng/m³)
|
|
31
|
+
MIN_AAE = 0.7 # Minimum valid AAE (absolute value)
|
|
32
|
+
MAX_AAE = 2.0 # Maximum valid AAE (absolute value)
|
|
33
|
+
|
|
34
|
+
# =========================================================================
|
|
35
|
+
# Status Error Codes (bitwise flags)
|
|
36
|
+
# =========================================================================
|
|
37
|
+
ERROR_STATES = [
|
|
38
|
+
1, # Power Failure
|
|
39
|
+
2, # Digital Sensor Link Failure
|
|
40
|
+
4, # Tape Move Failure
|
|
41
|
+
8, # Maintenance
|
|
42
|
+
16, # Flow Failure
|
|
43
|
+
32, # Automatic Tape Advance
|
|
44
|
+
64, # Detector Failure
|
|
45
|
+
256, # Sensor Range
|
|
46
|
+
512, # Nozzle Move Failure
|
|
47
|
+
1024, # SPI Link Failure
|
|
48
|
+
2048, # Calibration Audit
|
|
49
|
+
65536, # Tape Move
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
def _raw_reader(self, file):
|
|
53
|
+
"""Read and parse raw BC1054 data files."""
|
|
54
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
55
|
+
_df = read_csv(f, parse_dates=True, index_col=0)
|
|
56
|
+
_df.columns = _df.columns.str.replace(' ', '')
|
|
57
|
+
|
|
58
|
+
_df = _df.rename(columns={
|
|
59
|
+
'BC1(ng/m3)': 'BC1', 'BC2(ng/m3)': 'BC2', 'BC3(ng/m3)': 'BC3',
|
|
60
|
+
'BC4(ng/m3)': 'BC4', 'BC5(ng/m3)': 'BC5', 'BC6(ng/m3)': 'BC6',
|
|
61
|
+
'BC7(ng/m3)': 'BC7', 'BC8(ng/m3)': 'BC8', 'BC9(ng/m3)': 'BC9',
|
|
62
|
+
'BC10(ng/m3)': 'BC10'
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
_df = _df[self.BC_COLUMNS + ['Status']].apply(to_numeric, errors='coerce')
|
|
66
|
+
|
|
67
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
68
|
+
|
|
69
|
+
def _QC(self, _df):
|
|
70
|
+
"""
|
|
71
|
+
Perform quality control on BC1054 raw data.
|
|
72
|
+
|
|
73
|
+
QC Rules Applied (raw data only)
|
|
74
|
+
---------------------------------
|
|
75
|
+
1. Duplicate : Consecutive duplicate rows removed
|
|
76
|
+
2. Status Error : Invalid instrument status codes
|
|
77
|
+
3. Invalid BC : BC concentration outside 0-20000 ng/m³
|
|
78
|
+
4. Insufficient : Less than 50% hourly data completeness
|
|
79
|
+
|
|
80
|
+
Note: AAE validation is done in _process() after calculation.
|
|
81
|
+
"""
|
|
82
|
+
_index = _df.index.copy()
|
|
83
|
+
|
|
84
|
+
# Remove consecutive duplicate rows
|
|
85
|
+
duplicate_rows = _df.eq(_df.shift()).all(axis=1) | _df.eq(_df.shift(-1)).all(axis=1)
|
|
86
|
+
df_qc = _df[~duplicate_rows].copy()
|
|
87
|
+
|
|
88
|
+
# Build QC rules declaratively
|
|
89
|
+
qc = QCFlagBuilder()
|
|
90
|
+
qc.add_rules([
|
|
91
|
+
QCRule(
|
|
92
|
+
name='Status Error',
|
|
93
|
+
condition=lambda df: self.QC_control().filter_error_status(df, self.ERROR_STATES),
|
|
94
|
+
description='Invalid instrument status code detected'
|
|
95
|
+
),
|
|
96
|
+
QCRule(
|
|
97
|
+
name='Invalid BC',
|
|
98
|
+
condition=lambda df: ((df[self.BC_COLUMNS] <= self.MIN_BC) |
|
|
99
|
+
(df[self.BC_COLUMNS] > self.MAX_BC)).any(axis=1),
|
|
100
|
+
description=f'BC concentration outside valid range {self.MIN_BC}-{self.MAX_BC} ng/m³'
|
|
101
|
+
),
|
|
102
|
+
QCRule(
|
|
103
|
+
name='Insufficient',
|
|
104
|
+
condition=lambda df: self.QC_control().hourly_completeness_QC(
|
|
105
|
+
df[self.BC_COLUMNS], freq=self.meta['freq']
|
|
106
|
+
),
|
|
107
|
+
description='Less than 50% hourly data completeness'
|
|
108
|
+
),
|
|
109
|
+
])
|
|
110
|
+
|
|
111
|
+
# Apply all QC rules and get flagged DataFrame
|
|
112
|
+
df_qc = qc.apply(df_qc)
|
|
113
|
+
|
|
114
|
+
# Store QC summary for combined output in _process()
|
|
115
|
+
self._qc_summary = qc.get_summary(df_qc)
|
|
116
|
+
|
|
117
|
+
return df_qc.reindex(_index)
|
|
118
|
+
|
|
119
|
+
def _process(self, _df):
|
|
120
|
+
"""
|
|
121
|
+
Calculate absorption coefficients and validate derived parameters.
|
|
122
|
+
|
|
123
|
+
Processing Steps
|
|
124
|
+
----------------
|
|
125
|
+
1. Calculate absorption coefficients at each wavelength
|
|
126
|
+
2. Calculate AAE (Absorption Ångström Exponent)
|
|
127
|
+
3. Calculate eBC (equivalent Black Carbon)
|
|
128
|
+
4. Validate AAE range and update QC_Flag
|
|
129
|
+
"""
|
|
130
|
+
_index = _df.index.copy()
|
|
131
|
+
|
|
132
|
+
# Calculate absorption coefficients, AAE, and eBC
|
|
133
|
+
_df_cal = _absCoe(_df[self.BC_COLUMNS], instru=self.nam, specified_band=[550])
|
|
134
|
+
|
|
135
|
+
# Combine with Status and QC_Flag
|
|
136
|
+
df_out = concat([_df_cal, _df[['Status', 'QC_Flag']]], axis=1)
|
|
137
|
+
|
|
138
|
+
# Validate AAE and update QC_Flag
|
|
139
|
+
invalid_aae = (-df_out['AAE'] < self.MIN_AAE) | (-df_out['AAE'] > self.MAX_AAE)
|
|
140
|
+
df_out = self.update_qc_flag(df_out, invalid_aae, 'Invalid AAE')
|
|
141
|
+
|
|
142
|
+
# Log combined QC summary with calculated info
|
|
143
|
+
if hasattr(self, '_qc_summary') and self._qc_summary is not None:
|
|
144
|
+
import pandas as pd
|
|
145
|
+
# Add Invalid AAE row before Valid row
|
|
146
|
+
total = len(df_out)
|
|
147
|
+
invalid_aae_row = pd.DataFrame([{
|
|
148
|
+
'Rule': 'Invalid AAE',
|
|
149
|
+
'Count': invalid_aae.sum(),
|
|
150
|
+
'Percentage': f'{invalid_aae.sum() / total * 100:.1f}%',
|
|
151
|
+
'Description': f'AAE outside valid range {self.MIN_AAE}-{self.MAX_AAE}'
|
|
152
|
+
}])
|
|
153
|
+
# Insert before Valid row (last row)
|
|
154
|
+
summary = pd.concat([self._qc_summary.iloc[:-1], invalid_aae_row, self._qc_summary.iloc[-1:]], ignore_index=True)
|
|
155
|
+
self.logger.info(f"{self.nam} QC Summary:")
|
|
156
|
+
for _, row in summary.iterrows():
|
|
157
|
+
self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
|
|
158
|
+
|
|
159
|
+
# Reorder columns
|
|
160
|
+
all_data_cols = self.BC_COLUMNS + self.ABS_COLUMNS + self.CAL_COLUMNS
|
|
161
|
+
return df_out[all_data_cols + ['QC_Flag']].reindex(_index)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pandas import read_csv, to_numeric
|
|
3
|
+
|
|
4
|
+
from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
|
|
5
|
+
|
|
6
|
+
desired_order1 = ['SO2', 'NO', 'NOx', 'NO2', 'CO', 'O3', 'THC', 'NMHC',
|
|
7
|
+
'CH4', 'PM10', 'PM2.5', 'PM1', 'WS', 'WD', 'AT', 'RH']
|
|
8
|
+
|
|
9
|
+
desired_order2 = ['Benzene', 'Toluene', 'EthylBenzene', 'm/p-Xylene', 'o-Xylene']
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Reader(AbstractReader):
|
|
13
|
+
"""EPA Environmental Data Reader
|
|
14
|
+
|
|
15
|
+
A specialized reader for EPA air quality monitoring data files.
|
|
16
|
+
|
|
17
|
+
See full documentation at docs/source/instruments/EPA.md for detailed information
|
|
18
|
+
on supported formats and QC procedures.
|
|
19
|
+
"""
|
|
20
|
+
nam = 'EPA'
|
|
21
|
+
|
|
22
|
+
def _raw_reader(self, file):
|
|
23
|
+
# 查詢小時值(測項).csv & 查詢小時值(直式).csv (有、無輸出有效值都可以)
|
|
24
|
+
df = read_csv(file, encoding='big5', encoding_errors='ignore', index_col=0, parse_dates=True,
|
|
25
|
+
on_bad_lines='skip')
|
|
26
|
+
|
|
27
|
+
if len(df.groupby('測站')) > 1:
|
|
28
|
+
raise ValueError(f"Multiple stations found in the file: {df['測站'].unique()}")
|
|
29
|
+
else:
|
|
30
|
+
if '測站' in df.columns:
|
|
31
|
+
df.drop(columns=['測站'], inplace=True)
|
|
32
|
+
|
|
33
|
+
if '測項' in df.columns:
|
|
34
|
+
df = df.pivot(columns='測項', values='資料')
|
|
35
|
+
|
|
36
|
+
df.rename(columns={'AMB_TEMP': 'AT', 'WIND_SPEED': 'WS', 'WIND_DIREC': 'WD'}, inplace=True)
|
|
37
|
+
df.index.name = 'Time'
|
|
38
|
+
|
|
39
|
+
# 如果沒有將無效值拿掉就輸出 請將包含 #、L 的字串替換成 # 或 _
|
|
40
|
+
df = df.replace(to_replace=r'\d*\.?\d*[#]\b', value='#', regex=True)
|
|
41
|
+
df = df.replace(to_replace=r'\d*\.?\d*[L]\b', value='_', regex=True)
|
|
42
|
+
|
|
43
|
+
# 欄位排序
|
|
44
|
+
return self.reorder_dataframe_columns(df, [desired_order1]).apply(to_numeric, errors='coerce')
|
|
45
|
+
|
|
46
|
+
def _QC(self, _df):
|
|
47
|
+
"""
|
|
48
|
+
Perform quality control on EPA data.
|
|
49
|
+
|
|
50
|
+
QC Rules Applied
|
|
51
|
+
----------------
|
|
52
|
+
1. Negative : Any measurement value < 0
|
|
53
|
+
"""
|
|
54
|
+
_index = _df.index.copy()
|
|
55
|
+
df_qc = _df.copy()
|
|
56
|
+
|
|
57
|
+
# Get numeric columns for negative value check
|
|
58
|
+
numeric_cols = df_qc.select_dtypes(include=[np.number]).columns.tolist()
|
|
59
|
+
|
|
60
|
+
# Build QC rules declaratively
|
|
61
|
+
qc = QCFlagBuilder()
|
|
62
|
+
qc.add_rules([
|
|
63
|
+
QCRule(
|
|
64
|
+
name='Negative',
|
|
65
|
+
condition=lambda df: (df[numeric_cols] < 0).any(axis=1) if numeric_cols else False,
|
|
66
|
+
description='Measurement value is negative (< 0)'
|
|
67
|
+
),
|
|
68
|
+
])
|
|
69
|
+
|
|
70
|
+
# Apply all QC rules and get flagged DataFrame
|
|
71
|
+
df_qc = qc.apply(df_qc)
|
|
72
|
+
|
|
73
|
+
# Log QC summary
|
|
74
|
+
summary = qc.get_summary(df_qc)
|
|
75
|
+
self.logger.info(f"{self.nam} QC Summary:")
|
|
76
|
+
for _, row in summary.iterrows():
|
|
77
|
+
self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
|
|
78
|
+
|
|
79
|
+
return df_qc.reindex(_index)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from pandas import to_datetime, read_csv
|
|
2
|
+
|
|
3
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Reader(AbstractReader):
|
|
7
|
+
""" GRIMM Aerosol Spectrometer Data Reader
|
|
8
|
+
|
|
9
|
+
A specialized reader for GRIMM data files, which measure particle size distributions
|
|
10
|
+
in the range of 0.25-32 μm.
|
|
11
|
+
|
|
12
|
+
See full documentation at docs/source/instruments/GRIMM.md for detailed information
|
|
13
|
+
on supported formats and QC procedures.
|
|
14
|
+
"""
|
|
15
|
+
nam = 'GRIMM'
|
|
16
|
+
|
|
17
|
+
def _raw_reader(self, file):
|
|
18
|
+
"""
|
|
19
|
+
Read and parse raw GRIMM data files.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
file : Path or str
|
|
24
|
+
Path to the GRIMM data file.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
pandas.DataFrame or None
|
|
29
|
+
Processed GRIMM data with datetime index and size channels as columns.
|
|
30
|
+
Returns None if the file is empty.
|
|
31
|
+
"""
|
|
32
|
+
_df = read_csv(file, header=233, delimiter='\t', index_col=0, parse_dates=[0], encoding='ISO-8859-1',
|
|
33
|
+
dayfirst=True).rename_axis("Time")
|
|
34
|
+
_df.index = to_datetime(_df.index, format="%d/%m/%Y %H:%M:%S", dayfirst=True)
|
|
35
|
+
|
|
36
|
+
if file.name.startswith("A407ST"):
|
|
37
|
+
_df.drop(_df.columns[0:11].tolist() + _df.columns[128:].tolist(), axis=1, inplace=True)
|
|
38
|
+
else:
|
|
39
|
+
_df.drop(_df.columns[0:11].tolist() + _df.columns[-5:].tolist(), axis=1, inplace=True)
|
|
40
|
+
|
|
41
|
+
if _df.empty:
|
|
42
|
+
print(file, "is empty")
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
return _df / 0.035
|
|
46
|
+
|
|
47
|
+
def _QC(self, _df):
|
|
48
|
+
"""
|
|
49
|
+
Perform quality control on GRIMM data.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
_df : pandas.DataFrame
|
|
54
|
+
Raw GRIMM data with datetime index and size channels as columns.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
pandas.DataFrame
|
|
59
|
+
The input data unchanged.
|
|
60
|
+
|
|
61
|
+
Notes
|
|
62
|
+
-----
|
|
63
|
+
No QC filters are currently applied. Future implementations could include:
|
|
64
|
+
1. Value range checks for each size channel
|
|
65
|
+
2. Total concentration consistency checks
|
|
66
|
+
3. Time-based outlier detection
|
|
67
|
+
"""
|
|
68
|
+
return _df
|