AeroViz 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AeroViz/__init__.py +13 -0
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/data/DEFAULT_DATA.csv +1417 -0
- AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
- AeroViz/data/hysplit_example_data.txt +101 -0
- AeroViz/dataProcess/Chemistry/__init__.py +149 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
- AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
- AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
- AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
- AeroViz/dataProcess/Optical/__init__.py +281 -0
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_derived.py +518 -0
- AeroViz/dataProcess/Optical/_extinction.py +123 -0
- AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
- AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
- AeroViz/dataProcess/Optical/coefficient.py +72 -0
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/Optical/mie_theory.py +260 -0
- AeroViz/dataProcess/README.md +271 -0
- AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
- AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
- AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
- AeroViz/dataProcess/SizeDistr/prop.py +62 -0
- AeroViz/dataProcess/VOC/__init__.py +14 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/_potential_par.py +108 -0
- AeroViz/dataProcess/VOC/support_voc.json +446 -0
- AeroViz/dataProcess/__init__.py +66 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__init__.py +272 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/mcp_server.py +352 -0
- AeroViz/plot/__init__.py +13 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/bar.py +126 -0
- AeroViz/plot/box.py +69 -0
- AeroViz/plot/distribution/__init__.py +1 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/distribution.py +576 -0
- AeroViz/plot/meteorology/CBPF.py +295 -0
- AeroViz/plot/meteorology/__init__.py +3 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/hysplit.py +93 -0
- AeroViz/plot/meteorology/wind_rose.py +77 -0
- AeroViz/plot/optical/__init__.py +1 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/optical/optical.py +388 -0
- AeroViz/plot/pie.py +210 -0
- AeroViz/plot/radar.py +184 -0
- AeroViz/plot/regression.py +200 -0
- AeroViz/plot/scatter.py +174 -0
- AeroViz/plot/templates/__init__.py +6 -0
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/contour.py +47 -0
- AeroViz/plot/templates/corr_matrix.py +267 -0
- AeroViz/plot/templates/diurnal_pattern.py +61 -0
- AeroViz/plot/templates/koschmieder.py +95 -0
- AeroViz/plot/templates/metal_heatmap.py +164 -0
- AeroViz/plot/timeseries/__init__.py +2 -0
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +446 -0
- AeroViz/plot/utils/__init__.py +4 -0
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/_color.py +71 -0
- AeroViz/plot/utils/_unit.py +55 -0
- AeroViz/plot/utils/fRH.json +390 -0
- AeroViz/plot/utils/plt_utils.py +92 -0
- AeroViz/plot/utils/sklearn_utils.py +49 -0
- AeroViz/plot/utils/units.json +89 -0
- AeroViz/plot/violin.py +80 -0
- AeroViz/rawDataReader/FLOW.md +138 -0
- AeroViz/rawDataReader/__init__.py +220 -0
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__init__.py +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/supported_instruments.py +135 -0
- AeroViz/rawDataReader/core/__init__.py +658 -0
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +171 -0
- AeroViz/rawDataReader/core/pre_process.py +308 -0
- AeroViz/rawDataReader/core/qc.py +961 -0
- AeroViz/rawDataReader/core/report.py +579 -0
- AeroViz/rawDataReader/script/AE33.py +173 -0
- AeroViz/rawDataReader/script/AE43.py +151 -0
- AeroViz/rawDataReader/script/APS.py +339 -0
- AeroViz/rawDataReader/script/Aurora.py +191 -0
- AeroViz/rawDataReader/script/BAM1020.py +90 -0
- AeroViz/rawDataReader/script/BC1054.py +161 -0
- AeroViz/rawDataReader/script/EPA.py +79 -0
- AeroViz/rawDataReader/script/GRIMM.py +68 -0
- AeroViz/rawDataReader/script/IGAC.py +140 -0
- AeroViz/rawDataReader/script/MA350.py +179 -0
- AeroViz/rawDataReader/script/Minion.py +218 -0
- AeroViz/rawDataReader/script/NEPH.py +199 -0
- AeroViz/rawDataReader/script/OCEC.py +173 -0
- AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
- AeroViz/rawDataReader/script/SMPS.py +389 -0
- AeroViz/rawDataReader/script/TEOM.py +181 -0
- AeroViz/rawDataReader/script/VOC.py +106 -0
- AeroViz/rawDataReader/script/Xact.py +244 -0
- AeroViz/rawDataReader/script/__init__.py +28 -0
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__init__.py +2 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- AeroViz/tools/database.py +95 -0
- AeroViz/tools/dataclassifier.py +117 -0
- AeroViz/tools/dataprinter.py +58 -0
- aeroviz-0.1.21.dist-info/METADATA +294 -0
- aeroviz-0.1.21.dist-info/RECORD +180 -0
- aeroviz-0.1.21.dist-info/WHEEL +5 -0
- aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
- aeroviz-0.1.21.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from pandas import to_datetime, read_csv, to_numeric, Series
|
|
3
|
+
|
|
4
|
+
from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
|
|
5
|
+
from AeroViz.rawDataReader.core.pre_process import _scaCoe
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Reader(AbstractReader):
|
|
9
|
+
"""Nephelometer (NEPH) Data Reader
|
|
10
|
+
|
|
11
|
+
A specialized reader for integrating nephelometer data files, which measure
|
|
12
|
+
light scattering properties of aerosols at multiple wavelengths.
|
|
13
|
+
|
|
14
|
+
See full documentation at docs/source/instruments/NEPH.md for detailed information
|
|
15
|
+
on supported formats and QC procedures.
|
|
16
|
+
"""
|
|
17
|
+
nam = 'NEPH'
|
|
18
|
+
|
|
19
|
+
# =========================================================================
|
|
20
|
+
# Column Definitions
|
|
21
|
+
# =========================================================================
|
|
22
|
+
SCAT_COLUMNS = ['B', 'G', 'R', 'BB', 'BG', 'BR']
|
|
23
|
+
CAL_COLUMNS = ['sca_550', 'SAE']
|
|
24
|
+
|
|
25
|
+
# =========================================================================
|
|
26
|
+
# QC Thresholds
|
|
27
|
+
# =========================================================================
|
|
28
|
+
MIN_SCAT_VALUE = 0 # Minimum scattering coefficient (Mm^-1)
|
|
29
|
+
MAX_SCAT_VALUE = 2000 # Maximum scattering coefficient (Mm^-1)
|
|
30
|
+
|
|
31
|
+
# Status Flag
|
|
32
|
+
STATUS_COLUMN = 'status'
|
|
33
|
+
STATUS_OK = 0 # Status code 0 means normal operation
|
|
34
|
+
|
|
35
|
+
def __init__(self, *args, **kwargs):
|
|
36
|
+
super().__init__(*args, **kwargs)
|
|
37
|
+
|
|
38
|
+
def _raw_reader(self, file):
|
|
39
|
+
"""
|
|
40
|
+
Read and parse raw Nephelometer data files.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
file : Path or str
|
|
45
|
+
Path to the Nephelometer data file.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
pandas.DataFrame
|
|
50
|
+
Processed Nephelometer data with datetime index and scattering coefficient columns.
|
|
51
|
+
"""
|
|
52
|
+
_df = read_csv(file, header=None, names=range(11))
|
|
53
|
+
|
|
54
|
+
_df_grp = _df.groupby(0)
|
|
55
|
+
|
|
56
|
+
# T : time
|
|
57
|
+
_idx_tm = to_datetime(
|
|
58
|
+
_df_grp.get_group('T')[[1, 2, 3, 4, 5, 6]]
|
|
59
|
+
.map(lambda x: f"{int(x):02d}")
|
|
60
|
+
.agg(''.join, axis=1),
|
|
61
|
+
format='%Y%m%d%H%M%S'
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# D : data
|
|
65
|
+
# col : 3~8 B G R BB BG BR
|
|
66
|
+
# 1e6
|
|
67
|
+
try:
|
|
68
|
+
_df_dt = _df_grp.get_group('D')[[1, 2, 3, 4, 5, 6, 7, 8]].set_index(_idx_tm)
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
_df_out = (_df_dt.groupby(1).get_group('NBXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
|
|
72
|
+
except KeyError:
|
|
73
|
+
_df_out = (_df_dt.groupby(1).get_group('NTXX')[[3, 4, 5, 6, 7, 8]] * 1e6).reindex(_idx_tm)
|
|
74
|
+
|
|
75
|
+
_df_out.columns = ['B', 'G', 'R', 'BB', 'BG', 'BR']
|
|
76
|
+
_df_out.index.name = 'Time'
|
|
77
|
+
|
|
78
|
+
# Y : state
|
|
79
|
+
# col : 5 RH, col : 9 status
|
|
80
|
+
_df_st = _df_grp.get_group('Y')
|
|
81
|
+
_df_out['RH'] = _df_st[5].values
|
|
82
|
+
status_values = to_numeric(_df_st[9].values, errors='coerce').astype('Int64')
|
|
83
|
+
|
|
84
|
+
_df = _df_out[['B', 'G', 'R', 'BB', 'BG', 'BR', 'RH']].apply(to_numeric, errors='coerce')
|
|
85
|
+
# Include status as a column (will be processed by core together)
|
|
86
|
+
_df[self.STATUS_COLUMN] = status_values
|
|
87
|
+
_df = _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
88
|
+
|
|
89
|
+
return _df
|
|
90
|
+
|
|
91
|
+
except ValueError: # Define valid groups and find invalid indices
|
|
92
|
+
invalid_indices = _df[~_df[0].isin({'B', 'G', 'R', 'D', 'T', 'Y', 'Z'})].index
|
|
93
|
+
self.logger.warning(
|
|
94
|
+
f"\tInvalid values in {file.name}: {', '.join(f'{_}:{_df.at[_, 0]}' for _ in invalid_indices)}."
|
|
95
|
+
f" Skipping file.")
|
|
96
|
+
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
def _QC(self, _df):
|
|
100
|
+
"""
|
|
101
|
+
Perform quality control on Nephelometer raw data.
|
|
102
|
+
|
|
103
|
+
QC Rules Applied (raw data only)
|
|
104
|
+
---------------------------------
|
|
105
|
+
1. Status Error : Non-zero status code indicates instrument error
|
|
106
|
+
2. No Data : All scattering columns are NaN
|
|
107
|
+
3. Invalid Scat Value: Scattering coefficient outside 0-2000 Mm^-1
|
|
108
|
+
4. Invalid Scat Rel. : Wavelength dependence violation (B < G < R)
|
|
109
|
+
5. Insufficient : Less than 50% hourly data completeness
|
|
110
|
+
|
|
111
|
+
Note: SAE calculation is done in _process() after QC.
|
|
112
|
+
"""
|
|
113
|
+
_index = _df.index.copy()
|
|
114
|
+
df_qc = _df.copy()
|
|
115
|
+
|
|
116
|
+
# Identify rows with all data missing (handled separately)
|
|
117
|
+
all_missing_mask = df_qc[self.SCAT_COLUMNS].isna().all(axis=1)
|
|
118
|
+
|
|
119
|
+
# Build QC rules declaratively
|
|
120
|
+
qc = QCFlagBuilder()
|
|
121
|
+
|
|
122
|
+
qc.add_rules([
|
|
123
|
+
QCRule(
|
|
124
|
+
name='Status Error',
|
|
125
|
+
condition=lambda df: self.QC_control().filter_error_status(
|
|
126
|
+
_df, status_column=self.STATUS_COLUMN, status_type='numeric', ok_value=self.STATUS_OK
|
|
127
|
+
),
|
|
128
|
+
description=f'Status code is not {self.STATUS_OK} (non-zero indicates error)'
|
|
129
|
+
),
|
|
130
|
+
QCRule(
|
|
131
|
+
name='No Data',
|
|
132
|
+
condition=lambda df: Series(all_missing_mask, index=df.index),
|
|
133
|
+
description='All scattering columns are NaN'
|
|
134
|
+
),
|
|
135
|
+
QCRule(
|
|
136
|
+
name='Invalid Scat Value',
|
|
137
|
+
condition=lambda df: ((df[self.SCAT_COLUMNS] <= self.MIN_SCAT_VALUE) |
|
|
138
|
+
(df[self.SCAT_COLUMNS] > self.MAX_SCAT_VALUE)).any(axis=1),
|
|
139
|
+
description=f'Scattering coefficient outside {self.MIN_SCAT_VALUE}-{self.MAX_SCAT_VALUE} Mm^-1'
|
|
140
|
+
),
|
|
141
|
+
QCRule(
|
|
142
|
+
name='Invalid Scat Rel',
|
|
143
|
+
condition=lambda df: (df['B'] < df['G']) & (df['G'] < df['R']),
|
|
144
|
+
description='Wavelength dependence violation (Blue < Green < Red)'
|
|
145
|
+
),
|
|
146
|
+
QCRule(
|
|
147
|
+
name='Insufficient',
|
|
148
|
+
condition=lambda df: self.QC_control().hourly_completeness_QC(
|
|
149
|
+
df[self.SCAT_COLUMNS], freq=self.meta['freq']
|
|
150
|
+
),
|
|
151
|
+
description='Less than 50% hourly data completeness'
|
|
152
|
+
),
|
|
153
|
+
])
|
|
154
|
+
|
|
155
|
+
# Apply all QC rules and get flagged DataFrame
|
|
156
|
+
df_qc = qc.apply(df_qc)
|
|
157
|
+
|
|
158
|
+
# Store QC summary for combined output in _process()
|
|
159
|
+
self._qc_summary = qc.get_summary(df_qc)
|
|
160
|
+
|
|
161
|
+
return df_qc.reindex(_index)
|
|
162
|
+
|
|
163
|
+
def _process(self, _df):
|
|
164
|
+
"""
|
|
165
|
+
Calculate scattering coefficients and SAE.
|
|
166
|
+
|
|
167
|
+
Processing Steps
|
|
168
|
+
----------------
|
|
169
|
+
1. Calculate scattering coefficient at 550nm
|
|
170
|
+
2. Calculate SAE (Scattering Ångström Exponent)
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
_df : pd.DataFrame
|
|
175
|
+
Quality-controlled DataFrame with scattering columns and QC_Flag
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
pd.DataFrame
|
|
180
|
+
DataFrame with sca_550, SAE, and updated QC_Flag
|
|
181
|
+
"""
|
|
182
|
+
_index = _df.index.copy()
|
|
183
|
+
|
|
184
|
+
# Calculate SAE and scattering at 550nm
|
|
185
|
+
_df_cal = _scaCoe(_df[self.SCAT_COLUMNS], instru=self.nam, specified_band=[550])
|
|
186
|
+
|
|
187
|
+
# Combine with RH and QC_Flag
|
|
188
|
+
if 'RH' in _df.columns:
|
|
189
|
+
df_out = pd.concat([_df_cal, _df[['RH', 'QC_Flag']]], axis=1)
|
|
190
|
+
else:
|
|
191
|
+
df_out = pd.concat([_df_cal, _df[['QC_Flag']]], axis=1)
|
|
192
|
+
|
|
193
|
+
# Log QC summary
|
|
194
|
+
if hasattr(self, '_qc_summary') and self._qc_summary is not None:
|
|
195
|
+
self.logger.info(f"{self.nam} QC Summary:")
|
|
196
|
+
for _, row in self._qc_summary.iterrows():
|
|
197
|
+
self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
|
|
198
|
+
|
|
199
|
+
return df_out.reindex(_index)
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pandas import to_datetime, read_csv, to_numeric, Series
|
|
3
|
+
|
|
4
|
+
from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Reader(AbstractReader):
|
|
8
|
+
"""OC/EC (Organic Carbon/Elemental Carbon) Analyzer Data Reader
|
|
9
|
+
|
|
10
|
+
A specialized reader for OC/EC analyzer data files, which measure carbonaceous
|
|
11
|
+
aerosol composition using thermal and optical methods.
|
|
12
|
+
|
|
13
|
+
See full documentation at docs/source/instruments/OCEC.md for detailed information
|
|
14
|
+
on supported formats and QC procedures.
|
|
15
|
+
"""
|
|
16
|
+
nam = 'OCEC'
|
|
17
|
+
|
|
18
|
+
# =========================================================================
|
|
19
|
+
# Column Definitions
|
|
20
|
+
# =========================================================================
|
|
21
|
+
OUTPUT_COLUMNS = ['Thermal_OC', 'Thermal_EC', 'Optical_OC', 'Optical_EC', 'TC',
|
|
22
|
+
'OC1', 'OC2', 'OC3', 'OC4', 'PC']
|
|
23
|
+
|
|
24
|
+
# =========================================================================
|
|
25
|
+
# QC Thresholds
|
|
26
|
+
# =========================================================================
|
|
27
|
+
MIN_VALUE = -5 # Minimum valid value (ugC/m3)
|
|
28
|
+
MAX_VALUE = 100 # Maximum valid value (ugC/m3)
|
|
29
|
+
|
|
30
|
+
# Detection limits (MDL) for each carbon fraction
|
|
31
|
+
MDL = {
|
|
32
|
+
'Thermal_OC': 0.3,
|
|
33
|
+
'Optical_OC': 0.3,
|
|
34
|
+
'Thermal_EC': 0.015,
|
|
35
|
+
'Optical_EC': 0.015
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
def _raw_reader(self, file):
|
|
39
|
+
"""
|
|
40
|
+
Read and parse raw OC/EC data files.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
file : Path or str
|
|
45
|
+
Path to the OC/EC data file.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
pandas.DataFrame
|
|
50
|
+
Processed OC/EC data with datetime index and carbon fraction columns.
|
|
51
|
+
"""
|
|
52
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
53
|
+
_df = read_csv(f, skiprows=3, on_bad_lines='skip')
|
|
54
|
+
|
|
55
|
+
_df['Start Date/Time'] = _df['Start Date/Time'].str.strip()
|
|
56
|
+
_df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
|
|
57
|
+
|
|
58
|
+
if _df['time'].isna().all():
|
|
59
|
+
_df['time'] = to_datetime(_df['Start Date/Time'], format='%m/%d/%Y %H:%M:%S', errors='coerce')
|
|
60
|
+
|
|
61
|
+
_df = _df.set_index('time')
|
|
62
|
+
|
|
63
|
+
_df = _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
64
|
+
|
|
65
|
+
_df.index = _df.index.round('1h')
|
|
66
|
+
|
|
67
|
+
_df = _df.rename(columns={
|
|
68
|
+
'Thermal/Optical OC (ugC/LCm^3)': 'Thermal_OC',
|
|
69
|
+
'Thermal/Optical EC (ugC/LCm^3)': 'Thermal_EC',
|
|
70
|
+
'OC=TC-BC (ugC/LCm^3)': 'Optical_OC',
|
|
71
|
+
'BC (ugC/LCm^3)': 'Optical_EC',
|
|
72
|
+
'TC (ugC/LCm^3)': 'TC',
|
|
73
|
+
|
|
74
|
+
'OC ugC/m^3 (Thermal/Optical)': 'Thermal_OC',
|
|
75
|
+
'EC ugC/m^3 (Thermal/Optical)': 'Thermal_EC',
|
|
76
|
+
'OC by diff ugC (TC-OptEC)': 'Optical_OC',
|
|
77
|
+
'OptEC ugC/m^3': 'Optical_EC',
|
|
78
|
+
'TC ugC/m^3': 'TC',
|
|
79
|
+
|
|
80
|
+
'Sample Volume Local Condition Actual m^3': 'Sample_Volume',
|
|
81
|
+
|
|
82
|
+
'OCPk1-ug C': 'OC1_raw',
|
|
83
|
+
'OCPk2-ug C': 'OC2_raw',
|
|
84
|
+
'OCPk3-ug C': 'OC3_raw',
|
|
85
|
+
'OCPk4-ug C': 'OC4_raw',
|
|
86
|
+
'Pyrolized C ug': 'PC_raw',
|
|
87
|
+
|
|
88
|
+
'ECPk1-ug C': 'EC1_raw',
|
|
89
|
+
'ECPk2-ug C': 'EC2_raw',
|
|
90
|
+
'ECPk3-ug C': 'EC3_raw',
|
|
91
|
+
'ECPk4-ug C': 'EC4_raw',
|
|
92
|
+
'ECPk5-ug C': 'EC5_raw',
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
_df = _df.apply(to_numeric, errors='coerce')
|
|
96
|
+
|
|
97
|
+
_df['OC1'] = _df['OC1_raw'] / _df['Sample_Volume']
|
|
98
|
+
_df['OC2'] = _df['OC2_raw'] / _df['Sample_Volume']
|
|
99
|
+
_df['OC3'] = _df['OC3_raw'] / _df['Sample_Volume']
|
|
100
|
+
_df['OC4'] = _df['OC4_raw'] / _df['Sample_Volume']
|
|
101
|
+
|
|
102
|
+
_df['PC'] = _df['Thermal_OC'] - _df['OC1'] - _df['OC2'] - _df['OC3'] - _df['OC4']
|
|
103
|
+
|
|
104
|
+
# _df['EC1'] = _df['EC1_raw'] / _df['Sample_Volume']
|
|
105
|
+
# _df['EC2'] = _df['EC2_raw'] / _df['Sample_Volume']
|
|
106
|
+
# _df['EC3'] = _df['EC3_raw'] / _df['Sample_Volume']
|
|
107
|
+
# _df['EC4'] = _df['EC4_raw'] / _df['Sample_Volume']
|
|
108
|
+
# _df['EC5'] = _df['EC5_raw'] / _df['Sample_Volume']
|
|
109
|
+
|
|
110
|
+
_df = _df[['Thermal_OC', 'Thermal_EC', 'Optical_OC', 'Optical_EC', 'TC', 'Sample_Volume',
|
|
111
|
+
'OC1', 'OC2', 'OC3', 'OC4', 'PC']]
|
|
112
|
+
|
|
113
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
114
|
+
|
|
115
|
+
def _QC(self, _df):
|
|
116
|
+
"""
|
|
117
|
+
Perform quality control on OC/EC data.
|
|
118
|
+
|
|
119
|
+
QC Rules Applied
|
|
120
|
+
----------------
|
|
121
|
+
1. Invalid Carbon : Carbon value outside valid range (-5-100 ugC/m3)
|
|
122
|
+
2. Below MDL : Value below method detection limit
|
|
123
|
+
3. Spike : Sudden value change (vectorized spike detection)
|
|
124
|
+
4. Missing OC : Thermal_OC or Optical_OC is missing
|
|
125
|
+
"""
|
|
126
|
+
_index = _df.index.copy()
|
|
127
|
+
df_qc = _df.copy()
|
|
128
|
+
|
|
129
|
+
# Pre-calculate MDL mask (below detection limit)
|
|
130
|
+
mdl_mask = Series(False, index=df_qc.index)
|
|
131
|
+
for col, threshold in self.MDL.items():
|
|
132
|
+
if col in df_qc.columns:
|
|
133
|
+
mdl_mask = mdl_mask | (df_qc[col] <= threshold)
|
|
134
|
+
|
|
135
|
+
# Build QC rules declaratively
|
|
136
|
+
qc = QCFlagBuilder()
|
|
137
|
+
qc.add_rules([
|
|
138
|
+
QCRule(
|
|
139
|
+
name='Invalid Carbon',
|
|
140
|
+
condition=lambda df: ((df[self.OUTPUT_COLUMNS] <= self.MIN_VALUE) |
|
|
141
|
+
(df[self.OUTPUT_COLUMNS] > self.MAX_VALUE)).any(axis=1),
|
|
142
|
+
description=f'Carbon value outside valid range ({self.MIN_VALUE}-{self.MAX_VALUE} ugC/m3)'
|
|
143
|
+
),
|
|
144
|
+
QCRule(
|
|
145
|
+
name='Below MDL',
|
|
146
|
+
condition=lambda df: mdl_mask.reindex(df.index).fillna(False),
|
|
147
|
+
description='Value below method detection limit'
|
|
148
|
+
),
|
|
149
|
+
QCRule(
|
|
150
|
+
name='Spike',
|
|
151
|
+
condition=lambda df: self.QC_control().spike_detection(
|
|
152
|
+
df[['Thermal_OC', 'Thermal_EC', 'Optical_OC', 'Optical_EC']],
|
|
153
|
+
max_change_rate=3.0
|
|
154
|
+
),
|
|
155
|
+
description='Sudden unreasonable value change detected'
|
|
156
|
+
),
|
|
157
|
+
QCRule(
|
|
158
|
+
name='Missing OC',
|
|
159
|
+
condition=lambda df: df['Thermal_OC'].isna() | df['Optical_OC'].isna(),
|
|
160
|
+
description='Missing Thermal_OC or Optical_OC'
|
|
161
|
+
),
|
|
162
|
+
])
|
|
163
|
+
|
|
164
|
+
# Apply all QC rules and get flagged DataFrame
|
|
165
|
+
df_qc = qc.apply(df_qc)
|
|
166
|
+
|
|
167
|
+
# Log QC summary
|
|
168
|
+
summary = qc.get_summary(df_qc)
|
|
169
|
+
self.logger.info(f"{self.nam} QC Summary:")
|
|
170
|
+
for _, row in summary.iterrows():
|
|
171
|
+
self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
|
|
172
|
+
|
|
173
|
+
return df_qc[self.OUTPUT_COLUMNS + ['QC_Flag']].reindex(_index)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from AeroViz.rawDataReader.core import AbstractReader
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Reader(AbstractReader):
|
|
5
|
+
"""Q-ACSM Data Reader
|
|
6
|
+
|
|
7
|
+
A specialized reader for Q-ACSM data files.
|
|
8
|
+
|
|
9
|
+
See full documentation at docs/source/instruments/Q-ACSM.md for detailed information
|
|
10
|
+
on supported formats and QC procedures.
|
|
11
|
+
"""
|
|
12
|
+
nam = 'Q-ACSM'
|