AeroViz 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AeroViz/__init__.py +13 -0
- AeroViz/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/data/DEFAULT_DATA.csv +1417 -0
- AeroViz/data/DEFAULT_PNSD_DATA.csv +1417 -0
- AeroViz/data/hysplit_example_data.txt +101 -0
- AeroViz/dataProcess/Chemistry/__init__.py +149 -0
- AeroViz/dataProcess/Chemistry/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Chemistry/_calculate.py +557 -0
- AeroViz/dataProcess/Chemistry/_isoropia.py +150 -0
- AeroViz/dataProcess/Chemistry/_mass_volume.py +487 -0
- AeroViz/dataProcess/Chemistry/_ocec.py +172 -0
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Chemistry/isrpia2.exe +0 -0
- AeroViz/dataProcess/Optical/PyMieScatt_update.py +577 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +452 -0
- AeroViz/dataProcess/Optical/__init__.py +281 -0
- AeroViz/dataProcess/Optical/__pycache__/PyMieScatt_update.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/__pycache__/mie_theory.cpython-312.pyc +0 -0
- AeroViz/dataProcess/Optical/_derived.py +518 -0
- AeroViz/dataProcess/Optical/_extinction.py +123 -0
- AeroViz/dataProcess/Optical/_mie_sd.py +912 -0
- AeroViz/dataProcess/Optical/_retrieve_RI.py +243 -0
- AeroViz/dataProcess/Optical/coefficient.py +72 -0
- AeroViz/dataProcess/Optical/fRH.pkl +0 -0
- AeroViz/dataProcess/Optical/mie_theory.py +260 -0
- AeroViz/dataProcess/README.md +271 -0
- AeroViz/dataProcess/SizeDistr/__init__.py +245 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/__pycache__/_size_dist.cpython-312.pyc +0 -0
- AeroViz/dataProcess/SizeDistr/_size_dist.py +810 -0
- AeroViz/dataProcess/SizeDistr/merge/README.md +93 -0
- AeroViz/dataProcess/SizeDistr/merge/__init__.py +20 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0.py +251 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v0_1.py +246 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v1.py +255 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v2.py +244 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v3.py +518 -0
- AeroViz/dataProcess/SizeDistr/merge/_merge_v4.py +422 -0
- AeroViz/dataProcess/SizeDistr/prop.py +62 -0
- AeroViz/dataProcess/VOC/__init__.py +14 -0
- AeroViz/dataProcess/VOC/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/VOC/_potential_par.py +108 -0
- AeroViz/dataProcess/VOC/support_voc.json +446 -0
- AeroViz/dataProcess/__init__.py +66 -0
- AeroViz/dataProcess/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/dataProcess/core/__init__.py +272 -0
- AeroViz/dataProcess/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/mcp_server.py +352 -0
- AeroViz/plot/__init__.py +13 -0
- AeroViz/plot/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/bar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/box.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/pie.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/radar.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/regression.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/scatter.cpython-312.pyc +0 -0
- AeroViz/plot/__pycache__/violin.cpython-312.pyc +0 -0
- AeroViz/plot/bar.py +126 -0
- AeroViz/plot/box.py +69 -0
- AeroViz/plot/distribution/__init__.py +1 -0
- AeroViz/plot/distribution/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/__pycache__/distribution.cpython-312.pyc +0 -0
- AeroViz/plot/distribution/distribution.py +576 -0
- AeroViz/plot/meteorology/CBPF.py +295 -0
- AeroViz/plot/meteorology/__init__.py +3 -0
- AeroViz/plot/meteorology/__pycache__/CBPF.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/hysplit.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/__pycache__/wind_rose.cpython-312.pyc +0 -0
- AeroViz/plot/meteorology/hysplit.py +93 -0
- AeroViz/plot/meteorology/wind_rose.py +77 -0
- AeroViz/plot/optical/__init__.py +1 -0
- AeroViz/plot/optical/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/optical/__pycache__/optical.cpython-312.pyc +0 -0
- AeroViz/plot/optical/optical.py +388 -0
- AeroViz/plot/pie.py +210 -0
- AeroViz/plot/radar.py +184 -0
- AeroViz/plot/regression.py +200 -0
- AeroViz/plot/scatter.py +174 -0
- AeroViz/plot/templates/__init__.py +6 -0
- AeroViz/plot/templates/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/ammonium_rich.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/contour.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/corr_matrix.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/diurnal_pattern.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/koschmieder.cpython-312.pyc +0 -0
- AeroViz/plot/templates/__pycache__/metal_heatmap.cpython-312.pyc +0 -0
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/contour.py +47 -0
- AeroViz/plot/templates/corr_matrix.py +267 -0
- AeroViz/plot/templates/diurnal_pattern.py +61 -0
- AeroViz/plot/templates/koschmieder.py +95 -0
- AeroViz/plot/templates/metal_heatmap.py +164 -0
- AeroViz/plot/timeseries/__init__.py +2 -0
- AeroViz/plot/timeseries/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/template.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/__pycache__/timeseries.cpython-312.pyc +0 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +446 -0
- AeroViz/plot/utils/__init__.py +4 -0
- AeroViz/plot/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_color.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/_unit.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/plt_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/__pycache__/sklearn_utils.cpython-312.pyc +0 -0
- AeroViz/plot/utils/_color.py +71 -0
- AeroViz/plot/utils/_unit.py +55 -0
- AeroViz/plot/utils/fRH.json +390 -0
- AeroViz/plot/utils/plt_utils.py +92 -0
- AeroViz/plot/utils/sklearn_utils.py +49 -0
- AeroViz/plot/utils/units.json +89 -0
- AeroViz/plot/violin.py +80 -0
- AeroViz/rawDataReader/FLOW.md +138 -0
- AeroViz/rawDataReader/__init__.py +220 -0
- AeroViz/rawDataReader/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__init__.py +0 -0
- AeroViz/rawDataReader/config/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/__pycache__/supported_instruments.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/config/supported_instruments.py +135 -0
- AeroViz/rawDataReader/core/__init__.py +658 -0
- AeroViz/rawDataReader/core/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/logger.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/pre_process.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/qc.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/__pycache__/report.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/core/logger.py +171 -0
- AeroViz/rawDataReader/core/pre_process.py +308 -0
- AeroViz/rawDataReader/core/qc.py +961 -0
- AeroViz/rawDataReader/core/report.py +579 -0
- AeroViz/rawDataReader/script/AE33.py +173 -0
- AeroViz/rawDataReader/script/AE43.py +151 -0
- AeroViz/rawDataReader/script/APS.py +339 -0
- AeroViz/rawDataReader/script/Aurora.py +191 -0
- AeroViz/rawDataReader/script/BAM1020.py +90 -0
- AeroViz/rawDataReader/script/BC1054.py +161 -0
- AeroViz/rawDataReader/script/EPA.py +79 -0
- AeroViz/rawDataReader/script/GRIMM.py +68 -0
- AeroViz/rawDataReader/script/IGAC.py +140 -0
- AeroViz/rawDataReader/script/MA350.py +179 -0
- AeroViz/rawDataReader/script/Minion.py +218 -0
- AeroViz/rawDataReader/script/NEPH.py +199 -0
- AeroViz/rawDataReader/script/OCEC.py +173 -0
- AeroViz/rawDataReader/script/Q-ACSM.py +12 -0
- AeroViz/rawDataReader/script/SMPS.py +389 -0
- AeroViz/rawDataReader/script/TEOM.py +181 -0
- AeroViz/rawDataReader/script/VOC.py +106 -0
- AeroViz/rawDataReader/script/Xact.py +244 -0
- AeroViz/rawDataReader/script/__init__.py +28 -0
- AeroViz/rawDataReader/script/__pycache__/AE33.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/AE43.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/APS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Aurora.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BAM1020.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/BC1054.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/EPA.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/GRIMM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/IGAC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/MA350.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Minion.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/NEPH.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/OCEC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Q-ACSM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/SMPS.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/TEOM.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/VOC.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/Xact.cpython-312.pyc +0 -0
- AeroViz/rawDataReader/script/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__init__.py +2 -0
- AeroViz/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/database.cpython-312.pyc +0 -0
- AeroViz/tools/__pycache__/dataclassifier.cpython-312.pyc +0 -0
- AeroViz/tools/database.py +95 -0
- AeroViz/tools/dataclassifier.py +117 -0
- AeroViz/tools/dataprinter.py +58 -0
- aeroviz-0.1.21.dist-info/METADATA +294 -0
- aeroviz-0.1.21.dist-info/RECORD +180 -0
- aeroviz-0.1.21.dist-info/WHEEL +5 -0
- aeroviz-0.1.21.dist-info/licenses/LICENSE +21 -0
- aeroviz-0.1.21.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
from pandas import read_csv, to_datetime, to_numeric
|
|
2
|
+
|
|
3
|
+
from AeroViz.rawDataReader.core import AbstractReader, QCRule, QCFlagBuilder
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Reader(AbstractReader):
|
|
7
|
+
"""Xact 625i XRF Analyzer Data Reader
|
|
8
|
+
|
|
9
|
+
A specialized reader for Xact 625i continuous XRF analyzer data files,
|
|
10
|
+
which measure elemental composition of particulate matter.
|
|
11
|
+
"""
|
|
12
|
+
nam = 'Xact'
|
|
13
|
+
|
|
14
|
+
# Element symbols with atomic numbers (extracted from column headers)
|
|
15
|
+
ELEMENTS = [
|
|
16
|
+
'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe',
|
|
17
|
+
'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo',
|
|
18
|
+
'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Cs', 'Ba', 'La', 'Ce',
|
|
19
|
+
'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu',
|
|
20
|
+
'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Th', 'Pa', 'U'
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
# Environmental/status columns to keep
|
|
24
|
+
ENV_COLUMNS = [
|
|
25
|
+
'AT', 'SAMPLE_T', 'BP', 'TAPE', 'FLOW_25', 'FLOW_ACT', 'FLOW_STD', 'VOLUME',
|
|
26
|
+
'TUBE_T', 'ENCLOSURE_T', 'FILAMENT_V', 'SDD_T', 'DPP_T', 'RH',
|
|
27
|
+
'WIND', 'WIND_DIR', 'SAMPLE_TIME', 'ALARM', 'SAMPLE_TYPE'
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
# =========================================================================
|
|
31
|
+
# Alarm Code Definitions
|
|
32
|
+
# =========================================================================
|
|
33
|
+
# Error codes (100-110) - indicate instrument malfunction, invalidate data
|
|
34
|
+
ERROR_CODES = {
|
|
35
|
+
100: 'Xray Voltage Error',
|
|
36
|
+
101: 'Xray Current Error',
|
|
37
|
+
102: 'Tube Temperature Error',
|
|
38
|
+
103: 'Enclosure Temperature Error',
|
|
39
|
+
104: 'Tape Error',
|
|
40
|
+
105: 'Pump Error',
|
|
41
|
+
106: 'Filter Wheel Error',
|
|
42
|
+
107: 'Dynamic Rod Error',
|
|
43
|
+
108: 'Nozzle Error',
|
|
44
|
+
109: 'Energy Calibration Error',
|
|
45
|
+
110: 'Software Error',
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Warning codes (200-203) - indicate upscale warnings
|
|
49
|
+
WARNING_CODES = {
|
|
50
|
+
200: 'Upscale Cr Warning',
|
|
51
|
+
201: 'Upscale Pb Warning',
|
|
52
|
+
202: 'Upscale Cd Warning',
|
|
53
|
+
203: 'Upscale Nb Warning',
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# =========================================================================
|
|
57
|
+
# QC Thresholds
|
|
58
|
+
# =========================================================================
|
|
59
|
+
MIN_VALUE = 0
|
|
60
|
+
MAX_VALUE = 100000 # ng/m3
|
|
61
|
+
|
|
62
|
+
# Internal standard (Nb) QC parameters
|
|
63
|
+
INTERNAL_STD_ELEMENT = 'Nb'
|
|
64
|
+
INTERNAL_STD_TOLERANCE = 0.20 # ±20% from median
|
|
65
|
+
|
|
66
|
+
def _raw_reader(self, file):
|
|
67
|
+
"""Read and parse raw Xact 625i XRF data files."""
|
|
68
|
+
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
69
|
+
f.readline() # skip row 0 (element names)
|
|
70
|
+
headers = f.readline().strip().split(',')
|
|
71
|
+
headers.append('_extra_') # data has one extra field at end
|
|
72
|
+
_df = read_csv(f, names=headers, on_bad_lines='skip')
|
|
73
|
+
|
|
74
|
+
# Parse time column
|
|
75
|
+
_df['time'] = to_datetime(_df['TIME'], format='%m/%d/%Y %H:%M:%S', errors='coerce')
|
|
76
|
+
_df = _df.set_index('time')
|
|
77
|
+
_df = _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
78
|
+
|
|
79
|
+
# Filter out calibration samples BEFORE rounding to avoid losing valid 00:30 samples
|
|
80
|
+
# Xact does daily QA checks at midnight (00:00-00:30), SAMPLE_TYPE: 1=normal, 2=calibration
|
|
81
|
+
if 'Sample Type' in _df.columns:
|
|
82
|
+
_df = _df[_df['Sample Type'] == 1]
|
|
83
|
+
|
|
84
|
+
_df.index = _df.index.round('1h')
|
|
85
|
+
|
|
86
|
+
# Rename environmental/status columns
|
|
87
|
+
rename_map = {
|
|
88
|
+
'AT (C)': 'AT',
|
|
89
|
+
'SAMPLE (C)': 'SAMPLE_T',
|
|
90
|
+
'BP (mmHg)': 'BP',
|
|
91
|
+
'TAPE (mmHg)': 'TAPE',
|
|
92
|
+
'FLOW 25 (slpm)': 'FLOW_25',
|
|
93
|
+
'FLOW ACT (lpm)': 'FLOW_ACT',
|
|
94
|
+
'FLOW STD (slpm)': 'FLOW_STD',
|
|
95
|
+
'VOLUME (L)': 'VOLUME',
|
|
96
|
+
'TUBE (C)': 'TUBE_T',
|
|
97
|
+
'ENCLOSURE (C)': 'ENCLOSURE_T',
|
|
98
|
+
'FILAMENT (V)': 'FILAMENT_V',
|
|
99
|
+
'SDD (C)': 'SDD_T',
|
|
100
|
+
'DPP (C)': 'DPP_T',
|
|
101
|
+
'RH (%)': 'RH',
|
|
102
|
+
'WIND (m/s)': 'WIND',
|
|
103
|
+
'WIND DIR (deg)': 'WIND_DIR',
|
|
104
|
+
'SAMPLE TIME (min)': 'SAMPLE_TIME',
|
|
105
|
+
'ALARM': 'ALARM',
|
|
106
|
+
'Sample Type': 'SAMPLE_TYPE'
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# Build element column rename map
|
|
110
|
+
for col in _df.columns:
|
|
111
|
+
for elem in self.ELEMENTS:
|
|
112
|
+
# Match pattern like "Mg 12 (ng/m3)" or " K 19 (ng/m3)" for concentration
|
|
113
|
+
if f'{elem} ' in col and '(ng/m3)' in col and 'uncert' not in col.lower():
|
|
114
|
+
rename_map[col] = elem
|
|
115
|
+
# Match pattern like "Al Uncert (ng/m3)" or "Mg uncert (ng/m3)" for uncertainty
|
|
116
|
+
elif f'{elem} ' in col and 'uncert' in col.lower():
|
|
117
|
+
rename_map[col] = f'{elem}_uncert'
|
|
118
|
+
|
|
119
|
+
_df = _df.rename(columns=rename_map)
|
|
120
|
+
|
|
121
|
+
# Select columns to keep (elements + uncertainties + environmental)
|
|
122
|
+
keep_cols = []
|
|
123
|
+
for elem in self.ELEMENTS:
|
|
124
|
+
if elem in _df.columns:
|
|
125
|
+
keep_cols.append(elem)
|
|
126
|
+
if f'{elem}_uncert' in _df.columns:
|
|
127
|
+
keep_cols.append(f'{elem}_uncert')
|
|
128
|
+
for env_col in self.ENV_COLUMNS:
|
|
129
|
+
if env_col in _df.columns:
|
|
130
|
+
keep_cols.append(env_col)
|
|
131
|
+
|
|
132
|
+
_df = _df[[col for col in keep_cols if col in _df.columns]]
|
|
133
|
+
_df = _df.apply(to_numeric, errors='coerce')
|
|
134
|
+
|
|
135
|
+
return _df.loc[~_df.index.duplicated() & _df.index.notna()]
|
|
136
|
+
|
|
137
|
+
def _QC(self, _df):
|
|
138
|
+
"""Perform quality control on Xact XRF data.
|
|
139
|
+
|
|
140
|
+
QC Rules Applied
|
|
141
|
+
----------------
|
|
142
|
+
1. Calibration Mode : SAMPLE_TYPE != 1 indicates zero calibration
|
|
143
|
+
2. Instrument Error : ALARM code 100-110 indicates instrument error
|
|
144
|
+
3. Upscale Warning : ALARM code 200-203 indicates upscale warning
|
|
145
|
+
4. Invalid Value : Element concentration outside valid range (0-100000 ng/m3)
|
|
146
|
+
5. Internal Std Drift : Nb internal standard deviates ±20% from median
|
|
147
|
+
"""
|
|
148
|
+
_index = _df.index.copy()
|
|
149
|
+
df_qc = _df.copy()
|
|
150
|
+
|
|
151
|
+
# Get element columns (exclude uncertainty and environmental columns)
|
|
152
|
+
element_cols = [col for col in df_qc.columns if col in self.ELEMENTS]
|
|
153
|
+
uncert_cols = [f'{elem}_uncert' for elem in element_cols if f'{elem}_uncert' in df_qc.columns]
|
|
154
|
+
|
|
155
|
+
# Build QC rules declaratively
|
|
156
|
+
qc = QCFlagBuilder()
|
|
157
|
+
|
|
158
|
+
# Add Calibration Mode rule (SAMPLE_TYPE: 1=normal sampling, 2=zero calibration)
|
|
159
|
+
# Note: Most calibration samples are already filtered in _raw_reader, this catches any remaining
|
|
160
|
+
if 'SAMPLE_TYPE' in df_qc.columns:
|
|
161
|
+
qc.add_rules([
|
|
162
|
+
QCRule(
|
|
163
|
+
name='Calibration Mode',
|
|
164
|
+
condition=lambda df: (df['SAMPLE_TYPE'] != 1) & df['SAMPLE_TYPE'].notna(),
|
|
165
|
+
description='Instrument in calibration mode (SAMPLE_TYPE != 1)'
|
|
166
|
+
),
|
|
167
|
+
])
|
|
168
|
+
|
|
169
|
+
# Add Instrument Error rule (ALARM codes 100-110)
|
|
170
|
+
if 'ALARM' in df_qc.columns:
|
|
171
|
+
qc.add_rules([
|
|
172
|
+
QCRule(
|
|
173
|
+
name='Instrument Error',
|
|
174
|
+
condition=lambda df: df['ALARM'].isin(list(self.ERROR_CODES.keys())),
|
|
175
|
+
description='Instrument error detected (ALARM code 100-110)'
|
|
176
|
+
),
|
|
177
|
+
QCRule(
|
|
178
|
+
name='Upscale Warning',
|
|
179
|
+
condition=lambda df: df['ALARM'].isin(list(self.WARNING_CODES.keys())),
|
|
180
|
+
description='Upscale warning detected (ALARM code 200-203)'
|
|
181
|
+
),
|
|
182
|
+
])
|
|
183
|
+
|
|
184
|
+
# Add Invalid Value rule
|
|
185
|
+
if element_cols:
|
|
186
|
+
qc.add_rules([
|
|
187
|
+
QCRule(
|
|
188
|
+
name='Invalid Value',
|
|
189
|
+
condition=lambda df, cols=element_cols: (
|
|
190
|
+
(df[cols] < self.MIN_VALUE) | (df[cols] > self.MAX_VALUE)
|
|
191
|
+
).any(axis=1),
|
|
192
|
+
description=f'Concentration outside valid range ({self.MIN_VALUE}-{self.MAX_VALUE} ng/m3)'
|
|
193
|
+
),
|
|
194
|
+
])
|
|
195
|
+
|
|
196
|
+
# Add Internal Standard Drift rule (Nb)
|
|
197
|
+
if self.INTERNAL_STD_ELEMENT in df_qc.columns:
|
|
198
|
+
nb_median = df_qc[self.INTERNAL_STD_ELEMENT].median()
|
|
199
|
+
lower_bound = nb_median * (1 - self.INTERNAL_STD_TOLERANCE)
|
|
200
|
+
upper_bound = nb_median * (1 + self.INTERNAL_STD_TOLERANCE)
|
|
201
|
+
qc.add_rules([
|
|
202
|
+
QCRule(
|
|
203
|
+
name='Internal Std Drift',
|
|
204
|
+
condition=lambda df, lb=lower_bound, ub=upper_bound: (
|
|
205
|
+
(df[self.INTERNAL_STD_ELEMENT] < lb) | (df[self.INTERNAL_STD_ELEMENT] > ub)
|
|
206
|
+
),
|
|
207
|
+
description=f'{self.INTERNAL_STD_ELEMENT} internal standard outside ±{int(self.INTERNAL_STD_TOLERANCE * 100)}% of median ({nb_median:.2f} ng/m³)'
|
|
208
|
+
),
|
|
209
|
+
])
|
|
210
|
+
|
|
211
|
+
# Apply all QC rules and get flagged DataFrame
|
|
212
|
+
df_qc = qc.apply(df_qc)
|
|
213
|
+
|
|
214
|
+
# Log QC summary
|
|
215
|
+
summary = qc.get_summary(df_qc)
|
|
216
|
+
self.logger.info(f"{self.nam} QC Summary:")
|
|
217
|
+
for _, row in summary.iterrows():
|
|
218
|
+
self.logger.info(f" {row['Rule']}: {row['Count']} ({row['Percentage']})")
|
|
219
|
+
|
|
220
|
+
# Get output columns: elements + uncertainties + environmental + QC_Flag
|
|
221
|
+
output_cols = element_cols + uncert_cols + [c for c in self.ENV_COLUMNS if c in df_qc.columns] + ['QC_Flag']
|
|
222
|
+
return df_qc[[c for c in output_cols if c in df_qc.columns]].reindex(_index)
|
|
223
|
+
|
|
224
|
+
def decode_alarm(self, alarm_code):
|
|
225
|
+
"""Decode ALARM code to human-readable message.
|
|
226
|
+
|
|
227
|
+
Parameters
|
|
228
|
+
----------
|
|
229
|
+
alarm_code : int
|
|
230
|
+
The ALARM code from the Xact data
|
|
231
|
+
|
|
232
|
+
Returns
|
|
233
|
+
-------
|
|
234
|
+
str
|
|
235
|
+
Human-readable description of the alarm
|
|
236
|
+
"""
|
|
237
|
+
if alarm_code == 0:
|
|
238
|
+
return 'Normal'
|
|
239
|
+
elif alarm_code in self.ERROR_CODES:
|
|
240
|
+
return self.ERROR_CODES[alarm_code]
|
|
241
|
+
elif alarm_code in self.WARNING_CODES:
|
|
242
|
+
return self.WARNING_CODES[alarm_code]
|
|
243
|
+
else:
|
|
244
|
+
return f'Unknown Alarm ({alarm_code})'
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Auto-import all instrument reader modules dynamically
|
|
2
|
+
# New instruments only need to:
|
|
3
|
+
# 1. Add a new .py file in this directory
|
|
4
|
+
# 2. Add entry to supported_instruments.py meta dict
|
|
5
|
+
|
|
6
|
+
import importlib
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
# Get all .py files in this directory (excluding __init__.py)
|
|
10
|
+
_script_dir = Path(__file__).parent
|
|
11
|
+
_module_files = [
|
|
12
|
+
f.stem for f in _script_dir.glob('*.py')
|
|
13
|
+
if f.stem != '__init__' and not f.stem.startswith('_')
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
# Dynamically import each module
|
|
17
|
+
__all__ = []
|
|
18
|
+
for _module_name in _module_files:
|
|
19
|
+
try:
|
|
20
|
+
_module = importlib.import_module(f'.{_module_name}', package=__name__)
|
|
21
|
+
globals()[_module_name] = _module
|
|
22
|
+
__all__.append(_module_name)
|
|
23
|
+
except ImportError as e:
|
|
24
|
+
# Skip modules that fail to import (e.g., missing dependencies)
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
# Clean up temporary variables
|
|
28
|
+
del _script_dir, _module_files, _module_name, _module, importlib, Path
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from io import StringIO
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
from pandas import read_csv, DataFrame
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_default_chemical_data():
|
|
9
|
+
# The following data is from the chemical composition of real atmospheric particles.
|
|
10
|
+
#
|
|
11
|
+
# The six main chemical components that comprised PM2.5 are listed in the data.
|
|
12
|
+
# Here, we test the radar charts to see if we can clearly identify how the
|
|
13
|
+
# chemical components vary between the three pollutant scenarios:
|
|
14
|
+
#
|
|
15
|
+
# 1) Whole sampling period (Total)
|
|
16
|
+
# 2) Clean period (Clean)
|
|
17
|
+
# 3) Transition period (Transition)
|
|
18
|
+
# 4) Event period (Event)
|
|
19
|
+
|
|
20
|
+
data = {
|
|
21
|
+
'Sulfate': [0.01, 0.34, 0.02, 0.71],
|
|
22
|
+
'Nitrate': [0.88, 0.13, 0.34, 0.13],
|
|
23
|
+
'OC': [0.07, 0.95, 0.04, 0.05],
|
|
24
|
+
'EC': [0.20, 0.02, 0.85, 0.19],
|
|
25
|
+
'Soil': [0.20, 0.10, 0.07, 0.01],
|
|
26
|
+
'SS': [0.20, 0.10, 0.07, 0.01]
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return DataFrame(data, index=['Total', 'Clean', 'Transition', 'Event'])
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_dataset_by_url(dataset_name: Literal["Tunghai", "Taipei"] = "Tunghai") -> DataFrame:
|
|
33
|
+
import requests
|
|
34
|
+
dataset_uris = {
|
|
35
|
+
"Tunghai": "https://raw.githubusercontent.com/alex870521/DataPlot/main/DataPlot/config/default_data.csv"
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Ensure the dataset name is valid
|
|
39
|
+
if dataset_name not in dataset_uris:
|
|
40
|
+
raise ValueError(f"Dataset {dataset_name} is not supported.")
|
|
41
|
+
|
|
42
|
+
url = dataset_uris[dataset_name]
|
|
43
|
+
|
|
44
|
+
# Make a request to the URL
|
|
45
|
+
response = requests.get(url)
|
|
46
|
+
|
|
47
|
+
if response.status_code == 200:
|
|
48
|
+
return read_csv(StringIO(response.text), na_values=('E', 'F', '-', '_', '#', '*'), index_col=0,
|
|
49
|
+
parse_dates=True, low_memory=False)
|
|
50
|
+
else:
|
|
51
|
+
print(f"Failed to download file: {response.status_code}")
|
|
52
|
+
print(response.text) # Print the response text for debugging
|
|
53
|
+
return DataFrame() # Return an empty DataFrame in case of failure
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def load_dataset_local(dataset_name: Literal["Tunghai", "Taipei", "PNSD"] = "Tunghai") -> DataFrame:
|
|
57
|
+
base_dir = Path(__file__).resolve().parent.parent
|
|
58
|
+
config_dir = base_dir / 'data'
|
|
59
|
+
|
|
60
|
+
dataset_paths = {
|
|
61
|
+
"Tunghai": config_dir / 'DEFAULT_DATA.csv',
|
|
62
|
+
"Taipei": config_dir / 'DEFAULT_DATA.csv',
|
|
63
|
+
"PNSD": config_dir / 'DEFAULT_PNSD_DATA.csv'
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if dataset_name not in dataset_paths:
|
|
67
|
+
raise ValueError(f"Dataset {dataset_name} is not supported.")
|
|
68
|
+
|
|
69
|
+
file_path = dataset_paths[dataset_name]
|
|
70
|
+
|
|
71
|
+
if not file_path.exists():
|
|
72
|
+
raise FileNotFoundError(f"The file {file_path} does not exist.")
|
|
73
|
+
|
|
74
|
+
return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
|
|
75
|
+
low_memory=False)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class DataBase:
|
|
79
|
+
def __new__(cls, file_path: Path | str = None, load_data: bool = False, load_PSD: bool = False):
|
|
80
|
+
print(f'Loading:\033[96m Default Data\033[0m')
|
|
81
|
+
if file_path is not None:
|
|
82
|
+
file_path = Path(file_path)
|
|
83
|
+
if file_path.exists():
|
|
84
|
+
return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
|
|
85
|
+
low_memory=False)
|
|
86
|
+
|
|
87
|
+
if load_data ^ load_PSD:
|
|
88
|
+
return load_dataset_local("Tunghai") if load_data else load_dataset_local("PNSD")
|
|
89
|
+
|
|
90
|
+
else:
|
|
91
|
+
raise ValueError("Exactly one of 'load_data' or 'load_PSD' must be True.")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
if __name__ == '__main__':
|
|
95
|
+
df = DataBase("Tunghai")
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Literal, Sequence
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from pandas import concat, DataFrame, Series
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Classifier:
|
|
9
|
+
Seasons = {'2020-Summer': (datetime(2020, 9, 4), datetime(2020, 9, 21, 23)),
|
|
10
|
+
'2020-Autumn': (datetime(2020, 9, 22), datetime(2020, 12, 29, 23)),
|
|
11
|
+
'2020-Winter': (datetime(2020, 12, 30), datetime(2021, 3, 25, 23)),
|
|
12
|
+
'2021-Spring': (datetime(2021, 3, 26), datetime(2021, 5, 6, 23))}
|
|
13
|
+
|
|
14
|
+
# '2021-Summer': (datetime(2021, 5, 7), datetime(2021, 10, 16, 23))
|
|
15
|
+
# '2021-Autumn': (datetime(2021, 10, 17), datetime(2021, 12, 31, 23))
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def classify(cls, df) -> DataFrame:
|
|
19
|
+
df = cls.classify_by_diurnal(df)
|
|
20
|
+
df = cls.classify_by_state(df)
|
|
21
|
+
df = cls.classify_by_season(df)
|
|
22
|
+
df = cls.classify_by_season_state(df)
|
|
23
|
+
|
|
24
|
+
return df
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def classify_by_diurnal(cls, df):
|
|
28
|
+
df['Hour'] = df.index.hour
|
|
29
|
+
df['Diurnal'] = df['Hour'].apply(cls.map_diurnal)
|
|
30
|
+
return df
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def classify_by_state(cls, df):
|
|
34
|
+
df['State'] = df.apply(cls.map_state, axis=1, clean_bound=df.Extinction.quantile(0.2),
|
|
35
|
+
event_bound=df.Extinction.quantile(0.8))
|
|
36
|
+
return df
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def classify_by_season(cls, df):
|
|
40
|
+
for season, (season_start, season_end) in cls.Seasons.items():
|
|
41
|
+
df.loc[season_start:season_end, 'Season'] = season
|
|
42
|
+
return df
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def classify_by_season_state(cls, df):
|
|
46
|
+
for _grp, _df in df.groupby('Season'):
|
|
47
|
+
df['Season_State'] = df.apply(cls.map_state, axis=1, clean_bound=_df.Extinction.quantile(0.2),
|
|
48
|
+
event_bound=_df.Extinction.quantile(0.8))
|
|
49
|
+
return df
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def map_diurnal(hour):
|
|
53
|
+
return 'Day' if 7 <= hour <= 18 else 'Night'
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def map_state(row, clean_bound, event_bound):
|
|
57
|
+
return 'Event' if row['Extinction'] >= event_bound else 'Clean' if row[
|
|
58
|
+
'Extinction'] < clean_bound else 'Transition'
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class DataClassifier(Classifier):
|
|
62
|
+
"""
|
|
63
|
+
Notes
|
|
64
|
+
-----
|
|
65
|
+
First, create group then return the selected statistic method.
|
|
66
|
+
If the 'by' does not exist in DataFrame, import the default DataFrame to help to sign the different group.
|
|
67
|
+
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __new__(cls,
|
|
71
|
+
df: DataFrame,
|
|
72
|
+
by: Literal["Hour", "State", "Season", "Season_state"] | str,
|
|
73
|
+
df_support: DataFrame | Series = None,
|
|
74
|
+
cut_bins: Sequence = None,
|
|
75
|
+
qcut: int = None,
|
|
76
|
+
labels: list[str] = None
|
|
77
|
+
) -> tuple[DataFrame, DataFrame]:
|
|
78
|
+
group = cls._group_data(df, by, df_support, cut_bins, qcut, labels)
|
|
79
|
+
return cls._compute_statistics(df, group)
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _group_data(df, by, df_support, cut_bins, qcut, labels):
|
|
83
|
+
if by not in df.columns:
|
|
84
|
+
if df_support is None:
|
|
85
|
+
raise KeyError(f"Column '{by}' does not exist in DataFrame."
|
|
86
|
+
f"Please provide a support DataFrame or Series to help classify.")
|
|
87
|
+
else:
|
|
88
|
+
df = concat([df, Classifier.classify(df_support.copy())[by]], axis=1)
|
|
89
|
+
|
|
90
|
+
if cut_bins is not None:
|
|
91
|
+
df[f'{by}_cut'] = pd.cut(df.loc[:, f'{by}'], cut_bins,
|
|
92
|
+
labels=labels or (cut_bins + (cut_bins[1] - cut_bins[0]) / 2)[:-1])
|
|
93
|
+
return df.groupby(f'{by}_cut', observed=False)
|
|
94
|
+
|
|
95
|
+
elif qcut is not None:
|
|
96
|
+
df[f'{by}_qcut'] = pd.qcut(df.loc[:, f'{by}'], q=qcut, labels=labels)
|
|
97
|
+
return df.groupby(f'{by}_qcut', observed=False)
|
|
98
|
+
|
|
99
|
+
else:
|
|
100
|
+
if by == 'State':
|
|
101
|
+
return df.groupby(by)
|
|
102
|
+
|
|
103
|
+
elif by == 'Season':
|
|
104
|
+
return df.groupby(pd.Categorical(df['Season'], categories=['2020-Summer', '2020-Autumn', '2020-Winter',
|
|
105
|
+
'2021-Spring']), observed=False)
|
|
106
|
+
else:
|
|
107
|
+
return df.groupby(by, observed=False)
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def _compute_statistics(df, group):
|
|
111
|
+
mean_df = group.mean(numeric_only=True)
|
|
112
|
+
mean_df.loc['Total'] = df.mean(numeric_only=True)
|
|
113
|
+
|
|
114
|
+
std_df = group.std(numeric_only=True)
|
|
115
|
+
std_df.loc['Total'] = df.std(numeric_only=True)
|
|
116
|
+
|
|
117
|
+
return mean_df, std_df
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
from pandas import DataFrame, Timestamp
|
|
4
|
+
from tabulate import tabulate
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def data_table(df: DataFrame,
|
|
8
|
+
items: list[str] | str = None,
|
|
9
|
+
times: list[datetime | Timestamp | str] = None,
|
|
10
|
+
):
|
|
11
|
+
"""
|
|
12
|
+
This function cuts the DataFrame based on the given time periods and calculates the mean and standard deviation
|
|
13
|
+
of the specified items for each period.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
df : pd.DataFrame
|
|
18
|
+
The DataFrame to be processed. It should have a DateTime index.
|
|
19
|
+
items : list[str] | str, optional
|
|
20
|
+
The columns of the DataFrame to be processed. It can be a list of column names or a single column name.
|
|
21
|
+
By default, it is ['NO', 'NO2', 'NOx'].
|
|
22
|
+
times : list[str] | str, optional
|
|
23
|
+
The time periods to cut the DataFrame. It can be a list of time strings or a single time string.
|
|
24
|
+
Each time string should be in the format of 'YYYY-MM-DD'. By default, it is ['2024-03-21', '2024-04-30'].
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
None
|
|
29
|
+
This function doesn't return any value. It prints out a table showing the mean and standard deviation
|
|
30
|
+
of the specified items for each time period.
|
|
31
|
+
"""
|
|
32
|
+
items = [items] if isinstance(items, str) else items
|
|
33
|
+
times = [times] if isinstance(times, str) else times
|
|
34
|
+
times = list(map(Timestamp, times))
|
|
35
|
+
|
|
36
|
+
times.sort()
|
|
37
|
+
|
|
38
|
+
results = []
|
|
39
|
+
periods = []
|
|
40
|
+
for i in range(len(times) + 1):
|
|
41
|
+
if i == 0:
|
|
42
|
+
df_period = df.loc[df.index <= times[i], items]
|
|
43
|
+
period_label = f'Before {times[i].date()}'
|
|
44
|
+
elif i == len(times):
|
|
45
|
+
df_period = df.loc[df.index > times[i - 1], items]
|
|
46
|
+
period_label = f'After {times[i - 1].date()}'
|
|
47
|
+
else:
|
|
48
|
+
df_period = df.loc[(df.index > times[i - 1]) & (df.index <= times[i]), items]
|
|
49
|
+
period_label = f'{times[i - 1].date()} to {times[i].date()}'
|
|
50
|
+
|
|
51
|
+
mean, std = df_period.mean().round(2).to_numpy(), df_period.std().round(2).to_numpy()
|
|
52
|
+
|
|
53
|
+
results.append([f'{m} ± {s}' for m, s in zip(mean, std)])
|
|
54
|
+
periods.append(period_label)
|
|
55
|
+
|
|
56
|
+
result = DataFrame(results, columns=items, index=periods)
|
|
57
|
+
|
|
58
|
+
print(tabulate(result, headers='keys', tablefmt='fancy_grid'))
|