AeroViz 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AeroViz might be problematic. Click here for more details.
- AeroViz/__init__.py +7 -5
- AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
- AeroViz/dataProcess/Chemistry/__init__.py +40 -40
- AeroViz/dataProcess/Chemistry/_calculate.py +15 -15
- AeroViz/dataProcess/Chemistry/_isoropia.py +72 -68
- AeroViz/dataProcess/Chemistry/_mass_volume.py +158 -161
- AeroViz/dataProcess/Chemistry/_ocec.py +109 -109
- AeroViz/dataProcess/Chemistry/_partition.py +19 -18
- AeroViz/dataProcess/Chemistry/_teom.py +9 -11
- AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
- AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
- AeroViz/dataProcess/Optical/_IMPROVE.py +40 -41
- AeroViz/dataProcess/Optical/__init__.py +29 -44
- AeroViz/dataProcess/Optical/_absorption.py +21 -47
- AeroViz/dataProcess/Optical/_extinction.py +31 -25
- AeroViz/dataProcess/Optical/_mie.py +5 -7
- AeroViz/dataProcess/Optical/_mie_sd.py +89 -90
- AeroViz/dataProcess/Optical/_scattering.py +19 -20
- AeroViz/dataProcess/SizeDistr/__init__.py +39 -39
- AeroViz/dataProcess/SizeDistr/__merge.py +159 -158
- AeroViz/dataProcess/SizeDistr/_merge.py +155 -154
- AeroViz/dataProcess/SizeDistr/_merge_v1.py +162 -161
- AeroViz/dataProcess/SizeDistr/_merge_v2.py +153 -152
- AeroViz/dataProcess/SizeDistr/_merge_v3.py +327 -327
- AeroViz/dataProcess/SizeDistr/_merge_v4.py +273 -275
- AeroViz/dataProcess/SizeDistr/_size_distr.py +51 -51
- AeroViz/dataProcess/VOC/__init__.py +9 -9
- AeroViz/dataProcess/VOC/_potential_par.py +53 -55
- AeroViz/dataProcess/__init__.py +28 -6
- AeroViz/dataProcess/core/__init__.py +59 -65
- AeroViz/plot/__init__.py +7 -2
- AeroViz/plot/bar.py +126 -0
- AeroViz/plot/box.py +69 -0
- AeroViz/plot/distribution/distribution.py +421 -427
- AeroViz/plot/meteorology/meteorology.py +240 -292
- AeroViz/plot/optical/__init__.py +0 -1
- AeroViz/plot/optical/optical.py +230 -230
- AeroViz/plot/pie.py +198 -0
- AeroViz/plot/regression.py +196 -0
- AeroViz/plot/scatter.py +165 -0
- AeroViz/plot/templates/__init__.py +2 -4
- AeroViz/plot/templates/ammonium_rich.py +34 -0
- AeroViz/plot/templates/contour.py +25 -25
- AeroViz/plot/templates/corr_matrix.py +86 -93
- AeroViz/plot/templates/diurnal_pattern.py +28 -26
- AeroViz/plot/templates/koschmieder.py +59 -123
- AeroViz/plot/templates/metal_heatmap.py +135 -37
- AeroViz/plot/timeseries/__init__.py +1 -0
- AeroViz/plot/timeseries/template.py +47 -0
- AeroViz/plot/timeseries/timeseries.py +324 -264
- AeroViz/plot/utils/__init__.py +2 -1
- AeroViz/plot/utils/_color.py +57 -57
- AeroViz/plot/utils/_unit.py +48 -48
- AeroViz/plot/utils/plt_utils.py +92 -0
- AeroViz/plot/utils/sklearn_utils.py +49 -0
- AeroViz/plot/utils/units.json +5 -0
- AeroViz/plot/violin.py +80 -0
- AeroViz/process/__init__.py +17 -17
- AeroViz/process/core/DataProc.py +9 -9
- AeroViz/process/core/SizeDist.py +81 -81
- AeroViz/process/method/PyMieScatt_update.py +488 -488
- AeroViz/process/method/mie_theory.py +231 -229
- AeroViz/process/method/prop.py +40 -40
- AeroViz/process/script/AbstractDistCalc.py +103 -103
- AeroViz/process/script/Chemical.py +168 -167
- AeroViz/process/script/IMPACT.py +40 -40
- AeroViz/process/script/IMPROVE.py +152 -152
- AeroViz/process/script/Others.py +45 -45
- AeroViz/process/script/PSD.py +26 -26
- AeroViz/process/script/PSD_dry.py +69 -70
- AeroViz/process/script/retrieve_RI.py +50 -51
- AeroViz/rawDataReader/__init__.py +53 -58
- AeroViz/rawDataReader/config/supported_instruments.py +155 -0
- AeroViz/rawDataReader/core/__init__.py +233 -356
- AeroViz/rawDataReader/script/AE33.py +17 -18
- AeroViz/rawDataReader/script/AE43.py +18 -21
- AeroViz/rawDataReader/script/APS_3321.py +30 -30
- AeroViz/rawDataReader/script/Aurora.py +23 -24
- AeroViz/rawDataReader/script/BC1054.py +36 -40
- AeroViz/rawDataReader/script/EPA_vertical.py +37 -9
- AeroViz/rawDataReader/script/GRIMM.py +16 -23
- AeroViz/rawDataReader/script/IGAC.py +90 -0
- AeroViz/rawDataReader/script/MA350.py +32 -39
- AeroViz/rawDataReader/script/Minion.py +103 -0
- AeroViz/rawDataReader/script/NEPH.py +69 -74
- AeroViz/rawDataReader/script/SMPS_TH.py +25 -25
- AeroViz/rawDataReader/script/SMPS_aim11.py +32 -32
- AeroViz/rawDataReader/script/SMPS_genr.py +31 -31
- AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
- AeroViz/rawDataReader/script/TEOM.py +30 -28
- AeroViz/rawDataReader/script/Table.py +13 -14
- AeroViz/rawDataReader/script/VOC.py +26 -0
- AeroViz/rawDataReader/script/__init__.py +18 -20
- AeroViz/tools/database.py +64 -66
- AeroViz/tools/dataclassifier.py +106 -106
- AeroViz/tools/dataprinter.py +51 -51
- AeroViz/tools/datareader.py +38 -38
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/METADATA +5 -4
- AeroViz-0.1.4.dist-info/RECORD +112 -0
- AeroViz/plot/improve/__init__.py +0 -1
- AeroViz/plot/improve/improve.py +0 -240
- AeroViz/plot/optical/aethalometer.py +0 -77
- AeroViz/plot/templates/event_evolution.py +0 -65
- AeroViz/plot/templates/regression.py +0 -256
- AeroViz/plot/templates/scatter.py +0 -130
- AeroViz/plot/templates/templates.py +0 -398
- AeroViz/plot/utils/_decorator.py +0 -74
- AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
- AeroViz/rawDataReader/script/IGAC_ZM.py +0 -90
- AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
- AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
- AeroViz/rawDataReader/script/VOC_TH.py +0 -30
- AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
- AeroViz/rawDataReader/utils/__init__.py +0 -0
- AeroViz/rawDataReader/utils/config.py +0 -169
- AeroViz-0.1.3.dist-info/RECORD +0 -111
- /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
- /AeroViz/{config → rawDataReader/config}/__init__.py +0 -0
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
- {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
AeroViz/tools/database.py
CHANGED
|
@@ -6,92 +6,90 @@ from pandas import read_csv, DataFrame
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def load_default_chemical_data():
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
9
|
+
# The following data is from the chemical composition of real atmospheric particles.
|
|
10
|
+
#
|
|
11
|
+
# The six main chemical components that comprised PM2.5 are listed in the data.
|
|
12
|
+
# Here, we test the radar charts to see if we can clearly identify how the
|
|
13
|
+
# chemical components vary between the three pollutant scenarios:
|
|
14
|
+
#
|
|
15
|
+
# 1) Whole sampling period (Total)
|
|
16
|
+
# 2) Clean period (Clean)
|
|
17
|
+
# 3) Transition period (Transition)
|
|
18
|
+
# 4) Event period (Event)
|
|
19
|
+
|
|
20
|
+
data = {
|
|
21
|
+
'Sulfate': [0.01, 0.34, 0.02, 0.71],
|
|
22
|
+
'Nitrate': [0.88, 0.13, 0.34, 0.13],
|
|
23
|
+
'OC': [0.07, 0.95, 0.04, 0.05],
|
|
24
|
+
'EC': [0.20, 0.02, 0.85, 0.19],
|
|
25
|
+
'Soil': [0.20, 0.10, 0.07, 0.01],
|
|
26
|
+
'SS': [0.20, 0.10, 0.07, 0.01]
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return DataFrame(data, index=['Total', 'Clean', 'Transition', 'Event'])
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def load_dataset_by_url(dataset_name: Literal["Tunghai", "Taipei"] = "Tunghai") -> DataFrame:
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
import requests
|
|
34
|
+
dataset_uris = {
|
|
35
|
+
"Tunghai": "https://raw.githubusercontent.com/alex870521/DataPlot/main/DataPlot/config/default_data.csv"
|
|
36
|
+
}
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
# Ensure the dataset name is valid
|
|
39
|
+
if dataset_name not in dataset_uris:
|
|
40
|
+
raise ValueError(f"Dataset {dataset_name} is not supported.")
|
|
41
41
|
|
|
42
|
-
|
|
42
|
+
url = dataset_uris[dataset_name]
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
# Make a request to the URL
|
|
45
|
+
response = requests.get(url)
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
47
|
+
if response.status_code == 200:
|
|
48
|
+
return read_csv(StringIO(response.text), na_values=('E', 'F', '-', '_', '#', '*'), index_col=0,
|
|
49
|
+
parse_dates=True, low_memory=False)
|
|
50
|
+
else:
|
|
51
|
+
print(f"Failed to download file: {response.status_code}")
|
|
52
|
+
print(response.text) # Print the response text for debugging
|
|
53
|
+
return DataFrame() # Return an empty DataFrame in case of failure
|
|
53
54
|
|
|
54
55
|
|
|
55
56
|
def load_dataset_local(dataset_name: Literal["Tunghai", "Taipei", "PNSD"] = "Tunghai") -> DataFrame:
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
base_dir = Path(__file__).resolve().parent.parent
|
|
58
|
+
config_dir = base_dir / 'data'
|
|
58
59
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
dataset_paths = {
|
|
61
|
+
"Tunghai": config_dir / 'DEFAULT_DATA.csv',
|
|
62
|
+
"Taipei": config_dir / 'DEFAULT_DATA.csv',
|
|
63
|
+
"PNSD": config_dir / 'DEFAULT_PNSD_DATA.csv'
|
|
64
|
+
}
|
|
64
65
|
|
|
65
|
-
|
|
66
|
-
|
|
66
|
+
if dataset_name not in dataset_paths:
|
|
67
|
+
raise ValueError(f"Dataset {dataset_name} is not supported.")
|
|
67
68
|
|
|
68
|
-
|
|
69
|
+
file_path = dataset_paths[dataset_name]
|
|
69
70
|
|
|
70
|
-
|
|
71
|
-
|
|
71
|
+
if not file_path.exists():
|
|
72
|
+
raise FileNotFoundError(f"The file {file_path} does not exist.")
|
|
72
73
|
|
|
73
|
-
|
|
74
|
+
return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
|
|
75
|
+
low_memory=False)
|
|
74
76
|
|
|
75
77
|
|
|
76
78
|
class DataBase:
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
79
|
+
def __new__(cls, file_path: Path | str = None, load_data: bool = False, load_PSD: bool = False):
|
|
80
|
+
print(f'Loading:\033[96m Default Data\033[0m')
|
|
81
|
+
if file_path is not None:
|
|
82
|
+
file_path = Path(file_path)
|
|
83
|
+
if file_path.exists():
|
|
84
|
+
return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
|
|
85
|
+
low_memory=False)
|
|
84
86
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
return load_dataset_local("Tunghai")
|
|
87
|
+
if load_data ^ load_PSD:
|
|
88
|
+
return load_dataset_local("Tunghai") if load_data else load_dataset_local("PNSD")
|
|
88
89
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
else:
|
|
93
|
-
raise ValueError("Exactly one of 'load_data' or 'load_PSD' must be True.")
|
|
90
|
+
else:
|
|
91
|
+
raise ValueError("Exactly one of 'load_data' or 'load_PSD' must be True.")
|
|
94
92
|
|
|
95
93
|
|
|
96
94
|
if __name__ == '__main__':
|
|
97
|
-
|
|
95
|
+
df = DataBase("Tunghai")
|
AeroViz/tools/dataclassifier.py
CHANGED
|
@@ -6,112 +6,112 @@ from pandas import concat, DataFrame, Series
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class Classifier:
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
9
|
+
Seasons = {'2020-Summer': (datetime(2020, 9, 4), datetime(2020, 9, 21, 23)),
|
|
10
|
+
'2020-Autumn': (datetime(2020, 9, 22), datetime(2020, 12, 29, 23)),
|
|
11
|
+
'2020-Winter': (datetime(2020, 12, 30), datetime(2021, 3, 25, 23)),
|
|
12
|
+
'2021-Spring': (datetime(2021, 3, 26), datetime(2021, 5, 6, 23))}
|
|
13
|
+
|
|
14
|
+
# '2021-Summer': (datetime(2021, 5, 7), datetime(2021, 10, 16, 23))
|
|
15
|
+
# '2021-Autumn': (datetime(2021, 10, 17), datetime(2021, 12, 31, 23))
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def classify(cls, df) -> DataFrame:
|
|
19
|
+
df = cls.classify_by_diurnal(df)
|
|
20
|
+
df = cls.classify_by_state(df)
|
|
21
|
+
df = cls.classify_by_season(df)
|
|
22
|
+
df = cls.classify_by_season_state(df)
|
|
23
|
+
|
|
24
|
+
return df
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def classify_by_diurnal(cls, df):
|
|
28
|
+
df['Hour'] = df.index.hour
|
|
29
|
+
df['Diurnal'] = df['Hour'].apply(cls.map_diurnal)
|
|
30
|
+
return df
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def classify_by_state(cls, df):
|
|
34
|
+
df['State'] = df.apply(cls.map_state, axis=1, clean_bound=df.Extinction.quantile(0.2),
|
|
35
|
+
event_bound=df.Extinction.quantile(0.8))
|
|
36
|
+
return df
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def classify_by_season(cls, df):
|
|
40
|
+
for season, (season_start, season_end) in cls.Seasons.items():
|
|
41
|
+
df.loc[season_start:season_end, 'Season'] = season
|
|
42
|
+
return df
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def classify_by_season_state(cls, df):
|
|
46
|
+
for _grp, _df in df.groupby('Season'):
|
|
47
|
+
df['Season_State'] = df.apply(cls.map_state, axis=1, clean_bound=_df.Extinction.quantile(0.2),
|
|
48
|
+
event_bound=_df.Extinction.quantile(0.8))
|
|
49
|
+
return df
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def map_diurnal(hour):
|
|
53
|
+
return 'Day' if 7 <= hour <= 18 else 'Night'
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def map_state(row, clean_bound, event_bound):
|
|
57
|
+
return 'Event' if row['Extinction'] >= event_bound else 'Clean' if row[
|
|
58
|
+
'Extinction'] < clean_bound else 'Transition'
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
class DataClassifier(Classifier):
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
62
|
+
"""
|
|
63
|
+
Notes
|
|
64
|
+
-----
|
|
65
|
+
First, create group then return the selected statistic method.
|
|
66
|
+
If the 'by' does not exist in DataFrame, import the default DataFrame to help to sign the different group.
|
|
67
|
+
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __new__(cls,
|
|
71
|
+
df: DataFrame,
|
|
72
|
+
by: Literal["Hour", "State", "Season", "Season_state"] | str,
|
|
73
|
+
df_support: DataFrame | Series = None,
|
|
74
|
+
cut_bins: Sequence = None,
|
|
75
|
+
qcut: int = None,
|
|
76
|
+
labels: list[str] = None
|
|
77
|
+
) -> tuple[DataFrame, DataFrame]:
|
|
78
|
+
group = cls._group_data(df, by, df_support, cut_bins, qcut, labels)
|
|
79
|
+
return cls._compute_statistics(df, group)
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _group_data(df, by, df_support, cut_bins, qcut, labels):
|
|
83
|
+
if by not in df.columns:
|
|
84
|
+
if df_support is None:
|
|
85
|
+
raise KeyError(f"Column '{by}' does not exist in DataFrame."
|
|
86
|
+
f"Please provide a support DataFrame or Series to help classify.")
|
|
87
|
+
else:
|
|
88
|
+
df = concat([df, Classifier.classify(df_support.copy())[by]], axis=1)
|
|
89
|
+
|
|
90
|
+
if cut_bins is not None:
|
|
91
|
+
df[f'{by}_cut'] = pd.cut(df.loc[:, f'{by}'], cut_bins,
|
|
92
|
+
labels=labels or (cut_bins + (cut_bins[1] - cut_bins[0]) / 2)[:-1])
|
|
93
|
+
return df.groupby(f'{by}_cut', observed=False)
|
|
94
|
+
|
|
95
|
+
elif qcut is not None:
|
|
96
|
+
df[f'{by}_qcut'] = pd.qcut(df.loc[:, f'{by}'], q=qcut, labels=labels)
|
|
97
|
+
return df.groupby(f'{by}_qcut', observed=False)
|
|
98
|
+
|
|
99
|
+
else:
|
|
100
|
+
if by == 'State':
|
|
101
|
+
return df.groupby(by)
|
|
102
|
+
|
|
103
|
+
elif by == 'Season':
|
|
104
|
+
return df.groupby(pd.Categorical(df['Season'], categories=['2020-Summer', '2020-Autumn', '2020-Winter',
|
|
105
|
+
'2021-Spring']), observed=False)
|
|
106
|
+
else:
|
|
107
|
+
return df.groupby(by, observed=False)
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def _compute_statistics(df, group):
|
|
111
|
+
mean_df = group.mean(numeric_only=True)
|
|
112
|
+
mean_df.loc['Total'] = df.mean(numeric_only=True)
|
|
113
|
+
|
|
114
|
+
std_df = group.std(numeric_only=True)
|
|
115
|
+
std_df.loc['Total'] = df.std(numeric_only=True)
|
|
116
|
+
|
|
117
|
+
return mean_df, std_df
|
AeroViz/tools/dataprinter.py
CHANGED
|
@@ -5,54 +5,54 @@ from tabulate import tabulate
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def data_table(df: DataFrame,
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
8
|
+
items: list[str] | str = None,
|
|
9
|
+
times: list[datetime | Timestamp | str] = None,
|
|
10
|
+
):
|
|
11
|
+
"""
|
|
12
|
+
This function cuts the DataFrame based on the given time periods and calculates the mean and standard deviation
|
|
13
|
+
of the specified items for each period.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
df : pd.DataFrame
|
|
18
|
+
The DataFrame to be processed. It should have a DateTime index.
|
|
19
|
+
items : list[str] | str, optional
|
|
20
|
+
The columns of the DataFrame to be processed. It can be a list of column names or a single column name.
|
|
21
|
+
By default, it is ['NO', 'NO2', 'NOx'].
|
|
22
|
+
times : list[str] | str, optional
|
|
23
|
+
The time periods to cut the DataFrame. It can be a list of time strings or a single time string.
|
|
24
|
+
Each time string should be in the format of 'YYYY-MM-DD'. By default, it is ['2024-03-21', '2024-04-30'].
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
None
|
|
29
|
+
This function doesn't return any value. It prints out a table showing the mean and standard deviation
|
|
30
|
+
of the specified items for each time period.
|
|
31
|
+
"""
|
|
32
|
+
items = [items] if isinstance(items, str) else items
|
|
33
|
+
times = [times] if isinstance(times, str) else times
|
|
34
|
+
times = list(map(Timestamp, times))
|
|
35
|
+
|
|
36
|
+
times.sort()
|
|
37
|
+
|
|
38
|
+
results = []
|
|
39
|
+
periods = []
|
|
40
|
+
for i in range(len(times) + 1):
|
|
41
|
+
if i == 0:
|
|
42
|
+
df_period = df.loc[df.index <= times[i], items]
|
|
43
|
+
period_label = f'Before {times[i].date()}'
|
|
44
|
+
elif i == len(times):
|
|
45
|
+
df_period = df.loc[df.index > times[i - 1], items]
|
|
46
|
+
period_label = f'After {times[i - 1].date()}'
|
|
47
|
+
else:
|
|
48
|
+
df_period = df.loc[(df.index > times[i - 1]) & (df.index <= times[i]), items]
|
|
49
|
+
period_label = f'{times[i - 1].date()} to {times[i].date()}'
|
|
50
|
+
|
|
51
|
+
mean, std = df_period.mean().round(2).to_numpy(), df_period.std().round(2).to_numpy()
|
|
52
|
+
|
|
53
|
+
results.append([f'{m} ± {s}' for m, s in zip(mean, std)])
|
|
54
|
+
periods.append(period_label)
|
|
55
|
+
|
|
56
|
+
result = DataFrame(results, columns=items, index=periods)
|
|
57
|
+
|
|
58
|
+
print(tabulate(result, headers='keys', tablefmt='fancy_grid'))
|
AeroViz/tools/datareader.py
CHANGED
|
@@ -5,62 +5,62 @@ from pandas import read_csv, read_json, read_excel, DataFrame
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class FileHandler(ABC):
|
|
8
|
-
|
|
8
|
+
""" An abstract base class for reading data files with different extensions (.csv, .json, .xls, .xlsx). """
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def read_data(self, file_path: Path) -> DataFrame:
|
|
12
|
+
pass
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class CsvFileHandler(FileHandler):
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
def read_data(self, file_path: Path) -> DataFrame:
|
|
17
|
+
return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
|
|
18
|
+
low_memory=False)
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class JsonFileHandler(FileHandler):
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
def read_data(self, file_path: Path) -> DataFrame:
|
|
23
|
+
return read_json(file_path)
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class ExcelFileHandler(FileHandler):
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
def read_data(self, file_path: Path) -> DataFrame:
|
|
28
|
+
return read_excel(file_path, index_col=0, parse_dates=True, )
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class DataReaderFactory:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
_handler_mapping = {
|
|
33
|
+
'.csv': CsvFileHandler(),
|
|
34
|
+
'.json': JsonFileHandler(),
|
|
35
|
+
'.xls': ExcelFileHandler(),
|
|
36
|
+
'.xlsx': ExcelFileHandler(),
|
|
37
|
+
}
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
39
|
+
@staticmethod
|
|
40
|
+
def create_handler(file_extension: str) -> FileHandler:
|
|
41
|
+
reader_class = DataReaderFactory._handler_mapping.get(file_extension)
|
|
42
|
+
if reader_class is None:
|
|
43
|
+
raise ValueError(f"Unsupported file format: {file_extension}")
|
|
44
|
+
return reader_class
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class DataReader:
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
"""
|
|
49
|
+
A class for reading data files with different extensions (.csv, .json, .xls, .xlsx).
|
|
50
50
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
filename (Path | str): The name of the file to be read or the Path of the file.
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
pandas.DataFrame: data
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
Examples
|
|
60
|
+
--------
|
|
61
|
+
>>> psd = DataReader(Path(...))
|
|
62
|
+
"""
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
def __new__(cls, file_path: Path | str) -> DataFrame:
|
|
65
|
+
file_path = Path(file_path)
|
|
66
|
+
return DataReaderFactory.create_handler(file_path.suffix.lower()).read_data(file_path)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: AeroViz
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: Aerosol science
|
|
5
5
|
Home-page: https://github.com/Alex870521/AeroViz
|
|
6
6
|
Author: alex
|
|
@@ -43,9 +43,10 @@ Requires-Dist: tabulate
|
|
|
43
43
|
|
|
44
44
|
## <div align="center">Key Features</div>
|
|
45
45
|
|
|
46
|
-
*
|
|
47
|
-
*
|
|
48
|
-
|
|
46
|
+
* Data Reading: Supports reading multiple aerosol data formats.
|
|
47
|
+
* Data Visualization: Offers various charts and graphs, including time series plots, distribution plots, and correlation
|
|
48
|
+
matrices.
|
|
49
|
+
* Data Processing: Includes multiple data processing tools, such as linear regression and Mie theory calculations.
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
## <div align="center">Installation</div>
|