AeroViz 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AeroViz might be problematic. Click here for more details.

Files changed (121) hide show
  1. AeroViz/__init__.py +7 -5
  2. AeroViz/{config → data}/DEFAULT_DATA.csv +1 -1
  3. AeroViz/dataProcess/Chemistry/__init__.py +40 -40
  4. AeroViz/dataProcess/Chemistry/_calculate.py +15 -15
  5. AeroViz/dataProcess/Chemistry/_isoropia.py +72 -68
  6. AeroViz/dataProcess/Chemistry/_mass_volume.py +158 -161
  7. AeroViz/dataProcess/Chemistry/_ocec.py +109 -109
  8. AeroViz/dataProcess/Chemistry/_partition.py +19 -18
  9. AeroViz/dataProcess/Chemistry/_teom.py +9 -11
  10. AeroViz/dataProcess/Chemistry/isrpia.cnf +21 -0
  11. AeroViz/dataProcess/Optical/Angstrom_exponent.py +20 -0
  12. AeroViz/dataProcess/Optical/_IMPROVE.py +40 -41
  13. AeroViz/dataProcess/Optical/__init__.py +29 -44
  14. AeroViz/dataProcess/Optical/_absorption.py +21 -47
  15. AeroViz/dataProcess/Optical/_extinction.py +31 -25
  16. AeroViz/dataProcess/Optical/_mie.py +5 -7
  17. AeroViz/dataProcess/Optical/_mie_sd.py +89 -90
  18. AeroViz/dataProcess/Optical/_scattering.py +19 -20
  19. AeroViz/dataProcess/SizeDistr/__init__.py +39 -39
  20. AeroViz/dataProcess/SizeDistr/__merge.py +159 -158
  21. AeroViz/dataProcess/SizeDistr/_merge.py +155 -154
  22. AeroViz/dataProcess/SizeDistr/_merge_v1.py +162 -161
  23. AeroViz/dataProcess/SizeDistr/_merge_v2.py +153 -152
  24. AeroViz/dataProcess/SizeDistr/_merge_v3.py +327 -327
  25. AeroViz/dataProcess/SizeDistr/_merge_v4.py +273 -275
  26. AeroViz/dataProcess/SizeDistr/_size_distr.py +51 -51
  27. AeroViz/dataProcess/VOC/__init__.py +9 -9
  28. AeroViz/dataProcess/VOC/_potential_par.py +53 -55
  29. AeroViz/dataProcess/__init__.py +28 -6
  30. AeroViz/dataProcess/core/__init__.py +59 -65
  31. AeroViz/plot/__init__.py +7 -2
  32. AeroViz/plot/bar.py +126 -0
  33. AeroViz/plot/box.py +69 -0
  34. AeroViz/plot/distribution/distribution.py +421 -427
  35. AeroViz/plot/meteorology/meteorology.py +240 -292
  36. AeroViz/plot/optical/__init__.py +0 -1
  37. AeroViz/plot/optical/optical.py +230 -230
  38. AeroViz/plot/pie.py +198 -0
  39. AeroViz/plot/regression.py +196 -0
  40. AeroViz/plot/scatter.py +165 -0
  41. AeroViz/plot/templates/__init__.py +2 -4
  42. AeroViz/plot/templates/ammonium_rich.py +34 -0
  43. AeroViz/plot/templates/contour.py +25 -25
  44. AeroViz/plot/templates/corr_matrix.py +86 -93
  45. AeroViz/plot/templates/diurnal_pattern.py +28 -26
  46. AeroViz/plot/templates/koschmieder.py +59 -123
  47. AeroViz/plot/templates/metal_heatmap.py +135 -37
  48. AeroViz/plot/timeseries/__init__.py +1 -0
  49. AeroViz/plot/timeseries/template.py +47 -0
  50. AeroViz/plot/timeseries/timeseries.py +324 -264
  51. AeroViz/plot/utils/__init__.py +2 -1
  52. AeroViz/plot/utils/_color.py +57 -57
  53. AeroViz/plot/utils/_unit.py +48 -48
  54. AeroViz/plot/utils/plt_utils.py +92 -0
  55. AeroViz/plot/utils/sklearn_utils.py +49 -0
  56. AeroViz/plot/utils/units.json +5 -0
  57. AeroViz/plot/violin.py +80 -0
  58. AeroViz/process/__init__.py +17 -17
  59. AeroViz/process/core/DataProc.py +9 -9
  60. AeroViz/process/core/SizeDist.py +81 -81
  61. AeroViz/process/method/PyMieScatt_update.py +488 -488
  62. AeroViz/process/method/mie_theory.py +231 -229
  63. AeroViz/process/method/prop.py +40 -40
  64. AeroViz/process/script/AbstractDistCalc.py +103 -103
  65. AeroViz/process/script/Chemical.py +168 -167
  66. AeroViz/process/script/IMPACT.py +40 -40
  67. AeroViz/process/script/IMPROVE.py +152 -152
  68. AeroViz/process/script/Others.py +45 -45
  69. AeroViz/process/script/PSD.py +26 -26
  70. AeroViz/process/script/PSD_dry.py +69 -70
  71. AeroViz/process/script/retrieve_RI.py +50 -51
  72. AeroViz/rawDataReader/__init__.py +53 -58
  73. AeroViz/rawDataReader/config/supported_instruments.py +155 -0
  74. AeroViz/rawDataReader/core/__init__.py +233 -356
  75. AeroViz/rawDataReader/script/AE33.py +17 -18
  76. AeroViz/rawDataReader/script/AE43.py +18 -21
  77. AeroViz/rawDataReader/script/APS_3321.py +30 -30
  78. AeroViz/rawDataReader/script/Aurora.py +23 -24
  79. AeroViz/rawDataReader/script/BC1054.py +36 -40
  80. AeroViz/rawDataReader/script/EPA_vertical.py +37 -9
  81. AeroViz/rawDataReader/script/GRIMM.py +16 -23
  82. AeroViz/rawDataReader/script/IGAC.py +90 -0
  83. AeroViz/rawDataReader/script/MA350.py +32 -39
  84. AeroViz/rawDataReader/script/Minion.py +103 -0
  85. AeroViz/rawDataReader/script/NEPH.py +69 -74
  86. AeroViz/rawDataReader/script/SMPS_TH.py +25 -25
  87. AeroViz/rawDataReader/script/SMPS_aim11.py +32 -32
  88. AeroViz/rawDataReader/script/SMPS_genr.py +31 -31
  89. AeroViz/rawDataReader/script/Sunset_OCEC.py +60 -0
  90. AeroViz/rawDataReader/script/TEOM.py +30 -28
  91. AeroViz/rawDataReader/script/Table.py +13 -14
  92. AeroViz/rawDataReader/script/VOC.py +26 -0
  93. AeroViz/rawDataReader/script/__init__.py +18 -20
  94. AeroViz/tools/database.py +64 -66
  95. AeroViz/tools/dataclassifier.py +106 -106
  96. AeroViz/tools/dataprinter.py +51 -51
  97. AeroViz/tools/datareader.py +38 -38
  98. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/METADATA +5 -4
  99. AeroViz-0.1.4.dist-info/RECORD +112 -0
  100. AeroViz/plot/improve/__init__.py +0 -1
  101. AeroViz/plot/improve/improve.py +0 -240
  102. AeroViz/plot/optical/aethalometer.py +0 -77
  103. AeroViz/plot/templates/event_evolution.py +0 -65
  104. AeroViz/plot/templates/regression.py +0 -256
  105. AeroViz/plot/templates/scatter.py +0 -130
  106. AeroViz/plot/templates/templates.py +0 -398
  107. AeroViz/plot/utils/_decorator.py +0 -74
  108. AeroViz/rawDataReader/script/IGAC_TH.py +0 -104
  109. AeroViz/rawDataReader/script/IGAC_ZM.py +0 -90
  110. AeroViz/rawDataReader/script/OCEC_LCRES.py +0 -34
  111. AeroViz/rawDataReader/script/OCEC_RES.py +0 -28
  112. AeroViz/rawDataReader/script/VOC_TH.py +0 -30
  113. AeroViz/rawDataReader/script/VOC_ZM.py +0 -37
  114. AeroViz/rawDataReader/utils/__init__.py +0 -0
  115. AeroViz/rawDataReader/utils/config.py +0 -169
  116. AeroViz-0.1.3.dist-info/RECORD +0 -111
  117. /AeroViz/{config → data}/DEFAULT_PNSD_DATA.csv +0 -0
  118. /AeroViz/{config → rawDataReader/config}/__init__.py +0 -0
  119. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/LICENSE +0 -0
  120. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/WHEEL +0 -0
  121. {AeroViz-0.1.3.dist-info → AeroViz-0.1.4.dist-info}/top_level.txt +0 -0
AeroViz/tools/database.py CHANGED
@@ -6,92 +6,90 @@ from pandas import read_csv, DataFrame
6
6
 
7
7
 
8
8
  def load_default_chemical_data():
9
- # The following data is from the chemical composition of real atmospheric particles.
10
- #
11
- # The six main chemical components that comprised PM2.5 are listed in the data.
12
- # Here, we test the radar charts to see if we can clearly identify how the
13
- # chemical components vary between the three pollutant scenarios:
14
- #
15
- # 1) Whole sampling period (Total)
16
- # 2) Clean period (Clean)
17
- # 3) Transition period (Transition)
18
- # 4) Event period (Event)
19
-
20
- data = {
21
- 'Sulfate': [0.01, 0.34, 0.02, 0.71],
22
- 'Nitrate': [0.88, 0.13, 0.34, 0.13],
23
- 'OC': [0.07, 0.95, 0.04, 0.05],
24
- 'EC': [0.20, 0.02, 0.85, 0.19],
25
- 'Soil': [0.20, 0.10, 0.07, 0.01],
26
- 'SS': [0.20, 0.10, 0.07, 0.01]
27
- }
28
-
29
- return DataFrame(data, index=['Total', 'Clean', 'Transition', 'Event'])
9
+ # The following data is from the chemical composition of real atmospheric particles.
10
+ #
11
+ # The six main chemical components that comprised PM2.5 are listed in the data.
12
+ # Here, we test the radar charts to see if we can clearly identify how the
13
+ # chemical components vary between the three pollutant scenarios:
14
+ #
15
+ # 1) Whole sampling period (Total)
16
+ # 2) Clean period (Clean)
17
+ # 3) Transition period (Transition)
18
+ # 4) Event period (Event)
19
+
20
+ data = {
21
+ 'Sulfate': [0.01, 0.34, 0.02, 0.71],
22
+ 'Nitrate': [0.88, 0.13, 0.34, 0.13],
23
+ 'OC': [0.07, 0.95, 0.04, 0.05],
24
+ 'EC': [0.20, 0.02, 0.85, 0.19],
25
+ 'Soil': [0.20, 0.10, 0.07, 0.01],
26
+ 'SS': [0.20, 0.10, 0.07, 0.01]
27
+ }
28
+
29
+ return DataFrame(data, index=['Total', 'Clean', 'Transition', 'Event'])
30
30
 
31
31
 
32
32
  def load_dataset_by_url(dataset_name: Literal["Tunghai", "Taipei"] = "Tunghai") -> DataFrame:
33
- import requests
34
- dataset_uris = {
35
- "Tunghai": "https://raw.githubusercontent.com/alex870521/DataPlot/main/DataPlot/config/default_data.csv"
36
- }
33
+ import requests
34
+ dataset_uris = {
35
+ "Tunghai": "https://raw.githubusercontent.com/alex870521/DataPlot/main/DataPlot/config/default_data.csv"
36
+ }
37
37
 
38
- # Ensure the dataset name is valid
39
- if dataset_name not in dataset_uris:
40
- raise ValueError(f"Dataset {dataset_name} is not supported.")
38
+ # Ensure the dataset name is valid
39
+ if dataset_name not in dataset_uris:
40
+ raise ValueError(f"Dataset {dataset_name} is not supported.")
41
41
 
42
- url = dataset_uris[dataset_name]
42
+ url = dataset_uris[dataset_name]
43
43
 
44
- # Make a request to the URL
45
- response = requests.get(url)
44
+ # Make a request to the URL
45
+ response = requests.get(url)
46
46
 
47
- if response.status_code == 200:
48
- return read_csv(StringIO(response.text), parse_dates=['Time'], index_col='Time')
49
- else:
50
- print(f"Failed to download file: {response.status_code}")
51
- print(response.text) # Print the response text for debugging
52
- return DataFrame() # Return an empty DataFrame in case of failure
47
+ if response.status_code == 200:
48
+ return read_csv(StringIO(response.text), na_values=('E', 'F', '-', '_', '#', '*'), index_col=0,
49
+ parse_dates=True, low_memory=False)
50
+ else:
51
+ print(f"Failed to download file: {response.status_code}")
52
+ print(response.text) # Print the response text for debugging
53
+ return DataFrame() # Return an empty DataFrame in case of failure
53
54
 
54
55
 
55
56
  def load_dataset_local(dataset_name: Literal["Tunghai", "Taipei", "PNSD"] = "Tunghai") -> DataFrame:
56
- base_dir = Path(__file__).resolve().parent.parent
57
- config_dir = base_dir / 'config'
57
+ base_dir = Path(__file__).resolve().parent.parent
58
+ config_dir = base_dir / 'data'
58
59
 
59
- dataset_paths = {
60
- "Tunghai": config_dir / 'DEFAULT_DATA.csv',
61
- "Taipei": config_dir / 'DEFAULT_DATA.csv',
62
- "PNSD": config_dir / 'DEFAULT_PNSD_DATA.csv'
63
- }
60
+ dataset_paths = {
61
+ "Tunghai": config_dir / 'DEFAULT_DATA.csv',
62
+ "Taipei": config_dir / 'DEFAULT_DATA.csv',
63
+ "PNSD": config_dir / 'DEFAULT_PNSD_DATA.csv'
64
+ }
64
65
 
65
- if dataset_name not in dataset_paths:
66
- raise ValueError(f"Dataset {dataset_name} is not supported.")
66
+ if dataset_name not in dataset_paths:
67
+ raise ValueError(f"Dataset {dataset_name} is not supported.")
67
68
 
68
- file_path = dataset_paths[dataset_name]
69
+ file_path = dataset_paths[dataset_name]
69
70
 
70
- if not file_path.exists():
71
- raise FileNotFoundError(f"The file {file_path} does not exist.")
71
+ if not file_path.exists():
72
+ raise FileNotFoundError(f"The file {file_path} does not exist.")
72
73
 
73
- return read_csv(file_path, parse_dates=['Time'], index_col='Time', na_values=('-', 'E', 'F'), low_memory=False)
74
+ return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
75
+ low_memory=False)
74
76
 
75
77
 
76
78
  class DataBase:
77
- def __new__(cls, file_path: Path | str = None, load_data: bool = False, load_PSD: bool = False):
78
- print(f'\t\t \033[96m --- Loading Data --- \033[0m')
79
- if file_path is not None:
80
- file_path = Path(file_path)
81
- if file_path.exists():
82
- return read_csv(file_path, parse_dates=['Time'], index_col='Time', na_values=('-', 'E', 'F'),
83
- low_memory=False)
79
+ def __new__(cls, file_path: Path | str = None, load_data: bool = False, load_PSD: bool = False):
80
+ print(f'Loading:\033[96m Default Data\033[0m')
81
+ if file_path is not None:
82
+ file_path = Path(file_path)
83
+ if file_path.exists():
84
+ return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
85
+ low_memory=False)
84
86
 
85
- if load_data ^ load_PSD:
86
- if load_data:
87
- return load_dataset_local("Tunghai")
87
+ if load_data ^ load_PSD:
88
+ return load_dataset_local("Tunghai") if load_data else load_dataset_local("PNSD")
88
89
 
89
- elif load_PSD:
90
- return load_dataset_local("PNSD")
91
-
92
- else:
93
- raise ValueError("Exactly one of 'load_data' or 'load_PSD' must be True.")
90
+ else:
91
+ raise ValueError("Exactly one of 'load_data' or 'load_PSD' must be True.")
94
92
 
95
93
 
96
94
  if __name__ == '__main__':
97
- df = DataBase("Tunghai")
95
+ df = DataBase("Tunghai")
@@ -6,112 +6,112 @@ from pandas import concat, DataFrame, Series
6
6
 
7
7
 
8
8
  class Classifier:
9
- Seasons = {'2020-Summer': (datetime(2020, 9, 4), datetime(2020, 9, 21, 23)),
10
- '2020-Autumn': (datetime(2020, 9, 22), datetime(2020, 12, 29, 23)),
11
- '2020-Winter': (datetime(2020, 12, 30), datetime(2021, 3, 25, 23)),
12
- '2021-Spring': (datetime(2021, 3, 26), datetime(2021, 5, 6, 23))}
13
-
14
- # '2021-Summer': (datetime(2021, 5, 7), datetime(2021, 10, 16, 23))
15
- # '2021-Autumn': (datetime(2021, 10, 17), datetime(2021, 12, 31, 23))
16
-
17
- @classmethod
18
- def classify(cls, df) -> DataFrame:
19
- df = cls.classify_by_diurnal(df)
20
- df = cls.classify_by_state(df)
21
- df = cls.classify_by_season(df)
22
- df = cls.classify_by_season_state(df)
23
-
24
- return df
25
-
26
- @classmethod
27
- def classify_by_diurnal(cls, df):
28
- df['Hour'] = df.index.hour
29
- df['Diurnal'] = df['Hour'].apply(cls.map_diurnal)
30
- return df
31
-
32
- @classmethod
33
- def classify_by_state(cls, df):
34
- df['State'] = df.apply(cls.map_state, axis=1, clean_bound=df.Extinction.quantile(0.2),
35
- event_bound=df.Extinction.quantile(0.8))
36
- return df
37
-
38
- @classmethod
39
- def classify_by_season(cls, df):
40
- for season, (season_start, season_end) in cls.Seasons.items():
41
- df.loc[season_start:season_end, 'Season'] = season
42
- return df
43
-
44
- @classmethod
45
- def classify_by_season_state(cls, df):
46
- for _grp, _df in df.groupby('Season'):
47
- df['Season_State'] = df.apply(cls.map_state, axis=1, clean_bound=_df.Extinction.quantile(0.2),
48
- event_bound=_df.Extinction.quantile(0.8))
49
- return df
50
-
51
- @staticmethod
52
- def map_diurnal(hour):
53
- return 'Day' if 7 <= hour <= 18 else 'Night'
54
-
55
- @staticmethod
56
- def map_state(row, clean_bound, event_bound):
57
- return 'Event' if row['Extinction'] >= event_bound else 'Clean' if row[
58
- 'Extinction'] < clean_bound else 'Transition'
9
+ Seasons = {'2020-Summer': (datetime(2020, 9, 4), datetime(2020, 9, 21, 23)),
10
+ '2020-Autumn': (datetime(2020, 9, 22), datetime(2020, 12, 29, 23)),
11
+ '2020-Winter': (datetime(2020, 12, 30), datetime(2021, 3, 25, 23)),
12
+ '2021-Spring': (datetime(2021, 3, 26), datetime(2021, 5, 6, 23))}
13
+
14
+ # '2021-Summer': (datetime(2021, 5, 7), datetime(2021, 10, 16, 23))
15
+ # '2021-Autumn': (datetime(2021, 10, 17), datetime(2021, 12, 31, 23))
16
+
17
+ @classmethod
18
+ def classify(cls, df) -> DataFrame:
19
+ df = cls.classify_by_diurnal(df)
20
+ df = cls.classify_by_state(df)
21
+ df = cls.classify_by_season(df)
22
+ df = cls.classify_by_season_state(df)
23
+
24
+ return df
25
+
26
+ @classmethod
27
+ def classify_by_diurnal(cls, df):
28
+ df['Hour'] = df.index.hour
29
+ df['Diurnal'] = df['Hour'].apply(cls.map_diurnal)
30
+ return df
31
+
32
+ @classmethod
33
+ def classify_by_state(cls, df):
34
+ df['State'] = df.apply(cls.map_state, axis=1, clean_bound=df.Extinction.quantile(0.2),
35
+ event_bound=df.Extinction.quantile(0.8))
36
+ return df
37
+
38
+ @classmethod
39
+ def classify_by_season(cls, df):
40
+ for season, (season_start, season_end) in cls.Seasons.items():
41
+ df.loc[season_start:season_end, 'Season'] = season
42
+ return df
43
+
44
+ @classmethod
45
+ def classify_by_season_state(cls, df):
46
+ for _grp, _df in df.groupby('Season'):
47
+ df['Season_State'] = df.apply(cls.map_state, axis=1, clean_bound=_df.Extinction.quantile(0.2),
48
+ event_bound=_df.Extinction.quantile(0.8))
49
+ return df
50
+
51
+ @staticmethod
52
+ def map_diurnal(hour):
53
+ return 'Day' if 7 <= hour <= 18 else 'Night'
54
+
55
+ @staticmethod
56
+ def map_state(row, clean_bound, event_bound):
57
+ return 'Event' if row['Extinction'] >= event_bound else 'Clean' if row[
58
+ 'Extinction'] < clean_bound else 'Transition'
59
59
 
60
60
 
61
61
  class DataClassifier(Classifier):
62
- """
63
- Notes
64
- -----
65
- First, create group then return the selected statistic method.
66
- If the 'by' does not exist in DataFrame, import the default DataFrame to help to sign the different group.
67
-
68
- """
69
-
70
- def __new__(cls,
71
- df: DataFrame,
72
- by: Literal["Hour", "State", "Season", "Season_state"] | str,
73
- df_support: DataFrame | Series = None,
74
- cut_bins: Sequence = None,
75
- qcut: int = None,
76
- labels: list[str] = None
77
- ) -> tuple[DataFrame, DataFrame]:
78
- group = cls._group_data(df, by, df_support, cut_bins, qcut, labels)
79
- return cls._compute_statistics(df, group)
80
-
81
- @staticmethod
82
- def _group_data(df, by, df_support, cut_bins, qcut, labels):
83
- if by not in df.columns:
84
- if df_support is None:
85
- raise KeyError(f"Column '{by}' does not exist in DataFrame."
86
- f"Please provide a support DataFrame or Series to help classify.")
87
- else:
88
- df = concat([df, Classifier.classify(df_support.copy())[by]], axis=1)
89
-
90
- if cut_bins is not None:
91
- df[f'{by}_cut'] = pd.cut(df.loc[:, f'{by}'], cut_bins,
92
- labels=labels or (cut_bins + (cut_bins[1] - cut_bins[0]) / 2)[:-1])
93
- return df.groupby(f'{by}_cut', observed=False)
94
-
95
- elif qcut is not None:
96
- df[f'{by}_qcut'] = pd.qcut(df.loc[:, f'{by}'], q=qcut, labels=labels)
97
- return df.groupby(f'{by}_qcut', observed=False)
98
-
99
- else:
100
- if by == 'State':
101
- return df.groupby(by)
102
-
103
- elif by == 'Season':
104
- return df.groupby(pd.Categorical(df['Season'], categories=['2020-Summer', '2020-Autumn', '2020-Winter',
105
- '2021-Spring']), observed=False)
106
- else:
107
- return df.groupby(by, observed=False)
108
-
109
- @staticmethod
110
- def _compute_statistics(df, group):
111
- mean_df = group.mean(numeric_only=True)
112
- mean_df.loc['Total'] = df.mean(numeric_only=True)
113
-
114
- std_df = group.std(numeric_only=True)
115
- std_df.loc['Total'] = df.std(numeric_only=True)
116
-
117
- return mean_df, std_df
62
+ """
63
+ Notes
64
+ -----
65
+ First, create group then return the selected statistic method.
66
+ If the 'by' does not exist in DataFrame, import the default DataFrame to help to sign the different group.
67
+
68
+ """
69
+
70
+ def __new__(cls,
71
+ df: DataFrame,
72
+ by: Literal["Hour", "State", "Season", "Season_state"] | str,
73
+ df_support: DataFrame | Series = None,
74
+ cut_bins: Sequence = None,
75
+ qcut: int = None,
76
+ labels: list[str] = None
77
+ ) -> tuple[DataFrame, DataFrame]:
78
+ group = cls._group_data(df, by, df_support, cut_bins, qcut, labels)
79
+ return cls._compute_statistics(df, group)
80
+
81
+ @staticmethod
82
+ def _group_data(df, by, df_support, cut_bins, qcut, labels):
83
+ if by not in df.columns:
84
+ if df_support is None:
85
+ raise KeyError(f"Column '{by}' does not exist in DataFrame."
86
+ f"Please provide a support DataFrame or Series to help classify.")
87
+ else:
88
+ df = concat([df, Classifier.classify(df_support.copy())[by]], axis=1)
89
+
90
+ if cut_bins is not None:
91
+ df[f'{by}_cut'] = pd.cut(df.loc[:, f'{by}'], cut_bins,
92
+ labels=labels or (cut_bins + (cut_bins[1] - cut_bins[0]) / 2)[:-1])
93
+ return df.groupby(f'{by}_cut', observed=False)
94
+
95
+ elif qcut is not None:
96
+ df[f'{by}_qcut'] = pd.qcut(df.loc[:, f'{by}'], q=qcut, labels=labels)
97
+ return df.groupby(f'{by}_qcut', observed=False)
98
+
99
+ else:
100
+ if by == 'State':
101
+ return df.groupby(by)
102
+
103
+ elif by == 'Season':
104
+ return df.groupby(pd.Categorical(df['Season'], categories=['2020-Summer', '2020-Autumn', '2020-Winter',
105
+ '2021-Spring']), observed=False)
106
+ else:
107
+ return df.groupby(by, observed=False)
108
+
109
+ @staticmethod
110
+ def _compute_statistics(df, group):
111
+ mean_df = group.mean(numeric_only=True)
112
+ mean_df.loc['Total'] = df.mean(numeric_only=True)
113
+
114
+ std_df = group.std(numeric_only=True)
115
+ std_df.loc['Total'] = df.std(numeric_only=True)
116
+
117
+ return mean_df, std_df
@@ -5,54 +5,54 @@ from tabulate import tabulate
5
5
 
6
6
 
7
7
  def data_table(df: DataFrame,
8
- items: list[str] | str = None,
9
- times: list[datetime | Timestamp | str] = None,
10
- ):
11
- """
12
- This function cuts the DataFrame based on the given time periods and calculates the mean and standard deviation
13
- of the specified items for each period.
14
-
15
- Parameters
16
- ----------
17
- df : pd.DataFrame
18
- The DataFrame to be processed. It should have a DateTime index.
19
- items : list[str] | str, optional
20
- The columns of the DataFrame to be processed. It can be a list of column names or a single column name.
21
- By default, it is ['NO', 'NO2', 'NOx'].
22
- times : list[str] | str, optional
23
- The time periods to cut the DataFrame. It can be a list of time strings or a single time string.
24
- Each time string should be in the format of 'YYYY-MM-DD'. By default, it is ['2024-03-21', '2024-04-30'].
25
-
26
- Returns
27
- -------
28
- None
29
- This function doesn't return any value. It prints out a table showing the mean and standard deviation
30
- of the specified items for each time period.
31
- """
32
- items = [items] if isinstance(items, str) else items
33
- times = [times] if isinstance(times, str) else times
34
- times = list(map(Timestamp, times))
35
-
36
- times.sort()
37
-
38
- results = []
39
- periods = []
40
- for i in range(len(times) + 1):
41
- if i == 0:
42
- df_period = df.loc[df.index <= times[i], items]
43
- period_label = f'Before {times[i].date()}'
44
- elif i == len(times):
45
- df_period = df.loc[df.index > times[i - 1], items]
46
- period_label = f'After {times[i - 1].date()}'
47
- else:
48
- df_period = df.loc[(df.index > times[i - 1]) & (df.index <= times[i]), items]
49
- period_label = f'{times[i - 1].date()} to {times[i].date()}'
50
-
51
- mean, std = df_period.mean().round(2).to_numpy(), df_period.std().round(2).to_numpy()
52
-
53
- results.append([f'{m} ± {s}' for m, s in zip(mean, std)])
54
- periods.append(period_label)
55
-
56
- result = DataFrame(results, columns=items, index=periods)
57
-
58
- print(tabulate(result, headers='keys', tablefmt='fancy_grid'))
8
+ items: list[str] | str = None,
9
+ times: list[datetime | Timestamp | str] = None,
10
+ ):
11
+ """
12
+ This function cuts the DataFrame based on the given time periods and calculates the mean and standard deviation
13
+ of the specified items for each period.
14
+
15
+ Parameters
16
+ ----------
17
+ df : pd.DataFrame
18
+ The DataFrame to be processed. It should have a DateTime index.
19
+ items : list[str] | str, optional
20
+ The columns of the DataFrame to be processed. It can be a list of column names or a single column name.
21
+ By default, it is ['NO', 'NO2', 'NOx'].
22
+ times : list[str] | str, optional
23
+ The time periods to cut the DataFrame. It can be a list of time strings or a single time string.
24
+ Each time string should be in the format of 'YYYY-MM-DD'. By default, it is ['2024-03-21', '2024-04-30'].
25
+
26
+ Returns
27
+ -------
28
+ None
29
+ This function doesn't return any value. It prints out a table showing the mean and standard deviation
30
+ of the specified items for each time period.
31
+ """
32
+ items = [items] if isinstance(items, str) else items
33
+ times = [times] if isinstance(times, str) else times
34
+ times = list(map(Timestamp, times))
35
+
36
+ times.sort()
37
+
38
+ results = []
39
+ periods = []
40
+ for i in range(len(times) + 1):
41
+ if i == 0:
42
+ df_period = df.loc[df.index <= times[i], items]
43
+ period_label = f'Before {times[i].date()}'
44
+ elif i == len(times):
45
+ df_period = df.loc[df.index > times[i - 1], items]
46
+ period_label = f'After {times[i - 1].date()}'
47
+ else:
48
+ df_period = df.loc[(df.index > times[i - 1]) & (df.index <= times[i]), items]
49
+ period_label = f'{times[i - 1].date()} to {times[i].date()}'
50
+
51
+ mean, std = df_period.mean().round(2).to_numpy(), df_period.std().round(2).to_numpy()
52
+
53
+ results.append([f'{m} ± {s}' for m, s in zip(mean, std)])
54
+ periods.append(period_label)
55
+
56
+ result = DataFrame(results, columns=items, index=periods)
57
+
58
+ print(tabulate(result, headers='keys', tablefmt='fancy_grid'))
@@ -5,62 +5,62 @@ from pandas import read_csv, read_json, read_excel, DataFrame
5
5
 
6
6
 
7
7
  class FileHandler(ABC):
8
- """ An abstract base class for reading data files with different extensions (.csv, .json, .xls, .xlsx). """
8
+ """ An abstract base class for reading data files with different extensions (.csv, .json, .xls, .xlsx). """
9
9
 
10
- @abstractmethod
11
- def read_data(self, file_path: Path) -> DataFrame:
12
- pass
10
+ @abstractmethod
11
+ def read_data(self, file_path: Path) -> DataFrame:
12
+ pass
13
13
 
14
14
 
15
15
  class CsvFileHandler(FileHandler):
16
- def read_data(self, file_path: Path) -> DataFrame:
17
- return read_csv(file_path, na_values=('-', 'E', 'F', '#', '*'),
18
- parse_dates=['Time'], index_col='Time', low_memory=False)
16
+ def read_data(self, file_path: Path) -> DataFrame:
17
+ return read_csv(file_path, na_values=('E', 'F', '-', '_', '#', '*'), index_col=0, parse_dates=True,
18
+ low_memory=False)
19
19
 
20
20
 
21
21
  class JsonFileHandler(FileHandler):
22
- def read_data(self, file_path: Path) -> DataFrame:
23
- return read_json(file_path)
22
+ def read_data(self, file_path: Path) -> DataFrame:
23
+ return read_json(file_path)
24
24
 
25
25
 
26
26
  class ExcelFileHandler(FileHandler):
27
- def read_data(self, file_path: Path) -> DataFrame:
28
- return read_excel(file_path, parse_dates=['Time'])
27
+ def read_data(self, file_path: Path) -> DataFrame:
28
+ return read_excel(file_path, index_col=0, parse_dates=True, )
29
29
 
30
30
 
31
31
  class DataReaderFactory:
32
- _handler_mapping = {
33
- '.csv': CsvFileHandler(),
34
- '.json': JsonFileHandler(),
35
- '.xls': ExcelFileHandler(),
36
- '.xlsx': ExcelFileHandler(),
37
- }
32
+ _handler_mapping = {
33
+ '.csv': CsvFileHandler(),
34
+ '.json': JsonFileHandler(),
35
+ '.xls': ExcelFileHandler(),
36
+ '.xlsx': ExcelFileHandler(),
37
+ }
38
38
 
39
- @staticmethod
40
- def create_handler(file_extension: str) -> FileHandler:
41
- reader_class = DataReaderFactory._handler_mapping.get(file_extension)
42
- if reader_class is None:
43
- raise ValueError(f"Unsupported file format: {file_extension}")
44
- return reader_class
39
+ @staticmethod
40
+ def create_handler(file_extension: str) -> FileHandler:
41
+ reader_class = DataReaderFactory._handler_mapping.get(file_extension)
42
+ if reader_class is None:
43
+ raise ValueError(f"Unsupported file format: {file_extension}")
44
+ return reader_class
45
45
 
46
46
 
47
47
  class DataReader:
48
- """
49
- A class for reading data files with different extensions (.csv, .json, .xls, .xlsx).
48
+ """
49
+ A class for reading data files with different extensions (.csv, .json, .xls, .xlsx).
50
50
 
51
- Parameters
52
- ----------
53
- filename (Path | str): The name of the file to be read or the Path of the file.
51
+ Parameters
52
+ ----------
53
+ filename (Path | str): The name of the file to be read or the Path of the file.
54
54
 
55
- Returns
56
- -------
57
- pandas.DataFrame: data
55
+ Returns
56
+ -------
57
+ pandas.DataFrame: data
58
58
 
59
- Examples
60
- --------
61
- >>> psd = DataReader(Path(...))
62
- """
59
+ Examples
60
+ --------
61
+ >>> psd = DataReader(Path(...))
62
+ """
63
63
 
64
- def __new__(cls, file_path: Path | str) -> DataFrame:
65
- file_path = Path(file_path)
66
- return DataReaderFactory.create_handler(file_path.suffix.lower()).read_data(file_path)
64
+ def __new__(cls, file_path: Path | str) -> DataFrame:
65
+ file_path = Path(file_path)
66
+ return DataReaderFactory.create_handler(file_path.suffix.lower()).read_data(file_path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: AeroViz
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Aerosol science
5
5
  Home-page: https://github.com/Alex870521/AeroViz
6
6
  Author: alex
@@ -43,9 +43,10 @@ Requires-Dist: tabulate
43
43
 
44
44
  ## <div align="center">Key Features</div>
45
45
 
46
- * #### Data Reading: Supports reading multiple aerosol data formats.
47
- * #### Data Visualization: Offers various charts and graphs, including time series plots, distribution plots, and correlation matrices.
48
- * #### Data Processing: Includes multiple data processing tools, such as linear regression and Mie theory calculations.
46
+ * Data Reading: Supports reading multiple aerosol data formats.
47
+ * Data Visualization: Offers various charts and graphs, including time series plots, distribution plots, and correlation
48
+ matrices.
49
+ * Data Processing: Includes multiple data processing tools, such as linear regression and Mie theory calculations.
49
50
 
50
51
 
51
52
  ## <div align="center">Installation</div>