plotair 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plotair/__init__.py +1 -1
- plotair/config.toml +13 -12
- plotair/main.py +403 -371
- plotair-0.3.0.dist-info/METADATA +143 -0
- plotair-0.3.0.dist-info/RECORD +8 -0
- plotair-0.2.0.dist-info/METADATA +0 -97
- plotair-0.2.0.dist-info/RECORD +0 -8
- {plotair-0.2.0.dist-info → plotair-0.3.0.dist-info}/WHEEL +0 -0
- {plotair-0.2.0.dist-info → plotair-0.3.0.dist-info}/entry_points.txt +0 -0
- {plotair-0.2.0.dist-info → plotair-0.3.0.dist-info}/licenses/LICENSE +0 -0
plotair/main.py
CHANGED
|
@@ -53,8 +53,14 @@ def main():
|
|
|
53
53
|
help='sensor data file to process')
|
|
54
54
|
parser.add_argument('-a', '--all-dates', action='store_true',
|
|
55
55
|
help='plot all dates (otherwise only latest sequence)')
|
|
56
|
+
parser.add_argument('-b', '--boxplot', action='store_true',
|
|
57
|
+
help='generate boxplots along with text stats')
|
|
56
58
|
parser.add_argument('-m', '--merge', metavar='FIELD',
|
|
57
59
|
help='merge field from file1 to file2, and output to file3')
|
|
60
|
+
parser.add_argument('-M', '--filter-multiplier', type=float, default=1.5, metavar='MULTIPLIER',
|
|
61
|
+
help='multiplier for IQR outlier filtering (default: 1.5)')
|
|
62
|
+
parser.add_argument('-o', '--filter-outliers', action='store_true',
|
|
63
|
+
help='filter out outliers from the plots')
|
|
58
64
|
parser.add_argument('-r', '--reset-config', action='store_true',
|
|
59
65
|
help='reset configuration file to default')
|
|
60
66
|
parser.add_argument('-s', '--start-date', metavar='DATE',
|
|
@@ -63,7 +69,9 @@ def main():
|
|
|
63
69
|
help='date at which to stop the plot (YYYY-MM-DD)')
|
|
64
70
|
parser.add_argument('-t', '--title',
|
|
65
71
|
help='set the plot title')
|
|
66
|
-
parser.add_argument('-
|
|
72
|
+
parser.add_argument('-T', '--snapshots', action='store_true',
|
|
73
|
+
help='generate a snapshots table from all files')
|
|
74
|
+
parser.add_argument('-v', '--version', action='version',
|
|
67
75
|
version=f'%(prog)s {__version__}')
|
|
68
76
|
|
|
69
77
|
args = parser.parse_args()
|
|
@@ -71,66 +79,87 @@ def main():
|
|
|
71
79
|
try:
|
|
72
80
|
load_config(args.reset_config)
|
|
73
81
|
except FileNotFoundError as e:
|
|
74
|
-
|
|
82
|
+
print(f'Error: Failed to load configuration file: {e}')
|
|
75
83
|
return
|
|
76
84
|
|
|
77
|
-
|
|
78
|
-
field = args.merge
|
|
79
|
-
num_files = len(args.filenames)
|
|
80
|
-
|
|
81
|
-
if num_files != 3:
|
|
82
|
-
logger.error('argument -m/--merge requires three file arguments')
|
|
83
|
-
return
|
|
84
|
-
|
|
85
|
-
file_format, df1, num_valid_rows1, num_invalid_rows = read_data(args.filenames[0])
|
|
86
|
-
file_format, df2, num_valid_rows2, num_invalid_rows = read_data(args.filenames[1])
|
|
85
|
+
filenames = []
|
|
87
86
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
return
|
|
91
|
-
|
|
92
|
-
temp_df = df1[['co2']]
|
|
93
|
-
df2 = pd.concat([df2, temp_df]).sort_index()
|
|
94
|
-
df2.to_csv(args.filenames[2], index=True)
|
|
95
|
-
|
|
96
|
-
else:
|
|
97
|
-
# Create a list containing all files from all patterns like '*.csv',
|
|
98
|
-
# because under Windows the terminal doesn't expand wildcard arguments.
|
|
99
|
-
all_files = []
|
|
87
|
+
if sys.platform == "win32":
|
|
88
|
+
# On Windows, expand glob patterns (e.g. *.csv)
|
|
100
89
|
for pattern in args.filenames:
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
try:
|
|
106
|
-
file_format, df, num_valid_rows, num_invalid_rows = read_data(filename)
|
|
107
|
-
|
|
108
|
-
if num_valid_rows > 0:
|
|
109
|
-
logger.debug(f'{num_valid_rows} row(s) read')
|
|
110
|
-
else:
|
|
111
|
-
logger.error('Unsupported file format')
|
|
112
|
-
return
|
|
113
|
-
|
|
114
|
-
if num_invalid_rows > 0:
|
|
115
|
-
logger.info(f'{num_invalid_rows} invalid row(s) ignored')
|
|
90
|
+
filenames.extend(glob.glob(pattern))
|
|
91
|
+
else:
|
|
92
|
+
# On Linux, use filenames as-is (no glob expansion needed)
|
|
93
|
+
filenames = args.filenames
|
|
116
94
|
|
|
117
|
-
|
|
118
|
-
|
|
95
|
+
if args.merge:
|
|
96
|
+
# Merge field from file1 to file2, and output to file3
|
|
97
|
+
merge_field(args.merge, filenames)
|
|
119
98
|
|
|
120
|
-
|
|
99
|
+
elif args.snapshots:
|
|
100
|
+
# Generate a snapshots table from all files
|
|
101
|
+
generate_snapshots(filenames)
|
|
121
102
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
103
|
+
else:
|
|
104
|
+
# Generate plots for all files
|
|
105
|
+
process_files(filenames, args)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def process_files(filenames, args):
|
|
109
|
+
for filename in filenames:
|
|
110
|
+
print(f'Processing {filename}')
|
|
111
|
+
try:
|
|
112
|
+
file_format, df, num_valid_rows, num_invalid_rows = read_data(filename)
|
|
113
|
+
|
|
114
|
+
if num_valid_rows > 0:
|
|
115
|
+
logger.debug(f'{num_valid_rows} valid row(s) read')
|
|
116
|
+
else:
|
|
117
|
+
print('Error: Unsupported file format')
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
if num_invalid_rows > 0:
|
|
121
|
+
percent_ignored = round(num_invalid_rows / (num_valid_rows + num_invalid_rows) * 100)
|
|
122
|
+
print(f'{num_invalid_rows} invalid row(s) ignored ({percent_ignored}%)')
|
|
123
|
+
|
|
124
|
+
if not args.all_dates:
|
|
125
|
+
df = delete_old_data(df, args.start_date, args.stop_date)
|
|
126
|
+
|
|
127
|
+
generate_stats(df, filename, args.boxplot)
|
|
128
|
+
|
|
129
|
+
if file_format == 'plotair':
|
|
130
|
+
generate_plot(df, filename, args.title, suffix='cht',
|
|
131
|
+
series1='co2', series2='humidity', series3='temp',
|
|
132
|
+
filter_outliers=args.filter_outliers,
|
|
133
|
+
filter_multiplier=args.filter_multiplier)
|
|
134
|
+
elif file_format == 'visiblair_d':
|
|
135
|
+
generate_plot(df, filename, args.title, suffix='cht',
|
|
136
|
+
series1='co2', series2='humidity', series3='temp',
|
|
137
|
+
filter_outliers=args.filter_outliers,
|
|
138
|
+
filter_multiplier=args.filter_multiplier)
|
|
139
|
+
elif file_format == 'visiblair_e':
|
|
140
|
+
generate_plot(df, filename, args.title, suffix='cht',
|
|
141
|
+
series1='co2', series2='humidity', series3='temp',
|
|
142
|
+
filter_outliers=args.filter_outliers,
|
|
143
|
+
filter_multiplier=args.filter_multiplier)
|
|
144
|
+
generate_plot(df, filename, args.title, suffix='pm',
|
|
145
|
+
series1=None, series2='pm2.5', series3='pm10',
|
|
146
|
+
filter_outliers=args.filter_outliers,
|
|
147
|
+
filter_multiplier=args.filter_multiplier)
|
|
148
|
+
elif file_format == 'graywolf_ds':
|
|
149
|
+
generate_plot(df, filename, args.title, suffix='ht',
|
|
150
|
+
series1=None, series2='humidity', series3='temp',
|
|
151
|
+
filter_outliers=args.filter_outliers,
|
|
152
|
+
filter_multiplier=args.filter_multiplier)
|
|
153
|
+
generate_plot(df, filename, args.title, suffix='vf',
|
|
154
|
+
series1='tvoc', series2='form', series3=None,
|
|
155
|
+
filter_outliers=args.filter_outliers,
|
|
156
|
+
filter_multiplier=args.filter_multiplier)
|
|
157
|
+
generate_plot(df, filename, args.title, suffix='co',
|
|
158
|
+
series1=None, series2='co', series3=None,
|
|
159
|
+
filter_outliers=args.filter_outliers,
|
|
160
|
+
filter_multiplier=args.filter_multiplier)
|
|
161
|
+
except Exception as e:
|
|
162
|
+
print(f'Error: Unexpected error: {e}')
|
|
134
163
|
|
|
135
164
|
|
|
136
165
|
def detect_file_format(filename):
|
|
@@ -139,11 +168,14 @@ def detect_file_format(filename):
|
|
|
139
168
|
visiblair_e_num_col = (21, 21)
|
|
140
169
|
graywolf_ds_num_col = (7, 7)
|
|
141
170
|
|
|
142
|
-
with
|
|
171
|
+
# Some files begin with the '\ufeff' character (Byte Order Mark / BOM).
|
|
172
|
+
# This breaks the first field detection. Use 'utf-8-sig' instead of 'utf-8'
|
|
173
|
+
# to automatically handle BOM.
|
|
174
|
+
with open(filename, 'r', newline='', encoding='utf-8-sig') as file:
|
|
143
175
|
reader = csv.reader(file)
|
|
144
176
|
first_line = next(reader)
|
|
145
177
|
num_fields = len(first_line)
|
|
146
|
-
|
|
178
|
+
|
|
147
179
|
if first_line[0] == 'date':
|
|
148
180
|
file_format = 'plotair'
|
|
149
181
|
elif visiblair_d_num_col[0] <= num_fields <= visiblair_d_num_col[1]:
|
|
@@ -154,9 +186,9 @@ def detect_file_format(filename):
|
|
|
154
186
|
elif (graywolf_ds_num_col[0] <= num_fields <= graywolf_ds_num_col[1] and
|
|
155
187
|
first_line[0] == 'Date Time'):
|
|
156
188
|
file_format = 'graywolf_ds'
|
|
157
|
-
|
|
189
|
+
|
|
158
190
|
logger.debug(f'File format: {file_format}')
|
|
159
|
-
|
|
191
|
+
|
|
160
192
|
return file_format
|
|
161
193
|
|
|
162
194
|
|
|
@@ -175,6 +207,8 @@ def read_data(filename):
|
|
|
175
207
|
elif file_format == 'graywolf_ds':
|
|
176
208
|
df, num_valid_rows, num_invalid_rows = read_data_graywolf_ds(filename)
|
|
177
209
|
|
|
210
|
+
df = df.sort_index() # Sort in case some dates are not in order
|
|
211
|
+
|
|
178
212
|
return file_format, df, num_valid_rows, num_invalid_rows
|
|
179
213
|
|
|
180
214
|
|
|
@@ -189,7 +223,6 @@ def read_data_plotair(filename):
|
|
|
189
223
|
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
|
|
190
224
|
|
|
191
225
|
df = df.set_index('date')
|
|
192
|
-
df = df.sort_index() # Sort in case some dates are not in order
|
|
193
226
|
num_valid_rows = len(df)
|
|
194
227
|
|
|
195
228
|
return df, num_valid_rows, num_invalid_rows
|
|
@@ -207,34 +240,33 @@ def read_data_visiblair_d(filename):
|
|
|
207
240
|
for line in f:
|
|
208
241
|
line = line.strip()
|
|
209
242
|
fields = line.split(',')
|
|
210
|
-
|
|
243
|
+
|
|
211
244
|
if not (5 <= len(fields) <= 6):
|
|
212
245
|
# Skip lines with an invalid number of columns
|
|
213
|
-
logger.debug(f'Skipping line (number of columns): {line}')
|
|
246
|
+
#logger.debug(f'Skipping line (number of columns): {line}')
|
|
214
247
|
num_invalid_rows += 1
|
|
215
248
|
continue
|
|
216
|
-
|
|
249
|
+
|
|
217
250
|
try:
|
|
218
251
|
# Convert each field to its target data type
|
|
219
252
|
parsed_row = {
|
|
220
253
|
'date': pd.to_datetime(fields[0], format='%Y-%m-%d %H:%M:%S'),
|
|
221
|
-
'co2': np.uint16(fields[1]),
|
|
222
|
-
'
|
|
223
|
-
'humidity': np.uint8(fields[3])
|
|
254
|
+
'co2': np.uint16(fields[1]), # 0 to 10,000 ppm
|
|
255
|
+
'temp': np.float32(fields[2]), # -40 to 70 °C
|
|
256
|
+
'humidity': np.uint8(fields[3]) # 0 to 100% RH
|
|
224
257
|
}
|
|
225
258
|
# If conversion succeeds, add the parsed row to the list
|
|
226
259
|
valid_rows.append(parsed_row)
|
|
227
|
-
|
|
260
|
+
|
|
228
261
|
except (ValueError, TypeError) as e:
|
|
229
262
|
# Skip lines with conversion errors
|
|
230
|
-
logger.debug(f'Skipping line (conversion error): {line}')
|
|
263
|
+
#logger.debug(f'Skipping line (conversion error): {line}')
|
|
231
264
|
num_invalid_rows += 1
|
|
232
265
|
continue
|
|
233
266
|
|
|
234
267
|
# Create the DataFrame from the valid rows
|
|
235
268
|
df = pd.DataFrame(valid_rows)
|
|
236
269
|
df = df.set_index('date')
|
|
237
|
-
df = df.sort_index() # Sort in case some dates are not in order
|
|
238
270
|
num_valid_rows = len(df)
|
|
239
271
|
|
|
240
272
|
return df, num_valid_rows, num_invalid_rows
|
|
@@ -248,7 +280,7 @@ def read_data_visiblair_e(filename):
|
|
|
248
280
|
df = pd.read_csv(filename)
|
|
249
281
|
|
|
250
282
|
# Rename the columns
|
|
251
|
-
df.columns = ['uuid', 'date', 'co2', 'humidity', '
|
|
283
|
+
df.columns = ['uuid', 'date', 'co2', 'humidity', 'temp', 'pm0.1',
|
|
252
284
|
'pm0.3', 'pm0.5', 'pm1', 'pm2.5', 'pm5', 'pm10', 'pressure',
|
|
253
285
|
'voc_index', 'firmware', 'model', 'pcb', 'display_rate',
|
|
254
286
|
'is_charging', 'is_ac_in', 'batt_voltage']
|
|
@@ -257,7 +289,6 @@ def read_data_visiblair_e(filename):
|
|
|
257
289
|
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
|
|
258
290
|
|
|
259
291
|
df = df.set_index('date')
|
|
260
|
-
df = df.sort_index() # Sort in case some dates are not in order
|
|
261
292
|
num_valid_rows = len(df)
|
|
262
293
|
|
|
263
294
|
return df, num_valid_rows, num_invalid_rows
|
|
@@ -271,21 +302,40 @@ def read_data_graywolf_ds(filename):
|
|
|
271
302
|
df = pd.read_csv(filename)
|
|
272
303
|
|
|
273
304
|
# Rename the columns
|
|
274
|
-
df.columns = ['date', 'tvoc', 'co', 'form', 'humidity', '
|
|
305
|
+
df.columns = ['date', 'tvoc', 'co', 'form', 'humidity', 'temp', 'filename']
|
|
275
306
|
|
|
276
307
|
# Convert the 'date' column to pandas datetime objects
|
|
277
308
|
df['date'] = pd.to_datetime(df['date'], format='%d-%b-%y %I:%M:%S %p')
|
|
278
309
|
|
|
279
|
-
# Convert 'form' column to string, replace '< LOD' with '
|
|
280
|
-
|
|
310
|
+
# Convert 'form' column to string, replace '< LOD' with '10',
|
|
311
|
+
# and then convert to integer
|
|
312
|
+
df['form'] = df['form'].astype(str).str.replace('< LOD', '10').astype(int)
|
|
281
313
|
|
|
282
314
|
df = df.set_index('date')
|
|
283
|
-
df = df.sort_index() # Sort in case some dates are not in order
|
|
284
315
|
num_valid_rows = len(df)
|
|
285
316
|
|
|
286
317
|
return df, num_valid_rows, num_invalid_rows
|
|
287
318
|
|
|
288
319
|
|
|
320
|
+
def merge_field(field, filenames):
|
|
321
|
+
num_files = len(filenames)
|
|
322
|
+
|
|
323
|
+
if num_files != 3:
|
|
324
|
+
print('Error: Argument -m/--merge requires three file arguments')
|
|
325
|
+
return
|
|
326
|
+
|
|
327
|
+
file_format, df1, num_valid_rows1, num_invalid_rows = read_data(filenames[0])
|
|
328
|
+
file_format, df2, num_valid_rows2, num_invalid_rows = read_data(filenames[1])
|
|
329
|
+
|
|
330
|
+
if num_valid_rows1 <= 0 or num_valid_rows2 <= 0:
|
|
331
|
+
print('Error: At least one of the input files is unsupported')
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
temp_df = df1[[field]]
|
|
335
|
+
df2 = pd.concat([df2, temp_df]).sort_index()
|
|
336
|
+
df2.to_csv(filenames[2], index=True)
|
|
337
|
+
|
|
338
|
+
|
|
289
339
|
def delete_old_data(df, start_date = None, stop_date = None):
|
|
290
340
|
if not start_date and not stop_date:
|
|
291
341
|
# Iterate backwards through the samples to find the first time gap larger
|
|
@@ -319,12 +369,31 @@ def delete_old_data(df, start_date = None, stop_date = None):
|
|
|
319
369
|
df = df[df.index <= sd]
|
|
320
370
|
|
|
321
371
|
return df
|
|
322
|
-
|
|
323
372
|
|
|
324
|
-
|
|
373
|
+
|
|
374
|
+
class DataSeries:
|
|
375
|
+
def __init__(self, name=''):
|
|
376
|
+
# y_range could be replaced by y_min and y_max
|
|
377
|
+
self.name = name
|
|
378
|
+
self.label = CONFIG['labels'].get(self.name)
|
|
379
|
+
self.color = CONFIG['colors'].get(self.name)
|
|
380
|
+
self.y_range = CONFIG['axis_ranges'].get(self.name) # min/max tuple, e.g. (0, 100)
|
|
381
|
+
self.limit = CONFIG['limits'].get(self.name) # single value or min/max tuple
|
|
382
|
+
self.limit_label = CONFIG['labels'].get(self.name + '_limit')
|
|
383
|
+
self.linestyle = CONFIG['plot'].get(self.name + '_line_style')
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def generate_plot(df, filename, title, suffix='',
|
|
387
|
+
series1=None, series2=None, series3=None,
|
|
388
|
+
filter_outliers=False, filter_multiplier=None):
|
|
325
389
|
# The dates must be in a non-index column
|
|
326
390
|
df = df.reset_index()
|
|
327
391
|
|
|
392
|
+
# Get each series configuration
|
|
393
|
+
ds1 = DataSeries(name=series1) if series1 else None
|
|
394
|
+
ds2 = DataSeries(name=series2) if series2 else None
|
|
395
|
+
ds3 = DataSeries(name=series3) if series3 else None
|
|
396
|
+
|
|
328
397
|
# Set a theme and scale all fonts
|
|
329
398
|
sns.set_theme(style='whitegrid', font_scale=CONFIG['plot']['font_scale'])
|
|
330
399
|
|
|
@@ -335,96 +404,108 @@ def generate_plot_co2_hum_tmp(df, filename, title):
|
|
|
335
404
|
fig, ax1 = plt.subplots(figsize=CONFIG['plot']['size'])
|
|
336
405
|
ax2 = ax1.twinx() # Secondary y axis
|
|
337
406
|
|
|
338
|
-
#
|
|
339
|
-
sns.lineplot(data=df, x='date', y='co2', ax=ax1, color=CONFIG['colors']['co2'],
|
|
340
|
-
label=CONFIG['labels']['co2'], legend=False)
|
|
341
|
-
sns.lineplot(data=df, x='date', y='humidity', ax=ax2, color=CONFIG['colors']['humidity'],
|
|
342
|
-
label=CONFIG['labels']['humidity'], legend=False)
|
|
343
|
-
sns.lineplot(data=df, x='date', y='temperature', ax=ax2, color=CONFIG['colors']['temp'],
|
|
344
|
-
label=CONFIG['labels']['temp'], legend=False)
|
|
407
|
+
# TODO: add functions for repetitive code
|
|
345
408
|
|
|
346
|
-
#
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
# Add a grid for the x axis and the y axes
|
|
353
|
-
# This is already done if using the whitegrid theme
|
|
354
|
-
#ax1.grid(axis='x', alpha=CONFIG['plot']['grid_opacity'])
|
|
355
|
-
#ax1.grid(axis='y', alpha=CONFIG['plot']['grid_opacity'])
|
|
356
|
-
ax2.grid(axis='y', alpha=CONFIG['plot']['grid2_opacity'], linestyle=CONFIG['plot']['grid2_line_style'])
|
|
357
|
-
|
|
358
|
-
# Set the background color of the humidity comfort zone
|
|
359
|
-
hmin, hmax = CONFIG['limits']['humidity']
|
|
360
|
-
ax2.axhspan(ymin=hmin, ymax=hmax,
|
|
361
|
-
facecolor=CONFIG['colors']['humidity'], alpha=CONFIG['plot']['limit_zone_opacity'])
|
|
362
|
-
|
|
363
|
-
# Customize the plot title, labels and ticks
|
|
364
|
-
ax1.set_title(get_plot_title(title, filename))
|
|
365
|
-
ax1.tick_params(axis='x', rotation=CONFIG['plot']['date_rotation'])
|
|
366
|
-
ax1.tick_params(axis='y', labelcolor=CONFIG['colors']['co2'])
|
|
367
|
-
ax1.set_xlabel('')
|
|
368
|
-
ax1.set_ylabel(CONFIG['labels']['co2'], color=CONFIG['colors']['co2'])
|
|
369
|
-
ax2.set_ylabel('') # We will manually place the 2 parts in different colors
|
|
370
|
-
|
|
371
|
-
# Define the position for the center of the right y axis label
|
|
372
|
-
bottom_label = CONFIG['labels']['temp'] + ' '
|
|
373
|
-
top_label = ' ' + CONFIG['labels']['humidity']
|
|
374
|
-
x = 1.07 # Slightly to the right of the axis
|
|
375
|
-
y = get_label_center(bottom_label, top_label) # Vertically centered
|
|
409
|
+
# Plot series #1 main line (on left y-axis)
|
|
410
|
+
if ds1:
|
|
411
|
+
if ds1.linestyle:
|
|
412
|
+
linestyle = ds1.linestyle
|
|
413
|
+
else:
|
|
414
|
+
linestyle = CONFIG['plot']['default_line_style']
|
|
376
415
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
416
|
+
if filter_outliers:
|
|
417
|
+
df1 = remove_outliers_iqr(df, ds1.name, multiplier=filter_multiplier)
|
|
418
|
+
else:
|
|
419
|
+
df1 = df[df[ds1.name] != 0] # Only filter out zero values
|
|
420
|
+
|
|
421
|
+
sns.lineplot(data=df1, x='date', y=ds1.name, ax=ax1, color=ds1.color,
|
|
422
|
+
label=ds1.label, legend=False, linestyle=linestyle)
|
|
423
|
+
|
|
424
|
+
# Display series #1 limit line or zone
|
|
425
|
+
if ds1.limit and not isinstance(ds1.limit, list):
|
|
426
|
+
# Plot the limit line
|
|
427
|
+
line = ax1.axhline(y=ds1.limit, color=ds1.color, label=ds1.limit_label,
|
|
428
|
+
linestyle=CONFIG['plot']['limit_line_style'])
|
|
429
|
+
line.set_alpha(CONFIG['plot']['limit_line_opacity'])
|
|
430
|
+
|
|
431
|
+
if ds1.limit and isinstance(ds1.limit, list):
|
|
432
|
+
# Set the background color of the limit zone
|
|
433
|
+
hmin, hmax = ds1.limit
|
|
434
|
+
ax1.axhspan(ymin=hmin, ymax=hmax, facecolor=ds1.color,
|
|
435
|
+
alpha=CONFIG['plot']['limit_zone_opacity'])
|
|
436
|
+
|
|
437
|
+
# Plot series #2 main line (on right y-axis)
|
|
438
|
+
if ds2.linestyle:
|
|
439
|
+
linestyle = ds2.linestyle
|
|
440
|
+
else:
|
|
441
|
+
linestyle = CONFIG['plot']['default_line_style']
|
|
381
442
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
443
|
+
if filter_outliers:
|
|
444
|
+
df2 = remove_outliers_iqr(df, ds2.name, multiplier=filter_multiplier)
|
|
445
|
+
else:
|
|
446
|
+
df2 = df[df[ds2.name] != 0] # Only filter out zero values
|
|
447
|
+
|
|
448
|
+
sns.lineplot(data=df2, x='date', y=ds2.name, ax=ax2, color=ds2.color,
|
|
449
|
+
label=ds2.label, legend=False, linestyle=linestyle)
|
|
450
|
+
|
|
451
|
+
# Display series #2 limit line or zone
|
|
452
|
+
if ds2.limit and not isinstance(ds2.limit, list):
|
|
453
|
+
# Plot the limit line
|
|
454
|
+
line = ax2.axhline(y=ds2.limit, color=ds2.color, label=ds2.limit_label,
|
|
455
|
+
linestyle=CONFIG['plot']['limit_line_style'])
|
|
456
|
+
line.set_alpha(CONFIG['plot']['limit_line_opacity'])
|
|
457
|
+
|
|
458
|
+
if ds2.limit and isinstance(ds2.limit, list):
|
|
459
|
+
# Set the background color of the limit zone
|
|
460
|
+
hmin, hmax = ds2.limit
|
|
461
|
+
ax2.axhspan(ymin=hmin, ymax=hmax, facecolor=ds2.color,
|
|
462
|
+
alpha=CONFIG['plot']['limit_zone_opacity'])
|
|
463
|
+
|
|
464
|
+
# Plot series #3 main line (on right y-axis)
|
|
465
|
+
if ds3:
|
|
466
|
+
if ds3.linestyle:
|
|
467
|
+
linestyle = ds3.linestyle
|
|
468
|
+
else:
|
|
469
|
+
linestyle = CONFIG['plot']['default_line_style']
|
|
386
470
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
loc=CONFIG['plot']['legend_location'])
|
|
471
|
+
if filter_outliers:
|
|
472
|
+
df3 = remove_outliers_iqr(df, ds3.name, multiplier=filter_multiplier)
|
|
473
|
+
else:
|
|
474
|
+
df3 = df[df[ds3.name] != 0] # Only filter out zero values
|
|
392
475
|
|
|
393
|
-
|
|
394
|
-
|
|
476
|
+
sns.lineplot(data=df3, x='date', y=ds3.name, ax=ax2, color=ds3.color,
|
|
477
|
+
label=ds3.label, legend=False, linestyle=linestyle)
|
|
395
478
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
479
|
+
# Plot series #3 limit line
|
|
480
|
+
if ds3.limit and not isinstance(ds3.limit, list):
|
|
481
|
+
# Plot the limit line
|
|
482
|
+
line = ax2.axhline(y=ds3.limit, color=ds3.color, label=ds3.limit_label,
|
|
483
|
+
linestyle=CONFIG['plot']['limit_line_style'])
|
|
484
|
+
line.set_alpha(CONFIG['plot']['limit_line_opacity'])
|
|
399
485
|
|
|
486
|
+
if ds3.limit and isinstance(ds3.limit, list):
|
|
487
|
+
# Set the background color of the limit zone
|
|
488
|
+
hmin, hmax = ds3.limit
|
|
489
|
+
ax2.axhspan(ymin=hmin, ymax=hmax, facecolor=ds3.color,
|
|
490
|
+
alpha=CONFIG['plot']['limit_zone_opacity'])
|
|
400
491
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
492
|
+
# Set the ranges for both y axes
|
|
493
|
+
if ds1:
|
|
494
|
+
y1min, y1max = ds1.y_range
|
|
495
|
+
ax1.set_ylim(y1min, y1max)
|
|
404
496
|
|
|
405
|
-
|
|
406
|
-
sns.set_theme(style='whitegrid', font_scale=CONFIG['plot']['font_scale'])
|
|
497
|
+
y2min, y2max = ds2.y_range
|
|
407
498
|
|
|
408
|
-
|
|
409
|
-
|
|
499
|
+
if ds3:
|
|
500
|
+
y3min, y3max = ds3.y_range
|
|
410
501
|
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
ax2 = ax1.twinx() # Secondary y axis
|
|
502
|
+
if y2min != y3min or y2max != y3max:
|
|
503
|
+
print(f'Warning: Axis ranges differ for {series2} and {series3}, using largest range')
|
|
414
504
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
# label=CONFIG['labels']['co2'], legend=False)
|
|
418
|
-
sns.lineplot(data=df, x='date', y='humidity', ax=ax2, color=CONFIG['colors']['humidity'],
|
|
419
|
-
label=CONFIG['labels']['humidity'], legend=False)
|
|
420
|
-
sns.lineplot(data=df, x='date', y='temperature', ax=ax2, color=CONFIG['colors']['temp'],
|
|
421
|
-
label=CONFIG['labels']['temp'], legend=False)
|
|
505
|
+
y2min = min(y2min, y3min)
|
|
506
|
+
y2max = max(y2max, y3max)
|
|
422
507
|
|
|
423
|
-
|
|
424
|
-
cmin, cmax = CONFIG['axis_ranges']['co2']
|
|
425
|
-
tmin, tmax = CONFIG['axis_ranges']['temp_h']
|
|
426
|
-
ax1.set_ylim(cmin, cmax) # df['co2'].max() * 1.05
|
|
427
|
-
ax2.set_ylim(tmin, tmax)
|
|
508
|
+
ax2.set_ylim(y2min, y2max)
|
|
428
509
|
|
|
429
510
|
# Add a grid for the x axis and the y axes
|
|
430
511
|
# This is already done if using the whitegrid theme
|
|
@@ -432,271 +513,215 @@ def generate_plot_hum_tmp(df, filename, title):
|
|
|
432
513
|
#ax1.grid(axis='y', alpha=CONFIG['plot']['grid_opacity'])
|
|
433
514
|
ax2.grid(axis='y', alpha=CONFIG['plot']['grid2_opacity'], linestyle=CONFIG['plot']['grid2_line_style'])
|
|
434
515
|
|
|
435
|
-
# Set the background color of the humidity comfort zone
|
|
436
|
-
hmin, hmax = CONFIG['limits']['humidity']
|
|
437
|
-
ax2.axhspan(ymin=hmin, ymax=hmax,
|
|
438
|
-
facecolor=CONFIG['colors']['humidity'], alpha=CONFIG['plot']['limit_zone_opacity'])
|
|
439
|
-
|
|
440
516
|
# Customize the plot title, labels and ticks
|
|
441
517
|
ax1.set_title(get_plot_title(title, filename))
|
|
442
518
|
ax1.tick_params(axis='x', rotation=CONFIG['plot']['date_rotation'])
|
|
443
|
-
|
|
519
|
+
if ds1:
|
|
520
|
+
ax1.tick_params(axis='y', labelcolor=ds1.color)
|
|
521
|
+
ax1.set_ylabel(ds1.label, color=ds1.color)
|
|
444
522
|
ax1.set_xlabel('')
|
|
445
|
-
#ax1.set_ylabel(CONFIG['labels']['co2'], color=CONFIG['colors']['co2'])
|
|
446
|
-
ax2.set_ylabel('') # We will manually place the 2 parts in different colors
|
|
447
523
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
top_label = ' ' + CONFIG['labels']['humidity']
|
|
451
|
-
x = 1.07 # Slightly to the right of the axis
|
|
452
|
-
y = get_label_center(bottom_label, top_label) # Vertically centered
|
|
524
|
+
if ds3:
|
|
525
|
+
ax2.set_ylabel('') # We will manually place the 2 parts in different colors
|
|
453
526
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
527
|
+
# Define the position for the center of the right y axis label
|
|
528
|
+
bottom_label = ds3.label + ' '
|
|
529
|
+
top_label = ' ' + ds2.label
|
|
530
|
+
x = 1.07 # Slightly to the right of the axis
|
|
531
|
+
y = get_label_center(bottom_label, top_label) # Vertically centered
|
|
458
532
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
533
|
+
# Place the first (bottom) part of the label
|
|
534
|
+
ax2.text(x, y, bottom_label, transform=ax2.transAxes,
|
|
535
|
+
color=ds3.color, rotation='vertical',
|
|
536
|
+
ha='center', va='top')
|
|
537
|
+
|
|
538
|
+
# Place the second (top) part of the label
|
|
539
|
+
ax2.text(x, y, top_label, transform=ax2.transAxes,
|
|
540
|
+
color=ds2.color, rotation='vertical',
|
|
541
|
+
ha='center', va='bottom')
|
|
542
|
+
else:
|
|
543
|
+
ax2.tick_params(axis='y', labelcolor=ds2.color)
|
|
544
|
+
ax2.set_ylabel(ds2.label, color=ds2.color)
|
|
463
545
|
|
|
464
546
|
# Create a combined legend
|
|
465
547
|
lines1, labels1 = ax1.get_legend_handles_labels()
|
|
466
548
|
lines2, labels2 = ax2.get_legend_handles_labels()
|
|
549
|
+
labels1 = remove_units_from_labels(labels1)
|
|
550
|
+
labels2 = remove_units_from_labels(labels2)
|
|
467
551
|
ax1.legend(lines1 + lines2, labels1 + labels2,
|
|
468
552
|
loc=CONFIG['plot']['legend_location'])
|
|
469
553
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
554
|
+
if not ds1:
|
|
555
|
+
# Remove the left y-axis elements from ax1
|
|
556
|
+
ax1.grid(axis='y', visible=False)
|
|
557
|
+
ax1.spines['left'].set_visible(False)
|
|
558
|
+
ax1.tick_params(axis='y', left=False, labelleft=False)
|
|
474
559
|
|
|
475
560
|
# Adjust the plot margins to make room for the labels
|
|
476
561
|
plt.tight_layout()
|
|
477
562
|
|
|
478
563
|
# Save the plot as a PNG image
|
|
479
|
-
|
|
564
|
+
# TODO: auto build the plot suffix from the 1st char of each series?
|
|
565
|
+
plt.savefig(get_plot_filename(filename, f'-{suffix}'))
|
|
480
566
|
plt.close()
|
|
481
567
|
|
|
482
568
|
|
|
483
|
-
def
|
|
484
|
-
|
|
485
|
-
df =
|
|
486
|
-
|
|
487
|
-
# Set a theme and scale all fonts
|
|
488
|
-
sns.set_theme(style='whitegrid', font_scale=CONFIG['plot']['font_scale'])
|
|
489
|
-
|
|
490
|
-
ff = CONFIG['plot']['font_family']
|
|
491
|
-
if ff != '': plt.rcParams['font.family'] = ff
|
|
492
|
-
|
|
493
|
-
# Set up the matplotlib figure and axes
|
|
494
|
-
fig, ax1 = plt.subplots(figsize=CONFIG['plot']['size'])
|
|
495
|
-
ax2 = ax1.twinx() # Secondary y axis
|
|
496
|
-
|
|
497
|
-
# Plot the TVOC main line
|
|
498
|
-
sns.lineplot(data=df, x='date', y='tvoc', ax=ax1, legend=False,
|
|
499
|
-
color=CONFIG['colors']['tvoc'], label=CONFIG['labels']['tvoc'])
|
|
500
|
-
|
|
501
|
-
# Plot the TVOC limit line
|
|
502
|
-
line = ax1.axhline(y=CONFIG['limits']['tvoc'], color=CONFIG['colors']['tvoc'],
|
|
503
|
-
linestyle=CONFIG['plot']['limit_line_style'], linewidth=CONFIG['plot']['limit_line_width'],
|
|
504
|
-
label=CONFIG['labels']['tvoc_limit'])
|
|
505
|
-
line.set_alpha(CONFIG['plot']['limit_line_opacity'])
|
|
506
|
-
|
|
507
|
-
# Plot the formaldehyde main line
|
|
508
|
-
df_filtered = df[df['form'] != 0] # Filter out rows where 'form' is zero
|
|
509
|
-
sns.lineplot(data=df_filtered, x='date', y='form', ax=ax2, legend=False,
|
|
510
|
-
color=CONFIG['colors']['form'], label=CONFIG['labels']['form'])
|
|
511
|
-
|
|
512
|
-
# Plot the formaldehyde limit line
|
|
513
|
-
line = ax2.axhline(y=CONFIG['limits']['form'], color=CONFIG['colors']['form'],
|
|
514
|
-
linestyle=CONFIG['plot']['limit_line_style'], linewidth=CONFIG['plot']['limit_line_width'],
|
|
515
|
-
label=CONFIG['labels']['form_limit'])
|
|
516
|
-
line.set_alpha(CONFIG['plot']['limit_line_opacity'])
|
|
517
|
-
|
|
518
|
-
# Plot the CO main line
|
|
519
|
-
co_scale = 10
|
|
520
|
-
df['co_scaled'] = df['co'] * co_scale
|
|
521
|
-
sns.lineplot(data=df, x='date', y='co_scaled', ax=ax2, legend=False,
|
|
522
|
-
color=CONFIG['colors']['co'], label=CONFIG['labels']['co'])
|
|
523
|
-
|
|
524
|
-
# Plot the CO limit line
|
|
525
|
-
line = ax2.axhline(y=CONFIG['limits']['co'] * co_scale, color=CONFIG['colors']['co'],
|
|
526
|
-
linestyle=CONFIG['plot']['limit_line_style'], linewidth=CONFIG['plot']['limit_line_width'],
|
|
527
|
-
label=CONFIG['labels']['co_limit'])
|
|
528
|
-
line.set_alpha(CONFIG['plot']['limit_line_opacity'])
|
|
529
|
-
|
|
530
|
-
# Set the ranges for both y axes
|
|
531
|
-
tmin, tmax = CONFIG['axis_ranges']['tvoc']
|
|
532
|
-
cmin, cmax = CONFIG['axis_ranges']['co_form']
|
|
533
|
-
ax1.set_ylim(tmin, tmax)
|
|
534
|
-
ax2.set_ylim(cmin, cmax)
|
|
535
|
-
|
|
536
|
-
# Add a grid for the x axis and the y axes
|
|
537
|
-
# This is already done if using the whitegrid theme
|
|
538
|
-
#ax1.grid(axis='x', alpha=CONFIG['plot']['grid_opacity'])
|
|
539
|
-
#ax1.grid(axis='y', alpha=CONFIG['plot']['grid_opacity'])
|
|
540
|
-
ax2.grid(axis='y', alpha=CONFIG['plot']['grid2_opacity'], linestyle=CONFIG['plot']['grid2_line_style'])
|
|
541
|
-
|
|
542
|
-
# Customize the plot title, labels and ticks
|
|
543
|
-
ax1.set_title(get_plot_title(title, filename))
|
|
544
|
-
ax1.tick_params(axis='x', rotation=CONFIG['plot']['date_rotation'])
|
|
545
|
-
ax1.tick_params(axis='y', labelcolor=CONFIG['colors']['tvoc'])
|
|
546
|
-
ax1.set_xlabel('')
|
|
547
|
-
ax1.set_ylabel(CONFIG['labels']['tvoc'], color=CONFIG['colors']['tvoc'])
|
|
548
|
-
ax2.set_ylabel('') # We will manually place the 2 parts in different colors
|
|
549
|
-
|
|
550
|
-
# Define the position for the center of the right y axis label
|
|
551
|
-
bottom_label = CONFIG['labels']['co'] + ' '
|
|
552
|
-
top_label = ' ' + CONFIG['labels']['form']
|
|
553
|
-
x = 1.07 # Slightly to the right of the axis
|
|
554
|
-
y = get_label_center(bottom_label, top_label) # Vertically centered
|
|
555
|
-
|
|
556
|
-
# Place the first (bottom) part of the label
|
|
557
|
-
ax2.text(x, y, bottom_label, transform=ax2.transAxes,
|
|
558
|
-
color=CONFIG['colors']['co'], rotation='vertical',
|
|
559
|
-
ha='center', va='top')
|
|
569
|
+
def generate_snapshots(filenames):
|
|
570
|
+
columns = ['date', 'tvoc', 'co', 'form', 'humidity', 'temp', 'filename']
|
|
571
|
+
df = pd.DataFrame()
|
|
560
572
|
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
color=CONFIG['colors']['form'], rotation='vertical',
|
|
564
|
-
ha='center', va='bottom')
|
|
573
|
+
for filename in filenames:
|
|
574
|
+
print(f'Reading {filename}')
|
|
565
575
|
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
loc=CONFIG['plot']['legend_location'])
|
|
576
|
+
# Auto-detect field separator, skip the header row,
|
|
577
|
+
# and only read one data row
|
|
578
|
+
df_new = pd.read_csv(filename, sep=None, engine='python',
|
|
579
|
+
names=columns, skiprows=1, nrows=1)
|
|
571
580
|
|
|
572
|
-
|
|
573
|
-
|
|
581
|
+
# Update the filename field with the actual filename
|
|
582
|
+
df_new['filename'] = Path(filename).stem
|
|
574
583
|
|
|
575
|
-
|
|
576
|
-
|
|
584
|
+
# Append to the combined DataFrame
|
|
585
|
+
if df.empty:
|
|
586
|
+
# Prevent a warning on the first concat
|
|
587
|
+
df = df_new
|
|
588
|
+
else:
|
|
589
|
+
df = pd.concat([df, df_new], ignore_index=True)
|
|
590
|
+
|
|
591
|
+
# Convert 'form' column to string, and replace '< LOD' with '<10'
|
|
592
|
+
df['form'] = df['form'].astype(str).str.replace('< LOD', '<10')
|
|
593
|
+
|
|
594
|
+
# Drop the 'date' column, and move last column (filename) to first
|
|
595
|
+
df = df.drop(columns=['date'])
|
|
596
|
+
df = df[[df.columns[-1]] + df.columns[:-1].tolist()]
|
|
597
|
+
|
|
598
|
+
# Capitalize only the first character of the filenames
|
|
599
|
+
df['filename'] = df['filename'].str.capitalize()
|
|
600
|
+
|
|
601
|
+
# Rename the columns before creating the table
|
|
602
|
+
# TODO: use config file values instead
|
|
603
|
+
# TODO: directly assign to `df.columns` to change all column names at once
|
|
604
|
+
df = df.rename(columns={'filename': 'Pièce'})
|
|
605
|
+
df = df.rename(columns={'tvoc': 'COVT (ppb)'})
|
|
606
|
+
df = df.rename(columns={'co': 'Monoxyde de\ncarbone (ppm)'})
|
|
607
|
+
df = df.rename(columns={'form': 'Formaldéhyde\n(ppb)'})
|
|
608
|
+
df = df.rename(columns={'humidity': 'Humidité\nrelative (%)'})
|
|
609
|
+
df = df.rename(columns={'temp': 'Température (°C)'})
|
|
610
|
+
|
|
611
|
+
#log_data_frame(df, filename)
|
|
612
|
+
|
|
613
|
+
# Create table
|
|
614
|
+
fig, ax = plt.subplots(figsize=(7, 4))
|
|
615
|
+
#ax.axis('tight')
|
|
616
|
+
ax.axis('off')
|
|
617
|
+
table = ax.table(cellText=df.values,
|
|
618
|
+
colLabels=df.columns,
|
|
619
|
+
cellLoc='center',
|
|
620
|
+
loc='center')
|
|
621
|
+
table.auto_set_font_size(False)
|
|
622
|
+
table.set_fontsize(10)
|
|
623
|
+
table.scale(2, 2) # column width, row height
|
|
624
|
+
|
|
625
|
+
# Change grid color and set alternating row colors
|
|
626
|
+
for i in range(len(df) + 1): # +1 for header row
|
|
627
|
+
for j in range(len(df.columns)):
|
|
628
|
+
cell = table[(i, j)]
|
|
629
|
+
#cell.set_text_props(fontfamily='Noto Sans')
|
|
630
|
+
cell.set_edgecolor('#bbbbbb') # Medium light gray
|
|
631
|
+
|
|
632
|
+
if i % 2 == 0:
|
|
633
|
+
cell.set_facecolor('#f4f4f4') # Very light gray
|
|
634
|
+
else:
|
|
635
|
+
cell.set_facecolor('#ffffff') # White
|
|
636
|
+
|
|
637
|
+
# Header row: increase height, make text bold, and add background color
|
|
638
|
+
for j in range(len(df.columns)):
|
|
639
|
+
cell = table[(0, j)]
|
|
640
|
+
cell.set_height(0.15)
|
|
641
|
+
cell.set_text_props(weight='bold')
|
|
642
|
+
cell.set_facecolor('#dddddd') # Light gray
|
|
643
|
+
|
|
644
|
+
# First column: change alignment to left, except for the header
|
|
645
|
+
for i in range(1, len(df) + 1):
|
|
646
|
+
table[(i, 0)].set_text_props(ha='left')
|
|
647
|
+
|
|
648
|
+
plt.savefig(get_plot_filename(filename, stem='snapshots'),
|
|
649
|
+
bbox_inches='tight', dpi=300)
|
|
577
650
|
plt.close()
|
|
578
651
|
|
|
652
|
+
# Write a csv file to paste easily in a spreadsheet
|
|
653
|
+
df.columns = df.columns.str.replace('\n', ' ')
|
|
654
|
+
df.to_csv(get_filename(filenames[0], stem='snapshots', extension='txt'), index=False)
|
|
655
|
+
|
|
579
656
|
|
|
580
|
-
def
|
|
581
|
-
|
|
582
|
-
df = df.reset_index()
|
|
583
|
-
|
|
584
|
-
# Set a theme and scale all fonts
|
|
585
|
-
sns.set_theme(style='whitegrid', font_scale=CONFIG['plot']['font_scale'])
|
|
586
|
-
|
|
587
|
-
ff = CONFIG['plot']['font_family']
|
|
588
|
-
if ff != '': plt.rcParams['font.family'] = ff
|
|
657
|
+
def remove_units_from_labels(labels):
|
|
658
|
+
return [re.sub(r' \([^)]*\)', '', label) for label in labels]
|
|
589
659
|
|
|
590
|
-
# Set up the matplotlib figure and axes
|
|
591
|
-
fig, ax1 = plt.subplots(figsize=CONFIG['plot']['size'])
|
|
592
|
-
ax2 = ax1.twinx() # Secondary y axis
|
|
593
660
|
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
# Plot the PM2.5 limit line
|
|
605
|
-
line = ax2.axhline(y=CONFIG['limits']['pm2.5'],
|
|
606
|
-
color=CONFIG['colors']['pm2.5'],
|
|
607
|
-
label=CONFIG['labels']['pm2.5_limit'],
|
|
608
|
-
linewidth=CONFIG['plot']['limit_line_width'],
|
|
609
|
-
linestyle=CONFIG['plot']['limit_line_style'])
|
|
610
|
-
line.set_alpha(CONFIG['plot']['limit_line_opacity'])
|
|
611
|
-
|
|
612
|
-
# Plot the PM10 main line
|
|
613
|
-
sns.lineplot(data=df, x='date', y='pm10', ax=ax2, legend=False,
|
|
614
|
-
color=CONFIG['colors']['pm10'],
|
|
615
|
-
label=CONFIG['labels']['pm10'],
|
|
616
|
-
linewidth=CONFIG['plot']['pm10_line_width'],
|
|
617
|
-
linestyle=CONFIG['plot']['pm10_line_style'])
|
|
618
|
-
|
|
619
|
-
# Plot the PM10 limit line
|
|
620
|
-
line = ax2.axhline(y=CONFIG['limits']['pm10'],
|
|
621
|
-
color=CONFIG['colors']['pm10'],
|
|
622
|
-
label=CONFIG['labels']['pm10_limit'],
|
|
623
|
-
linewidth=CONFIG['plot']['limit_line_width'],
|
|
624
|
-
linestyle=CONFIG['plot']['limit_line_style'])
|
|
625
|
-
line.set_alpha(CONFIG['plot']['limit_line_opacity'])
|
|
661
|
+
def remove_outliers_iqr(df, column, multiplier=None):
|
|
662
|
+
"""
|
|
663
|
+
Remove outliers using Interquartile Range (IQR) method
|
|
664
|
+
multiplier = 1.0: Tight bounds, more outliers removed
|
|
665
|
+
multiplier = 1.5: Standard bounds, moderate outliers removed
|
|
666
|
+
multiplier = 2.0: Wide bounds, fewer outliers removed
|
|
667
|
+
"""
|
|
668
|
+
if multiplier == None:
|
|
669
|
+
multiplier = 1.5 # Default value
|
|
626
670
|
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
671
|
+
Q1 = df[column].quantile(0.25)
|
|
672
|
+
Q3 = df[column].quantile(0.75)
|
|
673
|
+
IQR = Q3 - Q1
|
|
674
|
+
lower_bound = Q1 - multiplier * IQR
|
|
675
|
+
upper_bound = Q3 + multiplier * IQR
|
|
632
676
|
|
|
633
|
-
|
|
634
|
-
# This is already done if using the whitegrid theme
|
|
635
|
-
#ax1.grid(axis='x', alpha=CONFIG['plot']['grid_opacity'])
|
|
636
|
-
#ax1.grid(axis='y', alpha=CONFIG['plot']['grid_opacity'])
|
|
637
|
-
ax2.grid(axis='y', alpha=CONFIG['plot']['grid2_opacity'], linestyle=CONFIG['plot']['grid1_line_style'])
|
|
677
|
+
return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
|
|
638
678
|
|
|
639
|
-
# Customize the plot title, labels and ticks
|
|
640
|
-
ax1.set_title(get_plot_title(title, filename))
|
|
641
|
-
ax1.tick_params(axis='x', rotation=CONFIG['plot']['date_rotation'])
|
|
642
|
-
#ax1.tick_params(axis='y', labelcolor=CONFIG['colors']['pm0.1'])
|
|
643
|
-
ax1.set_xlabel('')
|
|
644
|
-
#ax1.set_ylabel(CONFIG['labels']['pm0.1'], color=CONFIG['colors']['pm0.1'])
|
|
645
|
-
ax2.set_ylabel('') # We will manually place the 2 parts in different colors
|
|
646
679
|
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
680
|
+
# WARNING: Untested function
|
|
681
|
+
def remove_outliers_zscore(df, column, threshold=3):
|
|
682
|
+
# from scipy import stats ?
|
|
683
|
+
z_scores = np.abs(stats.zscore(df[column]))
|
|
684
|
+
return df[z_scores < threshold]
|
|
652
685
|
|
|
653
|
-
# Place the first (bottom) part of the label
|
|
654
|
-
ax2.text(x, y, bottom_label, transform=ax2.transAxes,
|
|
655
|
-
color=CONFIG['colors']['pm2.5'], rotation='vertical',
|
|
656
|
-
ha='center', va='top')
|
|
657
686
|
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
687
|
+
# WARNING: Untested function
|
|
688
|
+
def remove_outliers_std(df, column, n_std=2):
|
|
689
|
+
mean = df[column].mean()
|
|
690
|
+
std = df[column].std()
|
|
691
|
+
lower_bound = mean - n_std * std
|
|
692
|
+
upper_bound = mean + n_std * std
|
|
662
693
|
|
|
663
|
-
|
|
664
|
-
lines1, labels1 = ax1.get_legend_handles_labels()
|
|
665
|
-
lines2, labels2 = ax2.get_legend_handles_labels()
|
|
666
|
-
ax1.legend(lines1 + lines2, labels1 + labels2,
|
|
667
|
-
loc=CONFIG['plot']['legend_location'])
|
|
694
|
+
return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
|
|
668
695
|
|
|
669
|
-
# Remove the left y-axis elements from ax1
|
|
670
|
-
ax1.grid(axis='y', visible=False)
|
|
671
|
-
ax1.spines['left'].set_visible(False)
|
|
672
|
-
ax1.tick_params(axis='y', left=False, labelleft=False)
|
|
673
696
|
|
|
674
|
-
|
|
675
|
-
|
|
697
|
+
# WARNING: Untested function
|
|
698
|
+
def remove_outliers_percentile(df, column, lower_percentile=5, upper_percentile=95):
|
|
699
|
+
lower_bound = df[column].quantile(lower_percentile/100)
|
|
700
|
+
upper_bound = df[column].quantile(upper_percentile/100)
|
|
676
701
|
|
|
677
|
-
|
|
678
|
-
plt.savefig(get_plot_filename(filename, '-pm'))
|
|
679
|
-
plt.close()
|
|
702
|
+
return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
|
|
680
703
|
|
|
681
704
|
|
|
682
705
|
def get_label_center(bottom_label, top_label):
|
|
683
706
|
# Return a value between 0 and 1 to estimate where to center the label
|
|
707
|
+
# Divider optimized for 11x8.5 plot size, but not as good for 15x10
|
|
684
708
|
fs = CONFIG['plot']['font_scale']
|
|
685
709
|
divider = 72 * fs**2 - 316 * fs + 414 # Tested for fs between 0.8 and 2
|
|
686
710
|
center = 0.5 + ((len(bottom_label) - len(top_label)) / divider)
|
|
687
711
|
return center
|
|
688
712
|
|
|
689
713
|
|
|
690
|
-
def generate_stats(df, filename):
|
|
714
|
+
def generate_stats(df, filename, boxplot=False):
|
|
691
715
|
summary = df.describe()
|
|
692
716
|
|
|
693
717
|
with open(get_stats_filename(filename), 'w') as file:
|
|
694
718
|
file.write(summary.to_string())
|
|
695
719
|
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
720
|
+
if boxplot:
|
|
721
|
+
for column in summary.columns.tolist():
|
|
722
|
+
box = sns.boxplot(data=df, y=column)
|
|
723
|
+
plt.savefig(get_boxplot_filename(filename, f'-{column}'))
|
|
724
|
+
plt.close()
|
|
700
725
|
|
|
701
726
|
|
|
702
727
|
def load_config(reset_config = False):
|
|
@@ -736,10 +761,10 @@ def get_config_dir(app_name):
|
|
|
736
761
|
config_dir = Path(config_home) / app_name
|
|
737
762
|
else:
|
|
738
763
|
config_dir = Path.home() / ".config" / app_name
|
|
739
|
-
|
|
764
|
+
|
|
740
765
|
# Create the directory if it doesn't exist
|
|
741
766
|
config_dir.mkdir(parents=True, exist_ok=True)
|
|
742
|
-
|
|
767
|
+
|
|
743
768
|
return config_dir
|
|
744
769
|
|
|
745
770
|
|
|
@@ -752,19 +777,26 @@ def get_plot_title(title, filename):
|
|
|
752
777
|
plot_title = match.group(2) if match else stem
|
|
753
778
|
|
|
754
779
|
# Capitalize only the first character
|
|
755
|
-
if plot_title: plot_title = plot_title
|
|
780
|
+
if plot_title: plot_title = plot_title.capitalize()
|
|
756
781
|
|
|
757
782
|
return plot_title
|
|
758
783
|
|
|
759
784
|
|
|
760
|
-
def
|
|
785
|
+
def get_filename(filename, stem = '', suffix = '', extension = ''):
|
|
786
|
+
p = Path(filename)
|
|
787
|
+
s = stem if stem != '' else p.stem
|
|
788
|
+
return f'{p.parent}/{s}{suffix}.{extension}'
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
def get_plot_filename(filename, suffix = '', stem = ''):
|
|
761
792
|
p = Path(filename)
|
|
762
|
-
|
|
793
|
+
s = stem if stem != '' else p.stem
|
|
794
|
+
return f'{p.parent}/{s}{suffix}.png'
|
|
763
795
|
|
|
764
796
|
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
797
|
+
def get_boxplot_filename(filename, suffix = ''):
|
|
798
|
+
p = Path(filename)
|
|
799
|
+
return f'{p.parent}/{p.stem}-boxplot{suffix}.png'
|
|
768
800
|
|
|
769
801
|
|
|
770
802
|
def get_stats_filename(filename):
|
|
@@ -786,7 +818,7 @@ if __name__ == '__main__':
|
|
|
786
818
|
# Configure the root logger
|
|
787
819
|
logging.basicConfig(level=logging.WARNING,
|
|
788
820
|
format='%(levelname)s - %(message)s')
|
|
789
|
-
|
|
821
|
+
|
|
790
822
|
# Configure this script's logger
|
|
791
823
|
logger.setLevel(logging.DEBUG)
|
|
792
824
|
|