PyPI - plotair - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

plotair 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

plotair/__init__.py +1 -1
plotair/config.toml +13 -12
plotair/main.py +403 -371
plotair-0.3.0.dist-info/METADATA +143 -0
plotair-0.3.0.dist-info/RECORD +8 -0
plotair-0.2.0.dist-info/METADATA +0 -97
plotair-0.2.0.dist-info/RECORD +0 -8
{plotair-0.2.0.dist-info → plotair-0.3.0.dist-info}/WHEEL +0 -0
{plotair-0.2.0.dist-info → plotair-0.3.0.dist-info}/entry_points.txt +0 -0
{plotair-0.2.0.dist-info → plotair-0.3.0.dist-info}/licenses/LICENSE +0 -0

plotair/main.py CHANGED Viewed

@@ -53,8 +53,14 @@ def main():
                         help='sensor data file to process')
     parser.add_argument('-a', '--all-dates', action='store_true',
                         help='plot all dates (otherwise only latest sequence)')
+    parser.add_argument('-b', '--boxplot', action='store_true',
+                        help='generate boxplots along with text stats')
     parser.add_argument('-m', '--merge', metavar='FIELD',
                         help='merge field from file1 to file2, and output to file3')
+    parser.add_argument('-M', '--filter-multiplier', type=float, default=1.5, metavar='MULTIPLIER',
+                        help='multiplier for IQR outlier filtering (default: 1.5)')
+    parser.add_argument('-o', '--filter-outliers', action='store_true',
+                        help='filter out outliers from the plots')
     parser.add_argument('-r', '--reset-config', action='store_true',
                         help='reset configuration file to default')
     parser.add_argument('-s', '--start-date', metavar='DATE',
@@ -63,7 +69,9 @@ def main():
                         help='date at which to stop the plot (YYYY-MM-DD)')
     parser.add_argument('-t', '--title',
                         help='set the plot title')
-    parser.add_argument('-v', '--version', action='version',
+    parser.add_argument('-T', '--snapshots', action='store_true',
+                        help='generate a snapshots table from all files')
+    parser.add_argument('-v', '--version', action='version',
                         version=f'%(prog)s {__version__}')
     args = parser.parse_args()
@@ -71,66 +79,87 @@ def main():
     try:
         load_config(args.reset_config)
     except FileNotFoundError as e:
-        logger.error(f'Failed to load config: {e}')
+        print(f'Error: Failed to load configuration file: {e}')
         return
-    if args.merge:
-        field = args.merge
-        num_files = len(args.filenames)
-        if num_files != 3:
-            logger.error('argument -m/--merge requires three file arguments')
-            return
-        file_format, df1, num_valid_rows1, num_invalid_rows = read_data(args.filenames[0])
-        file_format, df2, num_valid_rows2, num_invalid_rows = read_data(args.filenames[1])
+    filenames = []
-        if num_valid_rows1 <= 0 or num_valid_rows2 <= 0:
-            logger.error('At least one of the input files is unsupported')
-            return
-        temp_df = df1[['co2']]
-        df2 = pd.concat([df2, temp_df]).sort_index()
-        df2.to_csv(args.filenames[2], index=True)
-    else:
-        # Create a list containing all files from all patterns like '*.csv',
-        # because under Windows the terminal doesn't expand wildcard arguments.
-        all_files = []
+    if sys.platform == "win32":
+        # On Windows, expand glob patterns (e.g. *.csv)
         for pattern in args.filenames:
-            all_files.extend(glob.glob(pattern))
-        for filename in all_files:
-            logger.info(f'Processing {filename}')
-            try:
-                file_format, df, num_valid_rows, num_invalid_rows = read_data(filename)
-                if num_valid_rows > 0:
-                    logger.debug(f'{num_valid_rows} row(s) read')
-                else:
-                    logger.error('Unsupported file format')
-                    return
-                if num_invalid_rows > 0:
-                    logger.info(f'{num_invalid_rows} invalid row(s) ignored')
+            filenames.extend(glob.glob(pattern))
+    else:
+        # On Linux, use filenames as-is (no glob expansion needed)
+        filenames = args.filenames
-                if not args.all_dates:
-                    df = delete_old_data(df, args.start_date, args.stop_date)
+    if args.merge:
+        # Merge field from file1 to file2, and output to file3
+        merge_field(args.merge, filenames)
-                generate_stats(df, filename)
+    elif args.snapshots:
+        # Generate a snapshots table from all files
+        generate_snapshots(filenames)
-                if file_format == 'plotair':
-                    generate_plot_co2_hum_tmp(df, filename, args.title)
-                elif file_format == 'visiblair_d':
-                    generate_plot_co2_hum_tmp(df, filename, args.title)
-                elif file_format == 'visiblair_e':
-                    generate_plot_co2_hum_tmp(df, filename, args.title)
-                    generate_plot_pm(df, filename, args.title)
-                elif file_format == 'graywolf_ds':
-                    generate_plot_hum_tmp(df, filename, args.title)
-                    generate_plot_voc_co_form(df, filename, args.title)
-            except Exception as e:
-                logger.exception(f'Unexpected error: {e}')
+    else:
+        # Generate plots for all files
+        process_files(filenames, args)
+def process_files(filenames, args):
+    for filename in filenames:
+        print(f'Processing {filename}')
+        try:
+            file_format, df, num_valid_rows, num_invalid_rows = read_data(filename)
+            if num_valid_rows > 0:
+                logger.debug(f'{num_valid_rows} valid row(s) read')
+            else:
+                print('Error: Unsupported file format')
+                return
+            if num_invalid_rows > 0:
+                percent_ignored = round(num_invalid_rows / (num_valid_rows + num_invalid_rows) * 100)
+                print(f'{num_invalid_rows} invalid row(s) ignored ({percent_ignored}%)')
+            if not args.all_dates:
+                df = delete_old_data(df, args.start_date, args.stop_date)
+            generate_stats(df, filename, args.boxplot)
+            if file_format == 'plotair':
+                generate_plot(df, filename, args.title, suffix='cht',
+                         series1='co2', series2='humidity', series3='temp',
+                         filter_outliers=args.filter_outliers,
+                         filter_multiplier=args.filter_multiplier)
+            elif file_format == 'visiblair_d':
+                generate_plot(df, filename, args.title, suffix='cht',
+                         series1='co2', series2='humidity', series3='temp',
+                         filter_outliers=args.filter_outliers,
+                         filter_multiplier=args.filter_multiplier)
+            elif file_format == 'visiblair_e':
+                generate_plot(df, filename, args.title, suffix='cht',
+                         series1='co2', series2='humidity', series3='temp',
+                         filter_outliers=args.filter_outliers,
+                         filter_multiplier=args.filter_multiplier)
+                generate_plot(df, filename, args.title, suffix='pm',
+                         series1=None, series2='pm2.5', series3='pm10',
+                         filter_outliers=args.filter_outliers,
+                         filter_multiplier=args.filter_multiplier)
+            elif file_format == 'graywolf_ds':
+                generate_plot(df, filename, args.title, suffix='ht',
+                         series1=None, series2='humidity', series3='temp',
+                         filter_outliers=args.filter_outliers,
+                         filter_multiplier=args.filter_multiplier)
+                generate_plot(df, filename, args.title, suffix='vf',
+                         series1='tvoc', series2='form', series3=None,
+                         filter_outliers=args.filter_outliers,
+                         filter_multiplier=args.filter_multiplier)
+                generate_plot(df, filename, args.title, suffix='co',
+                         series1=None, series2='co', series3=None,
+                         filter_outliers=args.filter_outliers,
+                         filter_multiplier=args.filter_multiplier)
+        except Exception as e:
+            print(f'Error: Unexpected error: {e}')
 def detect_file_format(filename):
@@ -139,11 +168,14 @@ def detect_file_format(filename):
     visiblair_e_num_col = (21, 21)
     graywolf_ds_num_col = (7, 7)
-    with open(filename, 'r', newline='', encoding='utf-8') as file:
+    # Some files begin with the '\ufeff' character (Byte Order Mark / BOM).
+    # This breaks the first field detection. Use 'utf-8-sig' instead of 'utf-8'
+    # to automatically handle BOM.
+    with open(filename, 'r', newline='', encoding='utf-8-sig') as file:
         reader = csv.reader(file)
         first_line = next(reader)
         num_fields = len(first_line)
         if first_line[0] == 'date':
             file_format = 'plotair'
         elif visiblair_d_num_col[0] <= num_fields <= visiblair_d_num_col[1]:
@@ -154,9 +186,9 @@ def detect_file_format(filename):
         elif (graywolf_ds_num_col[0] <= num_fields <= graywolf_ds_num_col[1] and
               first_line[0] == 'Date Time'):
             file_format = 'graywolf_ds'
     logger.debug(f'File format: {file_format}')
     return file_format
@@ -175,6 +207,8 @@ def read_data(filename):
     elif file_format == 'graywolf_ds':
         df, num_valid_rows, num_invalid_rows = read_data_graywolf_ds(filename)
+    df = df.sort_index()  # Sort in case some dates are not in order
     return file_format, df, num_valid_rows, num_invalid_rows
@@ -189,7 +223,6 @@ def read_data_plotair(filename):
     df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
     df = df.set_index('date')
-    df = df.sort_index()  # Sort in case some dates are not in order
     num_valid_rows = len(df)
     return df, num_valid_rows, num_invalid_rows
@@ -207,34 +240,33 @@ def read_data_visiblair_d(filename):
         for line in f:
             line = line.strip()
             fields = line.split(',')
             if not (5 <= len(fields) <= 6):
                 # Skip lines with an invalid number of columns
-                logger.debug(f'Skipping line (number of columns): {line}')
+                #logger.debug(f'Skipping line (number of columns): {line}')
                 num_invalid_rows += 1
                 continue
             try:
                 # Convert each field to its target data type
                 parsed_row = {
                     'date': pd.to_datetime(fields[0], format='%Y-%m-%d %H:%M:%S'),
-                    'co2': np.uint16(fields[1]),           # 0 to 10,000 ppm
-                    'temperature': np.float32(fields[2]),  # -40 to 70 °C
-                    'humidity': np.uint8(fields[3])        # 0 to 100% RH
+                    'co2': np.uint16(fields[1]),     # 0 to 10,000 ppm
+                    'temp': np.float32(fields[2]),   # -40 to 70 °C
+                    'humidity': np.uint8(fields[3])  # 0 to 100% RH
                 }
                 # If conversion succeeds, add the parsed row to the list
                 valid_rows.append(parsed_row)
             except (ValueError, TypeError) as e:
                 # Skip lines with conversion errors
-                logger.debug(f'Skipping line (conversion error): {line}')
+                #logger.debug(f'Skipping line (conversion error): {line}')
                 num_invalid_rows += 1
                 continue
         # Create the DataFrame from the valid rows
         df = pd.DataFrame(valid_rows)
         df = df.set_index('date')
-        df = df.sort_index()  # Sort in case some dates are not in order
         num_valid_rows = len(df)
     return df, num_valid_rows, num_invalid_rows
@@ -248,7 +280,7 @@ def read_data_visiblair_e(filename):
     df = pd.read_csv(filename)
     # Rename the columns
-    df.columns = ['uuid', 'date', 'co2', 'humidity', 'temperature', 'pm0.1',
+    df.columns = ['uuid', 'date', 'co2', 'humidity', 'temp', 'pm0.1',
                   'pm0.3', 'pm0.5', 'pm1', 'pm2.5', 'pm5', 'pm10', 'pressure',
                   'voc_index', 'firmware', 'model', 'pcb', 'display_rate',
                   'is_charging', 'is_ac_in', 'batt_voltage']
@@ -257,7 +289,6 @@ def read_data_visiblair_e(filename):
     df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
     df = df.set_index('date')
-    df = df.sort_index()  # Sort in case some dates are not in order
     num_valid_rows = len(df)
     return df, num_valid_rows, num_invalid_rows
@@ -271,21 +302,40 @@ def read_data_graywolf_ds(filename):
     df = pd.read_csv(filename)
     # Rename the columns
-    df.columns = ['date', 'tvoc', 'co', 'form', 'humidity', 'temperature', 'filename']
+    df.columns = ['date', 'tvoc', 'co', 'form', 'humidity', 'temp', 'filename']
     # Convert the 'date' column to pandas datetime objects
     df['date'] = pd.to_datetime(df['date'], format='%d-%b-%y %I:%M:%S %p')
-    # Convert 'form' column to string, replace '< LOD' with '0', and then convert to integer
-    df['form'] = df['form'].astype(str).str.replace('< LOD', '0').astype(int)
+    # Convert 'form' column to string, replace '< LOD' with '10',
+    # and then convert to integer
+    df['form'] = df['form'].astype(str).str.replace('< LOD', '10').astype(int)
     df = df.set_index('date')
-    df = df.sort_index()  # Sort in case some dates are not in order
     num_valid_rows = len(df)
     return df, num_valid_rows, num_invalid_rows
+def merge_field(field, filenames):
+    num_files = len(filenames)
+    if num_files != 3:
+        print('Error: Argument -m/--merge requires three file arguments')
+        return
+    file_format, df1, num_valid_rows1, num_invalid_rows = read_data(filenames[0])
+    file_format, df2, num_valid_rows2, num_invalid_rows = read_data(filenames[1])
+    if num_valid_rows1 <= 0 or num_valid_rows2 <= 0:
+        print('Error: At least one of the input files is unsupported')
+        return
+    temp_df = df1[[field]]
+    df2 = pd.concat([df2, temp_df]).sort_index()
+    df2.to_csv(filenames[2], index=True)
 def delete_old_data(df, start_date = None, stop_date = None):
     if not start_date and not stop_date:
         # Iterate backwards through the samples to find the first time gap larger
@@ -319,12 +369,31 @@ def delete_old_data(df, start_date = None, stop_date = None):
             df = df[df.index <= sd]
     return df
-def generate_plot_co2_hum_tmp(df, filename, title):
+class DataSeries:
+    def __init__(self, name=''):
+        # y_range could be replaced by y_min and y_max
+        self.name = name
+        self.label = CONFIG['labels'].get(self.name)
+        self.color = CONFIG['colors'].get(self.name)
+        self.y_range = CONFIG['axis_ranges'].get(self.name)  # min/max tuple, e.g. (0, 100)
+        self.limit = CONFIG['limits'].get(self.name)  # single value or min/max tuple
+        self.limit_label = CONFIG['labels'].get(self.name + '_limit')
+        self.linestyle = CONFIG['plot'].get(self.name + '_line_style')
+def generate_plot(df, filename, title, suffix='',
+                  series1=None, series2=None, series3=None,
+                  filter_outliers=False, filter_multiplier=None):
     # The dates must be in a non-index column
     df = df.reset_index()
+    # Get each series configuration
+    ds1 = DataSeries(name=series1) if series1 else None
+    ds2 = DataSeries(name=series2) if series2 else None
+    ds3 = DataSeries(name=series3) if series3 else None
     # Set a theme and scale all fonts
     sns.set_theme(style='whitegrid', font_scale=CONFIG['plot']['font_scale'])
@@ -335,96 +404,108 @@ def generate_plot_co2_hum_tmp(df, filename, title):
     fig, ax1 = plt.subplots(figsize=CONFIG['plot']['size'])
     ax2 = ax1.twinx()  # Secondary y axis
-    # Plot the data series
-    sns.lineplot(data=df, x='date', y='co2', ax=ax1, color=CONFIG['colors']['co2'],
-                 label=CONFIG['labels']['co2'], legend=False)
-    sns.lineplot(data=df, x='date', y='humidity', ax=ax2, color=CONFIG['colors']['humidity'],
-                 label=CONFIG['labels']['humidity'], legend=False)
-    sns.lineplot(data=df, x='date', y='temperature', ax=ax2, color=CONFIG['colors']['temp'],
-                 label=CONFIG['labels']['temp'], legend=False)
+    # TODO: add functions for repetitive code
-    # Set the ranges for both y axes
-    cmin, cmax = CONFIG['axis_ranges']['co2']
-    tmin, tmax = CONFIG['axis_ranges']['temp_h']
-    ax1.set_ylim(cmin, cmax)  # df['co2'].max() * 1.05
-    ax2.set_ylim(tmin, tmax)
-    # Add a grid for the x axis and the y axes
-    # This is already done if using the whitegrid theme
-    #ax1.grid(axis='x', alpha=CONFIG['plot']['grid_opacity'])
-    #ax1.grid(axis='y', alpha=CONFIG['plot']['grid_opacity'])
-    ax2.grid(axis='y', alpha=CONFIG['plot']['grid2_opacity'], linestyle=CONFIG['plot']['grid2_line_style'])
-    # Set the background color of the humidity comfort zone
-    hmin, hmax = CONFIG['limits']['humidity']
-    ax2.axhspan(ymin=hmin, ymax=hmax,
-                facecolor=CONFIG['colors']['humidity'], alpha=CONFIG['plot']['limit_zone_opacity'])
-    # Customize the plot title, labels and ticks
-    ax1.set_title(get_plot_title(title, filename))
-    ax1.tick_params(axis='x', rotation=CONFIG['plot']['date_rotation'])
-    ax1.tick_params(axis='y', labelcolor=CONFIG['colors']['co2'])
-    ax1.set_xlabel('')
-    ax1.set_ylabel(CONFIG['labels']['co2'], color=CONFIG['colors']['co2'])
-    ax2.set_ylabel('')  # We will manually place the 2 parts in different colors
-    # Define the position for the center of the right y axis label
-    bottom_label = CONFIG['labels']['temp'] + '  '
-    top_label = '  ' + CONFIG['labels']['humidity']
-    x = 1.07  # Slightly to the right of the axis
-    y = get_label_center(bottom_label, top_label)   # Vertically centered
+    # Plot series #1 main line (on left y-axis)
+    if ds1:
+        if ds1.linestyle:
+            linestyle = ds1.linestyle
+        else:
+            linestyle = CONFIG['plot']['default_line_style']
-    # Place the first (bottom) part of the label
-    ax2.text(x, y, bottom_label, transform=ax2.transAxes,
-             color=CONFIG['colors']['temp'], rotation='vertical',
-             ha='center', va='top')
+        if filter_outliers:
+            df1 = remove_outliers_iqr(df, ds1.name, multiplier=filter_multiplier)
+        else:
+            df1 = df[df[ds1.name] != 0]  # Only filter out zero values
+        sns.lineplot(data=df1, x='date', y=ds1.name, ax=ax1, color=ds1.color,
+                     label=ds1.label, legend=False, linestyle=linestyle)
+        # Display series #1 limit line or zone
+        if ds1.limit and not isinstance(ds1.limit, list):
+            # Plot the limit line
+            line = ax1.axhline(y=ds1.limit, color=ds1.color, label=ds1.limit_label,
+                               linestyle=CONFIG['plot']['limit_line_style'])
+            line.set_alpha(CONFIG['plot']['limit_line_opacity'])
+        if ds1.limit and isinstance(ds1.limit, list):
+            # Set the background color of the limit zone
+            hmin, hmax = ds1.limit
+            ax1.axhspan(ymin=hmin, ymax=hmax, facecolor=ds1.color,
+                        alpha=CONFIG['plot']['limit_zone_opacity'])
+    # Plot series #2 main line (on right y-axis)
+    if ds2.linestyle:
+        linestyle = ds2.linestyle
+    else:
+        linestyle = CONFIG['plot']['default_line_style']
-    # Place the second (top) part of the label
-    ax2.text(x, y, top_label, transform=ax2.transAxes,
-            color=CONFIG['colors']['humidity'], rotation='vertical',
-            ha='center', va='bottom')
+    if filter_outliers:
+        df2 = remove_outliers_iqr(df, ds2.name, multiplier=filter_multiplier)
+    else:
+        df2 = df[df[ds2.name] != 0]  # Only filter out zero values
+    sns.lineplot(data=df2, x='date', y=ds2.name, ax=ax2, color=ds2.color,
+                 label=ds2.label, legend=False, linestyle=linestyle)
+    # Display series #2 limit line or zone
+    if ds2.limit and not isinstance(ds2.limit, list):
+        # Plot the limit line
+        line = ax2.axhline(y=ds2.limit, color=ds2.color, label=ds2.limit_label,
+                           linestyle=CONFIG['plot']['limit_line_style'])
+        line.set_alpha(CONFIG['plot']['limit_line_opacity'])
+    if ds2.limit and isinstance(ds2.limit, list):
+        # Set the background color of the limit zone
+        hmin, hmax = ds2.limit
+        ax2.axhspan(ymin=hmin, ymax=hmax, facecolor=ds2.color,
+                    alpha=CONFIG['plot']['limit_zone_opacity'])
+    # Plot series #3 main line (on right y-axis)
+    if ds3:
+        if ds3.linestyle:
+            linestyle = ds3.linestyle
+        else:
+            linestyle = CONFIG['plot']['default_line_style']
-    # Create a combined legend
-    lines1, labels1 = ax1.get_legend_handles_labels()
-    lines2, labels2 = ax2.get_legend_handles_labels()
-    ax1.legend(lines1 + lines2, labels1 + labels2,
-               loc=CONFIG['plot']['legend_location'])
+        if filter_outliers:
+            df3 = remove_outliers_iqr(df, ds3.name, multiplier=filter_multiplier)
+        else:
+            df3 = df[df[ds3.name] != 0]  # Only filter out zero values
-    # Adjust the plot margins to make room for the labels
-    plt.tight_layout()
+        sns.lineplot(data=df3, x='date', y=ds3.name, ax=ax2, color=ds3.color,
+                     label=ds3.label, legend=False, linestyle=linestyle)
-    # Save the plot as a PNG image
-    plt.savefig(get_plot_filename(filename, '-cht'))
-    plt.close()
+        # Plot series #3 limit line
+        if ds3.limit and not isinstance(ds3.limit, list):
+            # Plot the limit line
+            line = ax2.axhline(y=ds3.limit, color=ds3.color, label=ds3.limit_label,
+                               linestyle=CONFIG['plot']['limit_line_style'])
+            line.set_alpha(CONFIG['plot']['limit_line_opacity'])
+        if ds3.limit and isinstance(ds3.limit, list):
+            # Set the background color of the limit zone
+            hmin, hmax = ds3.limit
+            ax2.axhspan(ymin=hmin, ymax=hmax, facecolor=ds3.color,
+                        alpha=CONFIG['plot']['limit_zone_opacity'])
-def generate_plot_hum_tmp(df, filename, title):
-    # The dates must be in a non-index column
-    df = df.reset_index()
+    # Set the ranges for both y axes
+    if ds1:
+        y1min, y1max = ds1.y_range
+        ax1.set_ylim(y1min, y1max)
-    # Set a theme and scale all fonts
-    sns.set_theme(style='whitegrid', font_scale=CONFIG['plot']['font_scale'])
+    y2min, y2max = ds2.y_range
-    ff = CONFIG['plot']['font_family']
-    if ff != '': plt.rcParams['font.family'] = ff
+    if ds3:
+        y3min, y3max = ds3.y_range
-    # Set up the matplotlib figure and axes
-    fig, ax1 = plt.subplots(figsize=CONFIG['plot']['size'])
-    ax2 = ax1.twinx()  # Secondary y axis
+        if y2min != y3min or y2max != y3max:
+            print(f'Warning: Axis ranges differ for {series2} and {series3}, using largest range')
-    # Plot the data series
-    #sns.lineplot(data=df, x='date', y='co2', ax=ax1, color=CONFIG['colors']['co2'],
-    #             label=CONFIG['labels']['co2'], legend=False)
-    sns.lineplot(data=df, x='date', y='humidity', ax=ax2, color=CONFIG['colors']['humidity'],
-                 label=CONFIG['labels']['humidity'], legend=False)
-    sns.lineplot(data=df, x='date', y='temperature', ax=ax2, color=CONFIG['colors']['temp'],
-                 label=CONFIG['labels']['temp'], legend=False)
+        y2min = min(y2min, y3min)
+        y2max = max(y2max, y3max)
-    # Set the ranges for both y axes
-    cmin, cmax = CONFIG['axis_ranges']['co2']
-    tmin, tmax = CONFIG['axis_ranges']['temp_h']
-    ax1.set_ylim(cmin, cmax)  # df['co2'].max() * 1.05
-    ax2.set_ylim(tmin, tmax)
+    ax2.set_ylim(y2min, y2max)
     # Add a grid for the x axis and the y axes
     # This is already done if using the whitegrid theme
@@ -432,271 +513,215 @@ def generate_plot_hum_tmp(df, filename, title):
     #ax1.grid(axis='y', alpha=CONFIG['plot']['grid_opacity'])
     ax2.grid(axis='y', alpha=CONFIG['plot']['grid2_opacity'], linestyle=CONFIG['plot']['grid2_line_style'])
-    # Set the background color of the humidity comfort zone
-    hmin, hmax = CONFIG['limits']['humidity']
-    ax2.axhspan(ymin=hmin, ymax=hmax,
-                facecolor=CONFIG['colors']['humidity'], alpha=CONFIG['plot']['limit_zone_opacity'])
     # Customize the plot title, labels and ticks
     ax1.set_title(get_plot_title(title, filename))
     ax1.tick_params(axis='x', rotation=CONFIG['plot']['date_rotation'])
-    #ax1.tick_params(axis='y', labelcolor=CONFIG['colors']['co2'])
+    if ds1:
+        ax1.tick_params(axis='y', labelcolor=ds1.color)
+        ax1.set_ylabel(ds1.label, color=ds1.color)
     ax1.set_xlabel('')
-    #ax1.set_ylabel(CONFIG['labels']['co2'], color=CONFIG['colors']['co2'])
-    ax2.set_ylabel('')  # We will manually place the 2 parts in different colors
-    # Define the position for the center of the right y axis label
-    bottom_label = CONFIG['labels']['temp'] + '  '
-    top_label = '  ' + CONFIG['labels']['humidity']
-    x = 1.07  # Slightly to the right of the axis
-    y = get_label_center(bottom_label, top_label)   # Vertically centered
+    if ds3:
+        ax2.set_ylabel('')  # We will manually place the 2 parts in different colors
-    # Place the first (bottom) part of the label
-    ax2.text(x, y, bottom_label, transform=ax2.transAxes,
-             color=CONFIG['colors']['temp'], rotation='vertical',
-             ha='center', va='top')
+        # Define the position for the center of the right y axis label
+        bottom_label = ds3.label + '  '
+        top_label = '  ' + ds2.label
+        x = 1.07  # Slightly to the right of the axis
+        y = get_label_center(bottom_label, top_label)   # Vertically centered
-    # Place the second (top) part of the label
-    ax2.text(x, y, top_label, transform=ax2.transAxes,
-            color=CONFIG['colors']['humidity'], rotation='vertical',
-            ha='center', va='bottom')
+        # Place the first (bottom) part of the label
+        ax2.text(x, y, bottom_label, transform=ax2.transAxes,
+                 color=ds3.color, rotation='vertical',
+                 ha='center', va='top')
+        # Place the second (top) part of the label
+        ax2.text(x, y, top_label, transform=ax2.transAxes,
+                color=ds2.color, rotation='vertical',
+                ha='center', va='bottom')
+    else:
+        ax2.tick_params(axis='y', labelcolor=ds2.color)
+        ax2.set_ylabel(ds2.label, color=ds2.color)
     # Create a combined legend
     lines1, labels1 = ax1.get_legend_handles_labels()
     lines2, labels2 = ax2.get_legend_handles_labels()
+    labels1 = remove_units_from_labels(labels1)
+    labels2 = remove_units_from_labels(labels2)
     ax1.legend(lines1 + lines2, labels1 + labels2,
                loc=CONFIG['plot']['legend_location'])
-    # Remove the left y-axis elements from ax1
-    ax1.grid(axis='y', visible=False)
-    ax1.spines['left'].set_visible(False)
-    ax1.tick_params(axis='y', left=False, labelleft=False)
+    if not ds1:
+        # Remove the left y-axis elements from ax1
+        ax1.grid(axis='y', visible=False)
+        ax1.spines['left'].set_visible(False)
+        ax1.tick_params(axis='y', left=False, labelleft=False)
     # Adjust the plot margins to make room for the labels
     plt.tight_layout()
     # Save the plot as a PNG image
-    plt.savefig(get_plot_filename(filename, '-ht'))
+    # TODO: auto build the plot suffix from the 1st char of each series?
+    plt.savefig(get_plot_filename(filename, f'-{suffix}'))
     plt.close()
-def generate_plot_voc_co_form(df, filename, title):
-    # The dates must be in a non-index column
-    df = df.reset_index()
-    # Set a theme and scale all fonts
-    sns.set_theme(style='whitegrid', font_scale=CONFIG['plot']['font_scale'])
-    ff = CONFIG['plot']['font_family']
-    if ff != '': plt.rcParams['font.family'] = ff
-    # Set up the matplotlib figure and axes
-    fig, ax1 = plt.subplots(figsize=CONFIG['plot']['size'])
-    ax2 = ax1.twinx()  # Secondary y axis
-    # Plot the TVOC main line
-    sns.lineplot(data=df, x='date', y='tvoc', ax=ax1, legend=False,
-                 color=CONFIG['colors']['tvoc'], label=CONFIG['labels']['tvoc'])
-    # Plot the TVOC limit line
-    line = ax1.axhline(y=CONFIG['limits']['tvoc'], color=CONFIG['colors']['tvoc'],
-                linestyle=CONFIG['plot']['limit_line_style'], linewidth=CONFIG['plot']['limit_line_width'],
-                label=CONFIG['labels']['tvoc_limit'])
-    line.set_alpha(CONFIG['plot']['limit_line_opacity'])
-    # Plot the formaldehyde main line
-    df_filtered = df[df['form'] != 0]  # Filter out rows where 'form' is zero
-    sns.lineplot(data=df_filtered, x='date', y='form', ax=ax2, legend=False,
-                 color=CONFIG['colors']['form'], label=CONFIG['labels']['form'])
-    # Plot the formaldehyde limit line
-    line = ax2.axhline(y=CONFIG['limits']['form'], color=CONFIG['colors']['form'],
-                linestyle=CONFIG['plot']['limit_line_style'], linewidth=CONFIG['plot']['limit_line_width'],
-                label=CONFIG['labels']['form_limit'])
-    line.set_alpha(CONFIG['plot']['limit_line_opacity'])
-    # Plot the CO main line
-    co_scale = 10
-    df['co_scaled'] = df['co'] * co_scale
-    sns.lineplot(data=df, x='date', y='co_scaled', ax=ax2, legend=False,
-                 color=CONFIG['colors']['co'], label=CONFIG['labels']['co'])
-    # Plot the CO limit line
-    line = ax2.axhline(y=CONFIG['limits']['co'] * co_scale, color=CONFIG['colors']['co'],
-                linestyle=CONFIG['plot']['limit_line_style'], linewidth=CONFIG['plot']['limit_line_width'],
-                label=CONFIG['labels']['co_limit'])
-    line.set_alpha(CONFIG['plot']['limit_line_opacity'])
-    # Set the ranges for both y axes
-    tmin, tmax = CONFIG['axis_ranges']['tvoc']
-    cmin, cmax = CONFIG['axis_ranges']['co_form']
-    ax1.set_ylim(tmin, tmax)
-    ax2.set_ylim(cmin, cmax)
-    # Add a grid for the x axis and the y axes
-    # This is already done if using the whitegrid theme
-    #ax1.grid(axis='x', alpha=CONFIG['plot']['grid_opacity'])
-    #ax1.grid(axis='y', alpha=CONFIG['plot']['grid_opacity'])
-    ax2.grid(axis='y', alpha=CONFIG['plot']['grid2_opacity'], linestyle=CONFIG['plot']['grid2_line_style'])
-    # Customize the plot title, labels and ticks
-    ax1.set_title(get_plot_title(title, filename))
-    ax1.tick_params(axis='x', rotation=CONFIG['plot']['date_rotation'])
-    ax1.tick_params(axis='y', labelcolor=CONFIG['colors']['tvoc'])
-    ax1.set_xlabel('')
-    ax1.set_ylabel(CONFIG['labels']['tvoc'], color=CONFIG['colors']['tvoc'])
-    ax2.set_ylabel('')  # We will manually place the 2 parts in different colors
-    # Define the position for the center of the right y axis label
-    bottom_label = CONFIG['labels']['co'] + '  '
-    top_label = '  ' + CONFIG['labels']['form']
-    x = 1.07  # Slightly to the right of the axis
-    y = get_label_center(bottom_label, top_label)   # Vertically centered
-    # Place the first (bottom) part of the label
-    ax2.text(x, y, bottom_label, transform=ax2.transAxes,
-             color=CONFIG['colors']['co'], rotation='vertical',
-             ha='center', va='top')
+def generate_snapshots(filenames):
+    columns = ['date', 'tvoc', 'co', 'form', 'humidity', 'temp', 'filename']
+    df = pd.DataFrame()
-    # Place the second (top) part of the label
-    ax2.text(x, y, top_label, transform=ax2.transAxes,
-             color=CONFIG['colors']['form'], rotation='vertical',
-             ha='center', va='bottom')
+    for filename in filenames:
+        print(f'Reading {filename}')
-    # Create a combined legend
-    lines1, labels1 = ax1.get_legend_handles_labels()
-    lines2, labels2 = ax2.get_legend_handles_labels()
-    ax1.legend(lines1 + lines2, labels1 + labels2,
-               loc=CONFIG['plot']['legend_location'])
+        # Auto-detect field separator, skip the header row,
+        # and only read one data row
+        df_new = pd.read_csv(filename, sep=None, engine='python',
+                             names=columns, skiprows=1, nrows=1)
-    # Adjust the plot margins to make room for the labels
-    plt.tight_layout()
+        # Update the filename field with the actual filename
+        df_new['filename'] = Path(filename).stem
-    # Save the plot as a PNG image
-    plt.savefig(get_plot_filename(filename, '-vcf'))
+        # Append to the combined DataFrame
+        if df.empty:
+            # Prevent a warning on the first concat
+            df = df_new
+        else:
+            df = pd.concat([df, df_new], ignore_index=True)
+    # Convert 'form' column to string, and replace '< LOD' with '<10'
+    df['form'] = df['form'].astype(str).str.replace('< LOD', '<10')
+    # Drop the 'date' column, and move last column (filename) to first
+    df = df.drop(columns=['date'])
+    df = df[[df.columns[-1]] + df.columns[:-1].tolist()]
+    # Capitalize only the first character of the filenames
+    df['filename'] = df['filename'].str.capitalize()
+    # Rename the columns before creating the table
+    # TODO: use config file values instead
+    # TODO: directly assign to `df.columns` to change all column names at once
+    df = df.rename(columns={'filename': 'Pièce'})
+    df = df.rename(columns={'tvoc': 'COVT (ppb)'})
+    df = df.rename(columns={'co': 'Monoxyde de\ncarbone (ppm)'})
+    df = df.rename(columns={'form': 'Formaldéhyde\n(ppb)'})
+    df = df.rename(columns={'humidity': 'Humidité\nrelative (%)'})
+    df = df.rename(columns={'temp': 'Température (°C)'})
+    #log_data_frame(df, filename)
+    # Create table
+    fig, ax = plt.subplots(figsize=(7, 4))
+    #ax.axis('tight')
+    ax.axis('off')
+    table = ax.table(cellText=df.values,
+                     colLabels=df.columns,
+                     cellLoc='center',
+                     loc='center')
+    table.auto_set_font_size(False)
+    table.set_fontsize(10)
+    table.scale(2, 2)  # column width, row height
+    # Change grid color and set alternating row colors
+    for i in range(len(df) + 1):  # +1 for header row
+        for j in range(len(df.columns)):
+            cell = table[(i, j)]
+            #cell.set_text_props(fontfamily='Noto Sans')
+            cell.set_edgecolor('#bbbbbb')      # Medium light gray
+            if i % 2 == 0:
+                cell.set_facecolor('#f4f4f4')  # Very light gray
+            else:
+                cell.set_facecolor('#ffffff')  # White
+    # Header row: increase height, make text bold, and add background color
+    for j in range(len(df.columns)):
+        cell = table[(0, j)]
+        cell.set_height(0.15)
+        cell.set_text_props(weight='bold')
+        cell.set_facecolor('#dddddd')          # Light gray
+    # First column: change alignment to left, except for the header
+    for i in range(1, len(df) + 1):
+        table[(i, 0)].set_text_props(ha='left')
+    plt.savefig(get_plot_filename(filename, stem='snapshots'),
+                bbox_inches='tight', dpi=300)
     plt.close()
+    # Write a csv file to paste easily in a spreadsheet
+    df.columns = df.columns.str.replace('\n', ' ')
+    df.to_csv(get_filename(filenames[0], stem='snapshots', extension='txt'), index=False)
-def generate_plot_pm(df, filename, title):
-    # The dates must be in a non-index column
-    df = df.reset_index()
-    # Set a theme and scale all fonts
-    sns.set_theme(style='whitegrid', font_scale=CONFIG['plot']['font_scale'])
-    ff = CONFIG['plot']['font_family']
-    if ff != '': plt.rcParams['font.family'] = ff
+def remove_units_from_labels(labels):
+    return [re.sub(r' \([^)]*\)', '', label) for label in labels]
-    # Set up the matplotlib figure and axes
-    fig, ax1 = plt.subplots(figsize=CONFIG['plot']['size'])
-    ax2 = ax1.twinx()  # Secondary y axis
-    #sns.lineplot(data=df, x='date', y='pm0.1', ax=ax1, color=CONFIG['colors']['pm0.1'],
-    #             label=CONFIG['labels']['pm0.1'], legend=False)
-    # Plot the PM2.5 main line
-    sns.lineplot(data=df, x='date', y='pm2.5', ax=ax2, legend=False,
-                 color=CONFIG['colors']['pm2.5'],
-                 label=CONFIG['labels']['pm2.5'],
-                 linewidth=CONFIG['plot']['pm2.5_line_width'],
-                 linestyle=CONFIG['plot']['pm2.5_line_style'])
-    # Plot the PM2.5 limit line
-    line = ax2.axhline(y=CONFIG['limits']['pm2.5'],
-                 color=CONFIG['colors']['pm2.5'],
-                 label=CONFIG['labels']['pm2.5_limit'],
-                 linewidth=CONFIG['plot']['limit_line_width'],
-                 linestyle=CONFIG['plot']['limit_line_style'])
-    line.set_alpha(CONFIG['plot']['limit_line_opacity'])
-    # Plot the PM10 main line
-    sns.lineplot(data=df, x='date', y='pm10', ax=ax2, legend=False,
-                 color=CONFIG['colors']['pm10'],
-                 label=CONFIG['labels']['pm10'],
-                 linewidth=CONFIG['plot']['pm10_line_width'],
-                 linestyle=CONFIG['plot']['pm10_line_style'])
-    # Plot the PM10 limit line
-    line = ax2.axhline(y=CONFIG['limits']['pm10'],
-                 color=CONFIG['colors']['pm10'],
-                 label=CONFIG['labels']['pm10_limit'],
-                 linewidth=CONFIG['plot']['limit_line_width'],
-                 linestyle=CONFIG['plot']['limit_line_style'])
-    line.set_alpha(CONFIG['plot']['limit_line_opacity'])
+def remove_outliers_iqr(df, column, multiplier=None):
+    """
+    Remove outliers using Interquartile Range (IQR) method
+    multiplier = 1.0: Tight bounds, more outliers removed
+    multiplier = 1.5: Standard bounds, moderate outliers removed
+    multiplier = 2.0: Wide bounds, fewer outliers removed
+    """
+    if multiplier == None:
+        multiplier = 1.5  # Default value
-    # Set the ranges for both y axes
-    #min1, max1 = CONFIG['axis_ranges']['pm0.1']
-    min2, max2 = CONFIG['axis_ranges']['pm2.5_10']
-    #ax1.set_ylim(min1, max1)  # df['co2'].max() * 1.05
-    ax2.set_ylim(min2, max2)
+    Q1 = df[column].quantile(0.25)
+    Q3 = df[column].quantile(0.75)
+    IQR = Q3 - Q1
+    lower_bound = Q1 - multiplier * IQR
+    upper_bound = Q3 + multiplier * IQR
-    # Add a grid for the x axis and the y axes
-    # This is already done if using the whitegrid theme
-    #ax1.grid(axis='x', alpha=CONFIG['plot']['grid_opacity'])
-    #ax1.grid(axis='y', alpha=CONFIG['plot']['grid_opacity'])
-    ax2.grid(axis='y', alpha=CONFIG['plot']['grid2_opacity'], linestyle=CONFIG['plot']['grid1_line_style'])
+    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
-    # Customize the plot title, labels and ticks
-    ax1.set_title(get_plot_title(title, filename))
-    ax1.tick_params(axis='x', rotation=CONFIG['plot']['date_rotation'])
-    #ax1.tick_params(axis='y', labelcolor=CONFIG['colors']['pm0.1'])
-    ax1.set_xlabel('')
-    #ax1.set_ylabel(CONFIG['labels']['pm0.1'], color=CONFIG['colors']['pm0.1'])
-    ax2.set_ylabel('')  # We will manually place the 2 parts in different colors
-    # Define the position for the center of the right y axis label
-    bottom_label = CONFIG['labels']['pm2.5'] + '  '
-    top_label = '  ' + CONFIG['labels']['pm10']
-    x = 1.07  # Slightly to the right of the axis
-    y = get_label_center(bottom_label, top_label)   # Vertically centered
+# WARNING: Untested function
+def remove_outliers_zscore(df, column, threshold=3):
+    # from scipy import stats ?
+    z_scores = np.abs(stats.zscore(df[column]))
+    return df[z_scores < threshold]
-    # Place the first (bottom) part of the label
-    ax2.text(x, y, bottom_label, transform=ax2.transAxes,
-             color=CONFIG['colors']['pm2.5'], rotation='vertical',
-             ha='center', va='top')
-    # Place the second (top) part of the label
-    ax2.text(x, y, top_label, transform=ax2.transAxes,
-            color=CONFIG['colors']['pm10'], rotation='vertical',
-            ha='center', va='bottom')
+# WARNING: Untested function
+def remove_outliers_std(df, column, n_std=2):
+    mean = df[column].mean()
+    std = df[column].std()
+    lower_bound = mean - n_std * std
+    upper_bound = mean + n_std * std
-    # Create a combined legend
-    lines1, labels1 = ax1.get_legend_handles_labels()
-    lines2, labels2 = ax2.get_legend_handles_labels()
-    ax1.legend(lines1 + lines2, labels1 + labels2,
-               loc=CONFIG['plot']['legend_location'])
+    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
-    # Remove the left y-axis elements from ax1
-    ax1.grid(axis='y', visible=False)
-    ax1.spines['left'].set_visible(False)
-    ax1.tick_params(axis='y', left=False, labelleft=False)
-    # Adjust the plot margins to make room for the labels
-    plt.tight_layout()
+# WARNING: Untested function
+def remove_outliers_percentile(df, column, lower_percentile=5, upper_percentile=95):
+    lower_bound = df[column].quantile(lower_percentile/100)
+    upper_bound = df[column].quantile(upper_percentile/100)
-    # Save the plot as a PNG image
-    plt.savefig(get_plot_filename(filename, '-pm'))
-    plt.close()
+    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
 def get_label_center(bottom_label, top_label):
     # Return a value between 0 and 1 to estimate where to center the label
+    # Divider optimized for 11x8.5 plot size, but not as good for 15x10
     fs = CONFIG['plot']['font_scale']
     divider = 72 * fs**2 - 316 * fs + 414  # Tested for fs between 0.8 and 2
     center = 0.5 + ((len(bottom_label) - len(top_label)) / divider)
     return center
-def generate_stats(df, filename):
+def generate_stats(df, filename, boxplot=False):
     summary = df.describe()
     with open(get_stats_filename(filename), 'w') as file:
         file.write(summary.to_string())
-    #for column in summary.columns.tolist():
-    #    box = sns.boxplot(data=df, y=column)
-    #    plt.savefig(get_boxplot_filename(filename, f'-{column}'))
-    #    plt.close()
+    if boxplot:
+        for column in summary.columns.tolist():
+            box = sns.boxplot(data=df, y=column)
+            plt.savefig(get_boxplot_filename(filename, f'-{column}'))
+            plt.close()
 def load_config(reset_config = False):
@@ -736,10 +761,10 @@ def get_config_dir(app_name):
             config_dir = Path(config_home) / app_name
         else:
             config_dir = Path.home() / ".config" / app_name
     # Create the directory if it doesn't exist
     config_dir.mkdir(parents=True, exist_ok=True)
     return config_dir
@@ -752,19 +777,26 @@ def get_plot_title(title, filename):
         plot_title = match.group(2) if match else stem
     # Capitalize only the first character
-    if plot_title: plot_title = plot_title[0].upper() + plot_title[1:]
+    if plot_title: plot_title = plot_title.capitalize()
     return plot_title
-def get_plot_filename(filename, suffix = ''):
+def get_filename(filename, stem = '', suffix = '', extension = ''):
+    p = Path(filename)
+    s = stem if stem != '' else p.stem
+    return f'{p.parent}/{s}{suffix}.{extension}'
+def get_plot_filename(filename, suffix = '', stem = ''):
     p = Path(filename)
-    return f'{p.parent}/{p.stem}{suffix}.png'
+    s = stem if stem != '' else p.stem
+    return f'{p.parent}/{s}{suffix}.png'
-#def get_boxplot_filename(filename, suffix = ''):
-#    p = Path(filename)
-#    return f'{p.parent}/{p.stem}-boxplot{suffix}.png'
+def get_boxplot_filename(filename, suffix = ''):
+    p = Path(filename)
+    return f'{p.parent}/{p.stem}-boxplot{suffix}.png'
 def get_stats_filename(filename):
@@ -786,7 +818,7 @@ if __name__ == '__main__':
     # Configure the root logger
     logging.basicConfig(level=logging.WARNING,
                         format='%(levelname)s - %(message)s')
     # Configure this script's logger
     logger.setLevel(logging.DEBUG)

plotair 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

plotair 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl