PyPI - accelerometry-annotator - Versions diffs - 3.2.0__py3-none-any.whl - Mend

accelerometry-annotator 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

accelerometry_annotator-3.2.0.dist-info/METADATA +218 -0
accelerometry_annotator-3.2.0.dist-info/RECORD +21 -0
accelerometry_annotator-3.2.0.dist-info/WHEEL +5 -0
accelerometry_annotator-3.2.0.dist-info/licenses/LICENSE +22 -0
accelerometry_annotator-3.2.0.dist-info/top_level.txt +1 -0
visualize_accelerometry/__init__.py +1 -0
visualize_accelerometry/app.py +1193 -0
visualize_accelerometry/callbacks.py +631 -0
visualize_accelerometry/config.py +79 -0
visualize_accelerometry/data_loading.py +243 -0
visualize_accelerometry/js/download.js +32 -0
visualize_accelerometry/plotting.py +239 -0
visualize_accelerometry/state.py +164 -0
visualize_accelerometry/static/favicon.ico +0 -0
visualize_accelerometry/static/favicon.svg +21 -0
visualize_accelerometry/static/logo-dark.svg +41 -0
visualize_accelerometry/static/logo.jpg +0 -0
visualize_accelerometry/static/logo.svg +41 -0
visualize_accelerometry/templates/index.html +26 -0
visualize_accelerometry/templates/login.html +159 -0
visualize_accelerometry/templates/logout.html +124 -0

visualize_accelerometry/config.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""
+Application-wide configuration constants.
+Centralizes paths, color palettes, user lists, and annotation schema
+so that changes propagate consistently across all modules.
+"""
+import os
+# ---------------------------------------------------------------------------
+# UChicago brand color palette
+# ---------------------------------------------------------------------------
+UCHICAGO_MAROON = "#800000"
+UCHICAGO_GRAY = "#58595b"
+UCHICAGO_TEAL = "#7EBEC5"
+# Signal line colors for the x, y, z accelerometry axes
+LST_COLORS = [UCHICAGO_MAROON, UCHICAGO_TEAL, UCHICAGO_GRAY]
+# Fill colors for annotation overlay quads (one per activity type)
+ARTIFACT_COLORS = {
+    "chair_stand": "cyan",
+    "3m_walk": "magenta",
+    "6min_walk": "green",
+    "tug": "yellow",
+}
+# ---------------------------------------------------------------------------
+# Filesystem paths
+# ---------------------------------------------------------------------------
+DATA_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+READINGS_FOLDER = os.path.join(DATA_FOLDER, "readings")
+OUTPUT_FOLDER = os.path.join(DATA_FOLDER, "output")
+# Glob pattern for per-user annotation Excel files (the * is replaced by username)
+ANNOTATIONS_GLOB = os.path.join(OUTPUT_FOLDER, "annotations_*.xlsx")
+# Path to the JSON credentials file used by the admin panel to add/remove users.
+# Overridden by demo/config_overrides.py for demo deployments.
+CREDENTIALS_FILE = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+    "credentials.json",
+)
+# ---------------------------------------------------------------------------
+# User lists
+# These are module-level mutable lists so the admin panel can add/remove
+# users at runtime without a server restart.  Because they are shared across
+# sessions, admin changes take effect for all sessions immediately.
+# ---------------------------------------------------------------------------
+ADMIN_USERS = ["megan", "kristen", "manu"]
+ANNOTATOR_USERS = sorted([
+    "ideyah", "evelyn", "junny", "amritap1", "ldepablo1", "ar277",
+    "megan", "kristen", "fran", "alan", "anita", "liberto",
+])
+KNOWN_USERS = sorted(set(ADMIN_USERS + ANNOTATOR_USERS))
+# ---------------------------------------------------------------------------
+# Defaults and formats
+# ---------------------------------------------------------------------------
+DEFAULT_WINDOW_SIZE = 3600  # seconds of signal data shown at once
+# Timestamp format used for anchor time display and HDF5 queries.
+# Must match the format produced by pandas dt.strftime.
+TIME_FMT = "%b %d %Y %I:%M %p"
+# ---------------------------------------------------------------------------
+# Annotation DataFrame schema
+# ---------------------------------------------------------------------------
+ANNOTATION_COLUMNS = [
+    "fname", "artifact", "segment", "scoring", "review",
+    "start_epoch", "end_epoch", "start_time", "end_time",
+    "annotated_at", "user", "notes",
+]
+DISPLAYED_ANNOTATION_COLUMNS = [
+    "artifact", "segment", "scoring", "review",
+    "start_time", "end_time", "annotated_at", "user", "notes",
+]

visualize_accelerometry/data_loading.py ADDED Viewed

@@ -0,0 +1,243 @@
+"""
+Data loading and persistence for accelerometry signals and annotations.
+Handles HDF5 signal file discovery, time-windowed data loading, annotation
+file I/O (Excel-based), and DataFrame normalization.
+"""
+import glob
+import os
+from itertools import cycle
+import numpy as np
+import pandas as pd
+from . import config as _config
+from .config import (
+    ANNOTATION_COLUMNS,
+    ANNOTATOR_USERS,
+    TIME_FMT,
+)
+def get_filenames():
+    """Discover HDF5 files and assign each to an annotator deterministically.
+    Returns
+    -------
+    list of str
+        Sorted list of ``"username--filename"`` strings.  The assignment
+        uses a fixed random seed so every server restart produces the
+        same mapping, distributing files evenly across annotators.
+    """
+    # Fixed seed ensures the same user-to-file assignment across restarts
+    np.random.seed(2020)
+    users_to_assign = list(ANNOTATOR_USERS)
+    np.random.shuffle(users_to_assign)
+    users_cycle = cycle(users_to_assign)
+    lst_files = sorted(
+        next(users_cycle) + "--" + os.path.splitext(f)[0]
+        for f in os.listdir(_config.READINGS_FOLDER)
+        if os.path.splitext(f)[1].lower() == ".h5"
+    )
+    return lst_files
+def get_filedata(fname, anchor_timestamp, windowsize):
+    """Load a time window of accelerometry data from an HDF5 file.
+    Parameters
+    ----------
+    fname : str
+        Path to the HDF5 file (without ``.h5`` extension).
+    anchor_timestamp : str or None
+        Center of the time window in ``TIME_FMT``.  If None, the window
+        starts at the beginning of the file.
+    windowsize : float
+        Total window duration in seconds.
+    Returns
+    -------
+    tuple of (str, str or None, str or None, DataFrame)
+        ``(anchor_timestamp, file_start, file_end, pdf)`` where
+        ``file_start`` and ``file_end`` are only set on the first load
+        (when anchor_timestamp was None).
+    """
+    from datetime import datetime, timedelta
+    file_path = fname + ".h5"
+    if anchor_timestamp is None:
+        # First load: read the first and last rows to determine file bounds
+        first_row = pd.read_hdf(file_path, "readings", start=0, stop=1)
+        last_row = pd.read_hdf(file_path, "readings", start=-1)
+        anchor_timestamp = first_row["timestamp"].dt.strftime(TIME_FMT).values[0]
+        file_start = first_row["timestamp"].dt.strftime(TIME_FMT).values[0]
+        file_end = last_row["timestamp"].dt.strftime(TIME_FMT).values[0]
+    else:
+        # Subsequent loads: file bounds already known by the caller
+        file_start = None
+        file_end = None
+    anchor_dt = datetime.strptime(anchor_timestamp, TIME_FMT)
+    half_window = timedelta(seconds=int(windowsize / 2))
+    start_dt = anchor_dt - half_window
+    end_dt = anchor_dt + half_window
+    start_str = start_dt.strftime(TIME_FMT)
+    end_str = end_dt.strftime(TIME_FMT)
+    # HDF5 where-clause pushes filtering to the storage layer for speed
+    pdf = pd.read_hdf(
+        file_path,
+        "readings",
+        where=f"(timestamp >= Timestamp('{start_str}')) & (timestamp <= Timestamp('{end_str}'))",
+    )
+    return anchor_timestamp, file_start, file_end, pdf
+def clamp_anchor(anchor_timestamp, file_start, file_end, windowsize):
+    """Clamp anchor_timestamp so the window stays within file bounds.
+    Parameters
+    ----------
+    anchor_timestamp : str
+        Current anchor in ``TIME_FMT``.
+    file_start, file_end : str
+        File bounds in ``TIME_FMT``.
+    windowsize : float
+        Window duration in seconds.
+    Returns
+    -------
+    str
+        Clamped anchor in ``TIME_FMT``.
+    """
+    from datetime import datetime, timedelta
+    anchor_dt = datetime.strptime(anchor_timestamp, TIME_FMT)
+    start_dt = datetime.strptime(file_start, TIME_FMT)
+    end_dt = datetime.strptime(file_end, TIME_FMT)
+    # Prevent the window from extending past either end of the file
+    if anchor_dt >= end_dt:
+        anchor_dt = end_dt - timedelta(seconds=int(windowsize / 2))
+    if anchor_dt <= start_dt:
+        anchor_dt = start_dt + timedelta(seconds=int(windowsize / 2))
+    return anchor_dt.strftime(TIME_FMT)
+def get_annotations_from_files(pattern=None):
+    """Load all per-user annotation Excel files and concatenate them.
+    Parameters
+    ----------
+    pattern : str, optional
+        Glob pattern for annotation files.  Defaults to ``ANNOTATIONS_GLOB``.
+    Returns
+    -------
+    DataFrame
+        Combined annotations (unsorted, not yet cleaned).
+    """
+    if pattern is None:
+        pattern = _config.ANNOTATIONS_GLOB
+    files = [n for n in glob.glob(pattern) if os.path.isfile(n)]
+    if files:
+        return pd.concat([pd.read_excel(n, engine="openpyxl") for n in files])
+    return pd.DataFrame(columns=ANNOTATION_COLUMNS)
+def cleanup_annotations(pdf):
+    """Sort and normalize an annotation DataFrame.
+    Ensures consistent types for datetime, numeric, and string columns
+    so that downstream code (Bokeh serialization, DataFrame filtering)
+    doesn't encounter NaN or mixed-type surprises.
+    Parameters
+    ----------
+    pdf : DataFrame
+        Raw or partially-processed annotations.
+    Returns
+    -------
+    DataFrame
+        Cleaned copy.
+    """
+    pdf = pdf.sort_values(
+        by=["user", "fname", "artifact", "segment", "scoring", "review", "annotated_at"],
+        ascending=False,
+    )
+    if pdf.shape[0] > 0:
+        if "notes" not in pdf.columns:
+            pdf = pdf.assign(notes="")
+        pdf = pdf.assign(
+            start_time=pd.to_datetime(pdf["start_time"], errors="coerce"),
+            end_time=pd.to_datetime(pdf["end_time"], errors="coerce"),
+            notes=pdf["notes"].fillna(""),
+        )
+        # Fill NaN in numeric columns to prevent Bokeh JSON serialization
+        # errors (Bokeh's PayloadEncoder has allow_nan=False)
+        for col in ["segment", "scoring", "review", "start_epoch", "end_epoch"]:
+            if col in pdf.columns:
+                pdf[col] = pdf[col].fillna(0)
+    pdf = pdf.assign(notes=pdf["notes"].astype(str))
+    return pdf
+def save_annotations(pdf_annotations, uname, fname):
+    """Persist the current user's annotations for one file to disk.
+    Merges the in-memory annotations with any existing data from other
+    files in the user's Excel file, then writes the result.
+    Parameters
+    ----------
+    pdf_annotations : DataFrame
+        Full in-memory annotation set (all users, all files).
+    uname : str
+        Current user whose annotations should be saved.
+    fname : str
+        Current file path (basename is extracted internally).
+    Returns
+    -------
+    DataFrame
+        Freshly-reloaded annotations from *all* users' files on disk.
+    """
+    annotations_file = _config.ANNOTATIONS_GLOB.replace("*", uname)
+    pdf_old = pd.DataFrame(columns=ANNOTATION_COLUMNS)
+    if os.path.exists(annotations_file):
+        pdf_old = pd.read_excel(annotations_file, engine="openpyxl")
+        pdf_old = pdf_old.assign(
+            annotated_at=pd.to_datetime(pdf_old["annotated_at"], errors="coerce")
+        )
+    basename = os.path.basename(fname)
+    pdf_current = pdf_annotations.loc[
+        (pdf_annotations["user"] == uname)
+        & (pdf_annotations["fname"] == basename)
+    ]
+    if pdf_old.shape[0] > 0:
+        # Replace only the current user+file slice, keep everything else
+        pdf_all = pd.concat(
+            [
+                pdf_old.loc[
+                    ~((pdf_old["user"] == uname) & (pdf_old["fname"] == basename))
+                ],
+                pdf_current,
+            ],
+            ignore_index=True,
+        ).reset_index(drop=True)
+    else:
+        pdf_all = pdf_current
+    pdf_all = cleanup_annotations(pdf_all)
+    pdf_all.to_excel(annotations_file, index=False)
+    # Reload from disk so all sessions see a consistent snapshot
+    return get_annotations_from_files()

visualize_accelerometry/js/download.js ADDED Viewed

@@ -0,0 +1,32 @@
+function table_to_csv(source) {
+    const columns = Object.keys(source.data)
+    const nrows = source.get_length()
+    const lines = [columns.join(',')]
+    for (let i = 0; i < nrows; i++) {
+        let row = [];
+        for (let j = 0; j < columns.length; j++) {
+            const column = columns[j]
+            row.push(source.data[column][i].toString())
+        }
+        lines.push(row.join(','))
+    }
+    return lines.join('\n').concat('\n')
+}
+const filename = 'annotations.csv'
+const filetext = table_to_csv(source)
+const blob = new Blob([filetext], { type: 'text/csv;charset=utf-8;' })
+//addresses IE
+if (navigator.msSaveBlob) {
+    navigator.msSaveBlob(blob, filename)
+} else {
+    const link = document.createElement('a')
+    link.href = URL.createObjectURL(blob)
+    link.download = filename
+    link.target = '_blank'
+    link.style.visibility = 'hidden'
+    link.dispatchEvent(new MouseEvent('click'))
+}

visualize_accelerometry/plotting.py ADDED Viewed

@@ -0,0 +1,239 @@
+"""
+Plotting module — native Bokeh figures with LTTB downsampling.
+Creates a main signal plot (with annotation overlays and box-select)
+and a range selector (minimap) for navigating large time series.
+LTTB downsampling keeps the browser responsive by limiting the number
+of points sent over the websocket while preserving visual fidelity.
+"""
+import numpy as np
+from bokeh.models import (
+    BoxSelectTool, ColumnDataSource, DatetimeTickFormatter,
+    Range1d, RangeTool,
+)
+from bokeh.plotting import figure
+from .config import ARTIFACT_COLORS, LST_COLORS, UCHICAGO_MAROON
+# Maximum points to send to the browser per signal axis.
+# 10000 provides high visual fidelity while remaining responsive
+# with the canvas backend (no WebGL).
+MAX_POINTS = 10000
+def _downsample(timestamps, values, n_out):
+    """Downsample a time series using LTTB (Largest Triangle Three Buckets).
+    LTTB selects representative points that preserve the visual shape
+    of the signal.  Falls back to uniform strided sampling if the
+    ``lttbc`` C extension is not installed.
+    Parameters
+    ----------
+    timestamps : ndarray
+        Datetime64 array of timestamps.
+    values : ndarray
+        Signal values corresponding to *timestamps*.
+    n_out : int
+        Target number of output points.
+    Returns
+    -------
+    tuple of (ndarray, ndarray)
+        Downsampled ``(timestamps, values)``.
+    """
+    if len(timestamps) <= n_out:
+        return timestamps, values
+    try:
+        import lttbc
+        # lttbc operates on float64 arrays
+        ts_float = timestamps.astype(np.float64)
+        vals_float = values.astype(np.float64)
+        ds_ts, ds_vals = lttbc.downsample(ts_float, vals_float, n_out)
+        return ds_ts.astype(timestamps.dtype), ds_vals
+    except Exception:
+        # Graceful fallback: take every Nth sample
+        step = max(1, len(timestamps) // n_out)
+        return timestamps[::step], values[::step]
+def make_plot(pdf, annotation_cds):
+    """Create the main signal plot and range selector.
+    Parameters
+    ----------
+    pdf : DataFrame or None
+        Signal data with columns ``timestamp``, ``x``, ``y``, ``z``.
+        If None or empty, returns empty placeholder plots.
+    annotation_cds : dict[str, ColumnDataSource]
+        Persistent Bokeh ColumnDataSources keyed by annotation type
+        (``"chair_stand"``, ``"segment"``, etc.).  Their ``.data`` is
+        updated externally; the plot just references them so overlays
+        refresh without rebuilding the figure.
+    Returns
+    -------
+    tuple of (Panel.pane.Bokeh, Panel.pane.Bokeh, Figure, ColumnDataSource)
+        ``(main_pane, range_pane, main_fig, signal_cds)`` where
+        ``signal_cds`` is the downsampled signal data source (needed
+        for wiring box-select callbacks).
+    """
+    import panel as pn
+    if pdf is None or len(pdf) == 0:
+        empty_fig = figure(height=300, sizing_mode="stretch_width")
+        empty_cds = ColumnDataSource(data=dict(timestamp=[], x=[], y=[], z=[]))
+        return (
+            pn.pane.Bokeh(empty_fig, sizing_mode="stretch_width"),
+            pn.pane.Bokeh(empty_fig, sizing_mode="stretch_width"),
+            empty_fig,
+            empty_cds,
+        )
+    ts_raw = pdf["timestamp"].values
+    # --- Downsample each axis independently via LTTB ---
+    # Each axis may pick slightly different representative timestamps,
+    # but we reuse the first axis's timestamps for all three.  This is
+    # a minor approximation that keeps the code simple without visible
+    # impact on the plot.
+    ds_data = {"timestamp": None}
+    for col in ["x", "y", "z"]:
+        ds_ts, ds_vals = _downsample(ts_raw, pdf[col].values, MAX_POINTS)
+        if ds_data["timestamp"] is None:
+            ds_data["timestamp"] = ds_ts
+        ds_data[col] = ds_vals
+    colsource = ColumnDataSource(data=ds_data)
+    full_start = ts_raw[0]
+    # Show ~10% of the file initially so the user sees detail
+    initial_end_idx = min(len(ts_raw) - 1, int(len(ts_raw) * 0.1))
+    initial_end = ts_raw[initial_end_idx]
+    # Explicit y_range computed from signal data.  Using Range1d (not
+    # DataRange1d) is critical because DataRange1d would auto-expand to
+    # include annotation quad bounds, squashing the signal to a thin line.
+    y_min = min(ds_data["x"].min(), ds_data["y"].min(), ds_data["z"].min())
+    y_max = max(ds_data["x"].max(), ds_data["y"].max(), ds_data["z"].max())
+    y_pad = (y_max - y_min) * 0.05
+    y_range = Range1d(start=y_min - y_pad, end=y_max + y_pad)
+    # --- Main signal plot ---
+    main_fig = figure(
+        height=300,
+        x_axis_type="datetime",
+        x_axis_location="above",
+        background_fill_color="#e8e8e8",
+        x_range=Range1d(start=full_start, end=initial_end),
+        y_range=y_range,
+        sizing_mode="stretch_width",
+        toolbar_location=None,
+    )
+    main_fig.yaxis.visible = False
+    for color, col in zip(LST_COLORS, ["x", "y", "z"]):
+        main_fig.line(
+            "timestamp", col, color=color, source=colsource,
+            alpha=0.95, line_width=1.5,
+            # Dim unselected data so the box-selected region stands out
+            nonselection_alpha=0.2, selection_alpha=1,
+        )
+        # Invisible scatter points on top of lines so that BoxSelectTool
+        # can select data indices.  Line glyphs alone don't support
+        # index-based hit testing.
+        main_fig.scatter(
+            "timestamp", col, color=None, source=colsource,
+            size=0, alpha=0, nonselection_alpha=0, selection_alpha=0,
+        )
+    main_fig.xaxis.formatter = DatetimeTickFormatter(
+        days="%Y/%m/%d",
+        months="%Y/%m/%d %H:%M",
+        hours="%Y/%m/%d %H:%M",
+        minutes="%H:%M",
+        seconds="%H:%M:%S",
+        milliseconds="%Ss:%3Nms",
+    )
+    # Width-only box select for time-range annotation
+    box_select = BoxSelectTool(dimensions="width")
+    main_fig.add_tools(box_select)
+    main_fig.toolbar.active_drag = box_select
+    # --- Annotation overlay quads ---
+    # Quads span the full y_range so they are visible behind the signal.
+    # Using level="overlay" prevents them from affecting auto-range.
+    q_top = y_max + y_pad
+    q_bot = y_min - y_pad
+    # Activity type overlays (semi-transparent colored fills)
+    for key, color in ARTIFACT_COLORS.items():
+        main_fig.quad(
+            left="start_time", right="end_time", top=q_top, bottom=q_bot,
+            fill_color=color, fill_alpha=0.2, line_alpha=0,
+            source=annotation_cds[key], level="overlay",
+        )
+    # Flag overlays (hatch patterns with no fill, matching the original app)
+    flag_hatches = {
+        "segment": "cross",
+        "scoring": "dot",
+        "review": "spiral",
+    }
+    for key, hatch in flag_hatches.items():
+        main_fig.quad(
+            left="start_time", right="end_time", top=q_top, bottom=q_bot,
+            fill_color=None, fill_alpha=0, line_alpha=0,
+            hatch_pattern=hatch, hatch_color="black",
+            hatch_weight=0.5, hatch_alpha=0.1,
+            source=annotation_cds[key], level="overlay",
+        )
+    # --- Range selector (minimap) ---
+    # Uses fewer points than the main plot since it's smaller
+    range_data = {"timestamp": None}
+    for col in ["x", "y", "z"]:
+        r_ts, r_vals = _downsample(ts_raw, pdf[col].values, 2000)
+        if range_data["timestamp"] is None:
+            range_data["timestamp"] = r_ts
+        range_data[col] = r_vals
+    range_source = ColumnDataSource(data=range_data)
+    range_fig = figure(
+        height=130,
+        y_range=main_fig.y_range,
+        x_axis_type="datetime",
+        y_axis_type=None,
+        tools="",
+        toolbar_location=None,
+        background_fill_color="#e8e8e8",
+        sizing_mode="stretch_width",
+    )
+    for color, col in zip(LST_COLORS, ["x", "y", "z"]):
+        range_fig.line(
+            "timestamp", col, color=color, source=range_source,
+            alpha=0.8, line_width=1.2,
+        )
+    range_fig.xaxis.formatter = DatetimeTickFormatter(
+        days="%m/%d %H:%M",
+        months="%m/%d %H:%M",
+        hours="%m/%d %H:%M",
+        minutes="%m/%d %H:%M",
+        seconds="%m/%d %H:%M:%S",
+    )
+    # RangeTool links the minimap's draggable overlay to main_fig.x_range
+    range_tool = RangeTool(x_range=main_fig.x_range)
+    range_tool.overlay.fill_color = UCHICAGO_MAROON
+    range_tool.overlay.fill_alpha = 0.15
+    range_fig.add_tools(range_tool)
+    range_fig.toolbar.active_multi = "auto"
+    main_pane = pn.pane.Bokeh(main_fig, sizing_mode="stretch_width")
+    range_pane = pn.pane.Bokeh(range_fig, sizing_mode="stretch_width")
+    return main_pane, range_pane, main_fig, colsource