pywib 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pywib/__init__.py ADDED
@@ -0,0 +1,63 @@
1
+ """
2
+ """
3
+
4
+ __version__ = "0.1.4"
5
+ __author__ = "Guillermo Dylan Carvajal Aza"
6
+ __email__ = "carvajalguillermo@uniovi.es"
7
+
8
+ from .constants import *
9
+ from .utils import (validate_dataframe, validate_dataframe_keyboard,
10
+ extract_traces_by_session, visualize_trace, compute_space_time_diff, video_from_trace, validate_duplicate_timestamps)
11
+ from .core import (velocity, acceleration, jerkiness, path, auc_ratio,
12
+ execution_time, movement_time, pauses_metrics, velocity_metrics,
13
+ acceleration_metrics, jerkiness_metrics, number_of_clicks,
14
+ click_slip, num_pauses, deviations, auc_ratio_metrics, typing_speed_metrics, typing_speed, backspace_usage)
15
+
16
+ __all__ = [
17
+ # Version info
18
+ "__version__",
19
+ "__author__",
20
+ "__email__",
21
+
22
+ # Constants
23
+ "EventTypes",
24
+ "ComponentTypes",
25
+
26
+ # Utility functions
27
+ "validate_dataframe",
28
+ "validate_dataframe_keyboard",
29
+ "extract_trace",
30
+ "visualize_trace",
31
+ "compute_space_time_diff",
32
+ "extract_traces_by_session",
33
+ "video_from_trace",
34
+ "validate_duplicate_timestamps",
35
+
36
+ # Movement functions
37
+ "velocity",
38
+ "acceleration",
39
+ "auc_ratio",
40
+ "jerkiness",
41
+ "path",
42
+ "velocity_metrics",
43
+ "acceleration_metrics",
44
+ "jerkiness_metrics",
45
+ "deviations",
46
+ "auc_ratio_metrics",
47
+
48
+ # Mouse functions
49
+ "number_of_clicks",
50
+ "click_slip",
51
+
52
+ # Keyboard functions
53
+ "typing_speed",
54
+ "typing_speed_metrics",
55
+ "backspace_usage",
56
+
57
+ # Timing
58
+ "pauses_metrics",
59
+ "execution_time",
60
+ "movement_time",
61
+ "num_pauses",
62
+
63
+ ]
File without changes
@@ -0,0 +1,39 @@
1
+ import pandas as pd
2
+ from pywib.utils import validate_dataframe, extract_mouse_click_traces_by_session_with_intial_pause
3
+ from pywib.core import auc_ratio
4
+
5
+ def obtain_straight_patterns(df: pd.DataFrame = None, traces : list[str, list[pd.DataFrame]] = None, per_traces= False, threshold = 100) -> list[pd.DataFrame]:
6
+ """
7
+ First described in 'Investigating the Differences in Web Browsing Behaviour of Chinese and European Users Using Mouse Tracking' (Lee & Chen, 2007),
8
+ the Straight Pattern can be described as a direct or straight movement in direction to a target, characterized by a pause before a direct
9
+ movement towards a target without significant pauses in between the initial movement and the target acquisition.
10
+
11
+ TODO : initial pause
12
+
13
+ Parameters:
14
+ df (pd.DataFrame): DataFrame containing 'sessionId', 'sceneId', 'eventType', 'timeStamp', 'x', and 'y' columns.
15
+ traces (list): List of traces to analyze. Each trace is a pd.DataFrame.
16
+ per_traces (bool): If True, process per traces. Default is False.
17
+ threshold (float): Threshold (in px) for the AUC perpendicular distance to consider a movement as a straight pattern. Default is 100.
18
+ Returns:
19
+ list[pd.DataFrame]: A list of DataFrames, each corresponding to a straight pattern trace.
20
+ """
21
+ if(traces is None and per_traces):
22
+ validate_dataframe(df)
23
+ # This variable will always contain point and click traces
24
+ traces = extract_mouse_click_traces_by_session_with_intial_pause(df)
25
+
26
+ if(not per_traces):
27
+ raise NotImplementedError("The 'per_traces' functionality is not yet implemented.")
28
+
29
+ auc_values = auc_ratio(None, traces= traces, per_traces= True)
30
+
31
+ straight_patterns_per_session = {}
32
+ for session_id, trace in auc_values.items():
33
+ straight_patterns = []
34
+ for i, auc in enumerate(trace):
35
+ # TODO usar auc_ratio con un porcentaje del threshold?
36
+ if(auc['auc_perp'] <= threshold):
37
+ straight_patterns.append(traces[session_id][i])
38
+ straight_patterns_per_session[session_id] = straight_patterns
39
+ return straight_patterns_per_session
pywib/constants.py ADDED
@@ -0,0 +1,84 @@
1
+ """
2
+
3
+ """
4
+
5
+ # Version and library information
6
+ LIBRARY_NAME = "pywib"
7
+ LIBRARY_VERSION = "0.1.4"
8
+
9
+ # Event types for interaction tracking
10
+ class EventTypes:
11
+ """ Event type constants for interaction tracking."""
12
+ EVENT_ON_MOUSE_MOVE = 0
13
+ """Event type for mouse move events."""
14
+ EVENT_ON_CLICK = 1
15
+ """Event type for mouse click events."""
16
+ EVENT_ON_DOUBLE_CLICK = 2
17
+ """Event type for mouse double click events."""
18
+ EVENT_ON_MOUSE_DOWN = 3
19
+ """Event type for mouse down events."""
20
+ EVENT_ON_MOUSE_UP = 4
21
+ """Event type for mouse up events."""
22
+ EVENT_ON_WHEEL = 5
23
+ """Event type for mouse wheel events, specificaly for wheel clicks."""
24
+ EVENT_CONTEXT_MENU = 6
25
+ """Event type for context menu events."""
26
+ EVENT_ON_TOUCH_MOVE = 7
27
+ """Event type for touch move events, specific for mobile and tablet devices."""
28
+ EVENT_WINDOW_SCROLL = 11
29
+ """Event type for window scroll events."""
30
+ EVENT_WINDOW_RESIZE = 12
31
+ """Event type for window resize events."""
32
+ EVENT_KEY_DOWN = 13
33
+ """Event type for key down events."""
34
+ EVENT_KEY_PRESS = 14
35
+ """Event type for key press events."""
36
+ EVENT_KEY_UP = 15
37
+ """Event type for key up events."""
38
+ EVENT_FOCUS = 16
39
+ """Event type for focus events."""
40
+ EVENT_BLUR = 17
41
+ """Event type for blur events."""
42
+ EVENT_ON_CHANGE_SELECTION_OBJECT = 18
43
+ """Event type for change selection events."""
44
+ EVENT_ON_CLICK_SELECTION_OBJECT = 19
45
+ """Event type for click selection events."""
46
+ EVENT_INIT_TRACKING = 100
47
+ """Custom event type for initializing tracking."""
48
+ EVENT_TRACKING_END = 200
49
+ """Custom event type for ending tracking."""
50
+
51
+ class ComponentTypes:
52
+ """ Component type constants for UI elements."""
53
+ COMPONENT_TEXT_FIELD = 1
54
+ COMPONENT_COMBOBOX = 2
55
+ COMPONENT_OPTION = 3
56
+ COMPONENT_RADIO_BUTTON = 4
57
+ COMPONENT_CHECK_BOX = 5
58
+
59
+ class ColumnNames:
60
+ """ Standard column names for DataFrame operations."""
61
+ SESSION_ID = 'sessionId'
62
+ SCENE_ID = 'sceneId'
63
+ EVENT_TYPE = 'eventType'
64
+ ELEMENT_ID = 'elementId'
65
+ TIME_STAMP = 'timeStamp'
66
+ X = 'x'
67
+ Y = 'y'
68
+ KEY_VALUE_EVENT = 'keyValueEvent'
69
+ KEY_CODE_EVENT = 'keyCodeEvent'
70
+ SOURCE_SESSION_ID = 'sourceSessionId'
71
+ DT = 'dt'
72
+ DX = 'dx'
73
+ DY = 'dy'
74
+ VELOCITY= 'velocity'
75
+ ACCELERATION = 'acceleration'
76
+ JERKINESS = 'jerkiness'
77
+ AUC_RATIO = 'auc_ratio'
78
+
79
+ class KeyCodeEvents:
80
+ """ Key code event constants for keyboard interactions."""
81
+ KEY_CODE_BACKSPACE = 8
82
+ """Key code for the Backspace key."""
83
+ KEY_CODE_DELETE = 46
84
+ """Key code for the Delete key."""
pywib/core/__init__.py ADDED
@@ -0,0 +1,34 @@
1
+ """
2
+ Utility functions for PyWib
3
+ """
4
+ from .timing import execution_time, movement_time, num_pauses, pauses_metrics
5
+ from .movement import (velocity, acceleration, jerkiness,
6
+ path, auc_optimal, auc_ratio, auc, auc_ratio_metrics,
7
+ velocity_metrics, acceleration_metrics, jerkiness_metrics,
8
+ deviations)
9
+ from .mouse import click_slip, number_of_clicks
10
+ from .keyboard import (typing_speed, typing_speed_metrics, backspace_usage)
11
+
12
+ __all__ = [
13
+ "execution_time",
14
+ "movement_time",
15
+ "num_pauses",
16
+ "pauses_metrics",
17
+ "velocity",
18
+ "acceleration",
19
+ "jerkiness",
20
+ "path",
21
+ "auc_optimal",
22
+ "auc_ratio",
23
+ "auc_ratio_metrics",
24
+ "auc",
25
+ "velocity_metrics",
26
+ "acceleration_metrics",
27
+ "jerkiness_metrics",
28
+ "click_slip",
29
+ "number_of_clicks",
30
+ "deviations"
31
+ "typing_speed",
32
+ "typing_speed_metrics",
33
+ "backspace_usage",
34
+ ]
pywib/core/keyboard.py ADDED
@@ -0,0 +1,114 @@
1
+ """
2
+ Analysis module for HCI Web Interaction Analyzer
3
+
4
+ This module provides methods to analyze interaction data from DataFrames.
5
+ """
6
+
7
+ import pandas as pd
8
+ import numpy as np
9
+ from pywib.utils.segmentation import extract_keystroke_traces_by_session
10
+ from pywib.utils.validation import validate_dataframe_keyboard
11
+ from pywib.constants import EventTypes, ColumnNames, KeyCodeEvents
12
+ from pywib.utils.keyboard import (backspace_usage_df, backspace_usage_traces, typing_durations_df, typing_durations_traces, typing_speed_df, typing_speed_traces)
13
+
14
+ def typing_durations(df: pd.DataFrame = None, traces: dict[str, list[pd.DataFrame]] = None, per_traces: bool = True) -> list:
15
+ """
16
+ Calculate the durations of individual keystrokes.
17
+
18
+ Parameters:
19
+ df (pd.DataFrame): DataFrame containing interaction data with 'event_type', 'timestamp', and 'key' columns.
20
+ traces (dict[str, list[pd.DataFrame]]): optional Pre-extracted keystroke traces by session.
21
+ per_traces (bool): optional Whether to calculate durations per trace. Default is True.
22
+ Returns:
23
+ list: List of keystroke durations in milliseconds.
24
+ """
25
+ if traces is None and per_traces:
26
+ traces = extract_keystroke_traces_by_session(df)
27
+ return typing_durations_traces(traces, False)
28
+
29
+ if not per_traces:
30
+ return typing_durations_df(df)
31
+
32
+ return typing_durations_traces(traces)
33
+
34
+ def typing_speed(df: pd.DataFrame = None, traces: dict[str, list[pd.DataFrame]] = None, per_traces : bool = True) -> dict[list[float]] | float:
35
+ """
36
+ Calculate the average typing speed in characters per minute (CPM).
37
+
38
+ Parameters:
39
+ df (pd.DataFrame): DataFrame containing interaction data with 'event_type', 'timestamp', and 'key' columns.
40
+ traces (dict[str, list[pd.DataFrame]]): optional Pre-extracted keystroke traces by session.
41
+ per_traces (bool): optional Whether to calculate speed per trace. Default is True. if False, mind that the df must only contain typing data in order to obtain the correct CPM calculation.
42
+
43
+ Returns:
44
+ dict (dict[list[float]] | float) : A dictionary with session IDs as keys and lists of typing speeds (CPM) per trace as values, or a float representing the typing speed if per_traces is False.
45
+ """
46
+
47
+ if per_traces and traces is None:
48
+ traces = extract_keystroke_traces_by_session(df)
49
+ return typing_speed_traces(traces, False)
50
+
51
+ elif not per_traces:
52
+ return typing_speed_df(df)
53
+
54
+ return typing_speed_traces(traces)
55
+
56
+ def typing_speed_metrics(df: pd.DataFrame = None, traces: dict[str, list[pd.DataFrame]] = None) -> dict:
57
+ """
58
+ Calculate typing speed metrics including average CPM, total characters typed, and total time spent typing.
59
+
60
+ This metrics include average typing speed (average_typing_speed) in CPM, total characters typed (total_characters), and total time spent typing (total_time_seconds) in seconds.
61
+
62
+ The average keydown to keyup duration (avg_keydown_to_keyup_duration) is the average duration of a keystroke (from keydown to keyup) in milliseconds.
63
+
64
+ Parameters:
65
+ df : pd.DataFrame DataFrame containing interaction data with 'event_type', 'timestamp', and 'key' columns.
66
+ traces : dict[str, list[pd.DataFrame]], optional Pre-extracted keystroke traces by session.
67
+ per_trace : bool, optional Whether to calculate metrics per trace. Default is True.
68
+ Returns:
69
+ dict: A dictionary with session IDs as keys and their corresponding typing speed metrics as values.
70
+ """
71
+ if traces is None:
72
+ traces = extract_keystroke_traces_by_session(df)
73
+
74
+ session_speeds = typing_speed(None, traces=traces, per_traces=True)
75
+
76
+ metrics_by_session = {}
77
+ for session_id, speeds in session_speeds.items():
78
+ if speeds:
79
+ avg_speed = np.mean(speeds)
80
+ total_chars = sum(session_traces[session_traces[ColumnNames.EVENT_TYPE] == EventTypes.EVENT_KEY_UP][ColumnNames.EVENT_TYPE].count() for session_traces in traces[session_id])
81
+ total_time = sum((session_traces[ColumnNames.TIME_STAMP].diff().fillna(0).sum() / 1000.0) for session_traces in traces[session_id])
82
+ avg_keydown_to_keyup_duration = np.mean([
83
+ (trace[trace[ColumnNames.EVENT_TYPE] == EventTypes.EVENT_KEY_UP][ColumnNames.TIME_STAMP].values - trace[trace[ColumnNames.EVENT_TYPE] == EventTypes.EVENT_KEY_DOWN][ColumnNames.TIME_STAMP].values).mean()
84
+ for trace in traces[session_id]
85
+ ])
86
+ metrics_by_session[session_id] = {
87
+ "average_typing_speed": avg_speed,
88
+ "total_characters": total_chars,
89
+ "total_time_seconds": total_time,
90
+ "avg_keydown_to_keyup_duration": avg_keydown_to_keyup_duration # TODO Review
91
+ }
92
+ return metrics_by_session
93
+
94
+
95
+ def backspace_usage(df: pd.DataFrame = None, traces: dict[str, list[pd.DataFrame]] = None, per_trace: bool = True) -> dict:
96
+ """
97
+ Calculate the backspace usage rate (backspaces per 100 characters typed) for each session.
98
+
99
+ Parameters:
100
+ df (pd.DataFrame): DataFrame containing interaction data with 'event_type', 'timestamp', and 'key' columns.
101
+ traces (dict[str, list[pd.DataFrame]]): optional Pre-extracted keystroke traces by session.
102
+ per_trace (bool): optional Whether to calculate usage per trace. Default is True.
103
+ Returns:
104
+ dict: A dictionary with session IDs as keys and their corresponding backspace counts as values.
105
+ """
106
+ if traces is None and per_trace:
107
+ validate_dataframe_keyboard(df)
108
+ traces = extract_keystroke_traces_by_session(df)
109
+ return backspace_usage_traces(traces, False)
110
+ if not per_trace:
111
+ validate_dataframe_keyboard(df)
112
+ return backspace_usage_df(df)
113
+
114
+ return backspace_usage_traces(traces)
pywib/core/mouse.py ADDED
@@ -0,0 +1,109 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from ..utils.validation import validate_dataframe
4
+ from ..utils.utils import compute_space_time_diff
5
+ from ..utils.segmentation import extract_traces_by_session
6
+ from ..constants import ColumnNames, EventTypes
7
+
8
+ def number_of_clicks(df: pd.DataFrame) -> dict:
9
+ """
10
+ Calculate the number of clicks per session.
11
+ Parameters:
12
+ df (pd.DataFrame): DataFrame containing mouse event data.
13
+ Returns:
14
+ dict: A dictionary with session IDs as keys and number of clicks as values.
15
+ """
16
+ validate_dataframe(df)
17
+
18
+ clicks_per_session = {}
19
+ df = df.groupby(ColumnNames.SESSION_ID)
20
+ for session_id, group in df:
21
+ clicks_per_session[session_id] = group[group[ColumnNames.EVENT_TYPE] == EventTypes.EVENT_ON_CLICK].shape[0]
22
+ return clicks_per_session
23
+
24
+ def click_slip(df: pd.DataFrame, threshold: float = 5.0) -> dict:
25
+ """
26
+ Calculate the number of click slips per session.
27
+ A click slip is defined as a click event that occurs within a certain distance
28
+ from the previous mouse position (indicating an unintended click).
29
+
30
+ Parameters:
31
+ df (pd.DataFrame): DataFrame containing mouse event data.
32
+ threshold (float): Distance threshold to consider a click as a slip.
33
+
34
+ Returns:
35
+ dict: A dictionary with session IDs as keys and the metrics (click slips, max, min, average) as values.
36
+ """
37
+ validate_dataframe(df)
38
+
39
+ click_slips_per_session = {}
40
+ df = df.groupby(ColumnNames.SESSION_ID)
41
+ metrics_per_session = {}
42
+ for session_id, group in df:
43
+ group = group.sort_values(by=ColumnNames.TIME_STAMP)
44
+ slips = 0
45
+ # We define a click slip as: the total path distance of all
46
+ # EVENT_ON_MOUSE_MOVE events that occur between an
47
+ # EVENT_ON_MOUSE_DOWN and the subsequent EVENT_ON_MOUSE_UP.
48
+ # If that total move-distance is less than `threshold` we count
49
+ # it as a slip.
50
+ in_down = False
51
+ last_move_x = None
52
+ last_move_y = None
53
+ accumulated_move_distance = 0.0
54
+ distances = []
55
+ durations = []
56
+ mouse_down_time = None
57
+ for _, row in group.iterrows():
58
+ # Mouse down: start a new segment
59
+ if row[ColumnNames.EVENT_TYPE] == EventTypes.EVENT_ON_MOUSE_DOWN:
60
+ in_down = True
61
+ # reset accumulators
62
+ last_move_x = row[ColumnNames.X]
63
+ last_move_y = row[ColumnNames.Y]
64
+ accumulated_move_distance = 0.0
65
+ mouse_down_time = row[ColumnNames.TIME_STAMP]
66
+
67
+ # While between down and up, accumulate move distances
68
+ if in_down and row[ColumnNames.EVENT_TYPE] == EventTypes.EVENT_ON_MOUSE_MOVE:
69
+ x = row[ColumnNames.X]
70
+ y = row[ColumnNames.Y]
71
+ if last_move_x is not None and last_move_y is not None:
72
+ d = np.hypot(x - last_move_x, y - last_move_y)
73
+ accumulated_move_distance += d
74
+ # set last seen move position (even if it was the first move)
75
+ last_move_x, last_move_y = x, y
76
+
77
+ # Mouse up: finalize the segment and decide if it's a slip
78
+ if row[ColumnNames.EVENT_TYPE] == EventTypes.EVENT_ON_MOUSE_UP and in_down:
79
+ x = row[ColumnNames.X]
80
+ y = row[ColumnNames.Y]
81
+ duration = row[ColumnNames.TIME_STAMP] - mouse_down_time if mouse_down_time is not None else 0
82
+ if last_move_x is not None and last_move_y is not None:
83
+ d = np.hypot(x - last_move_x, y - last_move_y)
84
+ accumulated_move_distance += d
85
+ if accumulated_move_distance >= threshold:
86
+ slips += 1
87
+ distances.append(accumulated_move_distance)
88
+ durations.append(duration)
89
+ in_down = False
90
+ last_move_x = None
91
+ last_move_y = None
92
+ mouse_down_time = None
93
+ accumulated_move_distance = 0.0
94
+ click_slips_per_session[session_id] = {
95
+ "slips": slips,
96
+ "distances": distances,
97
+ }
98
+ metrics = {
99
+ "click_slips": slips,
100
+ "longest_click_slip": max(distances) if distances else 0,
101
+ "shortest_click_slip": min(distances) if distances else 0,
102
+ "average_click_slip": slips / len(distances) if distances else 0,
103
+ "average_click_slip_distance": np.mean(distances) if distances else 0,
104
+ "average_click_duration": np.mean(durations) if durations else 0,
105
+ "max_click_duration": max(durations) if durations else 0,
106
+ "min_click_duration": min(durations) if durations else 0,
107
+ }
108
+ metrics_per_session[session_id] = metrics
109
+ return metrics_per_session