sentiero 1.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +7 -0
  3. data/README.md +679 -0
  4. data/lib/sentiero/analytics/analyzer.rb +91 -0
  5. data/lib/sentiero/analytics/bounded.rb +29 -0
  6. data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
  7. data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
  8. data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
  9. data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
  10. data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
  11. data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
  12. data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
  13. data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
  14. data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
  15. data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
  16. data/lib/sentiero/analytics/entry_attribution.rb +71 -0
  17. data/lib/sentiero/analytics/error_discovery.rb +118 -0
  18. data/lib/sentiero/analytics/events.rb +21 -0
  19. data/lib/sentiero/analytics/exporter.rb +242 -0
  20. data/lib/sentiero/analytics/form_analyzer.rb +153 -0
  21. data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
  22. data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
  23. data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
  24. data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
  25. data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
  26. data/lib/sentiero/analytics/problem_detail.rb +97 -0
  27. data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
  28. data/lib/sentiero/analytics/segmenter.rb +133 -0
  29. data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
  30. data/lib/sentiero/analytics/stats.rb +30 -0
  31. data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
  32. data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
  33. data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
  34. data/lib/sentiero/configuration.rb +184 -0
  35. data/lib/sentiero/erasure.rb +48 -0
  36. data/lib/sentiero/fingerprint.rb +34 -0
  37. data/lib/sentiero/ip_anonymizer.rb +29 -0
  38. data/lib/sentiero/redaction/config.rb +61 -0
  39. data/lib/sentiero/redaction.rb +207 -0
  40. data/lib/sentiero/reporter/configuration.rb +50 -0
  41. data/lib/sentiero/reporter/context.rb +31 -0
  42. data/lib/sentiero/reporter/dispatcher.rb +91 -0
  43. data/lib/sentiero/reporter/http_transport.rb +57 -0
  44. data/lib/sentiero/reporter/log_transport.rb +26 -0
  45. data/lib/sentiero/reporter/middleware.rb +62 -0
  46. data/lib/sentiero/reporter/normalizer.rb +14 -0
  47. data/lib/sentiero/reporter/null_transport.rb +18 -0
  48. data/lib/sentiero/reporter/report_context.rb +29 -0
  49. data/lib/sentiero/reporter/scrubber.rb +47 -0
  50. data/lib/sentiero/reporter/test_helper.rb +32 -0
  51. data/lib/sentiero/reporter/test_transport.rb +28 -0
  52. data/lib/sentiero/reporter.rb +214 -0
  53. data/lib/sentiero/roda.rb +47 -0
  54. data/lib/sentiero/store/error_store.rb +220 -0
  55. data/lib/sentiero/store/limits.rb +31 -0
  56. data/lib/sentiero/store/session_store.rb +118 -0
  57. data/lib/sentiero/store.rb +72 -0
  58. data/lib/sentiero/stores/file.rb +566 -0
  59. data/lib/sentiero/stores/memory.rb +362 -0
  60. data/lib/sentiero/stores/redis/keys.rb +59 -0
  61. data/lib/sentiero/stores/redis/lua.rb +119 -0
  62. data/lib/sentiero/stores/redis.rb +665 -0
  63. data/lib/sentiero/stores/sqlite/schema.rb +79 -0
  64. data/lib/sentiero/stores/sqlite.rb +626 -0
  65. data/lib/sentiero/user_agent.rb +32 -0
  66. data/lib/sentiero/version.rb +5 -0
  67. data/lib/sentiero/web/analytics_app.rb +538 -0
  68. data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
  69. data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
  70. data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
  71. data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
  72. data/lib/sentiero/web/assets/manifest.json +11 -0
  73. data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
  74. data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
  75. data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
  76. data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
  77. data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
  78. data/lib/sentiero/web/assets_app.rb +42 -0
  79. data/lib/sentiero/web/base_app.rb +319 -0
  80. data/lib/sentiero/web/basic_auth.rb +27 -0
  81. data/lib/sentiero/web/basic_auth_check.rb +41 -0
  82. data/lib/sentiero/web/body_reader.rb +44 -0
  83. data/lib/sentiero/web/csv_writer.rb +45 -0
  84. data/lib/sentiero/web/dashboard_app.rb +236 -0
  85. data/lib/sentiero/web/errors_app.rb +97 -0
  86. data/lib/sentiero/web/escaping.rb +37 -0
  87. data/lib/sentiero/web/events_app.rb +196 -0
  88. data/lib/sentiero/web/formatting.rb +43 -0
  89. data/lib/sentiero/web/ingest_app.rb +92 -0
  90. data/lib/sentiero/web/manifest.rb +43 -0
  91. data/lib/sentiero/web/monitoring_app.rb +316 -0
  92. data/lib/sentiero/web/script_tag.rb +57 -0
  93. data/lib/sentiero/web/shareable_replay.rb +88 -0
  94. data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
  95. data/lib/sentiero/web/templates/_brand.html.erb +18 -0
  96. data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
  97. data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
  98. data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
  99. data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
  100. data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
  101. data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
  102. data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
  103. data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
  104. data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
  105. data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
  106. data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
  107. data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
  108. data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
  109. data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
  110. data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
  111. data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
  112. data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
  113. data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
  114. data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
  115. data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
  116. data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
  117. data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
  118. data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
  119. data/lib/sentiero/web/templates/event_show.html.erb +52 -0
  120. data/lib/sentiero/web/templates/events_index.html.erb +177 -0
  121. data/lib/sentiero/web/templates/export_index.html.erb +69 -0
  122. data/lib/sentiero/web/templates/forms.html.erb +105 -0
  123. data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
  124. data/lib/sentiero/web/templates/import.html.erb +39 -0
  125. data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
  126. data/lib/sentiero/web/templates/segments.html.erb +114 -0
  127. data/lib/sentiero/web/templates/session_show.html.erb +195 -0
  128. data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
  129. data/lib/sentiero/web/track_app.rb +57 -0
  130. data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
  131. data/lib/sentiero/web/views/analyzer_view.rb +27 -0
  132. data/lib/sentiero/web/views/base_view.rb +76 -0
  133. data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
  134. data/lib/sentiero/web/views/conversions_view.rb +41 -0
  135. data/lib/sentiero/web/views/engagement_view.rb +67 -0
  136. data/lib/sentiero/web/views/errors_index_view.rb +37 -0
  137. data/lib/sentiero/web/views/event_show_view.rb +20 -0
  138. data/lib/sentiero/web/views/events_index_view.rb +56 -0
  139. data/lib/sentiero/web/views/export_view.rb +23 -0
  140. data/lib/sentiero/web/views/forms_view.rb +28 -0
  141. data/lib/sentiero/web/views/frustration_view.rb +15 -0
  142. data/lib/sentiero/web/views/funnel_view.rb +36 -0
  143. data/lib/sentiero/web/views/heatmap_view.rb +34 -0
  144. data/lib/sentiero/web/views/import_view.rb +13 -0
  145. data/lib/sentiero/web/views/page_report_view.rb +43 -0
  146. data/lib/sentiero/web/views/problem_show_view.rb +46 -0
  147. data/lib/sentiero/web/views/scroll_view.rb +23 -0
  148. data/lib/sentiero/web/views/segments_view.rb +28 -0
  149. data/lib/sentiero/web/views/session_show_view.rb +105 -0
  150. data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
  151. data/lib/sentiero/web/views/vitals_view.rb +45 -0
  152. data/lib/sentiero/web/views.rb +24 -0
  153. data/lib/sentiero/window_ref.rb +6 -0
  154. data/lib/sentiero.rb +69 -0
  155. metadata +232 -0
@@ -0,0 +1,242 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "stats_aggregator"
5
+ require_relative "error_discovery"
6
+ require_relative "heatmap_analyzer"
7
+ require_relative "scroll_depth_analyzer"
8
+ require_relative "form_analyzer"
9
+ require_relative "web_vitals_analyzer"
10
+ require_relative "../user_agent"
11
+
12
+ module Sentiero
13
+ module Analytics
14
+ # Builds tabular datasets for CSV/JSON export. Each dataset is a
15
+ # {headers:, rows:} table the web layer serializes without re-deriving.
16
+ class Exporter < Analyzer
17
+ DATASETS = {
18
+ "sessions" => "Session list",
19
+ "errors" => "Error list",
20
+ "browser_events" => "Browser Events (rrweb)",
21
+ "problems" => "Problems",
22
+ "server_events" => "Server Events",
23
+ "stats" => "Aggregate stats",
24
+ "heatmap" => "Heatmap data",
25
+ "scroll" => "Scroll-depth data",
26
+ "forms" => "Form analytics",
27
+ "web_vitals" => "Web Vitals"
28
+ }.freeze
29
+
30
+ def dataset?(name)
31
+ DATASETS.key?(name)
32
+ end
33
+
34
+ def table(name, since: nil, until_time: nil)
35
+ @since = since
36
+ @until_time = until_time
37
+ send("build_#{name}")
38
+ end
39
+
40
+ private
41
+
42
+ attr_reader :since, :until_time
43
+
44
+ def scan_cap
45
+ store.limits.analytics_max_scan_sessions
46
+ end
47
+
48
+ def build_sessions
49
+ headers = %w[session_id created_at first_event_at last_event_at
50
+ duration_ms event_count url referrer browser device has_errors]
51
+
52
+ rows = store.list_sessions(limit: scan_cap, since: since, until_time: until_time).map do |summary|
53
+ metadata = summary[:metadata] || {}
54
+ user_agent = metadata["userAgent"]
55
+ [
56
+ summary[:session_id],
57
+ summary[:created_at],
58
+ summary[:first_event_at],
59
+ summary[:last_event_at],
60
+ duration_ms(summary),
61
+ summary[:event_count],
62
+ metadata["url"],
63
+ metadata["referrer"],
64
+ UserAgent.browser(user_agent),
65
+ UserAgent.device(user_agent),
66
+ metadata["has_errors"] == true
67
+ ]
68
+ end
69
+
70
+ {headers: headers, rows: rows}
71
+ end
72
+
73
+ def build_errors
74
+ headers = %w[message source line count last_seen_at session_id window_id offset_ms]
75
+
76
+ rows = ErrorDiscovery.new(store).grouped_errors(since: since, until_time: until_time)[:groups].flat_map do |group|
77
+ group[:occurrences].map do |occ|
78
+ [
79
+ group[:message],
80
+ group[:source],
81
+ group[:line],
82
+ group[:count],
83
+ group[:last_seen_at],
84
+ occ[:session_id],
85
+ occ[:window_id],
86
+ occ[:offset_ms]
87
+ ]
88
+ end
89
+ end
90
+
91
+ {headers: headers, rows: rows}
92
+ end
93
+
94
+ def build_browser_events
95
+ headers = %w[session_id window_id timestamp tag]
96
+ rows = []
97
+
98
+ store.each_session_events(limit: scan_cap, since: since, until_time: until_time) do |summary, window_id, events|
99
+ events.each do |event|
100
+ next unless event["type"] == CUSTOM
101
+ tag = event.dig("data", "tag")
102
+ rows << [summary[:session_id], window_id, event["timestamp"], tag]
103
+ end
104
+ end
105
+
106
+ {headers: headers, rows: rows}
107
+ end
108
+
109
+ def build_problems
110
+ headers = %w[id fingerprint project exception_class title count status first_seen last_seen]
111
+
112
+ rows = store.list_problems(project: nil, limit: scan_cap, since: since, until_time: until_time).map do |problem|
113
+ [
114
+ problem[:id],
115
+ # A problem's id is its fingerprint; stores don't expose a separate key.
116
+ problem[:id],
117
+ problem[:project],
118
+ problem[:exception_class],
119
+ problem[:title],
120
+ problem[:count],
121
+ problem[:status],
122
+ problem[:first_seen],
123
+ problem[:last_seen]
124
+ ]
125
+ end
126
+
127
+ {headers: headers, rows: rows}
128
+ end
129
+
130
+ def build_server_events
131
+ headers = %w[id project name level session_id timestamp payload]
132
+
133
+ # list_server_events' `after` is an exclusive cursor; filter the range
134
+ # ourselves so `since`/`until` are both inclusive, like every other dataset.
135
+ events = store.list_server_events(project: nil, limit: scan_cap)
136
+ events = events.select { |event| event["timestamp"].to_f >= since } if since
137
+ events = events.select { |event| event["timestamp"].to_f <= until_time } if until_time
138
+
139
+ rows = events.map do |event|
140
+ [
141
+ event["id"],
142
+ event["project"],
143
+ event["name"],
144
+ event["level"],
145
+ event["session_id"],
146
+ event["timestamp"],
147
+ event["payload"].is_a?(Hash) ? event["payload"].to_json : event["payload"].to_s
148
+ ]
149
+ end
150
+
151
+ {headers: headers, rows: rows}
152
+ end
153
+
154
+ def build_stats
155
+ stats = StatsAggregator.new(store).aggregate(since: since, until_time: until_time)
156
+
157
+ headers = %w[metric value]
158
+ rows = [
159
+ ["total_sessions", stats[:total_sessions]],
160
+ ["total_events", stats[:total_events]],
161
+ ["avg_duration_ms", stats[:avg_duration_ms]]
162
+ ]
163
+ stats[:browser_distribution].each { |browser, count| rows << ["browser:#{browser}", count] }
164
+ stats[:device_distribution].each { |device, count| rows << ["device:#{device}", count] }
165
+ stats[:custom_event_tags].each { |tag, count| rows << ["custom_event:#{tag}", count] }
166
+
167
+ {headers: headers, rows: rows}
168
+ end
169
+
170
+ def build_heatmap
171
+ headers = %w[url selector count]
172
+ rows = HeatmapAnalyzer.new(store).build_heatmap_table(since: since, until_time: until_time).flat_map do |url, elements|
173
+ elements.map { |element| [url, element[:selector], element[:count]] }
174
+ end
175
+
176
+ {headers: headers, rows: rows}
177
+ end
178
+
179
+ def build_scroll
180
+ # Percentages are absolute depth vs. estimated page height (deepest
181
+ # scroll + viewport across sessions).
182
+ headers = %w[url session_count avg_depth_px avg_depth_pct page_height_px p50_pct p75_pct p90_pct]
183
+
184
+ rows = ScrollDepthAnalyzer.new(store).analyze(since: since, until_time: until_time)[:pages].map do |url, page|
185
+ folds = page[:fold_lines]
186
+ [
187
+ url,
188
+ page[:session_count],
189
+ page[:avg_depth_px],
190
+ page[:avg_depth_pct],
191
+ page[:page_height_px],
192
+ folds[:p50],
193
+ folds[:p75],
194
+ folds[:p90]
195
+ ]
196
+ end
197
+
198
+ {headers: headers, rows: rows}
199
+ end
200
+
201
+ def build_web_vitals
202
+ headers = %w[url metric p50 p75 p90 samples good_count needs_improvement_count poor_count]
203
+
204
+ pages = WebVitalsAnalyzer.new(store).analyze(since: since, until_time: until_time)[:pages]
205
+ rows = pages.sort_by { |url, _page| url }.flat_map do |url, page|
206
+ page[:metrics].map do |metric, m|
207
+ ratings = m[:ratings]
208
+ [
209
+ url,
210
+ metric,
211
+ m[:p50],
212
+ m[:p75],
213
+ m[:p90],
214
+ m[:samples],
215
+ ratings.fetch("good", 0),
216
+ ratings.fetch("needs-improvement", 0),
217
+ ratings.fetch("poor", 0)
218
+ ]
219
+ end
220
+ end
221
+
222
+ {headers: headers, rows: rows}
223
+ end
224
+
225
+ def build_forms
226
+ headers = %w[field_id sessions completion_rate avg_time_to_fill_ms total_refills]
227
+
228
+ rows = FormAnalyzer.new(store).analyze(since: since, until_time: until_time)[:fields].map do |field|
229
+ [
230
+ field[:field_id],
231
+ field[:sessions],
232
+ field[:completion_rate],
233
+ field[:avg_time_to_fill_ms],
234
+ field[:total_refills]
235
+ ]
236
+ end
237
+
238
+ {headers: headers, rows: rows}
239
+ end
240
+ end
241
+ end
242
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "collectors/form_collector"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Cross-session form analysis built from interaction patterns: rrweb input
9
+ # events (incremental, type 3 / source 5) carry the touched node's id and
10
+ # timing but never values, so this works with maskAllInputs enabled.
11
+ #
12
+ # Attribution is per page: each session's windows are merged, ordered by
13
+ # time, and split into page segments on Meta-href boundaries (the shared
14
+ # Analyzer#each_page_segment mechanism). A (session, page) unit "starts" a
15
+ # form when the segment contains input events, and is "completed" only
16
+ # when a __form_submit custom event — emitted by the recorder's
17
+ # capture-phase document submit listener (config.track_forms) — lands in
18
+ # the same segment at or after the first input. A session counts as
19
+ # completed when EVERY page it interacted on was submitted, so a genuine
20
+ # submit elsewhere (a later todo add) can no longer mask an abandonment
21
+ # (the signup form it walked away from).
22
+ #
23
+ # CAPTURE-VERSION NOTE: submits are counted ONLY from __form_submit
24
+ # events. Windows recorded before that capture existed (or with
25
+ # track_forms off) carry none and intentionally report ZERO submits —
26
+ # falling back to counting bare Meta/navigation events would resurrect
27
+ # the "navigating away counts as submitting" defect (product review
28
+ # P1.4/D4: 100% shown where the funnel proved 50%).
29
+ #
30
+ # Fields are keyed per (page URL, node id) so the same rrweb node id on
31
+ # two different pages (ids reset every full-page load) no longer
32
+ # conflates two unrelated fields. Per field it reports touch rate
33
+ # (fraction of interacting sessions), aggregate time-to-fill, and re-fill
34
+ # frequency, plus a drop-off table (the last field touched in each
35
+ # abandoned page segment).
36
+ #
37
+ # Compute-on-read: a full scan of Store#each_session_events up to the
38
+ # store's limits.analytics_max_scan_sessions — no fact-extraction tables.
39
+ #
40
+ # Per-segment math (input recognition, field accumulation, drop-off,
41
+ # submit detection, and session-level started/completed semantics) lives in
42
+ # FormCollector so PageReportAnalyzer can share it without duplication.
43
+ class FormAnalyzer < Analyzer
44
+ # rrweb EventType.FullSnapshot and NodeType.Element — used to read field
45
+ # identity (name/id/type) from the DOM snapshot for human field labels.
46
+ FULL_SNAPSHOT = 2
47
+ ELEMENT_NODE = 2
48
+ FORM_CONTROL_TAGS = %w[input select textarea].freeze
49
+
50
+ # Aggregates form interactions across sessions. Returns per-field stats,
51
+ # the drop-off table, form-level completion rate, the raw submit count,
52
+ # and whether the scan was capped. since/until_time (epoch seconds)
53
+ # bound the scan at the store level.
54
+ def analyze(limit: nil, since: nil, until_time: nil)
55
+ scan_cap = limit || store.limits.analytics_max_scan_sessions
56
+ collector = FormCollector.new # unbounded: no per-URL field cap here
57
+
58
+ sessions = merge_windows(scan_cap, since, until_time)
59
+ sessions.each { |session_id, session| analyze_session(collector, session_id, session) }
60
+
61
+ started = collector.started_count
62
+ {
63
+ sessions_with_form_interaction: started,
64
+ sessions_completed: collector.completed_count,
65
+ completion_rate: ratio(collector.completed_count, started),
66
+ total_submits: collector.total_submits,
67
+ fields: collector.summarize_fields(started, include_labels: true),
68
+ drop_off_fields: collector.summarize_drop_off(include_labels: true),
69
+ was_truncated: sessions.size >= scan_cap
70
+ }
71
+ end
72
+
73
+ private
74
+
75
+ # Each session may span several windows; each_session_events yields once
76
+ # per window. Concatenate a session's windows so it is analyzed once and
77
+ # its cross-window form interactions are seen together.
78
+ def merge_windows(scan_cap, since, until_time)
79
+ sessions = Hash.new { |h, id| h[id] = {events: []} }
80
+
81
+ store.each_session_events(limit: scan_cap, since: since, until_time: until_time) do |summary, _window_id, events|
82
+ sessions[summary[:session_id]][:events].concat(events)
83
+ end
84
+
85
+ sessions
86
+ end
87
+
88
+ # Walks one session's page segments, feeding each to the shared collector.
89
+ # Labels are built per-segment from the DOM snapshot so field identities
90
+ # (name/id/type) survive same-URL reloads correctly.
91
+ def analyze_session(collector, session_id, session)
92
+ events = order_by_time(session[:events])
93
+ each_page_segment(events) do |url, segment, _anchor|
94
+ collector.collect(session_id, url, segment, labels: field_labels(segment))
95
+ end
96
+ end
97
+
98
+ # Concatenated windows interleave in time, so order by timestamp to keep
99
+ # segmentation and first-input/submit ordering correct. Events without a
100
+ # numeric timestamp sort to the front so they never count as "after" input.
101
+ def order_by_time(events)
102
+ events.sort_by { |event| event["timestamp"].is_a?(Numeric) ? event["timestamp"] : -Float::INFINITY }
103
+ end
104
+
105
+ # Maps node id => human field label from the full DOM snapshots (rrweb
106
+ # type 2) in this page segment. rrweb input events carry only the node id;
107
+ # the snapshot is the only place the field's name/id/type live, and node
108
+ # ids are scoped to a page load, so this is built per segment. Attributes
109
+ # only — never values. Returns {} when the segment has no snapshot
110
+ # (incremental-only windows fall back to nil labels in the output).
111
+ def field_labels(segment)
112
+ segment.each_with_object({}) do |event, labels|
113
+ next unless event["type"] == FULL_SNAPSHOT
114
+ collect_field_labels(event.dig("data", "node"), labels)
115
+ end
116
+ end
117
+
118
+ def collect_field_labels(node, labels)
119
+ return unless node.is_a?(Hash)
120
+ if node["type"] == ELEMENT_NODE && FORM_CONTROL_TAGS.include?(node["tagName"])
121
+ id = node["id"]
122
+ labels[id] = field_label(node["attributes"] || {}, node["tagName"]) if id.is_a?(Integer)
123
+ end
124
+ children = node["childNodes"]
125
+ children.each { |child| collect_field_labels(child, labels) } if children.is_a?(Array)
126
+ end
127
+
128
+ # Prefer the field's name, then its DOM id, then its type; append the
129
+ # input type for context when a named text-like field exists, and never
130
+ # emit values.
131
+ def field_label(attrs, tag)
132
+ base = present(attrs["name"]) || present(attrs["id"])
133
+ type = present(attrs["type"])
134
+ if base
135
+ (tag == "input" && type) ? "#{base} (#{type})" : base
136
+ elsif tag == "select"
137
+ "select"
138
+ else
139
+ type || tag
140
+ end
141
+ end
142
+
143
+ def present(value)
144
+ value if value.is_a?(String) && !value.empty?
145
+ end
146
+
147
+ def ratio(numerator, denominator)
148
+ return 0 if denominator.zero?
149
+ numerator.to_f / denominator
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,158 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../events"
4
+
5
+ module Sentiero
6
+ module Analytics
7
+ module Frustration
8
+ # Pure, self-contained frustration detectors — Ruby ports of the JS
9
+ # detectors (frontend/src/dashboard/frustration.js), pinned by ported
10
+ # tests so the two can't drift. FrustrationAnalyzer layers cross-session
11
+ # aggregation and de-noising on top of this raw detector output.
12
+ module Detectors
13
+ # Detector thresholds — verbatim from frontend/src/dashboard/frustration.js.
14
+ RAGE_WINDOW_MS = 500 # max span of a rage cluster
15
+ RAGE_COORD_TOLERANCE_PX = 10 # max px from the first click
16
+ RAGE_MIN_CLICKS = 3 # min clicks to count as rage
17
+ DEAD_WINDOW_MS = 500 # response deadline for a dead click
18
+
19
+ MOUSE_CLICK = 2 # rrweb MouseInteraction click subtype
20
+ SOURCE_MUTATION = 0 # rrweb IncrementalSource.Mutation
21
+
22
+ module_function
23
+
24
+ def detect_rage_clicks(events)
25
+ return [] unless events.is_a?(Array)
26
+
27
+ clicks = events.select { |event| click?(event) }
28
+ return [] if clicks.size < RAGE_MIN_CLICKS
29
+
30
+ out = []
31
+ cluster_start = 0
32
+ (1..clicks.size).each do |i|
33
+ prev = clicks[i - 1]
34
+ cur = clicks[i]
35
+ anchor = clicks[cluster_start]
36
+ continues = cur &&
37
+ cur["timestamp"] - prev["timestamp"] <= RAGE_WINDOW_MS &&
38
+ cur["timestamp"] - anchor["timestamp"] <= RAGE_WINDOW_MS &&
39
+ (cur["data"]["x"] - anchor["data"]["x"]).abs <= RAGE_COORD_TOLERANCE_PX &&
40
+ (cur["data"]["y"] - anchor["data"]["y"]).abs <= RAGE_COORD_TOLERANCE_PX
41
+
42
+ next if continues
43
+
44
+ count = i - cluster_start
45
+ if count >= RAGE_MIN_CLICKS
46
+ out << {
47
+ subtype: "rage_click",
48
+ timestamp: anchor["timestamp"],
49
+ count: count,
50
+ x: anchor["data"]["x"],
51
+ y: anchor["data"]["y"],
52
+ member_timestamps: clicks[cluster_start...i].map { |c| c["timestamp"] },
53
+ event: anchor
54
+ }
55
+ end
56
+ cluster_start = i
57
+ end
58
+ out
59
+ end
60
+
61
+ # Clicks with no page response within DEAD_WINDOW_MS.
62
+ def detect_dead_clicks(events)
63
+ return [] unless events.is_a?(Array)
64
+
65
+ out = []
66
+ events.each_with_index do |event, i|
67
+ next unless click?(event)
68
+ click_ts = event["timestamp"]
69
+ deadline = click_ts + DEAD_WINDOW_MS
70
+
71
+ responded = false
72
+ (i + 1...events.size).each do |j|
73
+ ts = events[j].is_a?(Hash) ? events[j]["timestamp"] : nil
74
+ next unless ts.is_a?(Numeric)
75
+ break if ts > deadline
76
+ if ts > click_ts && response?(events[j])
77
+ responded = true
78
+ break
79
+ end
80
+ end
81
+
82
+ unless responded
83
+ out << {
84
+ subtype: "dead_click",
85
+ timestamp: click_ts,
86
+ x: event["data"]["x"],
87
+ y: event["data"]["y"],
88
+ elapsed: DEAD_WINDOW_MS,
89
+ event: event
90
+ }
91
+ end
92
+ end
93
+ out
94
+ end
95
+
96
+ # Combines both detectors (clicks absorbed into a rage cluster are not
97
+ # also reported as dead), sorted by offset from the window's first event.
98
+ def detect_frustration_events(events)
99
+ return [] unless events.is_a?(Array) && !events.empty?
100
+ first = events.first
101
+ return [] unless first.is_a?(Hash) && first["timestamp"].is_a?(Numeric)
102
+ first_ts = first["timestamp"]
103
+
104
+ rage = detect_rage_clicks(events)
105
+ dead = detect_dead_clicks(events)
106
+
107
+ rage_timestamps = {}
108
+ rage.each do |r|
109
+ (r[:member_timestamps] || [r[:timestamp]]).each { |t| rage_timestamps[t] = true }
110
+ end
111
+
112
+ combined = rage + dead.reject { |d| rage_timestamps[d[:timestamp]] }
113
+
114
+ combined
115
+ .map do |entry|
116
+ {
117
+ category: "frustration",
118
+ subtype: entry[:subtype],
119
+ timestamp: entry[:timestamp],
120
+ offset: entry[:timestamp] - first_ts,
121
+ count: entry[:count],
122
+ elapsed: entry[:elapsed],
123
+ x: entry[:x],
124
+ y: entry[:y],
125
+ event: entry[:event]
126
+ }
127
+ end
128
+ .each_with_index.sort_by { |entry, i| [entry[:offset], i] }
129
+ .map { |entry, _i| entry }
130
+ end
131
+
132
+ # rrweb left-click mouse-interaction carrying coordinates (mirrors JS isClick).
133
+ def click?(event)
134
+ return false unless event.is_a?(Hash)
135
+ return false unless event["type"] == Events::INCREMENTAL
136
+ return false unless event["timestamp"].is_a?(Numeric)
137
+ data = event["data"]
138
+ data.is_a?(Hash) &&
139
+ data["source"] == Events::SOURCE_MOUSE_INTERACTION &&
140
+ data["type"] == MOUSE_CLICK &&
141
+ data["x"].is_a?(Numeric) &&
142
+ data["y"].is_a?(Numeric)
143
+ end
144
+
145
+ # Page responded to a click: DOM mutation, input change, or meta/navigation
146
+ # (mirrors JS isResponse).
147
+ def response?(event)
148
+ return false unless event.is_a?(Hash) && event["data"]
149
+ return true if event["type"] == Events::META
150
+ return false unless event["type"] == Events::INCREMENTAL
151
+ data = event["data"]
152
+ return false unless data.is_a?(Hash)
153
+ data["source"] == SOURCE_MUTATION || data["source"] == Events::SOURCE_INPUT
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end