sentiero 1.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +7 -0
  3. data/README.md +679 -0
  4. data/lib/sentiero/analytics/analyzer.rb +91 -0
  5. data/lib/sentiero/analytics/bounded.rb +29 -0
  6. data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
  7. data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
  8. data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
  9. data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
  10. data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
  11. data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
  12. data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
  13. data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
  14. data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
  15. data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
  16. data/lib/sentiero/analytics/entry_attribution.rb +71 -0
  17. data/lib/sentiero/analytics/error_discovery.rb +118 -0
  18. data/lib/sentiero/analytics/events.rb +21 -0
  19. data/lib/sentiero/analytics/exporter.rb +242 -0
  20. data/lib/sentiero/analytics/form_analyzer.rb +153 -0
  21. data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
  22. data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
  23. data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
  24. data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
  25. data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
  26. data/lib/sentiero/analytics/problem_detail.rb +97 -0
  27. data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
  28. data/lib/sentiero/analytics/segmenter.rb +133 -0
  29. data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
  30. data/lib/sentiero/analytics/stats.rb +30 -0
  31. data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
  32. data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
  33. data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
  34. data/lib/sentiero/configuration.rb +184 -0
  35. data/lib/sentiero/erasure.rb +48 -0
  36. data/lib/sentiero/fingerprint.rb +34 -0
  37. data/lib/sentiero/ip_anonymizer.rb +29 -0
  38. data/lib/sentiero/redaction/config.rb +61 -0
  39. data/lib/sentiero/redaction.rb +207 -0
  40. data/lib/sentiero/reporter/configuration.rb +50 -0
  41. data/lib/sentiero/reporter/context.rb +31 -0
  42. data/lib/sentiero/reporter/dispatcher.rb +91 -0
  43. data/lib/sentiero/reporter/http_transport.rb +57 -0
  44. data/lib/sentiero/reporter/log_transport.rb +26 -0
  45. data/lib/sentiero/reporter/middleware.rb +62 -0
  46. data/lib/sentiero/reporter/normalizer.rb +14 -0
  47. data/lib/sentiero/reporter/null_transport.rb +18 -0
  48. data/lib/sentiero/reporter/report_context.rb +29 -0
  49. data/lib/sentiero/reporter/scrubber.rb +47 -0
  50. data/lib/sentiero/reporter/test_helper.rb +32 -0
  51. data/lib/sentiero/reporter/test_transport.rb +28 -0
  52. data/lib/sentiero/reporter.rb +214 -0
  53. data/lib/sentiero/roda.rb +47 -0
  54. data/lib/sentiero/store/error_store.rb +220 -0
  55. data/lib/sentiero/store/limits.rb +31 -0
  56. data/lib/sentiero/store/session_store.rb +118 -0
  57. data/lib/sentiero/store.rb +72 -0
  58. data/lib/sentiero/stores/file.rb +566 -0
  59. data/lib/sentiero/stores/memory.rb +362 -0
  60. data/lib/sentiero/stores/redis/keys.rb +59 -0
  61. data/lib/sentiero/stores/redis/lua.rb +119 -0
  62. data/lib/sentiero/stores/redis.rb +665 -0
  63. data/lib/sentiero/stores/sqlite/schema.rb +79 -0
  64. data/lib/sentiero/stores/sqlite.rb +626 -0
  65. data/lib/sentiero/user_agent.rb +32 -0
  66. data/lib/sentiero/version.rb +5 -0
  67. data/lib/sentiero/web/analytics_app.rb +538 -0
  68. data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
  69. data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
  70. data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
  71. data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
  72. data/lib/sentiero/web/assets/manifest.json +11 -0
  73. data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
  74. data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
  75. data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
  76. data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
  77. data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
  78. data/lib/sentiero/web/assets_app.rb +42 -0
  79. data/lib/sentiero/web/base_app.rb +319 -0
  80. data/lib/sentiero/web/basic_auth.rb +27 -0
  81. data/lib/sentiero/web/basic_auth_check.rb +41 -0
  82. data/lib/sentiero/web/body_reader.rb +44 -0
  83. data/lib/sentiero/web/csv_writer.rb +45 -0
  84. data/lib/sentiero/web/dashboard_app.rb +236 -0
  85. data/lib/sentiero/web/errors_app.rb +97 -0
  86. data/lib/sentiero/web/escaping.rb +37 -0
  87. data/lib/sentiero/web/events_app.rb +196 -0
  88. data/lib/sentiero/web/formatting.rb +43 -0
  89. data/lib/sentiero/web/ingest_app.rb +92 -0
  90. data/lib/sentiero/web/manifest.rb +43 -0
  91. data/lib/sentiero/web/monitoring_app.rb +316 -0
  92. data/lib/sentiero/web/script_tag.rb +57 -0
  93. data/lib/sentiero/web/shareable_replay.rb +88 -0
  94. data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
  95. data/lib/sentiero/web/templates/_brand.html.erb +18 -0
  96. data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
  97. data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
  98. data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
  99. data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
  100. data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
  101. data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
  102. data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
  103. data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
  104. data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
  105. data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
  106. data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
  107. data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
  108. data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
  109. data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
  110. data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
  111. data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
  112. data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
  113. data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
  114. data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
  115. data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
  116. data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
  117. data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
  118. data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
  119. data/lib/sentiero/web/templates/event_show.html.erb +52 -0
  120. data/lib/sentiero/web/templates/events_index.html.erb +177 -0
  121. data/lib/sentiero/web/templates/export_index.html.erb +69 -0
  122. data/lib/sentiero/web/templates/forms.html.erb +105 -0
  123. data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
  124. data/lib/sentiero/web/templates/import.html.erb +39 -0
  125. data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
  126. data/lib/sentiero/web/templates/segments.html.erb +114 -0
  127. data/lib/sentiero/web/templates/session_show.html.erb +195 -0
  128. data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
  129. data/lib/sentiero/web/track_app.rb +57 -0
  130. data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
  131. data/lib/sentiero/web/views/analyzer_view.rb +27 -0
  132. data/lib/sentiero/web/views/base_view.rb +76 -0
  133. data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
  134. data/lib/sentiero/web/views/conversions_view.rb +41 -0
  135. data/lib/sentiero/web/views/engagement_view.rb +67 -0
  136. data/lib/sentiero/web/views/errors_index_view.rb +37 -0
  137. data/lib/sentiero/web/views/event_show_view.rb +20 -0
  138. data/lib/sentiero/web/views/events_index_view.rb +56 -0
  139. data/lib/sentiero/web/views/export_view.rb +23 -0
  140. data/lib/sentiero/web/views/forms_view.rb +28 -0
  141. data/lib/sentiero/web/views/frustration_view.rb +15 -0
  142. data/lib/sentiero/web/views/funnel_view.rb +36 -0
  143. data/lib/sentiero/web/views/heatmap_view.rb +34 -0
  144. data/lib/sentiero/web/views/import_view.rb +13 -0
  145. data/lib/sentiero/web/views/page_report_view.rb +43 -0
  146. data/lib/sentiero/web/views/problem_show_view.rb +46 -0
  147. data/lib/sentiero/web/views/scroll_view.rb +23 -0
  148. data/lib/sentiero/web/views/segments_view.rb +28 -0
  149. data/lib/sentiero/web/views/session_show_view.rb +105 -0
  150. data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
  151. data/lib/sentiero/web/views/vitals_view.rb +45 -0
  152. data/lib/sentiero/web/views.rb +24 -0
  153. data/lib/sentiero/window_ref.rb +6 -0
  154. data/lib/sentiero.rb +69 -0
  155. metadata +232 -0
@@ -0,0 +1,331 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "frustration_analyzer"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Scores each session 0–100 on a STRUGGLE score (higher = more friction):
9
+ # a weighted blend of eight signals, each saturating to 0.0..1.0 so one
10
+ # pathological session can't dominate. Signals: rage_clicks, dead_clicks,
11
+ # nav_churn, idle_ratio, thrashing_scroll, quick_bounce, form_refills,
12
+ # error_abandonment.
13
+ class EngagementAnalyzer < Analyzer
14
+ # Signal weights; MUST sum to 1.00.
15
+ WEIGHTS = {
16
+ rage_clicks: 0.20,
17
+ dead_clicks: 0.15,
18
+ nav_churn: 0.15,
19
+ idle_ratio: 0.10,
20
+ thrashing_scroll: 0.10,
21
+ quick_bounce: 0.10,
22
+ form_refills: 0.10,
23
+ error_abandonment: 0.10
24
+ }.freeze
25
+
26
+ RAGE_SATURATION = 3 # 3+ rage clusters → full rage sub-score
27
+ DEAD_SATURATION = 3 # 3+ dead clicks → full dead sub-score
28
+ NAV_CHURN_SATURATION = 3 # 3+ revisits to already-seen URLs → full sub-score
29
+ THRASH_SATURATION = 3 # 3+ scroll reversals → full thrash sub-score
30
+ REFILL_SATURATION = 2 # 2+ field re-fills → full refill sub-score
31
+ IDLE_GAP_MS = 10_000 # consecutive events farther apart than this are "idle"
32
+ THRASH_MIN_DELTA_PX = 100 # a scroll delta below this is too small to be thrashing
33
+ THRASH_WINDOW_MS = 1_000 # both deltas of a reversal must fall within this span
34
+ QUICK_BOUNCE_MS = 5_000 # single-page sessions shorter than this bounced
35
+ ERROR_ABANDON_MS = 8_000 # a JS error within this of the session end = abandonment
36
+ MAX_SESSIONS = 500 # display cap on returned rows (does NOT set was_truncated)
37
+ DISTRIBUTION_BINS = %w[0-20 20-40 40-60 60-80 80-100].freeze
38
+
39
+ ERROR_TAG = "error"
40
+ NAVIGATION_TAG = "navigation"
41
+
42
+ # Integer division pins boundaries: 19→"0-20", 20→"20-40", 100→"80-100".
43
+ def self.bin_for(score)
44
+ DISTRIBUTION_BINS[[(score / 20), 4].min]
45
+ end
46
+
47
+ # The MAX_SESSIONS row cap is a DISPLAY bound (keeps highest scores, does
48
+ # NOT set was_truncated); only the scan cap sets was_truncated.
49
+ def analyze(limit: nil, since: nil, until_time: nil)
50
+ accumulators = {}
51
+
52
+ scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
53
+ session_id = summary[:session_id]
54
+ acc = (accumulators[session_id] ||= new_accumulator(summary, window_id))
55
+ accumulate_window(acc, events)
56
+ end
57
+
58
+ rows = accumulators.values.map { |acc| score_session(acc) }
59
+ distribution = build_distribution(rows)
60
+ rows.sort_by! { |row| [-row[:score], row[:session_id]] }
61
+
62
+ {
63
+ sessions: rows.first(MAX_SESSIONS),
64
+ distribution: distribution,
65
+ scanned: scanned,
66
+ was_truncated: hit_cap
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def new_accumulator(summary, window_id)
73
+ {
74
+ session_id: summary[:session_id],
75
+ window_id: window_id,
76
+ entry_url: nil,
77
+ entry_anchor: nil,
78
+ first_ts: nil,
79
+ last_ts: nil,
80
+ rage_count: 0,
81
+ dead_count: 0,
82
+ idle_gap_sum: 0,
83
+ reversals: 0,
84
+ visits: [],
85
+ input_counts: Hash.new(0),
86
+ distinct_urls: {},
87
+ error_timestamps: []
88
+ }
89
+ end
90
+
91
+ def accumulate_window(acc, events)
92
+ sorted = events.sort_by { |event| event["timestamp"].is_a?(Numeric) ? event["timestamp"] : -Float::INFINITY }
93
+
94
+ track_bounds(acc, sorted)
95
+ track_entry_url(acc, events)
96
+
97
+ frustration = FrustrationAnalyzer.detect_frustration_events(events)
98
+ acc[:rage_count] += frustration.count { |entry| entry[:subtype] == "rage_click" }
99
+ # RAW detector output (pre-refinement); may exceed the de-noised per-URL
100
+ # counts on the frustration page — the composite score wants raw friction.
101
+ acc[:dead_count] += frustration.count { |entry| entry[:subtype] == "dead_click" }
102
+
103
+ acc[:idle_gap_sum] += idle_gap_sum(sorted)
104
+ acc[:reversals] += scroll_reversals(sorted)
105
+ collect_visits(acc, events)
106
+ tally_inputs(acc, events)
107
+ tally_distinct_urls(acc, events)
108
+ collect_errors(acc, events)
109
+ end
110
+
111
+ def track_bounds(acc, sorted)
112
+ numeric = sorted.filter_map { |event| event["timestamp"] if event["timestamp"].is_a?(Numeric) }
113
+ return if numeric.empty?
114
+
115
+ first = numeric.first
116
+ last = numeric.last
117
+ acc[:first_ts] = first if acc[:first_ts].nil? || first < acc[:first_ts]
118
+ acc[:last_ts] = last if acc[:last_ts].nil? || last > acc[:last_ts]
119
+ end
120
+
121
+ # Earliest-timestamp Meta href across windows (yielded in no promised
122
+ # order) — scans every Meta in every window, unlike
123
+ # EntryAttribution#update_entry_candidate which only looks at each
124
+ # window's first Meta. nil_anchor_is_earlier: true because the first
125
+ # Meta accepted here can carry a nil anchor (a missing/non-numeric
126
+ # timestamp on that event) that a later, properly-timed Meta must still
127
+ # be able to displace.
128
+ def track_entry_url(acc, events)
129
+ events.each do |event|
130
+ href = meta_href(event)
131
+ next unless href
132
+
133
+ anchor = event["timestamp"]
134
+ next unless acc[:entry_url].nil? || earlier?(anchor, acc[:entry_anchor], nil_anchor_is_earlier: true)
135
+
136
+ acc[:entry_url] = href
137
+ acc[:entry_anchor] = anchor
138
+ end
139
+ end
140
+
141
+ def idle_gap_sum(sorted)
142
+ sum = 0
143
+ prev = nil
144
+ sorted.each do |event|
145
+ ts = event["timestamp"]
146
+ next unless ts.is_a?(Numeric)
147
+ if prev
148
+ gap = ts - prev
149
+ sum += gap if gap > IDLE_GAP_MS
150
+ end
151
+ prev = ts
152
+ end
153
+ sum
154
+ end
155
+
156
+ # Reversal: Δy sign flips, both deltas > THRASH_MIN_DELTA_PX, within THRASH_WINDOW_MS.
157
+ def scroll_reversals(sorted)
158
+ scrolls = sorted.filter_map { |event| scroll_point(event) }
159
+ return 0 if scrolls.size < 3
160
+
161
+ reversals = 0
162
+ prev_delta = nil
163
+ prev_ts = nil
164
+ (1...scrolls.size).each do |i|
165
+ cur_ts, cur_y = scrolls[i]
166
+ _, prev_y = scrolls[i - 1]
167
+ delta = cur_y - prev_y
168
+
169
+ if prev_delta &&
170
+ (prev_delta.positive? != delta.positive?) &&
171
+ prev_delta.abs > THRASH_MIN_DELTA_PX &&
172
+ delta.abs > THRASH_MIN_DELTA_PX &&
173
+ (cur_ts - prev_ts) <= THRASH_WINDOW_MS
174
+ reversals += 1
175
+ end
176
+
177
+ prev_delta = delta
178
+ prev_ts = cur_ts
179
+ end
180
+ reversals
181
+ end
182
+
183
+ def scroll_point(event)
184
+ return nil unless event["type"] == INCREMENTAL
185
+ data = event["data"]
186
+ return nil unless data.is_a?(Hash) && data["source"] == SOURCE_SCROLL
187
+ y = data["y"]
188
+ ts = event["timestamp"]
189
+ (y.is_a?(Numeric) && ts.is_a?(Numeric)) ? [ts, y] : nil
190
+ end
191
+
192
+ def collect_visits(acc, events)
193
+ events.each do |event|
194
+ ts = event["timestamp"]
195
+ next unless ts.is_a?(Numeric)
196
+
197
+ href = meta_href(event)
198
+ if href
199
+ acc[:visits] << [ts, href]
200
+ next
201
+ end
202
+
203
+ url = navigation_url(event)
204
+ acc[:visits] << [ts, url] if url
205
+ end
206
+ end
207
+
208
+ def navigation_url(event)
209
+ return nil unless event["type"] == CUSTOM
210
+ data = event["data"]
211
+ return nil unless data.is_a?(Hash) && data["tag"] == NAVIGATION_TAG
212
+ url = data.dig("payload", "url")
213
+ (url.is_a?(String) && !url.empty?) ? url : nil
214
+ end
215
+
216
+ # Masking + input:"last" make text-shrink undetectable, so a re-fill is
217
+ # proxied as a node touched more than once.
218
+ def tally_inputs(acc, events)
219
+ events.each do |event|
220
+ next unless event["type"] == INCREMENTAL
221
+ data = event["data"]
222
+ next unless data.is_a?(Hash) && data["source"] == SOURCE_INPUT
223
+
224
+ id = data["id"]
225
+ acc[:input_counts][id] += 1 if id.is_a?(Integer)
226
+ end
227
+ end
228
+
229
+ def tally_distinct_urls(acc, events)
230
+ events.each do |event|
231
+ href = meta_href(event)
232
+ acc[:distinct_urls][href] = true if href
233
+ end
234
+ end
235
+
236
+ def collect_errors(acc, events)
237
+ events.each do |event|
238
+ next unless event["type"] == CUSTOM
239
+ data = event["data"]
240
+ next unless data.is_a?(Hash) && data["tag"] == ERROR_TAG
241
+
242
+ ts = event["timestamp"]
243
+ acc[:error_timestamps] << ts if ts.is_a?(Numeric)
244
+ end
245
+ end
246
+
247
+ def score_session(acc)
248
+ duration = session_duration(acc)
249
+ signals = {
250
+ rage_clicks: acc[:rage_count],
251
+ dead_clicks: acc[:dead_count],
252
+ nav_churn: nav_churn_revisits(acc),
253
+ idle_ratio: idle_ratio(acc, duration),
254
+ thrashing_scroll: acc[:reversals],
255
+ quick_bounce: quick_bounce?(acc, duration),
256
+ form_refills: form_refills(acc),
257
+ error_abandonment: error_abandonment?(acc)
258
+ }
259
+
260
+ score = (WEIGHTS.sum { |key, weight| weight * sub_score(key, signals[key]) } * 100).round.clamp(0, 100)
261
+
262
+ {
263
+ session_id: acc[:session_id],
264
+ window_id: acc[:window_id],
265
+ score: score,
266
+ url: acc[:entry_url],
267
+ duration_ms: duration.to_i,
268
+ signals: signals
269
+ }
270
+ end
271
+
272
+ def sub_score(key, value)
273
+ case key
274
+ when :rage_clicks then [value / RAGE_SATURATION.to_f, 1.0].min
275
+ when :dead_clicks then [value / DEAD_SATURATION.to_f, 1.0].min
276
+ when :nav_churn then [value / NAV_CHURN_SATURATION.to_f, 1.0].min
277
+ when :thrashing_scroll then [value / THRASH_SATURATION.to_f, 1.0].min
278
+ when :form_refills then [value / REFILL_SATURATION.to_f, 1.0].min
279
+ when :idle_ratio then value
280
+ when :quick_bounce, :error_abandonment then value ? 1.0 : 0.0
281
+ end
282
+ end
283
+
284
+ def session_duration(acc)
285
+ return 0 unless acc[:first_ts] && acc[:last_ts]
286
+ acc[:last_ts] - acc[:first_ts]
287
+ end
288
+
289
+ def nav_churn_revisits(acc)
290
+ seen = {}
291
+ revisits = 0
292
+ acc[:visits].sort_by { |ts, _url| ts }.each do |_ts, url|
293
+ if seen[url]
294
+ revisits += 1
295
+ else
296
+ seen[url] = true
297
+ end
298
+ end
299
+ revisits
300
+ end
301
+
302
+ def idle_ratio(acc, duration)
303
+ return 0.0 unless duration && duration > 0
304
+ [acc[:idle_gap_sum].to_f / duration, 1.0].min
305
+ end
306
+
307
+ # Single distinct page (zero Metas counts as one) left within QUICK_BOUNCE_MS.
308
+ def quick_bounce?(acc, duration)
309
+ distinct = acc[:distinct_urls].empty? ? 1 : acc[:distinct_urls].size
310
+ distinct == 1 && duration < QUICK_BOUNCE_MS
311
+ end
312
+
313
+ def form_refills(acc)
314
+ acc[:input_counts].sum { |_id, count| [count - 1, 0].max }
315
+ end
316
+
317
+ def error_abandonment?(acc)
318
+ last = acc[:last_ts]
319
+ return false unless last
320
+ acc[:error_timestamps].any? { |ts| ts >= last - ERROR_ABANDON_MS }
321
+ end
322
+
323
+ # Over ALL scored sessions — built before the MAX_SESSIONS row cap.
324
+ def build_distribution(rows)
325
+ bins = DISTRIBUTION_BINS.to_h { |label| [label, 0] }
326
+ rows.each { |row| bins[self.class.bin_for(row[:score])] += 1 }
327
+ bins
328
+ end
329
+ end
330
+ end
331
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ module Sentiero
6
+ module Analytics
7
+ # Shared "earliest entry page" attribution, mixed into Analyzer so every
8
+ # analyzer gets it (StatsAggregator/ConversionAnalyzer use it directly;
9
+ # EngagementAnalyzer reuses only #earlier?, see its own track_entry_url —
10
+ # it scans every Meta in a window for the globally-earliest one instead of
11
+ # deferring to the window's first Meta, so it isn't update_entry_candidate).
12
+ #
13
+ # entry_url precedence: an explicit entry_url from session metadata is
14
+ # authoritative (callers anchor it at -Infinity so no Meta can displace
15
+ # it); otherwise the first Meta href of the earliest-starting window wins.
16
+ module EntryAttribution
17
+ # a "earlier than" b. nil_anchor_is_earlier: true lets a later,
18
+ # well-timed candidate displace one whose anchor came back nil (an
19
+ # accepted Meta whose own event lacked a numeric timestamp) —
20
+ # EngagementAnalyzer's track_entry_url needs this because it can accept
21
+ # a nil-anchor candidate on the first match it sees. StatsAggregator and
22
+ # ConversionAnalyzer never reach that state: their nil-anchor candidate
23
+ # is only ever the FIRST one (accepted unconditionally via the
24
+ # `entry_url.nil?` guard before earlier? is consulted), so the default
25
+ # (false) is the correct, stricter behavior for them.
26
+ def earlier?(a, b, nil_anchor_is_earlier: false)
27
+ return false unless a.is_a?(Numeric)
28
+ return true if b.nil? && nil_anchor_is_earlier
29
+
30
+ b.is_a?(Numeric) && a < b
31
+ end
32
+
33
+ def first_meta_href(events)
34
+ events.each do |event|
35
+ href = meta_href(event)
36
+ return href if href
37
+ end
38
+ nil
39
+ end
40
+
41
+ # Deferred candidate for one session, updated once per window: the
42
+ # window's first Meta href, anchored by the WINDOW's first event
43
+ # timestamp (not the Meta's own timestamp) so windows compare by when
44
+ # they started, not by their first navigation's exact instant.
45
+ def update_entry_candidate(state, events)
46
+ href = first_meta_href(events)
47
+ return unless href
48
+
49
+ anchor = events.first&.fetch("timestamp", nil)
50
+ return unless state[:entry_url].nil? || earlier?(anchor, state[:entry_anchor])
51
+
52
+ state[:entry_url] = href
53
+ state[:entry_anchor] = anchor
54
+ end
55
+
56
+ # Self-referral: same scheme://host:port. Unparseable or host-less values
57
+ # return false (kept — not provably internal).
58
+ def same_origin?(referrer, entry_url)
59
+ return false unless referrer.is_a?(String) && entry_url.is_a?(String)
60
+
61
+ ref = URI.parse(referrer)
62
+ entry = URI.parse(entry_url)
63
+ return false unless ref.host && entry.host
64
+
65
+ ref.scheme == entry.scheme && ref.host == entry.host && ref.port == entry.port
66
+ rescue URI::InvalidURIError
67
+ false
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require_relative "analyzer"
5
+ require_relative "collectors/error_collector"
6
+ require_relative "../user_agent"
7
+
8
+ module Sentiero
9
+ module Analytics
10
+ # Groups captured JS errors (custom events tagged "error") by a normalized
11
+ # message pattern so the same error collapses into one row. Each occurrence
12
+ # carries offset_ms from its window's first event for the player's ?t= deep
13
+ # link. Pure transforms are shared with PageReportAnalyzer via ErrorCollector.
14
+ class ErrorDiscovery < Analyzer
15
+ MAX_OCCURRENCES_PER_GROUP = 50
16
+
17
+ MAX_FACET_VALUES = 50
18
+
19
+ def grouped_errors(sort_by: "count", since: nil, until_time: nil)
20
+ groups = {}
21
+
22
+ _scanned, hit_cap = scan_sessions(since: since, until_time: until_time) do |summary, window_id, events|
23
+ collect_window(groups, summary, window_id, events)
24
+ end
25
+
26
+ {
27
+ groups: sort_groups(groups.values, sort_by),
28
+ was_truncated: hit_cap
29
+ }
30
+ end
31
+
32
+ private
33
+
34
+ def collect_window(groups, summary, window_id, events)
35
+ anchor = events.first&.fetch("timestamp", nil)
36
+
37
+ events.each do |event|
38
+ next unless ErrorCollector.error_event?(event)
39
+
40
+ add_occurrence(groups, summary, window_id, anchor, event)
41
+ end
42
+ end
43
+
44
+ def add_occurrence(groups, summary, window_id, anchor, event)
45
+ payload = error_payload(event)
46
+ message = ErrorCollector.extract_message(event)
47
+ timestamp = event["timestamp"]
48
+
49
+ key = ErrorCollector.group_key(message)
50
+ group = groups[key] ||= new_group(key, message, payload)
51
+ group[:count] += 1
52
+ group[:last_seen_at] = [group[:last_seen_at], timestamp].compact.max
53
+ tally_facets(group, summary[:metadata] || {})
54
+ return if group[:occurrences].size >= MAX_OCCURRENCES_PER_GROUP
55
+
56
+ group[:occurrences] << {
57
+ session_id: summary[:session_id],
58
+ window_id: window_id,
59
+ timestamp: timestamp,
60
+ offset_ms: offset_ms(anchor, timestamp)
61
+ }
62
+ end
63
+
64
+ def error_payload(event)
65
+ payload = event.dig("data", "payload")
66
+ payload.is_a?(Hash) ? payload : {}
67
+ end
68
+
69
+ def new_group(key, message, payload)
70
+ {
71
+ id: Digest::SHA1.hexdigest(key),
72
+ message: message,
73
+ source: source_of(payload),
74
+ line: line_of(payload),
75
+ count: 0,
76
+ last_seen_at: nil,
77
+ browsers: Hash.new(0),
78
+ devices: Hash.new(0),
79
+ pages: Hash.new(0),
80
+ occurrences: []
81
+ }
82
+ end
83
+
84
+ def tally_facets(group, metadata)
85
+ user_agent = metadata["userAgent"]
86
+ bounded_tally(group[:browsers], UserAgent.browser(user_agent))
87
+ bounded_tally(group[:devices], UserAgent.device(user_agent))
88
+ bounded_tally(group[:pages], metadata["url"])
89
+ end
90
+
91
+ def bounded_tally(counts, value)
92
+ return unless value.is_a?(String) && !value.empty?
93
+ return if !counts.key?(value) && counts.size >= MAX_FACET_VALUES
94
+
95
+ counts[value] += 1
96
+ end
97
+
98
+ def source_of(payload)
99
+ source = payload["source"]
100
+ (source.is_a?(String) && !source.empty?) ? source : nil
101
+ end
102
+
103
+ def line_of(payload)
104
+ line = payload["lineno"]
105
+ line.is_a?(Integer) ? line : nil
106
+ end
107
+
108
+ def sort_groups(groups, sort_by)
109
+ case sort_by
110
+ when "recency"
111
+ groups.sort_by { |group| -(group[:last_seen_at] || 0) }
112
+ else
113
+ groups.sort_by { |group| [-group[:count], -(group[:last_seen_at] || 0)] }
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentiero
4
+ module Analytics
5
+ # rrweb protocol constants shared by analyzers.
6
+ module Events
7
+ # rrweb EventType.IncrementalSnapshot
8
+ INCREMENTAL = 3
9
+ # rrweb EventType.Meta
10
+ META = 4
11
+ # rrweb EventType.Custom
12
+ CUSTOM = 5
13
+ # rrweb IncrementalSource.MouseInteraction
14
+ SOURCE_MOUSE_INTERACTION = 2
15
+ # rrweb IncrementalSource.Scroll
16
+ SOURCE_SCROLL = 3
17
+ # rrweb IncrementalSource.Input
18
+ SOURCE_INPUT = 5
19
+ end
20
+ end
21
+ end