sentiero 1.0.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +7 -0
- data/README.md +679 -0
- data/lib/sentiero/analytics/analyzer.rb +91 -0
- data/lib/sentiero/analytics/bounded.rb +29 -0
- data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
- data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
- data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
- data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
- data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
- data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
- data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
- data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
- data/lib/sentiero/analytics/entry_attribution.rb +71 -0
- data/lib/sentiero/analytics/error_discovery.rb +118 -0
- data/lib/sentiero/analytics/events.rb +21 -0
- data/lib/sentiero/analytics/exporter.rb +242 -0
- data/lib/sentiero/analytics/form_analyzer.rb +153 -0
- data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
- data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
- data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
- data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
- data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
- data/lib/sentiero/analytics/problem_detail.rb +97 -0
- data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
- data/lib/sentiero/analytics/segmenter.rb +133 -0
- data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
- data/lib/sentiero/analytics/stats.rb +30 -0
- data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
- data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
- data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
- data/lib/sentiero/configuration.rb +184 -0
- data/lib/sentiero/erasure.rb +48 -0
- data/lib/sentiero/fingerprint.rb +34 -0
- data/lib/sentiero/ip_anonymizer.rb +29 -0
- data/lib/sentiero/redaction/config.rb +61 -0
- data/lib/sentiero/redaction.rb +207 -0
- data/lib/sentiero/reporter/configuration.rb +50 -0
- data/lib/sentiero/reporter/context.rb +31 -0
- data/lib/sentiero/reporter/dispatcher.rb +91 -0
- data/lib/sentiero/reporter/http_transport.rb +57 -0
- data/lib/sentiero/reporter/log_transport.rb +26 -0
- data/lib/sentiero/reporter/middleware.rb +62 -0
- data/lib/sentiero/reporter/normalizer.rb +14 -0
- data/lib/sentiero/reporter/null_transport.rb +18 -0
- data/lib/sentiero/reporter/report_context.rb +29 -0
- data/lib/sentiero/reporter/scrubber.rb +47 -0
- data/lib/sentiero/reporter/test_helper.rb +32 -0
- data/lib/sentiero/reporter/test_transport.rb +28 -0
- data/lib/sentiero/reporter.rb +214 -0
- data/lib/sentiero/roda.rb +47 -0
- data/lib/sentiero/store/error_store.rb +220 -0
- data/lib/sentiero/store/limits.rb +31 -0
- data/lib/sentiero/store/session_store.rb +118 -0
- data/lib/sentiero/store.rb +72 -0
- data/lib/sentiero/stores/file.rb +566 -0
- data/lib/sentiero/stores/memory.rb +362 -0
- data/lib/sentiero/stores/redis/keys.rb +59 -0
- data/lib/sentiero/stores/redis/lua.rb +119 -0
- data/lib/sentiero/stores/redis.rb +665 -0
- data/lib/sentiero/stores/sqlite/schema.rb +79 -0
- data/lib/sentiero/stores/sqlite.rb +626 -0
- data/lib/sentiero/user_agent.rb +32 -0
- data/lib/sentiero/version.rb +5 -0
- data/lib/sentiero/web/analytics_app.rb +538 -0
- data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
- data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
- data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
- data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
- data/lib/sentiero/web/assets/manifest.json +11 -0
- data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
- data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
- data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
- data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
- data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
- data/lib/sentiero/web/assets_app.rb +42 -0
- data/lib/sentiero/web/base_app.rb +319 -0
- data/lib/sentiero/web/basic_auth.rb +27 -0
- data/lib/sentiero/web/basic_auth_check.rb +41 -0
- data/lib/sentiero/web/body_reader.rb +44 -0
- data/lib/sentiero/web/csv_writer.rb +45 -0
- data/lib/sentiero/web/dashboard_app.rb +236 -0
- data/lib/sentiero/web/errors_app.rb +97 -0
- data/lib/sentiero/web/escaping.rb +37 -0
- data/lib/sentiero/web/events_app.rb +196 -0
- data/lib/sentiero/web/formatting.rb +43 -0
- data/lib/sentiero/web/ingest_app.rb +92 -0
- data/lib/sentiero/web/manifest.rb +43 -0
- data/lib/sentiero/web/monitoring_app.rb +316 -0
- data/lib/sentiero/web/script_tag.rb +57 -0
- data/lib/sentiero/web/shareable_replay.rb +88 -0
- data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
- data/lib/sentiero/web/templates/_brand.html.erb +18 -0
- data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
- data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
- data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
- data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
- data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
- data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
- data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
- data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
- data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
- data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
- data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
- data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
- data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
- data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
- data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
- data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
- data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
- data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
- data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
- data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
- data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
- data/lib/sentiero/web/templates/event_show.html.erb +52 -0
- data/lib/sentiero/web/templates/events_index.html.erb +177 -0
- data/lib/sentiero/web/templates/export_index.html.erb +69 -0
- data/lib/sentiero/web/templates/forms.html.erb +105 -0
- data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
- data/lib/sentiero/web/templates/import.html.erb +39 -0
- data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
- data/lib/sentiero/web/templates/segments.html.erb +114 -0
- data/lib/sentiero/web/templates/session_show.html.erb +195 -0
- data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
- data/lib/sentiero/web/track_app.rb +57 -0
- data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
- data/lib/sentiero/web/views/analyzer_view.rb +27 -0
- data/lib/sentiero/web/views/base_view.rb +76 -0
- data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
- data/lib/sentiero/web/views/conversions_view.rb +41 -0
- data/lib/sentiero/web/views/engagement_view.rb +67 -0
- data/lib/sentiero/web/views/errors_index_view.rb +37 -0
- data/lib/sentiero/web/views/event_show_view.rb +20 -0
- data/lib/sentiero/web/views/events_index_view.rb +56 -0
- data/lib/sentiero/web/views/export_view.rb +23 -0
- data/lib/sentiero/web/views/forms_view.rb +28 -0
- data/lib/sentiero/web/views/frustration_view.rb +15 -0
- data/lib/sentiero/web/views/funnel_view.rb +36 -0
- data/lib/sentiero/web/views/heatmap_view.rb +34 -0
- data/lib/sentiero/web/views/import_view.rb +13 -0
- data/lib/sentiero/web/views/page_report_view.rb +43 -0
- data/lib/sentiero/web/views/problem_show_view.rb +46 -0
- data/lib/sentiero/web/views/scroll_view.rb +23 -0
- data/lib/sentiero/web/views/segments_view.rb +28 -0
- data/lib/sentiero/web/views/session_show_view.rb +105 -0
- data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
- data/lib/sentiero/web/views/vitals_view.rb +45 -0
- data/lib/sentiero/web/views.rb +24 -0
- data/lib/sentiero/window_ref.rb +6 -0
- data/lib/sentiero.rb +69 -0
- metadata +232 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "analyzer"
|
|
4
|
+
require_relative "frustration_analyzer"
|
|
5
|
+
|
|
6
|
+
module Sentiero
|
|
7
|
+
module Analytics
|
|
8
|
+
# Scores each session 0–100 on a STRUGGLE score (higher = more friction):
|
|
9
|
+
# a weighted blend of eight signals, each saturating to 0.0..1.0 so one
|
|
10
|
+
# pathological session can't dominate. Signals: rage_clicks, dead_clicks,
|
|
11
|
+
# nav_churn, idle_ratio, thrashing_scroll, quick_bounce, form_refills,
|
|
12
|
+
# error_abandonment.
|
|
13
|
+
class EngagementAnalyzer < Analyzer
|
|
14
|
+
# Signal weights; MUST sum to 1.00.
|
|
15
|
+
WEIGHTS = {
|
|
16
|
+
rage_clicks: 0.20,
|
|
17
|
+
dead_clicks: 0.15,
|
|
18
|
+
nav_churn: 0.15,
|
|
19
|
+
idle_ratio: 0.10,
|
|
20
|
+
thrashing_scroll: 0.10,
|
|
21
|
+
quick_bounce: 0.10,
|
|
22
|
+
form_refills: 0.10,
|
|
23
|
+
error_abandonment: 0.10
|
|
24
|
+
}.freeze
|
|
25
|
+
|
|
26
|
+
RAGE_SATURATION = 3 # 3+ rage clusters → full rage sub-score
|
|
27
|
+
DEAD_SATURATION = 3 # 3+ dead clicks → full dead sub-score
|
|
28
|
+
NAV_CHURN_SATURATION = 3 # 3+ revisits to already-seen URLs → full sub-score
|
|
29
|
+
THRASH_SATURATION = 3 # 3+ scroll reversals → full thrash sub-score
|
|
30
|
+
REFILL_SATURATION = 2 # 2+ field re-fills → full refill sub-score
|
|
31
|
+
IDLE_GAP_MS = 10_000 # consecutive events farther apart than this are "idle"
|
|
32
|
+
THRASH_MIN_DELTA_PX = 100 # a scroll delta below this is too small to be thrashing
|
|
33
|
+
THRASH_WINDOW_MS = 1_000 # both deltas of a reversal must fall within this span
|
|
34
|
+
QUICK_BOUNCE_MS = 5_000 # single-page sessions shorter than this bounced
|
|
35
|
+
ERROR_ABANDON_MS = 8_000 # a JS error within this of the session end = abandonment
|
|
36
|
+
MAX_SESSIONS = 500 # display cap on returned rows (does NOT set was_truncated)
|
|
37
|
+
DISTRIBUTION_BINS = %w[0-20 20-40 40-60 60-80 80-100].freeze
|
|
38
|
+
|
|
39
|
+
ERROR_TAG = "error"
|
|
40
|
+
NAVIGATION_TAG = "navigation"
|
|
41
|
+
|
|
42
|
+
# Integer division pins boundaries: 19→"0-20", 20→"20-40", 100→"80-100".
|
|
43
|
+
def self.bin_for(score)
|
|
44
|
+
DISTRIBUTION_BINS[[(score / 20), 4].min]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# The MAX_SESSIONS row cap is a DISPLAY bound (keeps highest scores, does
|
|
48
|
+
# NOT set was_truncated); only the scan cap sets was_truncated.
|
|
49
|
+
def analyze(limit: nil, since: nil, until_time: nil)
|
|
50
|
+
accumulators = {}
|
|
51
|
+
|
|
52
|
+
scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
|
|
53
|
+
session_id = summary[:session_id]
|
|
54
|
+
acc = (accumulators[session_id] ||= new_accumulator(summary, window_id))
|
|
55
|
+
accumulate_window(acc, events)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
rows = accumulators.values.map { |acc| score_session(acc) }
|
|
59
|
+
distribution = build_distribution(rows)
|
|
60
|
+
rows.sort_by! { |row| [-row[:score], row[:session_id]] }
|
|
61
|
+
|
|
62
|
+
{
|
|
63
|
+
sessions: rows.first(MAX_SESSIONS),
|
|
64
|
+
distribution: distribution,
|
|
65
|
+
scanned: scanned,
|
|
66
|
+
was_truncated: hit_cap
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def new_accumulator(summary, window_id)
|
|
73
|
+
{
|
|
74
|
+
session_id: summary[:session_id],
|
|
75
|
+
window_id: window_id,
|
|
76
|
+
entry_url: nil,
|
|
77
|
+
entry_anchor: nil,
|
|
78
|
+
first_ts: nil,
|
|
79
|
+
last_ts: nil,
|
|
80
|
+
rage_count: 0,
|
|
81
|
+
dead_count: 0,
|
|
82
|
+
idle_gap_sum: 0,
|
|
83
|
+
reversals: 0,
|
|
84
|
+
visits: [],
|
|
85
|
+
input_counts: Hash.new(0),
|
|
86
|
+
distinct_urls: {},
|
|
87
|
+
error_timestamps: []
|
|
88
|
+
}
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def accumulate_window(acc, events)
|
|
92
|
+
sorted = events.sort_by { |event| event["timestamp"].is_a?(Numeric) ? event["timestamp"] : -Float::INFINITY }
|
|
93
|
+
|
|
94
|
+
track_bounds(acc, sorted)
|
|
95
|
+
track_entry_url(acc, events)
|
|
96
|
+
|
|
97
|
+
frustration = FrustrationAnalyzer.detect_frustration_events(events)
|
|
98
|
+
acc[:rage_count] += frustration.count { |entry| entry[:subtype] == "rage_click" }
|
|
99
|
+
# RAW detector output (pre-refinement); may exceed the de-noised per-URL
|
|
100
|
+
# counts on the frustration page — the composite score wants raw friction.
|
|
101
|
+
acc[:dead_count] += frustration.count { |entry| entry[:subtype] == "dead_click" }
|
|
102
|
+
|
|
103
|
+
acc[:idle_gap_sum] += idle_gap_sum(sorted)
|
|
104
|
+
acc[:reversals] += scroll_reversals(sorted)
|
|
105
|
+
collect_visits(acc, events)
|
|
106
|
+
tally_inputs(acc, events)
|
|
107
|
+
tally_distinct_urls(acc, events)
|
|
108
|
+
collect_errors(acc, events)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def track_bounds(acc, sorted)
|
|
112
|
+
numeric = sorted.filter_map { |event| event["timestamp"] if event["timestamp"].is_a?(Numeric) }
|
|
113
|
+
return if numeric.empty?
|
|
114
|
+
|
|
115
|
+
first = numeric.first
|
|
116
|
+
last = numeric.last
|
|
117
|
+
acc[:first_ts] = first if acc[:first_ts].nil? || first < acc[:first_ts]
|
|
118
|
+
acc[:last_ts] = last if acc[:last_ts].nil? || last > acc[:last_ts]
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Earliest-timestamp Meta href across windows (yielded in no promised
|
|
122
|
+
# order) — scans every Meta in every window, unlike
|
|
123
|
+
# EntryAttribution#update_entry_candidate which only looks at each
|
|
124
|
+
# window's first Meta. nil_anchor_is_earlier: true because the first
|
|
125
|
+
# Meta accepted here can carry a nil anchor (a missing/non-numeric
|
|
126
|
+
# timestamp on that event) that a later, properly-timed Meta must still
|
|
127
|
+
# be able to displace.
|
|
128
|
+
def track_entry_url(acc, events)
|
|
129
|
+
events.each do |event|
|
|
130
|
+
href = meta_href(event)
|
|
131
|
+
next unless href
|
|
132
|
+
|
|
133
|
+
anchor = event["timestamp"]
|
|
134
|
+
next unless acc[:entry_url].nil? || earlier?(anchor, acc[:entry_anchor], nil_anchor_is_earlier: true)
|
|
135
|
+
|
|
136
|
+
acc[:entry_url] = href
|
|
137
|
+
acc[:entry_anchor] = anchor
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def idle_gap_sum(sorted)
|
|
142
|
+
sum = 0
|
|
143
|
+
prev = nil
|
|
144
|
+
sorted.each do |event|
|
|
145
|
+
ts = event["timestamp"]
|
|
146
|
+
next unless ts.is_a?(Numeric)
|
|
147
|
+
if prev
|
|
148
|
+
gap = ts - prev
|
|
149
|
+
sum += gap if gap > IDLE_GAP_MS
|
|
150
|
+
end
|
|
151
|
+
prev = ts
|
|
152
|
+
end
|
|
153
|
+
sum
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Reversal: Δy sign flips, both deltas > THRASH_MIN_DELTA_PX, within THRASH_WINDOW_MS.
|
|
157
|
+
def scroll_reversals(sorted)
|
|
158
|
+
scrolls = sorted.filter_map { |event| scroll_point(event) }
|
|
159
|
+
return 0 if scrolls.size < 3
|
|
160
|
+
|
|
161
|
+
reversals = 0
|
|
162
|
+
prev_delta = nil
|
|
163
|
+
prev_ts = nil
|
|
164
|
+
(1...scrolls.size).each do |i|
|
|
165
|
+
cur_ts, cur_y = scrolls[i]
|
|
166
|
+
_, prev_y = scrolls[i - 1]
|
|
167
|
+
delta = cur_y - prev_y
|
|
168
|
+
|
|
169
|
+
if prev_delta &&
|
|
170
|
+
(prev_delta.positive? != delta.positive?) &&
|
|
171
|
+
prev_delta.abs > THRASH_MIN_DELTA_PX &&
|
|
172
|
+
delta.abs > THRASH_MIN_DELTA_PX &&
|
|
173
|
+
(cur_ts - prev_ts) <= THRASH_WINDOW_MS
|
|
174
|
+
reversals += 1
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
prev_delta = delta
|
|
178
|
+
prev_ts = cur_ts
|
|
179
|
+
end
|
|
180
|
+
reversals
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def scroll_point(event)
|
|
184
|
+
return nil unless event["type"] == INCREMENTAL
|
|
185
|
+
data = event["data"]
|
|
186
|
+
return nil unless data.is_a?(Hash) && data["source"] == SOURCE_SCROLL
|
|
187
|
+
y = data["y"]
|
|
188
|
+
ts = event["timestamp"]
|
|
189
|
+
(y.is_a?(Numeric) && ts.is_a?(Numeric)) ? [ts, y] : nil
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def collect_visits(acc, events)
|
|
193
|
+
events.each do |event|
|
|
194
|
+
ts = event["timestamp"]
|
|
195
|
+
next unless ts.is_a?(Numeric)
|
|
196
|
+
|
|
197
|
+
href = meta_href(event)
|
|
198
|
+
if href
|
|
199
|
+
acc[:visits] << [ts, href]
|
|
200
|
+
next
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
url = navigation_url(event)
|
|
204
|
+
acc[:visits] << [ts, url] if url
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def navigation_url(event)
|
|
209
|
+
return nil unless event["type"] == CUSTOM
|
|
210
|
+
data = event["data"]
|
|
211
|
+
return nil unless data.is_a?(Hash) && data["tag"] == NAVIGATION_TAG
|
|
212
|
+
url = data.dig("payload", "url")
|
|
213
|
+
(url.is_a?(String) && !url.empty?) ? url : nil
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Masking + input:"last" make text-shrink undetectable, so a re-fill is
|
|
217
|
+
# proxied as a node touched more than once.
|
|
218
|
+
def tally_inputs(acc, events)
|
|
219
|
+
events.each do |event|
|
|
220
|
+
next unless event["type"] == INCREMENTAL
|
|
221
|
+
data = event["data"]
|
|
222
|
+
next unless data.is_a?(Hash) && data["source"] == SOURCE_INPUT
|
|
223
|
+
|
|
224
|
+
id = data["id"]
|
|
225
|
+
acc[:input_counts][id] += 1 if id.is_a?(Integer)
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def tally_distinct_urls(acc, events)
|
|
230
|
+
events.each do |event|
|
|
231
|
+
href = meta_href(event)
|
|
232
|
+
acc[:distinct_urls][href] = true if href
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def collect_errors(acc, events)
|
|
237
|
+
events.each do |event|
|
|
238
|
+
next unless event["type"] == CUSTOM
|
|
239
|
+
data = event["data"]
|
|
240
|
+
next unless data.is_a?(Hash) && data["tag"] == ERROR_TAG
|
|
241
|
+
|
|
242
|
+
ts = event["timestamp"]
|
|
243
|
+
acc[:error_timestamps] << ts if ts.is_a?(Numeric)
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def score_session(acc)
|
|
248
|
+
duration = session_duration(acc)
|
|
249
|
+
signals = {
|
|
250
|
+
rage_clicks: acc[:rage_count],
|
|
251
|
+
dead_clicks: acc[:dead_count],
|
|
252
|
+
nav_churn: nav_churn_revisits(acc),
|
|
253
|
+
idle_ratio: idle_ratio(acc, duration),
|
|
254
|
+
thrashing_scroll: acc[:reversals],
|
|
255
|
+
quick_bounce: quick_bounce?(acc, duration),
|
|
256
|
+
form_refills: form_refills(acc),
|
|
257
|
+
error_abandonment: error_abandonment?(acc)
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
score = (WEIGHTS.sum { |key, weight| weight * sub_score(key, signals[key]) } * 100).round.clamp(0, 100)
|
|
261
|
+
|
|
262
|
+
{
|
|
263
|
+
session_id: acc[:session_id],
|
|
264
|
+
window_id: acc[:window_id],
|
|
265
|
+
score: score,
|
|
266
|
+
url: acc[:entry_url],
|
|
267
|
+
duration_ms: duration.to_i,
|
|
268
|
+
signals: signals
|
|
269
|
+
}
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def sub_score(key, value)
|
|
273
|
+
case key
|
|
274
|
+
when :rage_clicks then [value / RAGE_SATURATION.to_f, 1.0].min
|
|
275
|
+
when :dead_clicks then [value / DEAD_SATURATION.to_f, 1.0].min
|
|
276
|
+
when :nav_churn then [value / NAV_CHURN_SATURATION.to_f, 1.0].min
|
|
277
|
+
when :thrashing_scroll then [value / THRASH_SATURATION.to_f, 1.0].min
|
|
278
|
+
when :form_refills then [value / REFILL_SATURATION.to_f, 1.0].min
|
|
279
|
+
when :idle_ratio then value
|
|
280
|
+
when :quick_bounce, :error_abandonment then value ? 1.0 : 0.0
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def session_duration(acc)
|
|
285
|
+
return 0 unless acc[:first_ts] && acc[:last_ts]
|
|
286
|
+
acc[:last_ts] - acc[:first_ts]
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def nav_churn_revisits(acc)
|
|
290
|
+
seen = {}
|
|
291
|
+
revisits = 0
|
|
292
|
+
acc[:visits].sort_by { |ts, _url| ts }.each do |_ts, url|
|
|
293
|
+
if seen[url]
|
|
294
|
+
revisits += 1
|
|
295
|
+
else
|
|
296
|
+
seen[url] = true
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
revisits
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def idle_ratio(acc, duration)
|
|
303
|
+
return 0.0 unless duration && duration > 0
|
|
304
|
+
[acc[:idle_gap_sum].to_f / duration, 1.0].min
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# Single distinct page (zero Metas counts as one) left within QUICK_BOUNCE_MS.
|
|
308
|
+
def quick_bounce?(acc, duration)
|
|
309
|
+
distinct = acc[:distinct_urls].empty? ? 1 : acc[:distinct_urls].size
|
|
310
|
+
distinct == 1 && duration < QUICK_BOUNCE_MS
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def form_refills(acc)
|
|
314
|
+
acc[:input_counts].sum { |_id, count| [count - 1, 0].max }
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def error_abandonment?(acc)
|
|
318
|
+
last = acc[:last_ts]
|
|
319
|
+
return false unless last
|
|
320
|
+
acc[:error_timestamps].any? { |ts| ts >= last - ERROR_ABANDON_MS }
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# Over ALL scored sessions — built before the MAX_SESSIONS row cap.
|
|
324
|
+
def build_distribution(rows)
|
|
325
|
+
bins = DISTRIBUTION_BINS.to_h { |label| [label, 0] }
|
|
326
|
+
rows.each { |row| bins[self.class.bin_for(row[:score])] += 1 }
|
|
327
|
+
bins
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
module Sentiero
|
|
6
|
+
module Analytics
|
|
7
|
+
# Shared "earliest entry page" attribution, mixed into Analyzer so every
|
|
8
|
+
# analyzer gets it (StatsAggregator/ConversionAnalyzer use it directly;
|
|
9
|
+
# EngagementAnalyzer reuses only #earlier?, see its own track_entry_url —
|
|
10
|
+
# it scans every Meta in a window for the globally-earliest one instead of
|
|
11
|
+
# deferring to the window's first Meta, so it isn't update_entry_candidate).
|
|
12
|
+
#
|
|
13
|
+
# entry_url precedence: an explicit entry_url from session metadata is
|
|
14
|
+
# authoritative (callers anchor it at -Infinity so no Meta can displace
|
|
15
|
+
# it); otherwise the first Meta href of the earliest-starting window wins.
|
|
16
|
+
module EntryAttribution
|
|
17
|
+
# a "earlier than" b. nil_anchor_is_earlier: true lets a later,
|
|
18
|
+
# well-timed candidate displace one whose anchor came back nil (an
|
|
19
|
+
# accepted Meta whose own event lacked a numeric timestamp) —
|
|
20
|
+
# EngagementAnalyzer's track_entry_url needs this because it can accept
|
|
21
|
+
# a nil-anchor candidate on the first match it sees. StatsAggregator and
|
|
22
|
+
# ConversionAnalyzer never reach that state: their nil-anchor candidate
|
|
23
|
+
# is only ever the FIRST one (accepted unconditionally via the
|
|
24
|
+
# `entry_url.nil?` guard before earlier? is consulted), so the default
|
|
25
|
+
# (false) is the correct, stricter behavior for them.
|
|
26
|
+
def earlier?(a, b, nil_anchor_is_earlier: false)
|
|
27
|
+
return false unless a.is_a?(Numeric)
|
|
28
|
+
return true if b.nil? && nil_anchor_is_earlier
|
|
29
|
+
|
|
30
|
+
b.is_a?(Numeric) && a < b
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def first_meta_href(events)
|
|
34
|
+
events.each do |event|
|
|
35
|
+
href = meta_href(event)
|
|
36
|
+
return href if href
|
|
37
|
+
end
|
|
38
|
+
nil
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Deferred candidate for one session, updated once per window: the
|
|
42
|
+
# window's first Meta href, anchored by the WINDOW's first event
|
|
43
|
+
# timestamp (not the Meta's own timestamp) so windows compare by when
|
|
44
|
+
# they started, not by their first navigation's exact instant.
|
|
45
|
+
def update_entry_candidate(state, events)
|
|
46
|
+
href = first_meta_href(events)
|
|
47
|
+
return unless href
|
|
48
|
+
|
|
49
|
+
anchor = events.first&.fetch("timestamp", nil)
|
|
50
|
+
return unless state[:entry_url].nil? || earlier?(anchor, state[:entry_anchor])
|
|
51
|
+
|
|
52
|
+
state[:entry_url] = href
|
|
53
|
+
state[:entry_anchor] = anchor
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Self-referral: same scheme://host:port. Unparseable or host-less values
|
|
57
|
+
# return false (kept — not provably internal).
|
|
58
|
+
def same_origin?(referrer, entry_url)
|
|
59
|
+
return false unless referrer.is_a?(String) && entry_url.is_a?(String)
|
|
60
|
+
|
|
61
|
+
ref = URI.parse(referrer)
|
|
62
|
+
entry = URI.parse(entry_url)
|
|
63
|
+
return false unless ref.host && entry.host
|
|
64
|
+
|
|
65
|
+
ref.scheme == entry.scheme && ref.host == entry.host && ref.port == entry.port
|
|
66
|
+
rescue URI::InvalidURIError
|
|
67
|
+
false
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
require_relative "analyzer"
|
|
5
|
+
require_relative "collectors/error_collector"
|
|
6
|
+
require_relative "../user_agent"
|
|
7
|
+
|
|
8
|
+
module Sentiero
|
|
9
|
+
module Analytics
|
|
10
|
+
# Groups captured JS errors (custom events tagged "error") by a normalized
|
|
11
|
+
# message pattern so the same error collapses into one row. Each occurrence
|
|
12
|
+
# carries offset_ms from its window's first event for the player's ?t= deep
|
|
13
|
+
# link. Pure transforms are shared with PageReportAnalyzer via ErrorCollector.
|
|
14
|
+
class ErrorDiscovery < Analyzer
|
|
15
|
+
MAX_OCCURRENCES_PER_GROUP = 50
|
|
16
|
+
|
|
17
|
+
MAX_FACET_VALUES = 50
|
|
18
|
+
|
|
19
|
+
def grouped_errors(sort_by: "count", since: nil, until_time: nil)
|
|
20
|
+
groups = {}
|
|
21
|
+
|
|
22
|
+
_scanned, hit_cap = scan_sessions(since: since, until_time: until_time) do |summary, window_id, events|
|
|
23
|
+
collect_window(groups, summary, window_id, events)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
{
|
|
27
|
+
groups: sort_groups(groups.values, sort_by),
|
|
28
|
+
was_truncated: hit_cap
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def collect_window(groups, summary, window_id, events)
|
|
35
|
+
anchor = events.first&.fetch("timestamp", nil)
|
|
36
|
+
|
|
37
|
+
events.each do |event|
|
|
38
|
+
next unless ErrorCollector.error_event?(event)
|
|
39
|
+
|
|
40
|
+
add_occurrence(groups, summary, window_id, anchor, event)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def add_occurrence(groups, summary, window_id, anchor, event)
|
|
45
|
+
payload = error_payload(event)
|
|
46
|
+
message = ErrorCollector.extract_message(event)
|
|
47
|
+
timestamp = event["timestamp"]
|
|
48
|
+
|
|
49
|
+
key = ErrorCollector.group_key(message)
|
|
50
|
+
group = groups[key] ||= new_group(key, message, payload)
|
|
51
|
+
group[:count] += 1
|
|
52
|
+
group[:last_seen_at] = [group[:last_seen_at], timestamp].compact.max
|
|
53
|
+
tally_facets(group, summary[:metadata] || {})
|
|
54
|
+
return if group[:occurrences].size >= MAX_OCCURRENCES_PER_GROUP
|
|
55
|
+
|
|
56
|
+
group[:occurrences] << {
|
|
57
|
+
session_id: summary[:session_id],
|
|
58
|
+
window_id: window_id,
|
|
59
|
+
timestamp: timestamp,
|
|
60
|
+
offset_ms: offset_ms(anchor, timestamp)
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def error_payload(event)
|
|
65
|
+
payload = event.dig("data", "payload")
|
|
66
|
+
payload.is_a?(Hash) ? payload : {}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def new_group(key, message, payload)
|
|
70
|
+
{
|
|
71
|
+
id: Digest::SHA1.hexdigest(key),
|
|
72
|
+
message: message,
|
|
73
|
+
source: source_of(payload),
|
|
74
|
+
line: line_of(payload),
|
|
75
|
+
count: 0,
|
|
76
|
+
last_seen_at: nil,
|
|
77
|
+
browsers: Hash.new(0),
|
|
78
|
+
devices: Hash.new(0),
|
|
79
|
+
pages: Hash.new(0),
|
|
80
|
+
occurrences: []
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def tally_facets(group, metadata)
|
|
85
|
+
user_agent = metadata["userAgent"]
|
|
86
|
+
bounded_tally(group[:browsers], UserAgent.browser(user_agent))
|
|
87
|
+
bounded_tally(group[:devices], UserAgent.device(user_agent))
|
|
88
|
+
bounded_tally(group[:pages], metadata["url"])
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def bounded_tally(counts, value)
|
|
92
|
+
return unless value.is_a?(String) && !value.empty?
|
|
93
|
+
return if !counts.key?(value) && counts.size >= MAX_FACET_VALUES
|
|
94
|
+
|
|
95
|
+
counts[value] += 1
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def source_of(payload)
|
|
99
|
+
source = payload["source"]
|
|
100
|
+
(source.is_a?(String) && !source.empty?) ? source : nil
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def line_of(payload)
|
|
104
|
+
line = payload["lineno"]
|
|
105
|
+
line.is_a?(Integer) ? line : nil
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def sort_groups(groups, sort_by)
|
|
109
|
+
case sort_by
|
|
110
|
+
when "recency"
|
|
111
|
+
groups.sort_by { |group| -(group[:last_seen_at] || 0) }
|
|
112
|
+
else
|
|
113
|
+
groups.sort_by { |group| [-group[:count], -(group[:last_seen_at] || 0)] }
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sentiero
|
|
4
|
+
module Analytics
|
|
5
|
+
# rrweb protocol constants shared by analyzers.
|
|
6
|
+
module Events
|
|
7
|
+
# rrweb EventType.IncrementalSnapshot
|
|
8
|
+
INCREMENTAL = 3
|
|
9
|
+
# rrweb EventType.Meta
|
|
10
|
+
META = 4
|
|
11
|
+
# rrweb EventType.Custom
|
|
12
|
+
CUSTOM = 5
|
|
13
|
+
# rrweb IncrementalSource.MouseInteraction
|
|
14
|
+
SOURCE_MOUSE_INTERACTION = 2
|
|
15
|
+
# rrweb IncrementalSource.Scroll
|
|
16
|
+
SOURCE_SCROLL = 3
|
|
17
|
+
# rrweb IncrementalSource.Input
|
|
18
|
+
SOURCE_INPUT = 5
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|