sentiero 1.0.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +7 -0
- data/README.md +679 -0
- data/lib/sentiero/analytics/analyzer.rb +91 -0
- data/lib/sentiero/analytics/bounded.rb +29 -0
- data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
- data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
- data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
- data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
- data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
- data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
- data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
- data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
- data/lib/sentiero/analytics/entry_attribution.rb +71 -0
- data/lib/sentiero/analytics/error_discovery.rb +118 -0
- data/lib/sentiero/analytics/events.rb +21 -0
- data/lib/sentiero/analytics/exporter.rb +242 -0
- data/lib/sentiero/analytics/form_analyzer.rb +153 -0
- data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
- data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
- data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
- data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
- data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
- data/lib/sentiero/analytics/problem_detail.rb +97 -0
- data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
- data/lib/sentiero/analytics/segmenter.rb +133 -0
- data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
- data/lib/sentiero/analytics/stats.rb +30 -0
- data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
- data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
- data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
- data/lib/sentiero/configuration.rb +184 -0
- data/lib/sentiero/erasure.rb +48 -0
- data/lib/sentiero/fingerprint.rb +34 -0
- data/lib/sentiero/ip_anonymizer.rb +29 -0
- data/lib/sentiero/redaction/config.rb +61 -0
- data/lib/sentiero/redaction.rb +207 -0
- data/lib/sentiero/reporter/configuration.rb +50 -0
- data/lib/sentiero/reporter/context.rb +31 -0
- data/lib/sentiero/reporter/dispatcher.rb +91 -0
- data/lib/sentiero/reporter/http_transport.rb +57 -0
- data/lib/sentiero/reporter/log_transport.rb +26 -0
- data/lib/sentiero/reporter/middleware.rb +62 -0
- data/lib/sentiero/reporter/normalizer.rb +14 -0
- data/lib/sentiero/reporter/null_transport.rb +18 -0
- data/lib/sentiero/reporter/report_context.rb +29 -0
- data/lib/sentiero/reporter/scrubber.rb +47 -0
- data/lib/sentiero/reporter/test_helper.rb +32 -0
- data/lib/sentiero/reporter/test_transport.rb +28 -0
- data/lib/sentiero/reporter.rb +214 -0
- data/lib/sentiero/roda.rb +47 -0
- data/lib/sentiero/store/error_store.rb +220 -0
- data/lib/sentiero/store/limits.rb +31 -0
- data/lib/sentiero/store/session_store.rb +118 -0
- data/lib/sentiero/store.rb +72 -0
- data/lib/sentiero/stores/file.rb +566 -0
- data/lib/sentiero/stores/memory.rb +362 -0
- data/lib/sentiero/stores/redis/keys.rb +59 -0
- data/lib/sentiero/stores/redis/lua.rb +119 -0
- data/lib/sentiero/stores/redis.rb +665 -0
- data/lib/sentiero/stores/sqlite/schema.rb +79 -0
- data/lib/sentiero/stores/sqlite.rb +626 -0
- data/lib/sentiero/user_agent.rb +32 -0
- data/lib/sentiero/version.rb +5 -0
- data/lib/sentiero/web/analytics_app.rb +538 -0
- data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
- data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
- data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
- data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
- data/lib/sentiero/web/assets/manifest.json +11 -0
- data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
- data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
- data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
- data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
- data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
- data/lib/sentiero/web/assets_app.rb +42 -0
- data/lib/sentiero/web/base_app.rb +319 -0
- data/lib/sentiero/web/basic_auth.rb +27 -0
- data/lib/sentiero/web/basic_auth_check.rb +41 -0
- data/lib/sentiero/web/body_reader.rb +44 -0
- data/lib/sentiero/web/csv_writer.rb +45 -0
- data/lib/sentiero/web/dashboard_app.rb +236 -0
- data/lib/sentiero/web/errors_app.rb +97 -0
- data/lib/sentiero/web/escaping.rb +37 -0
- data/lib/sentiero/web/events_app.rb +196 -0
- data/lib/sentiero/web/formatting.rb +43 -0
- data/lib/sentiero/web/ingest_app.rb +92 -0
- data/lib/sentiero/web/manifest.rb +43 -0
- data/lib/sentiero/web/monitoring_app.rb +316 -0
- data/lib/sentiero/web/script_tag.rb +57 -0
- data/lib/sentiero/web/shareable_replay.rb +88 -0
- data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
- data/lib/sentiero/web/templates/_brand.html.erb +18 -0
- data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
- data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
- data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
- data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
- data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
- data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
- data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
- data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
- data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
- data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
- data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
- data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
- data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
- data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
- data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
- data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
- data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
- data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
- data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
- data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
- data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
- data/lib/sentiero/web/templates/event_show.html.erb +52 -0
- data/lib/sentiero/web/templates/events_index.html.erb +177 -0
- data/lib/sentiero/web/templates/export_index.html.erb +69 -0
- data/lib/sentiero/web/templates/forms.html.erb +105 -0
- data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
- data/lib/sentiero/web/templates/import.html.erb +39 -0
- data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
- data/lib/sentiero/web/templates/segments.html.erb +114 -0
- data/lib/sentiero/web/templates/session_show.html.erb +195 -0
- data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
- data/lib/sentiero/web/track_app.rb +57 -0
- data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
- data/lib/sentiero/web/views/analyzer_view.rb +27 -0
- data/lib/sentiero/web/views/base_view.rb +76 -0
- data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
- data/lib/sentiero/web/views/conversions_view.rb +41 -0
- data/lib/sentiero/web/views/engagement_view.rb +67 -0
- data/lib/sentiero/web/views/errors_index_view.rb +37 -0
- data/lib/sentiero/web/views/event_show_view.rb +20 -0
- data/lib/sentiero/web/views/events_index_view.rb +56 -0
- data/lib/sentiero/web/views/export_view.rb +23 -0
- data/lib/sentiero/web/views/forms_view.rb +28 -0
- data/lib/sentiero/web/views/frustration_view.rb +15 -0
- data/lib/sentiero/web/views/funnel_view.rb +36 -0
- data/lib/sentiero/web/views/heatmap_view.rb +34 -0
- data/lib/sentiero/web/views/import_view.rb +13 -0
- data/lib/sentiero/web/views/page_report_view.rb +43 -0
- data/lib/sentiero/web/views/problem_show_view.rb +46 -0
- data/lib/sentiero/web/views/scroll_view.rb +23 -0
- data/lib/sentiero/web/views/segments_view.rb +28 -0
- data/lib/sentiero/web/views/session_show_view.rb +105 -0
- data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
- data/lib/sentiero/web/views/vitals_view.rb +45 -0
- data/lib/sentiero/web/views.rb +24 -0
- data/lib/sentiero/window_ref.rb +6 -0
- data/lib/sentiero.rb +69 -0
- metadata +232 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "analyzer"
|
|
4
|
+
require_relative "frustration/detectors"
|
|
5
|
+
|
|
6
|
+
module Sentiero
|
|
7
|
+
module Analytics
|
|
8
|
+
# Cross-session frustration signals per page URL: rage clicks (bursts at
|
|
9
|
+
# the same spot) and dead clicks (clicks the page never responds to), plus
|
|
10
|
+
# top rage-clicked elements and per-incident replay links.
|
|
11
|
+
#
|
|
12
|
+
# Detection itself lives in Frustration::Detectors (pure Ruby ports of the
|
|
13
|
+
# JS detectors, frontend/src/dashboard/frustration.js, pinned by ported
|
|
14
|
+
# tests so the two can't drift). Over the detectors' raw dead clicks this
|
|
15
|
+
# class layers cross-session aggregation and a de-noising pass: an
|
|
16
|
+
# app-level custom event in the dead window counts as a page response; the
|
|
17
|
+
# final click of a segment navigated away from is withdrawn; an
|
|
18
|
+
# error-coincident dead click is kept and tagged kind: "error".
|
|
19
|
+
class FrustrationAnalyzer < Analyzer
|
|
20
|
+
# Custom-event tag carrying the clicked element's CSS selector.
|
|
21
|
+
CLICK_TAG = "__click"
|
|
22
|
+
|
|
23
|
+
# Recorder-internal annotation prefix and the browser JS-error tag;
|
|
24
|
+
# neither proves the page responded to a click.
|
|
25
|
+
INTERNAL_TAG_PREFIX = "__"
|
|
26
|
+
ERROR_TAG = "error"
|
|
27
|
+
|
|
28
|
+
# Max ms a "__click" annotation may sit from a rage cluster's first
|
|
29
|
+
# click and still be attributed to it.
|
|
30
|
+
NEAREST_CLICK_TOLERANCE_MS = 500
|
|
31
|
+
|
|
32
|
+
# Accumulation caps during the scan (sessions scan newest-first).
|
|
33
|
+
MAX_URLS = 200
|
|
34
|
+
MAX_SELECTORS_PER_URL = 200
|
|
35
|
+
MAX_INCIDENTS_PER_URL = 20
|
|
36
|
+
TOP_SELECTORS_LIMIT = 10
|
|
37
|
+
|
|
38
|
+
# Stable entry point for callers outside this class (EngagementAnalyzer,
|
|
39
|
+
# PageReportAnalyzer) that only need raw detection, not the cross-session
|
|
40
|
+
# aggregation below.
|
|
41
|
+
def self.detect_frustration_events(events) = Frustration::Detectors.detect_frustration_events(events)
|
|
42
|
+
|
|
43
|
+
# Detectors run over the FULL window (their response semantics span page
|
|
44
|
+
# boundaries by design); each incident is then attributed to the page
|
|
45
|
+
# segment its click happened on.
|
|
46
|
+
def analyze(limit: nil, since: nil, until_time: nil)
|
|
47
|
+
pages = {}
|
|
48
|
+
accumulation_capped = false
|
|
49
|
+
|
|
50
|
+
_scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
|
|
51
|
+
incidents = Frustration::Detectors.detect_frustration_events(events)
|
|
52
|
+
next if incidents.empty?
|
|
53
|
+
|
|
54
|
+
segments = page_segments(events)
|
|
55
|
+
incidents = refine_incidents(incidents, segments)
|
|
56
|
+
next if incidents.empty?
|
|
57
|
+
|
|
58
|
+
annotations = click_annotations(events)
|
|
59
|
+
|
|
60
|
+
incidents.each do |incident|
|
|
61
|
+
page = page_for(pages, incident[:url])
|
|
62
|
+
unless page
|
|
63
|
+
accumulation_capped = true
|
|
64
|
+
next
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
accumulation_capped = true unless record_incident(page, incident, summary[:session_id], window_id, annotations)
|
|
68
|
+
page[:session_ids][summary[:session_id]] = true
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
{
|
|
73
|
+
pages: pages.transform_values { |page| summarize(page) },
|
|
74
|
+
was_truncated: accumulation_capped || hit_cap
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def page_segments(events)
|
|
81
|
+
segments = []
|
|
82
|
+
each_page_segment(events) do |url, segment, _anchor|
|
|
83
|
+
segments << [url, segment]
|
|
84
|
+
end
|
|
85
|
+
segments
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Attributes each incident to its click's segment and de-noises dead clicks
|
|
89
|
+
# (class-comment rules). Object identity (not timestamp) locates the segment,
|
|
90
|
+
# avoiding mis-attribution at a same-millisecond Meta boundary. Drops
|
|
91
|
+
# incidents on URL-less segments or withdrawn by the de-noise rules.
|
|
92
|
+
def refine_incidents(incidents, segments)
|
|
93
|
+
incidents.filter_map do |incident|
|
|
94
|
+
index = segments.index { |(_url, segment)| segment.any? { |e| e.equal?(incident[:event]) } }
|
|
95
|
+
url = index && segments[index][0]
|
|
96
|
+
next nil unless url
|
|
97
|
+
|
|
98
|
+
kind = nil
|
|
99
|
+
if incident[:subtype] == "dead_click"
|
|
100
|
+
segment = segments[index][1]
|
|
101
|
+
next nil if custom_response?(segment, incident[:timestamp])
|
|
102
|
+
|
|
103
|
+
if error_coincident?(segment, incident[:timestamp])
|
|
104
|
+
kind = "error"
|
|
105
|
+
elsif navigated_away_final_click?(segments, index, incident[:event])
|
|
106
|
+
next nil
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
incident.merge(url: url, kind: kind)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# An app-level custom event in the dead window means the page reacted
|
|
115
|
+
# (the pure detectors only see META/mutation/input).
|
|
116
|
+
def custom_response?(segment, click_ts)
|
|
117
|
+
any_custom_in_window?(segment, click_ts) do |tag|
|
|
118
|
+
!tag.start_with?(INTERNAL_TAG_PREFIX) && tag != ERROR_TAG
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def error_coincident?(segment, click_ts)
|
|
123
|
+
any_custom_in_window?(segment, click_ts) { |tag| tag == ERROR_TAG }
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Any CUSTOM event whose tag satisfies the block within [click_ts, +DEAD_WINDOW_MS].
|
|
127
|
+
# Same-tick INclusive: the recorder emits navigation/error customs in the same
|
|
128
|
+
# tick as the native click, which the detectors' strictly-after rule would miss.
|
|
129
|
+
def any_custom_in_window?(segment, click_ts)
|
|
130
|
+
deadline = click_ts + Frustration::Detectors::DEAD_WINDOW_MS
|
|
131
|
+
segment.any? do |event|
|
|
132
|
+
next false unless event["type"] == CUSTOM
|
|
133
|
+
|
|
134
|
+
ts = event["timestamp"]
|
|
135
|
+
next false unless ts.is_a?(Numeric) && ts >= click_ts && ts <= deadline
|
|
136
|
+
|
|
137
|
+
tag = event.dig("data", "tag")
|
|
138
|
+
tag.is_a?(String) && yield(tag)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# The last click of a segment that's navigated away from likely CAUSED a
|
|
143
|
+
# navigation slower than the dead window, so its dead verdict is withdrawn.
|
|
144
|
+
# The window's FINAL segment is exempt: a window that just ends proves no
|
|
145
|
+
# navigation, and the inert-button bounce is the signal this page exists for.
|
|
146
|
+
def navigated_away_final_click?(segments, index, event)
|
|
147
|
+
return false if index >= segments.size - 1
|
|
148
|
+
|
|
149
|
+
# Reuse the detectors' click? so "last click" can't drift from theirs.
|
|
150
|
+
last_click = segments[index][1].reverse_each.find { |e| Frustration::Detectors.click?(e) }
|
|
151
|
+
last_click.equal?(event)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Page accumulator for a URL, or nil when the URL-row cap is full.
|
|
155
|
+
def page_for(pages, url)
|
|
156
|
+
bounded_fetch(pages, url, MAX_URLS) do
|
|
157
|
+
{rage_count: 0, dead_count: 0, session_ids: {}, selectors: Hash.new(0), incidents: []}
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# [timestamp, selector] pairs from the window's "__click" annotations.
|
|
162
|
+
def click_annotations(events)
|
|
163
|
+
events.filter_map do |event|
|
|
164
|
+
next unless event["type"] == CUSTOM
|
|
165
|
+
data = event["data"]
|
|
166
|
+
next unless data.is_a?(Hash) && data["tag"] == CLICK_TAG
|
|
167
|
+
|
|
168
|
+
selector = data.dig("payload", "selector")
|
|
169
|
+
next unless selector.is_a?(String) && !selector.empty?
|
|
170
|
+
next unless event["timestamp"].is_a?(Numeric)
|
|
171
|
+
|
|
172
|
+
[event["timestamp"], selector]
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Returns false when the per-URL selector cap swallowed a new selector (the
|
|
177
|
+
# only lossy path — the incident-row cap is a display bound, counts stay complete).
|
|
178
|
+
def record_incident(page, incident, session_id, window_id, annotations)
|
|
179
|
+
selector = nil
|
|
180
|
+
selector_capped = false
|
|
181
|
+
|
|
182
|
+
if incident[:subtype] == "rage_click"
|
|
183
|
+
page[:rage_count] += 1
|
|
184
|
+
selector = nearest_selector(annotations, incident[:timestamp])
|
|
185
|
+
if selector
|
|
186
|
+
if page[:selectors].key?(selector) || page[:selectors].size < MAX_SELECTORS_PER_URL
|
|
187
|
+
page[:selectors][selector] += 1
|
|
188
|
+
else
|
|
189
|
+
selector_capped = true
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
else
|
|
193
|
+
page[:dead_count] += 1
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
if page[:incidents].size < MAX_INCIDENTS_PER_URL
|
|
197
|
+
page[:incidents] << {
|
|
198
|
+
subtype: incident[:subtype],
|
|
199
|
+
session_id: session_id,
|
|
200
|
+
window_id: window_id,
|
|
201
|
+
offset_ms: [incident[:offset], 0].max.round,
|
|
202
|
+
count: incident[:count],
|
|
203
|
+
selector: selector,
|
|
204
|
+
kind: incident[:kind]
|
|
205
|
+
}
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
!selector_capped
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Selector of the "__click" annotation nearest timestamp within
|
|
212
|
+
# NEAREST_CLICK_TOLERANCE_MS; nil when nothing is close enough.
|
|
213
|
+
def nearest_selector(annotations, timestamp)
|
|
214
|
+
nearest = annotations.min_by { |(ts, _selector)| (ts - timestamp).abs }
|
|
215
|
+
return nil unless nearest
|
|
216
|
+
((nearest[0] - timestamp).abs <= NEAREST_CLICK_TOLERANCE_MS) ? nearest[1] : nil
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def summarize(page)
|
|
220
|
+
{
|
|
221
|
+
rage_count: page[:rage_count],
|
|
222
|
+
dead_count: page[:dead_count],
|
|
223
|
+
sessions_affected: page[:session_ids].size,
|
|
224
|
+
top_selectors: top_selectors(page[:selectors]),
|
|
225
|
+
incidents: page[:incidents]
|
|
226
|
+
}
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def top_selectors(selectors)
|
|
230
|
+
top_counts(selectors, limit: TOP_SELECTORS_LIMIT)
|
|
231
|
+
.map { |selector, count| {selector: selector, count: count} }
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "analyzer"
|
|
4
|
+
|
|
5
|
+
module Sentiero
|
|
6
|
+
module Analytics
|
|
7
|
+
# Custom-event funnel: ordered step conversion across sessions. A session
|
|
8
|
+
# reaches step N+1 only when an event with that tag occurs strictly after
|
|
9
|
+
# the step-N match. Greedy-earliest chain matching is optimal for subsequence
|
|
10
|
+
# reachability, so "how far did this session get" is exact.
|
|
11
|
+
class FunnelAnalyzer < Analyzer
|
|
12
|
+
# Excluded as a funnel step; has its own ErrorDiscovery surface.
|
|
13
|
+
ERROR_TAG = "error"
|
|
14
|
+
|
|
15
|
+
# Prefix of recorder-internal annotation tags (__perf, __click, ...).
|
|
16
|
+
INTERNAL_TAG_PREFIX = "__"
|
|
17
|
+
|
|
18
|
+
MAX_STEPS = 3
|
|
19
|
+
|
|
20
|
+
MAX_TAGS = 200
|
|
21
|
+
|
|
22
|
+
# Bounds per-session memory.
|
|
23
|
+
MAX_STEP_EVENTS_PER_SESSION = 100
|
|
24
|
+
|
|
25
|
+
MAX_EXAMPLES_PER_STEP = 10
|
|
26
|
+
|
|
27
|
+
class << self
|
|
28
|
+
def internal_tag?(tag)
|
|
29
|
+
!tag.is_a?(String) || tag.empty? || tag.start_with?(INTERNAL_TAG_PREFIX) || tag == ERROR_TAG
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def usable_steps(tags)
|
|
33
|
+
Array(tags).reject { |tag| internal_tag?(tag) }.first(MAX_STEPS)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Fewer than 2 usable steps yields steps: [] but still collects vocabulary.
|
|
38
|
+
def analyze(steps = [], limit: nil, since: nil, until_time: nil)
|
|
39
|
+
steps = self.class.usable_steps(steps)
|
|
40
|
+
steps = [] if steps.size < 2
|
|
41
|
+
step_set = steps.uniq
|
|
42
|
+
|
|
43
|
+
tags = {}
|
|
44
|
+
sessions = {}
|
|
45
|
+
accumulation_capped = false
|
|
46
|
+
|
|
47
|
+
_scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
|
|
48
|
+
session_id = summary[:session_id]
|
|
49
|
+
|
|
50
|
+
anchor = events.first&.fetch("timestamp", nil)
|
|
51
|
+
events.each do |event|
|
|
52
|
+
tag = custom_tag(event)
|
|
53
|
+
next unless tag
|
|
54
|
+
|
|
55
|
+
accumulation_capped = true unless tally_tag(tags, tag)
|
|
56
|
+
next if steps.empty? || !step_set.include?(tag)
|
|
57
|
+
next unless event["timestamp"].is_a?(Numeric)
|
|
58
|
+
|
|
59
|
+
entries = sessions[session_id] ||= []
|
|
60
|
+
if entries.size >= MAX_STEP_EVENTS_PER_SESSION
|
|
61
|
+
accumulation_capped = true
|
|
62
|
+
next
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
entries << {
|
|
66
|
+
tag: tag,
|
|
67
|
+
timestamp: event["timestamp"],
|
|
68
|
+
window_id: window_id,
|
|
69
|
+
offset_ms: offset_ms(anchor, event["timestamp"])
|
|
70
|
+
}
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
{
|
|
75
|
+
tags: tags.keys.sort,
|
|
76
|
+
steps: summarize_steps(steps, sessions),
|
|
77
|
+
was_truncated: accumulation_capped || hit_cap
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
private
|
|
82
|
+
|
|
83
|
+
def custom_tag(event)
|
|
84
|
+
return nil unless event.is_a?(Hash) && event["type"] == CUSTOM
|
|
85
|
+
data = event["data"]
|
|
86
|
+
return nil unless data.is_a?(Hash)
|
|
87
|
+
tag = data["tag"]
|
|
88
|
+
self.class.internal_tag?(tag) ? nil : tag
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Returns false for a new tag past MAX_TAGS (signals truncation).
|
|
92
|
+
def tally_tag(tags, tag)
|
|
93
|
+
return true if tags.key?(tag)
|
|
94
|
+
return false if tags.size >= MAX_TAGS
|
|
95
|
+
tags[tag] = true
|
|
96
|
+
true
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def summarize_steps(steps, sessions)
|
|
100
|
+
return [] if steps.empty?
|
|
101
|
+
|
|
102
|
+
counts = Array.new(steps.size, 0)
|
|
103
|
+
inter_times = Array.new(steps.size) { [] }
|
|
104
|
+
examples = Array.new(steps.size) { [] }
|
|
105
|
+
|
|
106
|
+
sessions.each do |session_id, entries|
|
|
107
|
+
matches = chain(steps, entries)
|
|
108
|
+
next if matches.empty?
|
|
109
|
+
|
|
110
|
+
reached = matches.size
|
|
111
|
+
(0...reached).each { |i| counts[i] += 1 }
|
|
112
|
+
(1...reached).each { |i| inter_times[i] << matches[i][:timestamp] - matches[i - 1][:timestamp] }
|
|
113
|
+
|
|
114
|
+
next unless reached < steps.size # converted sessions never drop off
|
|
115
|
+
|
|
116
|
+
step_examples = examples[reached - 1]
|
|
117
|
+
if step_examples.size < MAX_EXAMPLES_PER_STEP
|
|
118
|
+
last = matches[reached - 1]
|
|
119
|
+
step_examples << {session_id: session_id, window_id: last[:window_id], offset_ms: last[:offset_ms]}
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
step_one = counts[0]
|
|
124
|
+
steps.each_with_index.map do |tag, i|
|
|
125
|
+
{
|
|
126
|
+
tag: tag,
|
|
127
|
+
sessions: counts[i],
|
|
128
|
+
conversion_pct: step_one.zero? ? nil : (counts[i].to_f / step_one * 100).round(1),
|
|
129
|
+
median_ms_from_previous: i.zero? ? nil : median(inter_times[i]),
|
|
130
|
+
drop_off_examples: examples[i]
|
|
131
|
+
}
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Greedy earliest chain over time-sorted step events: an event matches when
|
|
136
|
+
# its tag is the next pending step and its timestamp is strictly after the
|
|
137
|
+
# previous match.
|
|
138
|
+
def chain(steps, entries)
|
|
139
|
+
matches = []
|
|
140
|
+
last_ts = nil
|
|
141
|
+
|
|
142
|
+
entries.sort_by { |entry| entry[:timestamp] }.each do |entry|
|
|
143
|
+
break if matches.size >= steps.size
|
|
144
|
+
next unless entry[:tag] == steps[matches.size]
|
|
145
|
+
next unless last_ts.nil? || entry[:timestamp] > last_ts
|
|
146
|
+
|
|
147
|
+
matches << entry
|
|
148
|
+
last_ts = entry[:timestamp]
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
matches
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def median(values)
|
|
155
|
+
return nil if values.empty?
|
|
156
|
+
percentile(values.sort, 50)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "analyzer"
|
|
4
|
+
require_relative "collectors/click_collector"
|
|
5
|
+
|
|
6
|
+
module Sentiero
|
|
7
|
+
module Analytics
|
|
8
|
+
# Aggregates click coordinates for a single page URL into a normalized density
|
|
9
|
+
# grid plus a most-clicked-elements table. The per-segment density math lives
|
|
10
|
+
# in ClickCollector (shared with PageReportAnalyzer).
|
|
11
|
+
class HeatmapAnalyzer < Analyzer
|
|
12
|
+
CLICK_TAG = ClickCollector::CLICK_TAG
|
|
13
|
+
GRID_SIZE = ClickCollector::GRID_SIZE
|
|
14
|
+
|
|
15
|
+
TOP_ELEMENTS_LIMIT = 20
|
|
16
|
+
MAX_URLS = 200
|
|
17
|
+
|
|
18
|
+
# Clicks are attributed per page segment (Meta-href boundaries).
|
|
19
|
+
def analyze(target_url, limit: nil, since: nil, until_time: nil)
|
|
20
|
+
clicks = ClickCollector.new
|
|
21
|
+
representative = nil
|
|
22
|
+
|
|
23
|
+
_scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
|
|
24
|
+
session_id = summary[:session_id]
|
|
25
|
+
|
|
26
|
+
each_page_segment(events) do |url, segment, _anchor|
|
|
27
|
+
next unless url == target_url
|
|
28
|
+
|
|
29
|
+
added = clicks.collect(segment)
|
|
30
|
+
representative ||= {session_id: session_id, window_id: window_id} unless added.nil?
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
{
|
|
35
|
+
clicks_by_bucket: clicks.buckets,
|
|
36
|
+
top_elements: top_elements(clicks.selectors),
|
|
37
|
+
total_clicks: clicks.total,
|
|
38
|
+
representative_window: representative,
|
|
39
|
+
was_truncated: hit_cap
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def build_heatmap_table(since: nil, until_time: nil)
|
|
44
|
+
selectors_by_url = {}
|
|
45
|
+
|
|
46
|
+
scan_sessions(since: since, until_time: until_time) do |_summary, _window_id, events|
|
|
47
|
+
each_page_segment(events) do |url, segment, _anchor|
|
|
48
|
+
next unless url
|
|
49
|
+
|
|
50
|
+
selectors = selectors_by_url[url]
|
|
51
|
+
if selectors.nil?
|
|
52
|
+
next if selectors_by_url.size >= MAX_URLS
|
|
53
|
+
selectors = selectors_by_url[url] = Hash.new(0)
|
|
54
|
+
end
|
|
55
|
+
segment.each { |event| tally_selector(selectors, event) }
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
selectors_by_url
|
|
60
|
+
.sort_by { |url, _selectors| url }
|
|
61
|
+
.to_h { |url, selectors| [url, top_elements(selectors)] }
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def recorded_urls
|
|
65
|
+
urls = {}
|
|
66
|
+
|
|
67
|
+
scan_sessions do |_summary, _window_id, events|
|
|
68
|
+
each_page_segment(events) do |url, _segment, _anchor|
|
|
69
|
+
urls[url] = true if url && (urls.key?(url) || urls.size < MAX_URLS)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
urls.keys
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
def tally_selector(selectors, event)
|
|
79
|
+
return unless event["type"] == CUSTOM
|
|
80
|
+
data = event["data"]
|
|
81
|
+
return unless data.is_a?(Hash) && data["tag"] == CLICK_TAG
|
|
82
|
+
|
|
83
|
+
selector = data.dig("payload", "selector")
|
|
84
|
+
selectors[selector] += 1 if selector.is_a?(String) && !selector.empty?
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def top_elements(selectors)
|
|
88
|
+
top_counts(selectors, limit: TOP_ELEMENTS_LIMIT)
|
|
89
|
+
.map { |selector, count| {selector: selector, count: count} }
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "analyzer"
|
|
4
|
+
require_relative "frustration_analyzer"
|
|
5
|
+
require_relative "collectors/click_collector"
|
|
6
|
+
require_relative "collectors/scroll_collector"
|
|
7
|
+
require_relative "collectors/vitals_collector"
|
|
8
|
+
require_relative "collectors/error_collector"
|
|
9
|
+
require_relative "collectors/custom_tag_collector"
|
|
10
|
+
require_relative "collectors/form_collector"
|
|
11
|
+
require_relative "collectors/frustration_collector"
|
|
12
|
+
|
|
13
|
+
module Sentiero
|
|
14
|
+
module Analytics
|
|
15
|
+
# Per-URL drill-down: composes the suite's metrics (heatmap, scroll, forms,
|
|
16
|
+
# vitals, frustration, errors, custom tags) for one URL into one report via
|
|
17
|
+
# ONE bounded Store#each_session_events scan. Per-segment math lives in
|
|
18
|
+
# shared Collectors::. Frustration is the exception: only detection is shared
|
|
19
|
+
# (FrustrationAnalyzer.detect_frustration_events); the cross-session
|
|
20
|
+
# aggregation differs, so this URL's attribution uses FrustrationCollector.
|
|
21
|
+
class PageReportAnalyzer < Analyzer
|
|
22
|
+
# Output bounds — each caps a collector (flips #capped on hit).
|
|
23
|
+
MAX_SELECTORS = 200
|
|
24
|
+
MAX_SAMPLES_PER_METRIC = 2000
|
|
25
|
+
MAX_ERROR_GROUPS = 200
|
|
26
|
+
MAX_ERROR_OCCURRENCES = 50
|
|
27
|
+
MAX_CUSTOM_TAGS = 200
|
|
28
|
+
MAX_FIELDS = 500
|
|
29
|
+
|
|
30
|
+
# Display limits.
|
|
31
|
+
TOP_ELEMENTS_LIMIT = 20
|
|
32
|
+
TOP_SELECTORS_LIMIT = 10
|
|
33
|
+
|
|
34
|
+
# since/until_time are epoch seconds. Every result key is always present.
|
|
35
|
+
def analyze(target_url, limit: nil, since: nil, until_time: nil)
|
|
36
|
+
acc = new_accumulator
|
|
37
|
+
|
|
38
|
+
_scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
|
|
39
|
+
session_id = summary[:session_id]
|
|
40
|
+
|
|
41
|
+
# Detect over the FULL window: frustration semantics span page
|
|
42
|
+
# boundaries. FrustrationCollector then attributes each incident to a
|
|
43
|
+
# segment by object identity.
|
|
44
|
+
frustration = FrustrationAnalyzer.detect_frustration_events(events)
|
|
45
|
+
|
|
46
|
+
segment_index = 0
|
|
47
|
+
last_index = nil
|
|
48
|
+
first_was_target = false
|
|
49
|
+
target_segments = 0
|
|
50
|
+
|
|
51
|
+
each_page_segment(events) do |url, segment, anchor|
|
|
52
|
+
matches = url == target_url
|
|
53
|
+
first_was_target = true if segment_index.zero? && matches
|
|
54
|
+
last_index = segment_index if matches
|
|
55
|
+
segment_index += 1
|
|
56
|
+
|
|
57
|
+
next unless matches
|
|
58
|
+
|
|
59
|
+
target_segments += 1
|
|
60
|
+
acc[:page_views] += 1
|
|
61
|
+
acc[:sessions][session_id] = true
|
|
62
|
+
|
|
63
|
+
collect_time_on_page(acc, segment)
|
|
64
|
+
collect_heatmap(acc, segment, session_id, window_id)
|
|
65
|
+
acc[:vitals].collect(segment, session_id: session_id, window_id: window_id, anchor: anchor)
|
|
66
|
+
acc[:errors].collect(segment, session_id: session_id, window_id: window_id, anchor: anchor)
|
|
67
|
+
acc[:custom_tags].collect(segment)
|
|
68
|
+
acc[:forms].collect(session_id, url, segment)
|
|
69
|
+
acc[:scroll].observe(target_url, segment)
|
|
70
|
+
acc[:frustration].collect(frustration, segment)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# entry/exit/bounce decided once per window from the segment order.
|
|
74
|
+
if target_segments.positive?
|
|
75
|
+
acc[:windows_on_page] += 1
|
|
76
|
+
acc[:entries] += 1 if first_was_target
|
|
77
|
+
acc[:exits] += 1 if last_index == segment_index - 1
|
|
78
|
+
acc[:bounces] += 1 if segment_index == 1 && first_was_target
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# One scroll sample per (session, window): deepest wins.
|
|
82
|
+
acc[:scroll].flush_window
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
build_result(target_url, acc, hit_cap)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
def new_accumulator
|
|
91
|
+
{
|
|
92
|
+
page_views: 0,
|
|
93
|
+
sessions: {},
|
|
94
|
+
dwell_samples: [],
|
|
95
|
+
windows_on_page: 0,
|
|
96
|
+
entries: 0,
|
|
97
|
+
exits: 0,
|
|
98
|
+
bounces: 0,
|
|
99
|
+
representative: nil,
|
|
100
|
+
clicks: ClickCollector.new(max_selectors: MAX_SELECTORS),
|
|
101
|
+
scroll: ScrollCollector.new,
|
|
102
|
+
vitals: VitalsCollector.new(max_samples: MAX_SAMPLES_PER_METRIC),
|
|
103
|
+
errors: ErrorCollector.new(max_groups: MAX_ERROR_GROUPS, max_occurrences: MAX_ERROR_OCCURRENCES),
|
|
104
|
+
custom_tags: CustomTagCollector.new(max_tags: MAX_CUSTOM_TAGS),
|
|
105
|
+
forms: FormCollector.new(max_fields: MAX_FIELDS),
|
|
106
|
+
frustration: FrustrationCollector.new(max_selectors: MAX_SELECTORS)
|
|
107
|
+
}
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# An A→B→A revisit yields TWO target segments, hence TWO dwell samples —
|
|
111
|
+
# intended ("time on page per visit").
|
|
112
|
+
def collect_time_on_page(acc, segment)
|
|
113
|
+
timestamps = segment.filter_map { |e| e["timestamp"] if e["timestamp"].is_a?(Numeric) }
|
|
114
|
+
return if timestamps.size < 2
|
|
115
|
+
acc[:dwell_samples] << (timestamps.max - timestamps.min)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# A segment with no valid Meta width/height contributes zero clicks
|
|
119
|
+
# (collect returns nil) and never becomes the representative window.
|
|
120
|
+
def collect_heatmap(acc, segment, session_id, window_id)
|
|
121
|
+
added = acc[:clicks].collect(segment)
|
|
122
|
+
acc[:representative] ||= {session_id: session_id, window_id: window_id} unless added.nil?
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def build_result(target_url, acc, hit_cap)
|
|
126
|
+
collectors = [acc[:clicks], acc[:scroll], acc[:vitals], acc[:errors], acc[:custom_tags], acc[:forms], acc[:frustration]]
|
|
127
|
+
{
|
|
128
|
+
url: target_url,
|
|
129
|
+
sessions: acc[:sessions].size,
|
|
130
|
+
page_views: acc[:page_views],
|
|
131
|
+
time_on_page: summarize_time_on_page(acc[:dwell_samples]),
|
|
132
|
+
entry_exit: {
|
|
133
|
+
entries: acc[:entries],
|
|
134
|
+
exits: acc[:exits],
|
|
135
|
+
bounce_rate: acc[:entries].zero? ? 0.0 : acc[:bounces].to_f / acc[:entries],
|
|
136
|
+
windows_on_page: acc[:windows_on_page]
|
|
137
|
+
},
|
|
138
|
+
heatmap: {
|
|
139
|
+
top_elements: top_selectors(acc[:clicks].selectors, TOP_ELEMENTS_LIMIT),
|
|
140
|
+
total_clicks: acc[:clicks].total,
|
|
141
|
+
representative_window: acc[:representative]
|
|
142
|
+
},
|
|
143
|
+
scroll: acc[:scroll].summarize(target_url),
|
|
144
|
+
forms: build_forms_section(acc[:forms]),
|
|
145
|
+
vitals: build_vitals_section(acc[:vitals]),
|
|
146
|
+
errors: acc[:errors].summarize,
|
|
147
|
+
frustration: {
|
|
148
|
+
rage_count: acc[:frustration].rage_count,
|
|
149
|
+
dead_count: acc[:frustration].dead_count,
|
|
150
|
+
top_selectors: top_selectors(acc[:frustration].selectors, TOP_SELECTORS_LIMIT)
|
|
151
|
+
},
|
|
152
|
+
custom_events: acc[:custom_tags].top(MAX_CUSTOM_TAGS),
|
|
153
|
+
was_truncated: collectors.any?(&:capped) || hit_cap
|
|
154
|
+
}
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def summarize_time_on_page(samples)
|
|
158
|
+
return {mean_ms: nil, median_ms: nil, samples: 0} if samples.empty?
|
|
159
|
+
|
|
160
|
+
sorted = samples.sort
|
|
161
|
+
{
|
|
162
|
+
mean_ms: (samples.sum.to_f / samples.size).round,
|
|
163
|
+
median_ms: percentile(sorted, 50),
|
|
164
|
+
samples: samples.size
|
|
165
|
+
}
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def build_forms_section(forms)
|
|
169
|
+
started = forms.started_count
|
|
170
|
+
{
|
|
171
|
+
started: started,
|
|
172
|
+
# "completed" here = session submitted on the target URL at all
|
|
173
|
+
# (submitted_count), not FormAnalyzer's stricter completed_count.
|
|
174
|
+
completed: forms.submitted_count,
|
|
175
|
+
completion_rate: started.zero? ? 0.0 : forms.submitted_count.to_f / started,
|
|
176
|
+
total_submits: forms.total_submits,
|
|
177
|
+
fields: forms.summarize_fields(started),
|
|
178
|
+
drop_off_fields: forms.summarize_drop_off
|
|
179
|
+
}
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# The collector's worst-sample carries :value (WebVitalsAnalyzer needs it);
|
|
183
|
+
# the page report exposes only the replay-link coordinates.
|
|
184
|
+
def build_vitals_section(vitals)
|
|
185
|
+
result = vitals.summarize
|
|
186
|
+
result[:metrics].each_value do |metric|
|
|
187
|
+
metric[:worst] = metric[:worst]&.slice(:session_id, :window_id, :offset_ms)
|
|
188
|
+
end
|
|
189
|
+
result
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def top_selectors(selectors, limit)
|
|
193
|
+
top_counts(selectors, limit: limit)
|
|
194
|
+
.map { |selector, count| {selector: selector, count: count} }
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|