sentiero 1.0.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +7 -0
- data/README.md +679 -0
- data/lib/sentiero/analytics/analyzer.rb +91 -0
- data/lib/sentiero/analytics/bounded.rb +29 -0
- data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
- data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
- data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
- data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
- data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
- data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
- data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
- data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
- data/lib/sentiero/analytics/entry_attribution.rb +71 -0
- data/lib/sentiero/analytics/error_discovery.rb +118 -0
- data/lib/sentiero/analytics/events.rb +21 -0
- data/lib/sentiero/analytics/exporter.rb +242 -0
- data/lib/sentiero/analytics/form_analyzer.rb +153 -0
- data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
- data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
- data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
- data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
- data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
- data/lib/sentiero/analytics/problem_detail.rb +97 -0
- data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
- data/lib/sentiero/analytics/segmenter.rb +133 -0
- data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
- data/lib/sentiero/analytics/stats.rb +30 -0
- data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
- data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
- data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
- data/lib/sentiero/configuration.rb +184 -0
- data/lib/sentiero/erasure.rb +48 -0
- data/lib/sentiero/fingerprint.rb +34 -0
- data/lib/sentiero/ip_anonymizer.rb +29 -0
- data/lib/sentiero/redaction/config.rb +61 -0
- data/lib/sentiero/redaction.rb +207 -0
- data/lib/sentiero/reporter/configuration.rb +50 -0
- data/lib/sentiero/reporter/context.rb +31 -0
- data/lib/sentiero/reporter/dispatcher.rb +91 -0
- data/lib/sentiero/reporter/http_transport.rb +57 -0
- data/lib/sentiero/reporter/log_transport.rb +26 -0
- data/lib/sentiero/reporter/middleware.rb +62 -0
- data/lib/sentiero/reporter/normalizer.rb +14 -0
- data/lib/sentiero/reporter/null_transport.rb +18 -0
- data/lib/sentiero/reporter/report_context.rb +29 -0
- data/lib/sentiero/reporter/scrubber.rb +47 -0
- data/lib/sentiero/reporter/test_helper.rb +32 -0
- data/lib/sentiero/reporter/test_transport.rb +28 -0
- data/lib/sentiero/reporter.rb +214 -0
- data/lib/sentiero/roda.rb +47 -0
- data/lib/sentiero/store/error_store.rb +220 -0
- data/lib/sentiero/store/limits.rb +31 -0
- data/lib/sentiero/store/session_store.rb +118 -0
- data/lib/sentiero/store.rb +72 -0
- data/lib/sentiero/stores/file.rb +566 -0
- data/lib/sentiero/stores/memory.rb +362 -0
- data/lib/sentiero/stores/redis/keys.rb +59 -0
- data/lib/sentiero/stores/redis/lua.rb +119 -0
- data/lib/sentiero/stores/redis.rb +665 -0
- data/lib/sentiero/stores/sqlite/schema.rb +79 -0
- data/lib/sentiero/stores/sqlite.rb +626 -0
- data/lib/sentiero/user_agent.rb +32 -0
- data/lib/sentiero/version.rb +5 -0
- data/lib/sentiero/web/analytics_app.rb +538 -0
- data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
- data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
- data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
- data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
- data/lib/sentiero/web/assets/manifest.json +11 -0
- data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
- data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
- data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
- data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
- data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
- data/lib/sentiero/web/assets_app.rb +42 -0
- data/lib/sentiero/web/base_app.rb +319 -0
- data/lib/sentiero/web/basic_auth.rb +27 -0
- data/lib/sentiero/web/basic_auth_check.rb +41 -0
- data/lib/sentiero/web/body_reader.rb +44 -0
- data/lib/sentiero/web/csv_writer.rb +45 -0
- data/lib/sentiero/web/dashboard_app.rb +236 -0
- data/lib/sentiero/web/errors_app.rb +97 -0
- data/lib/sentiero/web/escaping.rb +37 -0
- data/lib/sentiero/web/events_app.rb +196 -0
- data/lib/sentiero/web/formatting.rb +43 -0
- data/lib/sentiero/web/ingest_app.rb +92 -0
- data/lib/sentiero/web/manifest.rb +43 -0
- data/lib/sentiero/web/monitoring_app.rb +316 -0
- data/lib/sentiero/web/script_tag.rb +57 -0
- data/lib/sentiero/web/shareable_replay.rb +88 -0
- data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
- data/lib/sentiero/web/templates/_brand.html.erb +18 -0
- data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
- data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
- data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
- data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
- data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
- data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
- data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
- data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
- data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
- data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
- data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
- data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
- data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
- data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
- data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
- data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
- data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
- data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
- data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
- data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
- data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
- data/lib/sentiero/web/templates/event_show.html.erb +52 -0
- data/lib/sentiero/web/templates/events_index.html.erb +177 -0
- data/lib/sentiero/web/templates/export_index.html.erb +69 -0
- data/lib/sentiero/web/templates/forms.html.erb +105 -0
- data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
- data/lib/sentiero/web/templates/import.html.erb +39 -0
- data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
- data/lib/sentiero/web/templates/segments.html.erb +114 -0
- data/lib/sentiero/web/templates/session_show.html.erb +195 -0
- data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
- data/lib/sentiero/web/track_app.rb +57 -0
- data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
- data/lib/sentiero/web/views/analyzer_view.rb +27 -0
- data/lib/sentiero/web/views/base_view.rb +76 -0
- data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
- data/lib/sentiero/web/views/conversions_view.rb +41 -0
- data/lib/sentiero/web/views/engagement_view.rb +67 -0
- data/lib/sentiero/web/views/errors_index_view.rb +37 -0
- data/lib/sentiero/web/views/event_show_view.rb +20 -0
- data/lib/sentiero/web/views/events_index_view.rb +56 -0
- data/lib/sentiero/web/views/export_view.rb +23 -0
- data/lib/sentiero/web/views/forms_view.rb +28 -0
- data/lib/sentiero/web/views/frustration_view.rb +15 -0
- data/lib/sentiero/web/views/funnel_view.rb +36 -0
- data/lib/sentiero/web/views/heatmap_view.rb +34 -0
- data/lib/sentiero/web/views/import_view.rb +13 -0
- data/lib/sentiero/web/views/page_report_view.rb +43 -0
- data/lib/sentiero/web/views/problem_show_view.rb +46 -0
- data/lib/sentiero/web/views/scroll_view.rb +23 -0
- data/lib/sentiero/web/views/segments_view.rb +28 -0
- data/lib/sentiero/web/views/session_show_view.rb +105 -0
- data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
- data/lib/sentiero/web/views/vitals_view.rb +45 -0
- data/lib/sentiero/web/views.rb +24 -0
- data/lib/sentiero/window_ref.rb +6 -0
- data/lib/sentiero.rb +69 -0
- metadata +232 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "events"
|
|
4
|
+
require_relative "stats"
|
|
5
|
+
require_relative "bounded"
|
|
6
|
+
require_relative "entry_attribution"
|
|
7
|
+
|
|
8
|
+
module Sentiero
|
|
9
|
+
# Compute-on-read analytics: query the store and aggregate at request time.
|
|
10
|
+
module Analytics
|
|
11
|
+
class Analyzer
|
|
12
|
+
include Events
|
|
13
|
+
include Stats
|
|
14
|
+
include Bounded
|
|
15
|
+
include EntryAttribution
|
|
16
|
+
|
|
17
|
+
attr_reader :store
|
|
18
|
+
|
|
19
|
+
def initialize(store = Sentiero.store)
|
|
20
|
+
@store = store
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
# The standard bounded session scan: yields each window's
|
|
26
|
+
# [summary, window_id, events] up to the scan cap, and returns
|
|
27
|
+
# [sessions_scanned, hit_cap]. Counts DISTINCT sessions (not windows), so
|
|
28
|
+
# `hit_cap` is correct even when a session spans several windows. Callers
|
|
29
|
+
# build was_truncated as `collector.capped || hit_cap`.
|
|
30
|
+
def scan_sessions(limit: nil, since: nil, until_time: nil)
|
|
31
|
+
scan_cap = limit || store.limits.analytics_max_scan_sessions
|
|
32
|
+
seen = {}
|
|
33
|
+
store.each_session_events(limit: scan_cap, since: since, until_time: until_time) do |summary, window_id, events|
|
|
34
|
+
seen[summary[:session_id]] = true
|
|
35
|
+
yield summary, window_id, events
|
|
36
|
+
end
|
|
37
|
+
[seen.size, seen.size >= scan_cap]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def duration_ms(summary)
|
|
41
|
+
first = summary[:first_event_at]
|
|
42
|
+
last = summary[:last_event_at]
|
|
43
|
+
return nil unless first && last
|
|
44
|
+
|
|
45
|
+
(last - first).abs
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def meta_event(events)
|
|
49
|
+
events.find { |event| event["type"] == META && event["data"].is_a?(Hash) }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Splits a window's events into per-page segments on Meta href boundaries
|
|
53
|
+
# (one non-SPA window spans every page). Yields [url, segment_events,
|
|
54
|
+
# anchor_ts]; consecutive same-href Metas (same-URL reloads) stay in one
|
|
55
|
+
# segment. anchor_ts is the WINDOW's first timestamp for every segment:
|
|
56
|
+
# replay deep-links (?t=offset) are window-relative, never segment-local.
|
|
57
|
+
def each_page_segment(events)
|
|
58
|
+
return if events.empty?
|
|
59
|
+
|
|
60
|
+
anchor_ts = events.first&.fetch("timestamp", nil)
|
|
61
|
+
|
|
62
|
+
boundaries = [] # [start_index, url] per href change
|
|
63
|
+
events.each_with_index do |event, index|
|
|
64
|
+
url = meta_href(event)
|
|
65
|
+
next unless url
|
|
66
|
+
|
|
67
|
+
boundaries << [index, url] if boundaries.empty? || boundaries.last[1] != url
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
if boundaries.empty?
|
|
71
|
+
yield nil, events, anchor_ts
|
|
72
|
+
return
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
boundaries.each_with_index do |(start, url), i|
|
|
76
|
+
start = 0 if i.zero? # pre-first-Meta events belong to the first page
|
|
77
|
+
stop = boundaries[i + 1]&.first || events.size
|
|
78
|
+
yield url, events[start...stop], anchor_ts
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def meta_href(event)
|
|
83
|
+
return nil unless event.is_a?(Hash) && event["type"] == META
|
|
84
|
+
|
|
85
|
+
data = event["data"]
|
|
86
|
+
href = data.is_a?(Hash) ? data["href"] : nil
|
|
87
|
+
(href.is_a?(String) && !href.empty?) ? href : nil
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sentiero
|
|
4
|
+
module Analytics
|
|
5
|
+
# Cap primitives shared by the compute-on-read collectors/analyzers, which
|
|
6
|
+
# bound memory during a scan. Both leave the collection unchanged once full,
|
|
7
|
+
# so the caller flips its own `@capped` flag on a false/nil return.
|
|
8
|
+
module Bounded
|
|
9
|
+
# Counter cap: bump counts[key] (a Hash defaulting to 0), adding a NEW key
|
|
10
|
+
# only while under `cap` (nil = unbounded). Returns true if counted, false
|
|
11
|
+
# if the cap dropped it.
|
|
12
|
+
def bounded_increment(counts, key, cap, by: 1)
|
|
13
|
+
return false unless counts.key?(key) || cap.nil? || counts.size < cap
|
|
14
|
+
|
|
15
|
+
counts[key] += by
|
|
16
|
+
true
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Slot cap: the existing entry, a freshly built one (yielded, while under
|
|
20
|
+
# `cap`), or nil when the store already holds `cap` distinct keys.
|
|
21
|
+
def bounded_fetch(store, key, cap)
|
|
22
|
+
return store[key] if store.key?(key)
|
|
23
|
+
return nil if cap && store.size >= cap
|
|
24
|
+
|
|
25
|
+
store[key] = yield
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "analyzer"
|
|
4
|
+
|
|
5
|
+
module Sentiero
|
|
6
|
+
module Analytics
|
|
7
|
+
# Cross-session newest-first listing of browser custom events (rrweb type==5)
|
|
8
|
+
# excluding the "error" tag (errors have ErrorDiscovery). Each row carries
|
|
9
|
+
# session/window + offset(ms) from window start for replay deep-links (?t=).
|
|
10
|
+
class BrowserEventDiscovery < Analyzer
|
|
11
|
+
ERROR_TAG = "error"
|
|
12
|
+
|
|
13
|
+
MAX_ROWS = 500
|
|
14
|
+
|
|
15
|
+
# Trim cap during the scan so a busy store can't balloon memory. Mid-scan
|
|
16
|
+
# trimming is safe: it keeps the globally-newest seen, and newer events
|
|
17
|
+
# from later sessions still get added and survive the next trim.
|
|
18
|
+
ACCUMULATION_LIMIT = MAX_ROWS * 4
|
|
19
|
+
|
|
20
|
+
def recent_events(since: nil, until_time: nil)
|
|
21
|
+
rows = []
|
|
22
|
+
truncated = false
|
|
23
|
+
|
|
24
|
+
_scanned, hit_cap = scan_sessions(since: since, until_time: until_time) do |summary, window_id, events|
|
|
25
|
+
anchor = events.first&.fetch("timestamp", nil)
|
|
26
|
+
events.each do |event|
|
|
27
|
+
next unless browser_event?(event)
|
|
28
|
+
|
|
29
|
+
rows << build_row(summary, window_id, anchor, event)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
next unless rows.size > ACCUMULATION_LIMIT
|
|
33
|
+
|
|
34
|
+
rows.sort_by! { |r| -(r[:timestamp] || 0) }
|
|
35
|
+
rows = rows.first(MAX_ROWS)
|
|
36
|
+
truncated = true
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
rows.sort_by! { |r| -(r[:timestamp] || 0) }
|
|
40
|
+
{
|
|
41
|
+
rows: rows.first(MAX_ROWS),
|
|
42
|
+
was_truncated: truncated || hit_cap || rows.size > MAX_ROWS
|
|
43
|
+
}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def browser_event?(event)
|
|
49
|
+
return false unless event["type"] == CUSTOM
|
|
50
|
+
|
|
51
|
+
data = event["data"]
|
|
52
|
+
data.is_a?(Hash) && data["tag"] != ERROR_TAG
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def build_row(summary, window_id, anchor, event)
|
|
56
|
+
data = event["data"] || {}
|
|
57
|
+
ts = event["timestamp"]
|
|
58
|
+
payload = data["payload"]
|
|
59
|
+
{
|
|
60
|
+
name: data["tag"].to_s,
|
|
61
|
+
session_id: summary[:session_id],
|
|
62
|
+
window_id: window_id,
|
|
63
|
+
timestamp: ts,
|
|
64
|
+
offset_ms: offset_ms(anchor, ts),
|
|
65
|
+
payload: payload.is_a?(Hash) ? payload : nil
|
|
66
|
+
}
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../events"
|
|
4
|
+
require_relative "../bounded"
|
|
5
|
+
|
|
6
|
+
module Sentiero
|
|
7
|
+
module Analytics
|
|
8
|
+
# Per-URL click density grid + element selectors across page segments. A
|
|
9
|
+
# click's viewport y becomes a page coordinate by adding the latest scroll
|
|
10
|
+
# offset, normalized against the estimated page height (deepest scroll +
|
|
11
|
+
# viewport); x by viewport width. Both bucket into a GRID_SIZE x GRID_SIZE grid.
|
|
12
|
+
class ClickCollector
|
|
13
|
+
include Events
|
|
14
|
+
include Bounded
|
|
15
|
+
|
|
16
|
+
MOUSE_CLICK = 2
|
|
17
|
+
|
|
18
|
+
# Carries the clicked element's CSS selector; rrweb's own click event only
|
|
19
|
+
# references an internal node id, not a stable selector.
|
|
20
|
+
CLICK_TAG = "__click"
|
|
21
|
+
|
|
22
|
+
# Grid resolution per axis.
|
|
23
|
+
GRID_SIZE = 20
|
|
24
|
+
|
|
25
|
+
attr_reader :total, :buckets, :selectors, :capped
|
|
26
|
+
|
|
27
|
+
def initialize(max_selectors: nil)
|
|
28
|
+
@max_selectors = max_selectors
|
|
29
|
+
@buckets = Hash.new(0)
|
|
30
|
+
@selectors = Hash.new(0)
|
|
31
|
+
@total = 0
|
|
32
|
+
@capped = false
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Returns clicks added, or nil when the segment has no usable viewport
|
|
36
|
+
# (callers branch on the nil).
|
|
37
|
+
def collect(segment)
|
|
38
|
+
viewport = viewport_size(segment)
|
|
39
|
+
return nil unless viewport
|
|
40
|
+
|
|
41
|
+
page_height = estimate_page_height(segment, viewport)
|
|
42
|
+
scroll_y = 0
|
|
43
|
+
added = 0
|
|
44
|
+
|
|
45
|
+
segment.each do |event|
|
|
46
|
+
scroll_y = 0 if event["type"] == META
|
|
47
|
+
if (y = document_scroll_y(event))
|
|
48
|
+
scroll_y = y
|
|
49
|
+
end
|
|
50
|
+
if click?(event)
|
|
51
|
+
data = event["data"]
|
|
52
|
+
@buckets[bucket(data["x"], data["y"] + scroll_y, viewport, page_height)] += 1
|
|
53
|
+
added += 1
|
|
54
|
+
end
|
|
55
|
+
tally_selector(event)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
@total += added
|
|
59
|
+
added
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def viewport_size(events)
|
|
65
|
+
meta = events.find { |event| event["type"] == META && event["data"].is_a?(Hash) }
|
|
66
|
+
return nil unless meta
|
|
67
|
+
|
|
68
|
+
width = meta.dig("data", "width")
|
|
69
|
+
height = meta.dig("data", "height")
|
|
70
|
+
return nil unless width.is_a?(Numeric) && height.is_a?(Numeric)
|
|
71
|
+
return nil unless width > 0 && height > 0
|
|
72
|
+
|
|
73
|
+
{width: width, height: height}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def estimate_page_height(segment, viewport)
|
|
77
|
+
max_scroll = 0
|
|
78
|
+
segment.each do |event|
|
|
79
|
+
y = document_scroll_y(event)
|
|
80
|
+
max_scroll = y if y && y > max_scroll
|
|
81
|
+
end
|
|
82
|
+
max_scroll + viewport[:height]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def document_scroll_y(event)
|
|
86
|
+
return nil unless event["type"] == INCREMENTAL
|
|
87
|
+
|
|
88
|
+
data = event["data"]
|
|
89
|
+
return nil unless data.is_a?(Hash) && data["source"] == SOURCE_SCROLL
|
|
90
|
+
|
|
91
|
+
id = data["id"]
|
|
92
|
+
return nil unless id.nil? || id == 1
|
|
93
|
+
|
|
94
|
+
y = data["y"]
|
|
95
|
+
(y.is_a?(Numeric) && y >= 0) ? y : nil
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def click?(event)
|
|
99
|
+
return false unless event["type"] == INCREMENTAL
|
|
100
|
+
|
|
101
|
+
data = event["data"]
|
|
102
|
+
return false unless data.is_a?(Hash)
|
|
103
|
+
|
|
104
|
+
data["source"] == SOURCE_MOUSE_INTERACTION &&
|
|
105
|
+
data["type"] == MOUSE_CLICK &&
|
|
106
|
+
data["x"].is_a?(Numeric) &&
|
|
107
|
+
data["y"].is_a?(Numeric)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def bucket(x, page_y, viewport, page_height)
|
|
111
|
+
[
|
|
112
|
+
bucket_index(x, viewport[:width]),
|
|
113
|
+
bucket_index(page_y, page_height)
|
|
114
|
+
]
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def bucket_index(value, axis_length)
|
|
118
|
+
index = (value.to_f / axis_length * GRID_SIZE).floor
|
|
119
|
+
index.clamp(0, GRID_SIZE - 1)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def tally_selector(event)
|
|
123
|
+
return unless event["type"] == CUSTOM
|
|
124
|
+
|
|
125
|
+
data = event["data"]
|
|
126
|
+
return unless data.is_a?(Hash) && data["tag"] == CLICK_TAG
|
|
127
|
+
|
|
128
|
+
selector = data.dig("payload", "selector")
|
|
129
|
+
return unless selector.is_a?(String) && !selector.empty?
|
|
130
|
+
|
|
131
|
+
@capped = true unless bounded_increment(@selectors, selector, @max_selectors)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../events"
|
|
4
|
+
require_relative "../bounded"
|
|
5
|
+
require_relative "../stats"
|
|
6
|
+
|
|
7
|
+
module Sentiero
|
|
8
|
+
module Analytics
|
|
9
|
+
# Custom-event tag tally across page segments. The single definition of
|
|
10
|
+
# which tags are "internal" and how the rest are counted and ranked.
|
|
11
|
+
class CustomTagCollector
|
|
12
|
+
include Events
|
|
13
|
+
include Bounded
|
|
14
|
+
include Stats
|
|
15
|
+
|
|
16
|
+
# Recorder-internal annotations (__perf, __click, …); never on the panel.
|
|
17
|
+
INTERNAL_TAG_PREFIX = "__"
|
|
18
|
+
# The JS-error tag is also internal — it has its own panel.
|
|
19
|
+
ERROR_TAG = "error"
|
|
20
|
+
MAX_CUSTOM_TAGS = 200
|
|
21
|
+
|
|
22
|
+
attr_reader :tags, :capped
|
|
23
|
+
|
|
24
|
+
# max_tags: nil unbounded; an Integer caps distinct tags, flipping #capped.
|
|
25
|
+
def initialize(max_tags: nil)
|
|
26
|
+
@max_tags = max_tags
|
|
27
|
+
@tags = Hash.new(0)
|
|
28
|
+
@capped = false
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def internal_tag?(tag)
|
|
32
|
+
tag.start_with?(INTERNAL_TAG_PREFIX) || tag == ERROR_TAG
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Returns true when counted, false when internal or capped — callers gate
|
|
36
|
+
# per-tag side-effects on this.
|
|
37
|
+
def tally(tag)
|
|
38
|
+
return false if internal_tag?(tag)
|
|
39
|
+
|
|
40
|
+
counted = bounded_increment(@tags, tag, @max_tags)
|
|
41
|
+
@capped = true unless counted
|
|
42
|
+
counted
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def collect(segment)
|
|
46
|
+
segment.each do |event|
|
|
47
|
+
next unless event["type"] == CUSTOM
|
|
48
|
+
|
|
49
|
+
tag = event.dig("data", "tag")
|
|
50
|
+
next unless tag.is_a?(String) && !tag.empty?
|
|
51
|
+
|
|
52
|
+
tally(tag)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def top(n)
|
|
57
|
+
top_counts(@tags, limit: n).map { |tag, count| {tag: tag, count: count} }
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../events"
|
|
4
|
+
require_relative "../stats"
|
|
5
|
+
require_relative "../bounded"
|
|
6
|
+
|
|
7
|
+
module Sentiero
|
|
8
|
+
module Analytics
|
|
9
|
+
# Per-URL error grouping across page segments. Groups JS errors by a
|
|
10
|
+
# normalized key (see group_key) so messages differing only by an
|
|
11
|
+
# id/count/line number collapse into one row. Each occurrence records
|
|
12
|
+
# offset_ms from the window's first event so the UI can deep-link via ?t=.
|
|
13
|
+
# The three helpers are class methods so ErrorDiscovery can reuse them with
|
|
14
|
+
# its own group shape without instantiating an accumulator.
|
|
15
|
+
class ErrorCollector
|
|
16
|
+
include Events
|
|
17
|
+
include Stats
|
|
18
|
+
include Bounded
|
|
19
|
+
|
|
20
|
+
ERROR_TAG = "error"
|
|
21
|
+
MAX_KEY_LENGTH = 200
|
|
22
|
+
|
|
23
|
+
attr_reader :groups, :capped
|
|
24
|
+
|
|
25
|
+
# Integer caps distinct groups (flips #capped) / occurrences per group; nil unbounded.
|
|
26
|
+
def initialize(max_groups: nil, max_occurrences: nil)
|
|
27
|
+
@max_groups = max_groups
|
|
28
|
+
@max_occurrences = max_occurrences
|
|
29
|
+
@groups = {}
|
|
30
|
+
@capped = false
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# anchor is the window's first-event timestamp; offset_ms is relative to it
|
|
34
|
+
# so replay ?t= deep-links stay consistent across segments of the window.
|
|
35
|
+
def collect(segment, session_id:, window_id:, anchor:)
|
|
36
|
+
segment.each do |event|
|
|
37
|
+
next unless self.class.error_event?(event)
|
|
38
|
+
|
|
39
|
+
message = self.class.extract_message(event)
|
|
40
|
+
key = self.class.group_key(message)
|
|
41
|
+
|
|
42
|
+
group = bounded_fetch(@groups, key, @max_groups) { {message: message, count: 0, occurrences: []} }
|
|
43
|
+
if group.nil?
|
|
44
|
+
@capped = true
|
|
45
|
+
next
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
group[:count] += 1
|
|
49
|
+
|
|
50
|
+
if @max_occurrences.nil? || group[:occurrences].size < @max_occurrences
|
|
51
|
+
group[:occurrences] << {
|
|
52
|
+
session_id: session_id,
|
|
53
|
+
window_id: window_id,
|
|
54
|
+
offset_ms: offset_ms(anchor, event["timestamp"])
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def summarize
|
|
61
|
+
{
|
|
62
|
+
groups: @groups.values
|
|
63
|
+
.sort_by { |g| -g[:count] }
|
|
64
|
+
.map { |g| {message: g[:message], count: g[:count], occurrences: g[:occurrences]} },
|
|
65
|
+
total: @groups.values.sum { |g| g[:count] }
|
|
66
|
+
}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def self.error_event?(event)
|
|
70
|
+
return false unless event["type"] == CUSTOM
|
|
71
|
+
|
|
72
|
+
data = event["data"]
|
|
73
|
+
data.is_a?(Hash) && data["tag"] == ERROR_TAG
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def self.group_key(message)
|
|
77
|
+
message.lines.first.to_s.strip.gsub(/\d+/, "#")[0, MAX_KEY_LENGTH]
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def self.extract_message(event)
|
|
81
|
+
payload = event.dig("data", "payload")
|
|
82
|
+
message = payload.is_a?(Hash) ? payload["message"] : nil
|
|
83
|
+
return "Unknown error" if message.nil? || message.to_s.strip.empty?
|
|
84
|
+
|
|
85
|
+
message.to_s
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../events"
|
|
4
|
+
require_relative "../bounded"
|
|
5
|
+
|
|
6
|
+
module Sentiero
|
|
7
|
+
module Analytics
|
|
8
|
+
# Per-URL form interaction math. The single definition of what an "input"
|
|
9
|
+
# and a "submit" are in rrweb terms. Two completion semantics for two callers:
|
|
10
|
+
# #completed_count — sessions with inputs where EVERY input segment was
|
|
11
|
+
# submitted (strict; one abandoned segment disqualifies).
|
|
12
|
+
# #submitted_count — sessions with ANY __form_submit event, regardless of
|
|
13
|
+
# input timing.
|
|
14
|
+
class FormCollector
|
|
15
|
+
include Events
|
|
16
|
+
include Bounded
|
|
17
|
+
|
|
18
|
+
# Recorder tag for a document-level form submit.
|
|
19
|
+
SUBMIT_TAG = "__form_submit"
|
|
20
|
+
|
|
21
|
+
# Output cap for the drop-off table.
|
|
22
|
+
DROP_OFF_LIMIT = 50
|
|
23
|
+
|
|
24
|
+
attr_reader :total_submits, :capped
|
|
25
|
+
|
|
26
|
+
# max_fields: nil unbounded; an Integer caps the fields hash, flipping #capped.
|
|
27
|
+
def initialize(max_fields: nil)
|
|
28
|
+
@max_fields = max_fields
|
|
29
|
+
@total_submits = 0
|
|
30
|
+
@fields = {} # [url, node_id] => field-stats hash
|
|
31
|
+
@drop_off = Hash.new(0) # [url, node_id] => abandon count
|
|
32
|
+
@started = {} # session_id => true (≥1 input event seen)
|
|
33
|
+
@submitted = {} # session_id => true (≥1 submit event, any segment)
|
|
34
|
+
@abandoned = {} # session_id => true (≥1 input segment not submitted)
|
|
35
|
+
@capped = false
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Returns the count of input events found. labels: {node_id => label} from
|
|
39
|
+
# the segment's DOM snapshot; {} omits them.
|
|
40
|
+
def collect(session_id, url, segment, labels: {})
|
|
41
|
+
@total_submits += segment.count { |e| submit?(e) }
|
|
42
|
+
@submitted[session_id] = true if segment.any? { |e| submit?(e) }
|
|
43
|
+
|
|
44
|
+
inputs = segment.select { |e| input?(e) }
|
|
45
|
+
return 0 if inputs.empty?
|
|
46
|
+
|
|
47
|
+
@started[session_id] = true
|
|
48
|
+
record_fields(session_id, url, inputs, labels)
|
|
49
|
+
|
|
50
|
+
first_input_at = inputs.first["timestamp"]
|
|
51
|
+
unless segment_submitted?(segment, first_input_at)
|
|
52
|
+
@abandoned[session_id] = true
|
|
53
|
+
@drop_off[[url, node_id(inputs.last)]] += 1
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
inputs.size
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def started_count
|
|
60
|
+
@started.size
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Counts a submit on the target URL even when inputs landed on a prior segment.
|
|
64
|
+
def submitted_count
|
|
65
|
+
@submitted.size
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Sessions with inputs where NO input segment was abandoned; a submit on a
|
|
69
|
+
# later page never masks an abandonment.
|
|
70
|
+
def completed_count
|
|
71
|
+
@started.count { |id, _| !@abandoned.key?(id) }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def summarize_fields(started, include_labels: false)
|
|
75
|
+
@fields
|
|
76
|
+
.sort_by { |(url, id), stats| [-stats[:sessions], url.to_s, id] }
|
|
77
|
+
.map do |(url, id), stats|
|
|
78
|
+
row = {}
|
|
79
|
+
row[:field_id] = id
|
|
80
|
+
row[:label] = stats[:label] if include_labels
|
|
81
|
+
row[:url] = url
|
|
82
|
+
row[:sessions] = stats[:sessions]
|
|
83
|
+
row[:completion_rate] = started.zero? ? 0.0 : stats[:sessions].to_f / started
|
|
84
|
+
row[:avg_time_to_fill_ms] = stats[:units].zero? ? 0.0 : stats[:total_fill_ms] / stats[:units]
|
|
85
|
+
row[:total_refills] = stats[:total_refills]
|
|
86
|
+
row
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def summarize_drop_off(include_labels: false)
|
|
91
|
+
@drop_off
|
|
92
|
+
.sort_by { |(url, id), count| [-count, url.to_s, id] }
|
|
93
|
+
.first(DROP_OFF_LIMIT)
|
|
94
|
+
.map do |(url, id), count|
|
|
95
|
+
row = {}
|
|
96
|
+
row[:field_id] = id
|
|
97
|
+
row[:label] = @fields.key?([url, id]) ? @fields[[url, id]][:label] : nil if include_labels
|
|
98
|
+
row[:url] = url
|
|
99
|
+
row[:count] = count
|
|
100
|
+
row
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
def input?(event)
|
|
107
|
+
return false unless event["type"] == INCREMENTAL
|
|
108
|
+
|
|
109
|
+
data = event["data"]
|
|
110
|
+
data.is_a?(Hash) && data["source"] == SOURCE_INPUT && node_id(event)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def node_id(event)
|
|
114
|
+
id = event.dig("data", "id")
|
|
115
|
+
id.is_a?(Integer) ? id : nil
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def submit?(event)
|
|
119
|
+
event["type"] == CUSTOM && event.dig("data", "tag") == SUBMIT_TAG
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# A segment counts as submitted only when a __form_submit lands at or
|
|
123
|
+
# after the first input; an earlier submit belongs to a prior interaction
|
|
124
|
+
# (counting it resurrects "navigating away counts as submitting").
|
|
125
|
+
def segment_submitted?(segment, first_input_at)
|
|
126
|
+
segment.any? do |event|
|
|
127
|
+
next false unless submit?(event)
|
|
128
|
+
|
|
129
|
+
ts = event["timestamp"]
|
|
130
|
+
!first_input_at.is_a?(Numeric) || (ts.is_a?(Numeric) && ts >= first_input_at)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Keyed by [url, node_id]: rrweb node ids reset on every full-page load, so
|
|
135
|
+
# the url scope keeps unrelated fields from conflating across pages.
|
|
136
|
+
def record_fields(session_id, url, inputs, labels)
|
|
137
|
+
inputs.group_by { |e| [url, node_id(e)] }.each do |key, field_inputs|
|
|
138
|
+
stats = bounded_fetch(@fields, key, @max_fields) do
|
|
139
|
+
{sessions: 0, units: 0, total_fill_ms: 0.0, total_refills: 0, last_session: nil, label: nil}
|
|
140
|
+
end
|
|
141
|
+
if stats.nil?
|
|
142
|
+
@capped = true
|
|
143
|
+
next
|
|
144
|
+
end
|
|
145
|
+
stats[:sessions] += 1 unless stats[:last_session] == session_id
|
|
146
|
+
stats[:last_session] = session_id
|
|
147
|
+
stats[:units] += 1
|
|
148
|
+
timestamps = field_inputs.map { |e| e["timestamp"] }
|
|
149
|
+
stats[:total_fill_ms] += (timestamps.max - timestamps.min).to_f
|
|
150
|
+
stats[:total_refills] += field_inputs.size - 1
|
|
151
|
+
stats[:label] ||= labels[node_id(field_inputs.first)]
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|