sentiero 1.0.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +7 -0
- data/README.md +679 -0
- data/lib/sentiero/analytics/analyzer.rb +91 -0
- data/lib/sentiero/analytics/bounded.rb +29 -0
- data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
- data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
- data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
- data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
- data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
- data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
- data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
- data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
- data/lib/sentiero/analytics/entry_attribution.rb +71 -0
- data/lib/sentiero/analytics/error_discovery.rb +118 -0
- data/lib/sentiero/analytics/events.rb +21 -0
- data/lib/sentiero/analytics/exporter.rb +242 -0
- data/lib/sentiero/analytics/form_analyzer.rb +153 -0
- data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
- data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
- data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
- data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
- data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
- data/lib/sentiero/analytics/problem_detail.rb +97 -0
- data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
- data/lib/sentiero/analytics/segmenter.rb +133 -0
- data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
- data/lib/sentiero/analytics/stats.rb +30 -0
- data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
- data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
- data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
- data/lib/sentiero/configuration.rb +184 -0
- data/lib/sentiero/erasure.rb +48 -0
- data/lib/sentiero/fingerprint.rb +34 -0
- data/lib/sentiero/ip_anonymizer.rb +29 -0
- data/lib/sentiero/redaction/config.rb +61 -0
- data/lib/sentiero/redaction.rb +207 -0
- data/lib/sentiero/reporter/configuration.rb +50 -0
- data/lib/sentiero/reporter/context.rb +31 -0
- data/lib/sentiero/reporter/dispatcher.rb +91 -0
- data/lib/sentiero/reporter/http_transport.rb +57 -0
- data/lib/sentiero/reporter/log_transport.rb +26 -0
- data/lib/sentiero/reporter/middleware.rb +62 -0
- data/lib/sentiero/reporter/normalizer.rb +14 -0
- data/lib/sentiero/reporter/null_transport.rb +18 -0
- data/lib/sentiero/reporter/report_context.rb +29 -0
- data/lib/sentiero/reporter/scrubber.rb +47 -0
- data/lib/sentiero/reporter/test_helper.rb +32 -0
- data/lib/sentiero/reporter/test_transport.rb +28 -0
- data/lib/sentiero/reporter.rb +214 -0
- data/lib/sentiero/roda.rb +47 -0
- data/lib/sentiero/store/error_store.rb +220 -0
- data/lib/sentiero/store/limits.rb +31 -0
- data/lib/sentiero/store/session_store.rb +118 -0
- data/lib/sentiero/store.rb +72 -0
- data/lib/sentiero/stores/file.rb +566 -0
- data/lib/sentiero/stores/memory.rb +362 -0
- data/lib/sentiero/stores/redis/keys.rb +59 -0
- data/lib/sentiero/stores/redis/lua.rb +119 -0
- data/lib/sentiero/stores/redis.rb +665 -0
- data/lib/sentiero/stores/sqlite/schema.rb +79 -0
- data/lib/sentiero/stores/sqlite.rb +626 -0
- data/lib/sentiero/user_agent.rb +32 -0
- data/lib/sentiero/version.rb +5 -0
- data/lib/sentiero/web/analytics_app.rb +538 -0
- data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
- data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
- data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
- data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
- data/lib/sentiero/web/assets/manifest.json +11 -0
- data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
- data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
- data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
- data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
- data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
- data/lib/sentiero/web/assets_app.rb +42 -0
- data/lib/sentiero/web/base_app.rb +319 -0
- data/lib/sentiero/web/basic_auth.rb +27 -0
- data/lib/sentiero/web/basic_auth_check.rb +41 -0
- data/lib/sentiero/web/body_reader.rb +44 -0
- data/lib/sentiero/web/csv_writer.rb +45 -0
- data/lib/sentiero/web/dashboard_app.rb +236 -0
- data/lib/sentiero/web/errors_app.rb +97 -0
- data/lib/sentiero/web/escaping.rb +37 -0
- data/lib/sentiero/web/events_app.rb +196 -0
- data/lib/sentiero/web/formatting.rb +43 -0
- data/lib/sentiero/web/ingest_app.rb +92 -0
- data/lib/sentiero/web/manifest.rb +43 -0
- data/lib/sentiero/web/monitoring_app.rb +316 -0
- data/lib/sentiero/web/script_tag.rb +57 -0
- data/lib/sentiero/web/shareable_replay.rb +88 -0
- data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
- data/lib/sentiero/web/templates/_brand.html.erb +18 -0
- data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
- data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
- data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
- data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
- data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
- data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
- data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
- data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
- data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
- data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
- data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
- data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
- data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
- data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
- data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
- data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
- data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
- data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
- data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
- data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
- data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
- data/lib/sentiero/web/templates/event_show.html.erb +52 -0
- data/lib/sentiero/web/templates/events_index.html.erb +177 -0
- data/lib/sentiero/web/templates/export_index.html.erb +69 -0
- data/lib/sentiero/web/templates/forms.html.erb +105 -0
- data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
- data/lib/sentiero/web/templates/import.html.erb +39 -0
- data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
- data/lib/sentiero/web/templates/segments.html.erb +114 -0
- data/lib/sentiero/web/templates/session_show.html.erb +195 -0
- data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
- data/lib/sentiero/web/track_app.rb +57 -0
- data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
- data/lib/sentiero/web/views/analyzer_view.rb +27 -0
- data/lib/sentiero/web/views/base_view.rb +76 -0
- data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
- data/lib/sentiero/web/views/conversions_view.rb +41 -0
- data/lib/sentiero/web/views/engagement_view.rb +67 -0
- data/lib/sentiero/web/views/errors_index_view.rb +37 -0
- data/lib/sentiero/web/views/event_show_view.rb +20 -0
- data/lib/sentiero/web/views/events_index_view.rb +56 -0
- data/lib/sentiero/web/views/export_view.rb +23 -0
- data/lib/sentiero/web/views/forms_view.rb +28 -0
- data/lib/sentiero/web/views/frustration_view.rb +15 -0
- data/lib/sentiero/web/views/funnel_view.rb +36 -0
- data/lib/sentiero/web/views/heatmap_view.rb +34 -0
- data/lib/sentiero/web/views/import_view.rb +13 -0
- data/lib/sentiero/web/views/page_report_view.rb +43 -0
- data/lib/sentiero/web/views/problem_show_view.rb +46 -0
- data/lib/sentiero/web/views/scroll_view.rb +23 -0
- data/lib/sentiero/web/views/segments_view.rb +28 -0
- data/lib/sentiero/web/views/session_show_view.rb +105 -0
- data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
- data/lib/sentiero/web/views/vitals_view.rb +45 -0
- data/lib/sentiero/web/views.rb +24 -0
- data/lib/sentiero/window_ref.rb +6 -0
- data/lib/sentiero.rb +69 -0
- metadata +232 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../events"
|
|
4
|
+
require_relative "../bounded"
|
|
5
|
+
|
|
6
|
+
module Sentiero
|
|
7
|
+
module Analytics
|
|
8
|
+
# Per-segment frustration attribution. Attributes incidents to segments by
|
|
9
|
+
# object identity (e.equal?(incident[:event])) so a same-millisecond Meta
|
|
10
|
+
# boundary cannot mis-attribute one.
|
|
11
|
+
#
|
|
12
|
+
# IMPORTANT: works on the RAW detector output (before refine_incidents
|
|
13
|
+
# de-noise), so dead_count may EXCEED /analytics/frustration for the same
|
|
14
|
+
# URL. Intentional: completeness over precision (no rages a de-noise rule
|
|
15
|
+
# might withdraw are missed).
|
|
16
|
+
class FrustrationCollector
|
|
17
|
+
include Events
|
|
18
|
+
include Bounded
|
|
19
|
+
|
|
20
|
+
# Recorder tag carrying the clicked element's CSS selector.
|
|
21
|
+
CLICK_TAG = "__click"
|
|
22
|
+
|
|
23
|
+
attr_reader :rage_count, :dead_count, :selectors, :capped
|
|
24
|
+
|
|
25
|
+
def initialize(max_selectors: nil)
|
|
26
|
+
@max_selectors = max_selectors
|
|
27
|
+
@rage_count = 0
|
|
28
|
+
@dead_count = 0
|
|
29
|
+
@selectors = Hash.new(0)
|
|
30
|
+
@capped = false
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Returns the number attributed to this segment.
|
|
34
|
+
def collect(incidents, segment)
|
|
35
|
+
return 0 if incidents.empty?
|
|
36
|
+
|
|
37
|
+
attributed = 0
|
|
38
|
+
incidents.each do |incident|
|
|
39
|
+
next unless segment.any? { |e| e.equal?(incident[:event]) }
|
|
40
|
+
|
|
41
|
+
if incident[:subtype] == "rage_click"
|
|
42
|
+
@rage_count += 1
|
|
43
|
+
selector = nearest_click_selector(segment, incident[:timestamp])
|
|
44
|
+
if selector
|
|
45
|
+
@capped = true unless bounded_increment(@selectors, selector, @max_selectors)
|
|
46
|
+
end
|
|
47
|
+
else
|
|
48
|
+
@dead_count += 1
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
attributed += 1
|
|
52
|
+
end
|
|
53
|
+
attributed
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
# Nearest "__click" selector by timestamp. No distance ceiling — the
|
|
59
|
+
# segment is bounded to one page.
|
|
60
|
+
def nearest_click_selector(segment, timestamp)
|
|
61
|
+
nearest = nil
|
|
62
|
+
nearest_distance = nil
|
|
63
|
+
segment.each do |event|
|
|
64
|
+
next unless event["type"] == CUSTOM
|
|
65
|
+
|
|
66
|
+
data = event["data"]
|
|
67
|
+
next unless data.is_a?(Hash) && data["tag"] == CLICK_TAG
|
|
68
|
+
|
|
69
|
+
selector = data.dig("payload", "selector")
|
|
70
|
+
next unless selector.is_a?(String) && !selector.empty?
|
|
71
|
+
|
|
72
|
+
ts = event["timestamp"]
|
|
73
|
+
next unless ts.is_a?(Numeric)
|
|
74
|
+
|
|
75
|
+
distance = (ts - timestamp).abs
|
|
76
|
+
if nearest_distance.nil? || distance < nearest_distance
|
|
77
|
+
nearest_distance = distance
|
|
78
|
+
nearest = selector
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
nearest
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../events"
|
|
4
|
+
require_relative "../stats"
|
|
5
|
+
require_relative "../bounded"
|
|
6
|
+
|
|
7
|
+
module Sentiero
|
|
8
|
+
module Analytics
|
|
9
|
+
# Per-URL scroll depth across page segments and windows.
|
|
10
|
+
#
|
|
11
|
+
# rrweb Metas carry the viewport height but NOT the document height, so page
|
|
12
|
+
# height is ESTIMATED as the deepest (max scroll + viewport) any sample
|
|
13
|
+
# reached — exact when somebody read to the end, a lower bound otherwise.
|
|
14
|
+
# Viewport-less samples fall back to pixels (no percentage derivable).
|
|
15
|
+
#
|
|
16
|
+
# Per window: #observe each segment, then #flush_window once to commit each
|
|
17
|
+
# URL's deepest segment as ONE sample.
|
|
18
|
+
class ScrollCollector
|
|
19
|
+
include Events
|
|
20
|
+
include Stats
|
|
21
|
+
include Bounded
|
|
22
|
+
|
|
23
|
+
DISTRIBUTION_BINS = %w[0-25 25-50 50-75 75-100].freeze
|
|
24
|
+
|
|
25
|
+
attr_reader :capped
|
|
26
|
+
|
|
27
|
+
# max_urls: nil unbounded; an Integer caps distinct URLs, flipping #capped.
|
|
28
|
+
def initialize(max_urls: nil)
|
|
29
|
+
@max_urls = max_urls
|
|
30
|
+
@samples_by_url = {} # url => [{max_y:, viewport_height:}, ...]
|
|
31
|
+
@window = {} # url => deepest segment depth in the current window
|
|
32
|
+
@capped = false
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Deepest segment per url wins; segments with no scroll are ignored.
|
|
36
|
+
def observe(url, segment)
|
|
37
|
+
depth = segment_depth(segment)
|
|
38
|
+
return unless depth
|
|
39
|
+
|
|
40
|
+
current = @window[url]
|
|
41
|
+
@window[url] = depth if current.nil? || depth[:max_y] > current[:max_y]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# One sample per (url, window): the deepest of the window's segments, then resets.
|
|
45
|
+
def flush_window
|
|
46
|
+
@window.each do |url, depth|
|
|
47
|
+
samples = bounded_fetch(@samples_by_url, url, @max_urls) { [] }
|
|
48
|
+
if samples.nil?
|
|
49
|
+
@capped = true
|
|
50
|
+
next
|
|
51
|
+
end
|
|
52
|
+
samples << depth
|
|
53
|
+
end
|
|
54
|
+
@window = {}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# nil when nothing was recorded for the URL.
|
|
58
|
+
def summarize(url)
|
|
59
|
+
samples = @samples_by_url[url]
|
|
60
|
+
return nil unless samples && !samples.empty?
|
|
61
|
+
|
|
62
|
+
summarize_samples(samples)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def pages
|
|
66
|
+
@samples_by_url.transform_values { |samples| summarize_samples(samples) }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def segment_depth(segment)
|
|
72
|
+
max_y = segment.filter_map { |event| scroll_y(event) }.max || 0
|
|
73
|
+
return nil unless max_y > 0
|
|
74
|
+
|
|
75
|
+
{max_y: max_y, viewport_height: viewport_height(segment)}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Only the document scroll (node id nil or 1) counts as page depth; inner
|
|
79
|
+
# scroll containers (id > 1) would otherwise inflate it. Mirrors
|
|
80
|
+
# ClickCollector#document_scroll_y so both agree on "page scroll".
|
|
81
|
+
def scroll_y(event)
|
|
82
|
+
return nil unless event["type"] == INCREMENTAL
|
|
83
|
+
|
|
84
|
+
data = event["data"]
|
|
85
|
+
return nil unless data.is_a?(Hash) && data["source"] == SOURCE_SCROLL
|
|
86
|
+
|
|
87
|
+
id = data["id"]
|
|
88
|
+
return nil unless id.nil? || id == 1
|
|
89
|
+
|
|
90
|
+
y = data["y"]
|
|
91
|
+
y.is_a?(Numeric) ? y : nil
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def viewport_height(segment)
|
|
95
|
+
height = meta_event(segment)&.dig("data", "height")
|
|
96
|
+
(height.is_a?(Numeric) && height > 0) ? height : nil
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def meta_event(events)
|
|
100
|
+
events.find { |event| event["type"] == META && event["data"].is_a?(Hash) }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def summarize_samples(samples)
|
|
104
|
+
pixels = samples.map { |sample| sample[:max_y] }
|
|
105
|
+
page_height = samples.filter_map { |sample| viewport_bottom(sample) }.max
|
|
106
|
+
pcts = samples.filter_map { |sample| depth_pct(sample, page_height) }
|
|
107
|
+
|
|
108
|
+
{
|
|
109
|
+
session_count: samples.size,
|
|
110
|
+
avg_depth_px: mean(pixels),
|
|
111
|
+
avg_depth_pct: pcts.empty? ? nil : mean(pcts),
|
|
112
|
+
page_height_px: page_height,
|
|
113
|
+
fold_lines: fold_lines(pcts),
|
|
114
|
+
distribution: distribution(samples, page_height)
|
|
115
|
+
}
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def viewport_bottom(sample)
|
|
119
|
+
height = sample[:viewport_height]
|
|
120
|
+
height ? sample[:max_y] + height : nil
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def depth_pct(sample, page_height)
|
|
124
|
+
bottom = viewport_bottom(sample)
|
|
125
|
+
return nil unless bottom && page_height
|
|
126
|
+
|
|
127
|
+
[bottom.to_f / page_height * 100, 100.0].min
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def fold_lines(pcts)
|
|
131
|
+
return {p50: nil, p75: nil, p90: nil} if pcts.empty?
|
|
132
|
+
|
|
133
|
+
sorted = pcts.sort
|
|
134
|
+
{p50: percentile(sorted, 50), p75: percentile(sorted, 75), p90: percentile(sorted, 90)}
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Viewport-less samples (no percentage derivable) fall back to pixels
|
|
138
|
+
# relative to the deepest sample so they still land in a bin.
|
|
139
|
+
def distribution(samples, page_height)
|
|
140
|
+
bins = DISTRIBUTION_BINS.to_h { |label| [label, 0] }
|
|
141
|
+
deepest_px = samples.map { |sample| sample[:max_y] }.max
|
|
142
|
+
|
|
143
|
+
samples.each do |sample|
|
|
144
|
+
pct = depth_pct(sample, page_height) || (sample[:max_y].to_f / deepest_px * 100)
|
|
145
|
+
bins[bin_for(pct)] += 1
|
|
146
|
+
end
|
|
147
|
+
bins
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def bin_for(pct)
|
|
151
|
+
index = (pct / 25.0).ceil.clamp(1, 4) - 1
|
|
152
|
+
DISTRIBUTION_BINS[index]
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../events"
|
|
4
|
+
require_relative "../stats"
|
|
5
|
+
|
|
6
|
+
module Sentiero
|
|
7
|
+
module Analytics
|
|
8
|
+
# Per-segment web-vitals accumulator. The recorder emits one "__perf" custom
|
|
9
|
+
# event per metric report, with data.payload {metric, value, rating}.
|
|
10
|
+
#
|
|
11
|
+
# Within a single segment, multiple reports for the same metric collapse to
|
|
12
|
+
# the LAST (the web-vitals library re-reports as the page evolves; only the
|
|
13
|
+
# final report is authoritative). Samples accumulate across all segments.
|
|
14
|
+
#
|
|
15
|
+
# #summarize's worst carries :value too; PageReportAnalyzer strips it afterward.
|
|
16
|
+
class VitalsCollector
|
|
17
|
+
include Events
|
|
18
|
+
include Stats
|
|
19
|
+
|
|
20
|
+
# Recorder tag for a web-vitals report.
|
|
21
|
+
PERF_TAG = "__perf"
|
|
22
|
+
|
|
23
|
+
attr_reader :capped
|
|
24
|
+
|
|
25
|
+
# max_samples: nil unbounded; an Integer caps each metric's values, flipping #capped.
|
|
26
|
+
def initialize(max_samples: nil)
|
|
27
|
+
@max_samples = max_samples
|
|
28
|
+
@metrics = {} # metric => {values:, ratings:, worst:}
|
|
29
|
+
@capped = false
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# session_id/window_id/anchor attribute the worst (highest-value) sample.
|
|
33
|
+
# anchor is the window's first-event timestamp; offset_ms is relative to it
|
|
34
|
+
# so replay deep-links target the window start, not the segment.
|
|
35
|
+
def collect(segment, session_id:, window_id:, anchor:)
|
|
36
|
+
samples = {}
|
|
37
|
+
segment.each do |event|
|
|
38
|
+
sample = parse_sample(event)
|
|
39
|
+
samples[sample[:metric]] = sample if sample
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
samples.each_value do |sample|
|
|
43
|
+
entry = @metrics[sample[:metric]] ||= {values: [], ratings: Hash.new(0), worst: nil}
|
|
44
|
+
if @max_samples && entry[:values].size >= @max_samples
|
|
45
|
+
@capped = true
|
|
46
|
+
next
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
entry[:values] << sample[:value]
|
|
50
|
+
|
|
51
|
+
rating = sample[:rating]
|
|
52
|
+
entry[:ratings][rating] += 1 if rating.is_a?(String) && !rating.empty?
|
|
53
|
+
|
|
54
|
+
if entry[:worst].nil? || sample[:value] > entry[:worst][:value]
|
|
55
|
+
entry[:worst] = {
|
|
56
|
+
session_id: session_id,
|
|
57
|
+
window_id: window_id,
|
|
58
|
+
offset_ms: offset_ms(anchor, sample[:timestamp]),
|
|
59
|
+
value: sample[:value]
|
|
60
|
+
}
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def summarize
|
|
66
|
+
summarized = @metrics.transform_values { |entry| summarize_metric(entry) }
|
|
67
|
+
{
|
|
68
|
+
sample_count: summarized.values.sum { |m| m[:samples] },
|
|
69
|
+
metrics: summarized
|
|
70
|
+
}
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def parse_sample(event)
|
|
76
|
+
return nil unless event["type"] == CUSTOM
|
|
77
|
+
|
|
78
|
+
data = event["data"]
|
|
79
|
+
return nil unless data.is_a?(Hash) && data["tag"] == PERF_TAG
|
|
80
|
+
|
|
81
|
+
payload = data["payload"]
|
|
82
|
+
return nil unless payload.is_a?(Hash)
|
|
83
|
+
|
|
84
|
+
metric = payload["metric"]
|
|
85
|
+
value = payload["value"]
|
|
86
|
+
return nil unless metric.is_a?(String) && !metric.empty? && value.is_a?(Numeric)
|
|
87
|
+
|
|
88
|
+
{metric: metric, value: value, rating: payload["rating"], timestamp: event["timestamp"]}
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def summarize_metric(entry)
|
|
92
|
+
sorted = entry[:values].sort
|
|
93
|
+
{
|
|
94
|
+
samples: sorted.size,
|
|
95
|
+
p50: percentile(sorted, 50),
|
|
96
|
+
p75: percentile(sorted, 75),
|
|
97
|
+
p90: percentile(sorted, 90),
|
|
98
|
+
ratings: entry[:ratings],
|
|
99
|
+
worst: entry[:worst]
|
|
100
|
+
}
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
require_relative "analyzer"
|
|
6
|
+
require_relative "funnel_analyzer"
|
|
7
|
+
|
|
8
|
+
module Sentiero
|
|
9
|
+
module Analytics
|
|
10
|
+
# Conversion rate by acquisition dimension (entry page, referrer host, UTM)
|
|
11
|
+
# for one custom-event tag. A session counts as converting at most once,
|
|
12
|
+
# regardless of how many times/windows the tag fired.
|
|
13
|
+
class ConversionAnalyzer < Analyzer
|
|
14
|
+
TOP_ROWS = 15
|
|
15
|
+
|
|
16
|
+
# Below this many sessions a rate is too thin; rows flagged low_volume.
|
|
17
|
+
MIN_SESSIONS_FOR_RATE = 5
|
|
18
|
+
|
|
19
|
+
# A new key past the cap is dropped and sets was_truncated.
|
|
20
|
+
MAX_DIMENSION_KEYS = 200
|
|
21
|
+
|
|
22
|
+
DIRECT = "(direct / none)"
|
|
23
|
+
|
|
24
|
+
# Matched case-insensitively.
|
|
25
|
+
UTM_PARAMS = %w[utm_source utm_medium utm_campaign].freeze
|
|
26
|
+
|
|
27
|
+
# No tag selected → empty facets, but tag vocabulary is still collected.
|
|
28
|
+
def analyze(tag = nil, limit: nil, since: nil, until_time: nil)
|
|
29
|
+
selected = FunnelAnalyzer.usable_steps([tag].compact).first
|
|
30
|
+
|
|
31
|
+
tags = {}
|
|
32
|
+
sessions = {}
|
|
33
|
+
@truncated = false
|
|
34
|
+
|
|
35
|
+
_scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
|
|
36
|
+
session_id = summary[:session_id]
|
|
37
|
+
state = sessions[session_id] ||= new_state(summary, window_id)
|
|
38
|
+
|
|
39
|
+
update_entry_candidate(state, events)
|
|
40
|
+
collect_vocabulary(tags, events)
|
|
41
|
+
record_conversion(state, selected, window_id, events) if selected
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
facets = selected ? build_facets(sessions) : empty_facets
|
|
45
|
+
|
|
46
|
+
{
|
|
47
|
+
tags: tags.keys.sort,
|
|
48
|
+
selected_tag: selected,
|
|
49
|
+
entry_pages: facets[:entry_pages],
|
|
50
|
+
referrers: facets[:referrers],
|
|
51
|
+
utm: facets[:utm],
|
|
52
|
+
was_truncated: @truncated || hit_cap
|
|
53
|
+
}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
# entry_url precedence: an explicit entry_url is authoritative (anchor
|
|
59
|
+
# -Infinity so no later Meta can displace it), else the first Meta href wins.
|
|
60
|
+
def new_state(summary, first_window)
|
|
61
|
+
metadata = summary[:metadata] || {}
|
|
62
|
+
entry_url = metadata["entry_url"]
|
|
63
|
+
{
|
|
64
|
+
session_id: summary[:session_id],
|
|
65
|
+
entry_url: entry_url,
|
|
66
|
+
entry_anchor: entry_url ? -Float::INFINITY : nil,
|
|
67
|
+
referrer: metadata["entry_referrer"] || metadata["referrer"],
|
|
68
|
+
converted: false,
|
|
69
|
+
convert_window: nil,
|
|
70
|
+
convert_offset: nil,
|
|
71
|
+
convert_anchor: nil,
|
|
72
|
+
first_window: first_window
|
|
73
|
+
}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def collect_vocabulary(tags, events)
|
|
77
|
+
events.each do |event|
|
|
78
|
+
next unless event.is_a?(Hash) && event["type"] == CUSTOM
|
|
79
|
+
data = event["data"]
|
|
80
|
+
next unless data.is_a?(Hash)
|
|
81
|
+
tag = data["tag"]
|
|
82
|
+
next if FunnelAnalyzer.internal_tag?(tag)
|
|
83
|
+
|
|
84
|
+
next if tags.key?(tag)
|
|
85
|
+
if tags.size >= FunnelAnalyzer::MAX_TAGS
|
|
86
|
+
@truncated = true
|
|
87
|
+
next
|
|
88
|
+
end
|
|
89
|
+
tags[tag] = true
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Keeps the earliest conversion across windows so example coordinates are
|
|
94
|
+
# deterministic (earlier window start, or same start with earlier offset).
|
|
95
|
+
def record_conversion(state, tag, window_id, events)
|
|
96
|
+
anchor = events.first&.fetch("timestamp", nil)
|
|
97
|
+
match = events.find do |event|
|
|
98
|
+
event.is_a?(Hash) && event["type"] == CUSTOM &&
|
|
99
|
+
event["data"].is_a?(Hash) && event["data"]["tag"] == tag &&
|
|
100
|
+
event["timestamp"].is_a?(Numeric)
|
|
101
|
+
end
|
|
102
|
+
return unless match
|
|
103
|
+
|
|
104
|
+
offset = offset_ms(anchor, match["timestamp"])
|
|
105
|
+
return if state[:converted] && !earlier_match?(anchor, offset, state)
|
|
106
|
+
|
|
107
|
+
state[:converted] = true
|
|
108
|
+
state[:convert_window] = window_id
|
|
109
|
+
state[:convert_offset] = offset
|
|
110
|
+
state[:convert_anchor] = anchor
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def earlier_match?(anchor, offset, state)
|
|
114
|
+
cur_anchor = state[:convert_anchor]
|
|
115
|
+
if anchor.is_a?(Numeric) && cur_anchor.is_a?(Numeric)
|
|
116
|
+
return true if anchor < cur_anchor
|
|
117
|
+
return false if anchor > cur_anchor
|
|
118
|
+
return offset < state[:convert_offset]
|
|
119
|
+
end
|
|
120
|
+
false
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def empty_facets
|
|
124
|
+
{entry_pages: [], referrers: [], utm: {source: [], medium: [], campaign: []}}
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Runs after the scan, so every window of every session has been seen.
|
|
128
|
+
def build_facets(sessions)
|
|
129
|
+
entry_pages = new_facet
|
|
130
|
+
referrers = new_facet
|
|
131
|
+
utm = {source: new_facet, medium: new_facet, campaign: new_facet}
|
|
132
|
+
|
|
133
|
+
sessions.each_value do |state|
|
|
134
|
+
entry_url = state[:entry_url]
|
|
135
|
+
entry_key = normalize_entry(entry_url)
|
|
136
|
+
# No resolvable entry page => no acquisition data; contribute to no facet.
|
|
137
|
+
next unless entry_key
|
|
138
|
+
|
|
139
|
+
fold(entry_pages, entry_key, state)
|
|
140
|
+
fold(referrers, referrer_key(state[:referrer], entry_url), state)
|
|
141
|
+
fold_utm(utm, entry_url, state)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
{
|
|
145
|
+
entry_pages: rows_for(entry_pages),
|
|
146
|
+
referrers: rows_for(referrers),
|
|
147
|
+
utm: {
|
|
148
|
+
source: rows_for(utm[:source]),
|
|
149
|
+
medium: rows_for(utm[:medium]),
|
|
150
|
+
campaign: rows_for(utm[:campaign])
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def new_facet
|
|
156
|
+
{sessions: Hash.new(0), conversions: Hash.new(0), converting: {}, non_converting: {}}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def fold(facet, key, state)
|
|
160
|
+
return if key.nil?
|
|
161
|
+
|
|
162
|
+
if !facet[:sessions].key?(key) && facet[:sessions].size >= MAX_DIMENSION_KEYS
|
|
163
|
+
@truncated = true
|
|
164
|
+
return
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
facet[:sessions][key] += 1
|
|
168
|
+
if state[:converted]
|
|
169
|
+
facet[:conversions][key] += 1
|
|
170
|
+
facet[:converting][key] ||= {
|
|
171
|
+
session_id: state[:session_id],
|
|
172
|
+
window_id: state[:convert_window],
|
|
173
|
+
offset_ms: state[:convert_offset]
|
|
174
|
+
}
|
|
175
|
+
else
|
|
176
|
+
facet[:non_converting][key] ||= {
|
|
177
|
+
session_id: state[:session_id],
|
|
178
|
+
window_id: state[:first_window],
|
|
179
|
+
offset_ms: 0
|
|
180
|
+
}
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def fold_utm(utm, entry_url, state)
|
|
185
|
+
params = utm_params(entry_url)
|
|
186
|
+
fold(utm[:source], params["utm_source"], state)
|
|
187
|
+
fold(utm[:medium], params["utm_medium"], state)
|
|
188
|
+
fold(utm[:campaign], params["utm_campaign"], state)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def normalize_entry(url)
|
|
192
|
+
return nil unless url.is_a?(String) && !url.empty?
|
|
193
|
+
|
|
194
|
+
uri = URI.parse(url)
|
|
195
|
+
return nil unless uri.scheme && uri.host
|
|
196
|
+
|
|
197
|
+
port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ""
|
|
198
|
+
"#{uri.scheme}://#{uri.host}#{port}#{uri.path}"
|
|
199
|
+
rescue URI::InvalidURIError
|
|
200
|
+
nil
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Same-origin referrers are from within the site, not acquisition, so dropped.
|
|
204
|
+
def referrer_key(referrer, entry_url)
|
|
205
|
+
return nil if same_origin?(referrer, entry_url)
|
|
206
|
+
return DIRECT unless referrer.is_a?(String) && !referrer.empty?
|
|
207
|
+
|
|
208
|
+
host = URI.parse(referrer).host
|
|
209
|
+
(host && !host.empty?) ? host : DIRECT
|
|
210
|
+
rescue URI::InvalidURIError
|
|
211
|
+
DIRECT
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def utm_params(url)
|
|
215
|
+
out = {}
|
|
216
|
+
return out unless url.is_a?(String) && url.include?("?")
|
|
217
|
+
|
|
218
|
+
query = url.split("?", 2)[1].split("#", 2)[0]
|
|
219
|
+
URI.decode_www_form(query).each do |key, value|
|
|
220
|
+
name = key.to_s.downcase
|
|
221
|
+
next unless UTM_PARAMS.include?(name)
|
|
222
|
+
next if out.key?(name)
|
|
223
|
+
stripped = value.to_s.strip
|
|
224
|
+
out[name] = stripped unless stripped.empty?
|
|
225
|
+
end
|
|
226
|
+
out
|
|
227
|
+
rescue ArgumentError
|
|
228
|
+
out
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def rows_for(facet)
|
|
232
|
+
top_counts(facet[:sessions], limit: TOP_ROWS).map do |key, sessions|
|
|
233
|
+
conversions = facet[:conversions][key]
|
|
234
|
+
{
|
|
235
|
+
key: key,
|
|
236
|
+
sessions: sessions,
|
|
237
|
+
conversions: conversions,
|
|
238
|
+
conversion_rate: sessions.zero? ? nil : (conversions.to_f / sessions * 100).round(1),
|
|
239
|
+
low_volume: sessions < MIN_SESSIONS_FOR_RATE,
|
|
240
|
+
converting_example: facet[:converting][key],
|
|
241
|
+
non_converting_example: facet[:non_converting][key]
|
|
242
|
+
}
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
end
|