sentiero 1.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +7 -0
  3. data/README.md +679 -0
  4. data/lib/sentiero/analytics/analyzer.rb +91 -0
  5. data/lib/sentiero/analytics/bounded.rb +29 -0
  6. data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
  7. data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
  8. data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
  9. data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
  10. data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
  11. data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
  12. data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
  13. data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
  14. data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
  15. data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
  16. data/lib/sentiero/analytics/entry_attribution.rb +71 -0
  17. data/lib/sentiero/analytics/error_discovery.rb +118 -0
  18. data/lib/sentiero/analytics/events.rb +21 -0
  19. data/lib/sentiero/analytics/exporter.rb +242 -0
  20. data/lib/sentiero/analytics/form_analyzer.rb +153 -0
  21. data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
  22. data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
  23. data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
  24. data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
  25. data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
  26. data/lib/sentiero/analytics/problem_detail.rb +97 -0
  27. data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
  28. data/lib/sentiero/analytics/segmenter.rb +133 -0
  29. data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
  30. data/lib/sentiero/analytics/stats.rb +30 -0
  31. data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
  32. data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
  33. data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
  34. data/lib/sentiero/configuration.rb +184 -0
  35. data/lib/sentiero/erasure.rb +48 -0
  36. data/lib/sentiero/fingerprint.rb +34 -0
  37. data/lib/sentiero/ip_anonymizer.rb +29 -0
  38. data/lib/sentiero/redaction/config.rb +61 -0
  39. data/lib/sentiero/redaction.rb +207 -0
  40. data/lib/sentiero/reporter/configuration.rb +50 -0
  41. data/lib/sentiero/reporter/context.rb +31 -0
  42. data/lib/sentiero/reporter/dispatcher.rb +91 -0
  43. data/lib/sentiero/reporter/http_transport.rb +57 -0
  44. data/lib/sentiero/reporter/log_transport.rb +26 -0
  45. data/lib/sentiero/reporter/middleware.rb +62 -0
  46. data/lib/sentiero/reporter/normalizer.rb +14 -0
  47. data/lib/sentiero/reporter/null_transport.rb +18 -0
  48. data/lib/sentiero/reporter/report_context.rb +29 -0
  49. data/lib/sentiero/reporter/scrubber.rb +47 -0
  50. data/lib/sentiero/reporter/test_helper.rb +32 -0
  51. data/lib/sentiero/reporter/test_transport.rb +28 -0
  52. data/lib/sentiero/reporter.rb +214 -0
  53. data/lib/sentiero/roda.rb +47 -0
  54. data/lib/sentiero/store/error_store.rb +220 -0
  55. data/lib/sentiero/store/limits.rb +31 -0
  56. data/lib/sentiero/store/session_store.rb +118 -0
  57. data/lib/sentiero/store.rb +72 -0
  58. data/lib/sentiero/stores/file.rb +566 -0
  59. data/lib/sentiero/stores/memory.rb +362 -0
  60. data/lib/sentiero/stores/redis/keys.rb +59 -0
  61. data/lib/sentiero/stores/redis/lua.rb +119 -0
  62. data/lib/sentiero/stores/redis.rb +665 -0
  63. data/lib/sentiero/stores/sqlite/schema.rb +79 -0
  64. data/lib/sentiero/stores/sqlite.rb +626 -0
  65. data/lib/sentiero/user_agent.rb +32 -0
  66. data/lib/sentiero/version.rb +5 -0
  67. data/lib/sentiero/web/analytics_app.rb +538 -0
  68. data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
  69. data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
  70. data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
  71. data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
  72. data/lib/sentiero/web/assets/manifest.json +11 -0
  73. data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
  74. data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
  75. data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
  76. data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
  77. data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
  78. data/lib/sentiero/web/assets_app.rb +42 -0
  79. data/lib/sentiero/web/base_app.rb +319 -0
  80. data/lib/sentiero/web/basic_auth.rb +27 -0
  81. data/lib/sentiero/web/basic_auth_check.rb +41 -0
  82. data/lib/sentiero/web/body_reader.rb +44 -0
  83. data/lib/sentiero/web/csv_writer.rb +45 -0
  84. data/lib/sentiero/web/dashboard_app.rb +236 -0
  85. data/lib/sentiero/web/errors_app.rb +97 -0
  86. data/lib/sentiero/web/escaping.rb +37 -0
  87. data/lib/sentiero/web/events_app.rb +196 -0
  88. data/lib/sentiero/web/formatting.rb +43 -0
  89. data/lib/sentiero/web/ingest_app.rb +92 -0
  90. data/lib/sentiero/web/manifest.rb +43 -0
  91. data/lib/sentiero/web/monitoring_app.rb +316 -0
  92. data/lib/sentiero/web/script_tag.rb +57 -0
  93. data/lib/sentiero/web/shareable_replay.rb +88 -0
  94. data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
  95. data/lib/sentiero/web/templates/_brand.html.erb +18 -0
  96. data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
  97. data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
  98. data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
  99. data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
  100. data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
  101. data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
  102. data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
  103. data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
  104. data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
  105. data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
  106. data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
  107. data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
  108. data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
  109. data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
  110. data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
  111. data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
  112. data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
  113. data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
  114. data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
  115. data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
  116. data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
  117. data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
  118. data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
  119. data/lib/sentiero/web/templates/event_show.html.erb +52 -0
  120. data/lib/sentiero/web/templates/events_index.html.erb +177 -0
  121. data/lib/sentiero/web/templates/export_index.html.erb +69 -0
  122. data/lib/sentiero/web/templates/forms.html.erb +105 -0
  123. data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
  124. data/lib/sentiero/web/templates/import.html.erb +39 -0
  125. data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
  126. data/lib/sentiero/web/templates/segments.html.erb +114 -0
  127. data/lib/sentiero/web/templates/session_show.html.erb +195 -0
  128. data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
  129. data/lib/sentiero/web/track_app.rb +57 -0
  130. data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
  131. data/lib/sentiero/web/views/analyzer_view.rb +27 -0
  132. data/lib/sentiero/web/views/base_view.rb +76 -0
  133. data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
  134. data/lib/sentiero/web/views/conversions_view.rb +41 -0
  135. data/lib/sentiero/web/views/engagement_view.rb +67 -0
  136. data/lib/sentiero/web/views/errors_index_view.rb +37 -0
  137. data/lib/sentiero/web/views/event_show_view.rb +20 -0
  138. data/lib/sentiero/web/views/events_index_view.rb +56 -0
  139. data/lib/sentiero/web/views/export_view.rb +23 -0
  140. data/lib/sentiero/web/views/forms_view.rb +28 -0
  141. data/lib/sentiero/web/views/frustration_view.rb +15 -0
  142. data/lib/sentiero/web/views/funnel_view.rb +36 -0
  143. data/lib/sentiero/web/views/heatmap_view.rb +34 -0
  144. data/lib/sentiero/web/views/import_view.rb +13 -0
  145. data/lib/sentiero/web/views/page_report_view.rb +43 -0
  146. data/lib/sentiero/web/views/problem_show_view.rb +46 -0
  147. data/lib/sentiero/web/views/scroll_view.rb +23 -0
  148. data/lib/sentiero/web/views/segments_view.rb +28 -0
  149. data/lib/sentiero/web/views/session_show_view.rb +105 -0
  150. data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
  151. data/lib/sentiero/web/views/vitals_view.rb +45 -0
  152. data/lib/sentiero/web/views.rb +24 -0
  153. data/lib/sentiero/window_ref.rb +6 -0
  154. data/lib/sentiero.rb +69 -0
  155. metadata +232 -0
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../events"
4
+ require_relative "../bounded"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Per-segment frustration attribution. Attributes incidents to segments by
9
+ # object identity (e.equal?(incident[:event])) so a same-millisecond Meta
10
+ # boundary cannot mis-attribute one.
11
+ #
12
+ # IMPORTANT: works on the RAW detector output (before refine_incidents
13
+ # de-noise), so dead_count may EXCEED /analytics/frustration for the same
14
+ # URL. Intentional: completeness over precision (no rages a de-noise rule
15
+ # might withdraw are missed).
16
+ class FrustrationCollector
17
+ include Events
18
+ include Bounded
19
+
20
+ # Recorder tag carrying the clicked element's CSS selector.
21
+ CLICK_TAG = "__click"
22
+
23
+ attr_reader :rage_count, :dead_count, :selectors, :capped
24
+
25
+ def initialize(max_selectors: nil)
26
+ @max_selectors = max_selectors
27
+ @rage_count = 0
28
+ @dead_count = 0
29
+ @selectors = Hash.new(0)
30
+ @capped = false
31
+ end
32
+
33
+ # Returns the number attributed to this segment.
34
+ def collect(incidents, segment)
35
+ return 0 if incidents.empty?
36
+
37
+ attributed = 0
38
+ incidents.each do |incident|
39
+ next unless segment.any? { |e| e.equal?(incident[:event]) }
40
+
41
+ if incident[:subtype] == "rage_click"
42
+ @rage_count += 1
43
+ selector = nearest_click_selector(segment, incident[:timestamp])
44
+ if selector
45
+ @capped = true unless bounded_increment(@selectors, selector, @max_selectors)
46
+ end
47
+ else
48
+ @dead_count += 1
49
+ end
50
+
51
+ attributed += 1
52
+ end
53
+ attributed
54
+ end
55
+
56
+ private
57
+
58
+ # Nearest "__click" selector by timestamp. No distance ceiling — the
59
+ # segment is bounded to one page.
60
+ def nearest_click_selector(segment, timestamp)
61
+ nearest = nil
62
+ nearest_distance = nil
63
+ segment.each do |event|
64
+ next unless event["type"] == CUSTOM
65
+
66
+ data = event["data"]
67
+ next unless data.is_a?(Hash) && data["tag"] == CLICK_TAG
68
+
69
+ selector = data.dig("payload", "selector")
70
+ next unless selector.is_a?(String) && !selector.empty?
71
+
72
+ ts = event["timestamp"]
73
+ next unless ts.is_a?(Numeric)
74
+
75
+ distance = (ts - timestamp).abs
76
+ if nearest_distance.nil? || distance < nearest_distance
77
+ nearest_distance = distance
78
+ nearest = selector
79
+ end
80
+ end
81
+ nearest
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../events"
4
+ require_relative "../stats"
5
+ require_relative "../bounded"
6
+
7
+ module Sentiero
8
+ module Analytics
9
+ # Per-URL scroll depth across page segments and windows.
10
+ #
11
+ # rrweb Metas carry the viewport height but NOT the document height, so page
12
+ # height is ESTIMATED as the deepest (max scroll + viewport) any sample
13
+ # reached — exact when somebody read to the end, a lower bound otherwise.
14
+ # Viewport-less samples fall back to pixels (no percentage derivable).
15
+ #
16
+ # Per window: #observe each segment, then #flush_window once to commit each
17
+ # URL's deepest segment as ONE sample.
18
+ class ScrollCollector
19
+ include Events
20
+ include Stats
21
+ include Bounded
22
+
23
+ DISTRIBUTION_BINS = %w[0-25 25-50 50-75 75-100].freeze
24
+
25
+ attr_reader :capped
26
+
27
+ # max_urls: nil unbounded; an Integer caps distinct URLs, flipping #capped.
28
+ def initialize(max_urls: nil)
29
+ @max_urls = max_urls
30
+ @samples_by_url = {} # url => [{max_y:, viewport_height:}, ...]
31
+ @window = {} # url => deepest segment depth in the current window
32
+ @capped = false
33
+ end
34
+
35
+ # Deepest segment per url wins; segments with no scroll are ignored.
36
+ def observe(url, segment)
37
+ depth = segment_depth(segment)
38
+ return unless depth
39
+
40
+ current = @window[url]
41
+ @window[url] = depth if current.nil? || depth[:max_y] > current[:max_y]
42
+ end
43
+
44
+ # One sample per (url, window): the deepest of the window's segments, then resets.
45
+ def flush_window
46
+ @window.each do |url, depth|
47
+ samples = bounded_fetch(@samples_by_url, url, @max_urls) { [] }
48
+ if samples.nil?
49
+ @capped = true
50
+ next
51
+ end
52
+ samples << depth
53
+ end
54
+ @window = {}
55
+ end
56
+
57
+ # nil when nothing was recorded for the URL.
58
+ def summarize(url)
59
+ samples = @samples_by_url[url]
60
+ return nil unless samples && !samples.empty?
61
+
62
+ summarize_samples(samples)
63
+ end
64
+
65
+ def pages
66
+ @samples_by_url.transform_values { |samples| summarize_samples(samples) }
67
+ end
68
+
69
+ private
70
+
71
+ def segment_depth(segment)
72
+ max_y = segment.filter_map { |event| scroll_y(event) }.max || 0
73
+ return nil unless max_y > 0
74
+
75
+ {max_y: max_y, viewport_height: viewport_height(segment)}
76
+ end
77
+
78
+ # Only the document scroll (node id nil or 1) counts as page depth; inner
79
+ # scroll containers (id > 1) would otherwise inflate it. Mirrors
80
+ # ClickCollector#document_scroll_y so both agree on "page scroll".
81
+ def scroll_y(event)
82
+ return nil unless event["type"] == INCREMENTAL
83
+
84
+ data = event["data"]
85
+ return nil unless data.is_a?(Hash) && data["source"] == SOURCE_SCROLL
86
+
87
+ id = data["id"]
88
+ return nil unless id.nil? || id == 1
89
+
90
+ y = data["y"]
91
+ y.is_a?(Numeric) ? y : nil
92
+ end
93
+
94
+ def viewport_height(segment)
95
+ height = meta_event(segment)&.dig("data", "height")
96
+ (height.is_a?(Numeric) && height > 0) ? height : nil
97
+ end
98
+
99
+ def meta_event(events)
100
+ events.find { |event| event["type"] == META && event["data"].is_a?(Hash) }
101
+ end
102
+
103
+ def summarize_samples(samples)
104
+ pixels = samples.map { |sample| sample[:max_y] }
105
+ page_height = samples.filter_map { |sample| viewport_bottom(sample) }.max
106
+ pcts = samples.filter_map { |sample| depth_pct(sample, page_height) }
107
+
108
+ {
109
+ session_count: samples.size,
110
+ avg_depth_px: mean(pixels),
111
+ avg_depth_pct: pcts.empty? ? nil : mean(pcts),
112
+ page_height_px: page_height,
113
+ fold_lines: fold_lines(pcts),
114
+ distribution: distribution(samples, page_height)
115
+ }
116
+ end
117
+
118
+ def viewport_bottom(sample)
119
+ height = sample[:viewport_height]
120
+ height ? sample[:max_y] + height : nil
121
+ end
122
+
123
+ def depth_pct(sample, page_height)
124
+ bottom = viewport_bottom(sample)
125
+ return nil unless bottom && page_height
126
+
127
+ [bottom.to_f / page_height * 100, 100.0].min
128
+ end
129
+
130
+ def fold_lines(pcts)
131
+ return {p50: nil, p75: nil, p90: nil} if pcts.empty?
132
+
133
+ sorted = pcts.sort
134
+ {p50: percentile(sorted, 50), p75: percentile(sorted, 75), p90: percentile(sorted, 90)}
135
+ end
136
+
137
+ # Viewport-less samples (no percentage derivable) fall back to pixels
138
+ # relative to the deepest sample so they still land in a bin.
139
+ def distribution(samples, page_height)
140
+ bins = DISTRIBUTION_BINS.to_h { |label| [label, 0] }
141
+ deepest_px = samples.map { |sample| sample[:max_y] }.max
142
+
143
+ samples.each do |sample|
144
+ pct = depth_pct(sample, page_height) || (sample[:max_y].to_f / deepest_px * 100)
145
+ bins[bin_for(pct)] += 1
146
+ end
147
+ bins
148
+ end
149
+
150
+ def bin_for(pct)
151
+ index = (pct / 25.0).ceil.clamp(1, 4) - 1
152
+ DISTRIBUTION_BINS[index]
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../events"
4
+ require_relative "../stats"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Per-segment web-vitals accumulator. The recorder emits one "__perf" custom
9
+ # event per metric report, with data.payload {metric, value, rating}.
10
+ #
11
+ # Within a single segment, multiple reports for the same metric collapse to
12
+ # the LAST (the web-vitals library re-reports as the page evolves; only the
13
+ # final report is authoritative). Samples accumulate across all segments.
14
+ #
15
+ # #summarize's worst carries :value too; PageReportAnalyzer strips it afterward.
16
+ class VitalsCollector
17
+ include Events
18
+ include Stats
19
+
20
+ # Recorder tag for a web-vitals report.
21
+ PERF_TAG = "__perf"
22
+
23
+ attr_reader :capped
24
+
25
+ # max_samples: nil unbounded; an Integer caps each metric's values, flipping #capped.
26
+ def initialize(max_samples: nil)
27
+ @max_samples = max_samples
28
+ @metrics = {} # metric => {values:, ratings:, worst:}
29
+ @capped = false
30
+ end
31
+
32
+ # session_id/window_id/anchor attribute the worst (highest-value) sample.
33
+ # anchor is the window's first-event timestamp; offset_ms is relative to it
34
+ # so replay deep-links target the window start, not the segment.
35
+ def collect(segment, session_id:, window_id:, anchor:)
36
+ samples = {}
37
+ segment.each do |event|
38
+ sample = parse_sample(event)
39
+ samples[sample[:metric]] = sample if sample
40
+ end
41
+
42
+ samples.each_value do |sample|
43
+ entry = @metrics[sample[:metric]] ||= {values: [], ratings: Hash.new(0), worst: nil}
44
+ if @max_samples && entry[:values].size >= @max_samples
45
+ @capped = true
46
+ next
47
+ end
48
+
49
+ entry[:values] << sample[:value]
50
+
51
+ rating = sample[:rating]
52
+ entry[:ratings][rating] += 1 if rating.is_a?(String) && !rating.empty?
53
+
54
+ if entry[:worst].nil? || sample[:value] > entry[:worst][:value]
55
+ entry[:worst] = {
56
+ session_id: session_id,
57
+ window_id: window_id,
58
+ offset_ms: offset_ms(anchor, sample[:timestamp]),
59
+ value: sample[:value]
60
+ }
61
+ end
62
+ end
63
+ end
64
+
65
+ def summarize
66
+ summarized = @metrics.transform_values { |entry| summarize_metric(entry) }
67
+ {
68
+ sample_count: summarized.values.sum { |m| m[:samples] },
69
+ metrics: summarized
70
+ }
71
+ end
72
+
73
+ private
74
+
75
+ def parse_sample(event)
76
+ return nil unless event["type"] == CUSTOM
77
+
78
+ data = event["data"]
79
+ return nil unless data.is_a?(Hash) && data["tag"] == PERF_TAG
80
+
81
+ payload = data["payload"]
82
+ return nil unless payload.is_a?(Hash)
83
+
84
+ metric = payload["metric"]
85
+ value = payload["value"]
86
+ return nil unless metric.is_a?(String) && !metric.empty? && value.is_a?(Numeric)
87
+
88
+ {metric: metric, value: value, rating: payload["rating"], timestamp: event["timestamp"]}
89
+ end
90
+
91
+ def summarize_metric(entry)
92
+ sorted = entry[:values].sort
93
+ {
94
+ samples: sorted.size,
95
+ p50: percentile(sorted, 50),
96
+ p75: percentile(sorted, 75),
97
+ p90: percentile(sorted, 90),
98
+ ratings: entry[:ratings],
99
+ worst: entry[:worst]
100
+ }
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,247 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ require_relative "analyzer"
6
+ require_relative "funnel_analyzer"
7
+
8
+ module Sentiero
9
+ module Analytics
10
+ # Conversion rate by acquisition dimension (entry page, referrer host, UTM)
11
+ # for one custom-event tag. A session counts as converting at most once,
12
+ # regardless of how many times/windows the tag fired.
13
+ class ConversionAnalyzer < Analyzer
14
+ TOP_ROWS = 15
15
+
16
+ # Below this many sessions a rate is too thin; rows flagged low_volume.
17
+ MIN_SESSIONS_FOR_RATE = 5
18
+
19
+ # A new key past the cap is dropped and sets was_truncated.
20
+ MAX_DIMENSION_KEYS = 200
21
+
22
+ DIRECT = "(direct / none)"
23
+
24
+ # Matched case-insensitively.
25
+ UTM_PARAMS = %w[utm_source utm_medium utm_campaign].freeze
26
+
27
+ # No tag selected → empty facets, but tag vocabulary is still collected.
28
+ def analyze(tag = nil, limit: nil, since: nil, until_time: nil)
29
+ selected = FunnelAnalyzer.usable_steps([tag].compact).first
30
+
31
+ tags = {}
32
+ sessions = {}
33
+ @truncated = false
34
+
35
+ _scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
36
+ session_id = summary[:session_id]
37
+ state = sessions[session_id] ||= new_state(summary, window_id)
38
+
39
+ update_entry_candidate(state, events)
40
+ collect_vocabulary(tags, events)
41
+ record_conversion(state, selected, window_id, events) if selected
42
+ end
43
+
44
+ facets = selected ? build_facets(sessions) : empty_facets
45
+
46
+ {
47
+ tags: tags.keys.sort,
48
+ selected_tag: selected,
49
+ entry_pages: facets[:entry_pages],
50
+ referrers: facets[:referrers],
51
+ utm: facets[:utm],
52
+ was_truncated: @truncated || hit_cap
53
+ }
54
+ end
55
+
56
+ private
57
+
58
+ # entry_url precedence: an explicit entry_url is authoritative (anchor
59
+ # -Infinity so no later Meta can displace it), else the first Meta href wins.
60
+ def new_state(summary, first_window)
61
+ metadata = summary[:metadata] || {}
62
+ entry_url = metadata["entry_url"]
63
+ {
64
+ session_id: summary[:session_id],
65
+ entry_url: entry_url,
66
+ entry_anchor: entry_url ? -Float::INFINITY : nil,
67
+ referrer: metadata["entry_referrer"] || metadata["referrer"],
68
+ converted: false,
69
+ convert_window: nil,
70
+ convert_offset: nil,
71
+ convert_anchor: nil,
72
+ first_window: first_window
73
+ }
74
+ end
75
+
76
+ def collect_vocabulary(tags, events)
77
+ events.each do |event|
78
+ next unless event.is_a?(Hash) && event["type"] == CUSTOM
79
+ data = event["data"]
80
+ next unless data.is_a?(Hash)
81
+ tag = data["tag"]
82
+ next if FunnelAnalyzer.internal_tag?(tag)
83
+
84
+ next if tags.key?(tag)
85
+ if tags.size >= FunnelAnalyzer::MAX_TAGS
86
+ @truncated = true
87
+ next
88
+ end
89
+ tags[tag] = true
90
+ end
91
+ end
92
+
93
+ # Keeps the earliest conversion across windows so example coordinates are
94
+ # deterministic (earlier window start, or same start with earlier offset).
95
+ def record_conversion(state, tag, window_id, events)
96
+ anchor = events.first&.fetch("timestamp", nil)
97
+ match = events.find do |event|
98
+ event.is_a?(Hash) && event["type"] == CUSTOM &&
99
+ event["data"].is_a?(Hash) && event["data"]["tag"] == tag &&
100
+ event["timestamp"].is_a?(Numeric)
101
+ end
102
+ return unless match
103
+
104
+ offset = offset_ms(anchor, match["timestamp"])
105
+ return if state[:converted] && !earlier_match?(anchor, offset, state)
106
+
107
+ state[:converted] = true
108
+ state[:convert_window] = window_id
109
+ state[:convert_offset] = offset
110
+ state[:convert_anchor] = anchor
111
+ end
112
+
113
+ def earlier_match?(anchor, offset, state)
114
+ cur_anchor = state[:convert_anchor]
115
+ if anchor.is_a?(Numeric) && cur_anchor.is_a?(Numeric)
116
+ return true if anchor < cur_anchor
117
+ return false if anchor > cur_anchor
118
+ return offset < state[:convert_offset]
119
+ end
120
+ false
121
+ end
122
+
123
+ def empty_facets
124
+ {entry_pages: [], referrers: [], utm: {source: [], medium: [], campaign: []}}
125
+ end
126
+
127
+ # Runs after the scan, so every window of every session has been seen.
128
+ def build_facets(sessions)
129
+ entry_pages = new_facet
130
+ referrers = new_facet
131
+ utm = {source: new_facet, medium: new_facet, campaign: new_facet}
132
+
133
+ sessions.each_value do |state|
134
+ entry_url = state[:entry_url]
135
+ entry_key = normalize_entry(entry_url)
136
+ # No resolvable entry page => no acquisition data; contribute to no facet.
137
+ next unless entry_key
138
+
139
+ fold(entry_pages, entry_key, state)
140
+ fold(referrers, referrer_key(state[:referrer], entry_url), state)
141
+ fold_utm(utm, entry_url, state)
142
+ end
143
+
144
+ {
145
+ entry_pages: rows_for(entry_pages),
146
+ referrers: rows_for(referrers),
147
+ utm: {
148
+ source: rows_for(utm[:source]),
149
+ medium: rows_for(utm[:medium]),
150
+ campaign: rows_for(utm[:campaign])
151
+ }
152
+ }
153
+ end
154
+
155
+ def new_facet
156
+ {sessions: Hash.new(0), conversions: Hash.new(0), converting: {}, non_converting: {}}
157
+ end
158
+
159
+ def fold(facet, key, state)
160
+ return if key.nil?
161
+
162
+ if !facet[:sessions].key?(key) && facet[:sessions].size >= MAX_DIMENSION_KEYS
163
+ @truncated = true
164
+ return
165
+ end
166
+
167
+ facet[:sessions][key] += 1
168
+ if state[:converted]
169
+ facet[:conversions][key] += 1
170
+ facet[:converting][key] ||= {
171
+ session_id: state[:session_id],
172
+ window_id: state[:convert_window],
173
+ offset_ms: state[:convert_offset]
174
+ }
175
+ else
176
+ facet[:non_converting][key] ||= {
177
+ session_id: state[:session_id],
178
+ window_id: state[:first_window],
179
+ offset_ms: 0
180
+ }
181
+ end
182
+ end
183
+
184
+ def fold_utm(utm, entry_url, state)
185
+ params = utm_params(entry_url)
186
+ fold(utm[:source], params["utm_source"], state)
187
+ fold(utm[:medium], params["utm_medium"], state)
188
+ fold(utm[:campaign], params["utm_campaign"], state)
189
+ end
190
+
191
+ def normalize_entry(url)
192
+ return nil unless url.is_a?(String) && !url.empty?
193
+
194
+ uri = URI.parse(url)
195
+ return nil unless uri.scheme && uri.host
196
+
197
+ port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ""
198
+ "#{uri.scheme}://#{uri.host}#{port}#{uri.path}"
199
+ rescue URI::InvalidURIError
200
+ nil
201
+ end
202
+
203
+ # Same-origin referrers are from within the site, not acquisition, so dropped.
204
+ def referrer_key(referrer, entry_url)
205
+ return nil if same_origin?(referrer, entry_url)
206
+ return DIRECT unless referrer.is_a?(String) && !referrer.empty?
207
+
208
+ host = URI.parse(referrer).host
209
+ (host && !host.empty?) ? host : DIRECT
210
+ rescue URI::InvalidURIError
211
+ DIRECT
212
+ end
213
+
214
+ def utm_params(url)
215
+ out = {}
216
+ return out unless url.is_a?(String) && url.include?("?")
217
+
218
+ query = url.split("?", 2)[1].split("#", 2)[0]
219
+ URI.decode_www_form(query).each do |key, value|
220
+ name = key.to_s.downcase
221
+ next unless UTM_PARAMS.include?(name)
222
+ next if out.key?(name)
223
+ stripped = value.to_s.strip
224
+ out[name] = stripped unless stripped.empty?
225
+ end
226
+ out
227
+ rescue ArgumentError
228
+ out
229
+ end
230
+
231
+ def rows_for(facet)
232
+ top_counts(facet[:sessions], limit: TOP_ROWS).map do |key, sessions|
233
+ conversions = facet[:conversions][key]
234
+ {
235
+ key: key,
236
+ sessions: sessions,
237
+ conversions: conversions,
238
+ conversion_rate: sessions.zero? ? nil : (conversions.to_f / sessions * 100).round(1),
239
+ low_volume: sessions < MIN_SESSIONS_FOR_RATE,
240
+ converting_example: facet[:converting][key],
241
+ non_converting_example: facet[:non_converting][key]
242
+ }
243
+ end
244
+ end
245
+ end
246
+ end
247
+ end