sentiero 1.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +7 -0
  3. data/README.md +679 -0
  4. data/lib/sentiero/analytics/analyzer.rb +91 -0
  5. data/lib/sentiero/analytics/bounded.rb +29 -0
  6. data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
  7. data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
  8. data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
  9. data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
  10. data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
  11. data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
  12. data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
  13. data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
  14. data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
  15. data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
  16. data/lib/sentiero/analytics/entry_attribution.rb +71 -0
  17. data/lib/sentiero/analytics/error_discovery.rb +118 -0
  18. data/lib/sentiero/analytics/events.rb +21 -0
  19. data/lib/sentiero/analytics/exporter.rb +242 -0
  20. data/lib/sentiero/analytics/form_analyzer.rb +153 -0
  21. data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
  22. data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
  23. data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
  24. data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
  25. data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
  26. data/lib/sentiero/analytics/problem_detail.rb +97 -0
  27. data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
  28. data/lib/sentiero/analytics/segmenter.rb +133 -0
  29. data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
  30. data/lib/sentiero/analytics/stats.rb +30 -0
  31. data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
  32. data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
  33. data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
  34. data/lib/sentiero/configuration.rb +184 -0
  35. data/lib/sentiero/erasure.rb +48 -0
  36. data/lib/sentiero/fingerprint.rb +34 -0
  37. data/lib/sentiero/ip_anonymizer.rb +29 -0
  38. data/lib/sentiero/redaction/config.rb +61 -0
  39. data/lib/sentiero/redaction.rb +207 -0
  40. data/lib/sentiero/reporter/configuration.rb +50 -0
  41. data/lib/sentiero/reporter/context.rb +31 -0
  42. data/lib/sentiero/reporter/dispatcher.rb +91 -0
  43. data/lib/sentiero/reporter/http_transport.rb +57 -0
  44. data/lib/sentiero/reporter/log_transport.rb +26 -0
  45. data/lib/sentiero/reporter/middleware.rb +62 -0
  46. data/lib/sentiero/reporter/normalizer.rb +14 -0
  47. data/lib/sentiero/reporter/null_transport.rb +18 -0
  48. data/lib/sentiero/reporter/report_context.rb +29 -0
  49. data/lib/sentiero/reporter/scrubber.rb +47 -0
  50. data/lib/sentiero/reporter/test_helper.rb +32 -0
  51. data/lib/sentiero/reporter/test_transport.rb +28 -0
  52. data/lib/sentiero/reporter.rb +214 -0
  53. data/lib/sentiero/roda.rb +47 -0
  54. data/lib/sentiero/store/error_store.rb +220 -0
  55. data/lib/sentiero/store/limits.rb +31 -0
  56. data/lib/sentiero/store/session_store.rb +118 -0
  57. data/lib/sentiero/store.rb +72 -0
  58. data/lib/sentiero/stores/file.rb +566 -0
  59. data/lib/sentiero/stores/memory.rb +362 -0
  60. data/lib/sentiero/stores/redis/keys.rb +59 -0
  61. data/lib/sentiero/stores/redis/lua.rb +119 -0
  62. data/lib/sentiero/stores/redis.rb +665 -0
  63. data/lib/sentiero/stores/sqlite/schema.rb +79 -0
  64. data/lib/sentiero/stores/sqlite.rb +626 -0
  65. data/lib/sentiero/user_agent.rb +32 -0
  66. data/lib/sentiero/version.rb +5 -0
  67. data/lib/sentiero/web/analytics_app.rb +538 -0
  68. data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
  69. data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
  70. data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
  71. data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
  72. data/lib/sentiero/web/assets/manifest.json +11 -0
  73. data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
  74. data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
  75. data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
  76. data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
  77. data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
  78. data/lib/sentiero/web/assets_app.rb +42 -0
  79. data/lib/sentiero/web/base_app.rb +319 -0
  80. data/lib/sentiero/web/basic_auth.rb +27 -0
  81. data/lib/sentiero/web/basic_auth_check.rb +41 -0
  82. data/lib/sentiero/web/body_reader.rb +44 -0
  83. data/lib/sentiero/web/csv_writer.rb +45 -0
  84. data/lib/sentiero/web/dashboard_app.rb +236 -0
  85. data/lib/sentiero/web/errors_app.rb +97 -0
  86. data/lib/sentiero/web/escaping.rb +37 -0
  87. data/lib/sentiero/web/events_app.rb +196 -0
  88. data/lib/sentiero/web/formatting.rb +43 -0
  89. data/lib/sentiero/web/ingest_app.rb +92 -0
  90. data/lib/sentiero/web/manifest.rb +43 -0
  91. data/lib/sentiero/web/monitoring_app.rb +316 -0
  92. data/lib/sentiero/web/script_tag.rb +57 -0
  93. data/lib/sentiero/web/shareable_replay.rb +88 -0
  94. data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
  95. data/lib/sentiero/web/templates/_brand.html.erb +18 -0
  96. data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
  97. data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
  98. data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
  99. data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
  100. data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
  101. data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
  102. data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
  103. data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
  104. data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
  105. data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
  106. data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
  107. data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
  108. data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
  109. data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
  110. data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
  111. data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
  112. data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
  113. data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
  114. data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
  115. data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
  116. data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
  117. data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
  118. data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
  119. data/lib/sentiero/web/templates/event_show.html.erb +52 -0
  120. data/lib/sentiero/web/templates/events_index.html.erb +177 -0
  121. data/lib/sentiero/web/templates/export_index.html.erb +69 -0
  122. data/lib/sentiero/web/templates/forms.html.erb +105 -0
  123. data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
  124. data/lib/sentiero/web/templates/import.html.erb +39 -0
  125. data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
  126. data/lib/sentiero/web/templates/segments.html.erb +114 -0
  127. data/lib/sentiero/web/templates/session_show.html.erb +195 -0
  128. data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
  129. data/lib/sentiero/web/track_app.rb +57 -0
  130. data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
  131. data/lib/sentiero/web/views/analyzer_view.rb +27 -0
  132. data/lib/sentiero/web/views/base_view.rb +76 -0
  133. data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
  134. data/lib/sentiero/web/views/conversions_view.rb +41 -0
  135. data/lib/sentiero/web/views/engagement_view.rb +67 -0
  136. data/lib/sentiero/web/views/errors_index_view.rb +37 -0
  137. data/lib/sentiero/web/views/event_show_view.rb +20 -0
  138. data/lib/sentiero/web/views/events_index_view.rb +56 -0
  139. data/lib/sentiero/web/views/export_view.rb +23 -0
  140. data/lib/sentiero/web/views/forms_view.rb +28 -0
  141. data/lib/sentiero/web/views/frustration_view.rb +15 -0
  142. data/lib/sentiero/web/views/funnel_view.rb +36 -0
  143. data/lib/sentiero/web/views/heatmap_view.rb +34 -0
  144. data/lib/sentiero/web/views/import_view.rb +13 -0
  145. data/lib/sentiero/web/views/page_report_view.rb +43 -0
  146. data/lib/sentiero/web/views/problem_show_view.rb +46 -0
  147. data/lib/sentiero/web/views/scroll_view.rb +23 -0
  148. data/lib/sentiero/web/views/segments_view.rb +28 -0
  149. data/lib/sentiero/web/views/session_show_view.rb +105 -0
  150. data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
  151. data/lib/sentiero/web/views/vitals_view.rb +45 -0
  152. data/lib/sentiero/web/views.rb +24 -0
  153. data/lib/sentiero/window_ref.rb +6 -0
  154. data/lib/sentiero.rb +69 -0
  155. metadata +232 -0
@@ -0,0 +1,235 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "frustration/detectors"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Cross-session frustration signals per page URL: rage clicks (bursts at
9
+ # the same spot) and dead clicks (clicks the page never responds to), plus
10
+ # top rage-clicked elements and per-incident replay links.
11
+ #
12
+ # Detection itself lives in Frustration::Detectors (pure Ruby ports of the
13
+ # JS detectors, frontend/src/dashboard/frustration.js, pinned by ported
14
+ # tests so the two can't drift). Over the detectors' raw dead clicks this
15
+ # class layers cross-session aggregation and a de-noising pass: an
16
+ # app-level custom event in the dead window counts as a page response; the
17
+ # final click of a segment navigated away from is withdrawn; an
18
+ # error-coincident dead click is kept and tagged kind: "error".
19
+ class FrustrationAnalyzer < Analyzer
20
+ # Custom-event tag carrying the clicked element's CSS selector.
21
+ CLICK_TAG = "__click"
22
+
23
+ # Recorder-internal annotation prefix and the browser JS-error tag;
24
+ # neither proves the page responded to a click.
25
+ INTERNAL_TAG_PREFIX = "__"
26
+ ERROR_TAG = "error"
27
+
28
+ # Max ms a "__click" annotation may sit from a rage cluster's first
29
+ # click and still be attributed to it.
30
+ NEAREST_CLICK_TOLERANCE_MS = 500
31
+
32
+ # Accumulation caps during the scan (sessions scan newest-first).
33
+ MAX_URLS = 200
34
+ MAX_SELECTORS_PER_URL = 200
35
+ MAX_INCIDENTS_PER_URL = 20
36
+ TOP_SELECTORS_LIMIT = 10
37
+
38
+ # Stable entry point for callers outside this class (EngagementAnalyzer,
39
+ # PageReportAnalyzer) that only need raw detection, not the cross-session
40
+ # aggregation below.
41
+ def self.detect_frustration_events(events) = Frustration::Detectors.detect_frustration_events(events)
42
+
43
+ # Detectors run over the FULL window (their response semantics span page
44
+ # boundaries by design); each incident is then attributed to the page
45
+ # segment its click happened on.
46
+ def analyze(limit: nil, since: nil, until_time: nil)
47
+ pages = {}
48
+ accumulation_capped = false
49
+
50
+ _scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
51
+ incidents = Frustration::Detectors.detect_frustration_events(events)
52
+ next if incidents.empty?
53
+
54
+ segments = page_segments(events)
55
+ incidents = refine_incidents(incidents, segments)
56
+ next if incidents.empty?
57
+
58
+ annotations = click_annotations(events)
59
+
60
+ incidents.each do |incident|
61
+ page = page_for(pages, incident[:url])
62
+ unless page
63
+ accumulation_capped = true
64
+ next
65
+ end
66
+
67
+ accumulation_capped = true unless record_incident(page, incident, summary[:session_id], window_id, annotations)
68
+ page[:session_ids][summary[:session_id]] = true
69
+ end
70
+ end
71
+
72
+ {
73
+ pages: pages.transform_values { |page| summarize(page) },
74
+ was_truncated: accumulation_capped || hit_cap
75
+ }
76
+ end
77
+
78
+ private
79
+
80
+ def page_segments(events)
81
+ segments = []
82
+ each_page_segment(events) do |url, segment, _anchor|
83
+ segments << [url, segment]
84
+ end
85
+ segments
86
+ end
87
+
88
+ # Attributes each incident to its click's segment and de-noises dead clicks
89
+ # (class-comment rules). Object identity (not timestamp) locates the segment,
90
+ # avoiding mis-attribution at a same-millisecond Meta boundary. Drops
91
+ # incidents on URL-less segments or withdrawn by the de-noise rules.
92
+ def refine_incidents(incidents, segments)
93
+ incidents.filter_map do |incident|
94
+ index = segments.index { |(_url, segment)| segment.any? { |e| e.equal?(incident[:event]) } }
95
+ url = index && segments[index][0]
96
+ next nil unless url
97
+
98
+ kind = nil
99
+ if incident[:subtype] == "dead_click"
100
+ segment = segments[index][1]
101
+ next nil if custom_response?(segment, incident[:timestamp])
102
+
103
+ if error_coincident?(segment, incident[:timestamp])
104
+ kind = "error"
105
+ elsif navigated_away_final_click?(segments, index, incident[:event])
106
+ next nil
107
+ end
108
+ end
109
+
110
+ incident.merge(url: url, kind: kind)
111
+ end
112
+ end
113
+
114
+ # An app-level custom event in the dead window means the page reacted
115
+ # (the pure detectors only see META/mutation/input).
116
+ def custom_response?(segment, click_ts)
117
+ any_custom_in_window?(segment, click_ts) do |tag|
118
+ !tag.start_with?(INTERNAL_TAG_PREFIX) && tag != ERROR_TAG
119
+ end
120
+ end
121
+
122
+ def error_coincident?(segment, click_ts)
123
+ any_custom_in_window?(segment, click_ts) { |tag| tag == ERROR_TAG }
124
+ end
125
+
126
+ # Any CUSTOM event whose tag satisfies the block within [click_ts, +DEAD_WINDOW_MS].
127
+ # Same-tick INclusive: the recorder emits navigation/error customs in the same
128
+ # tick as the native click, which the detectors' strictly-after rule would miss.
129
+ def any_custom_in_window?(segment, click_ts)
130
+ deadline = click_ts + Frustration::Detectors::DEAD_WINDOW_MS
131
+ segment.any? do |event|
132
+ next false unless event["type"] == CUSTOM
133
+
134
+ ts = event["timestamp"]
135
+ next false unless ts.is_a?(Numeric) && ts >= click_ts && ts <= deadline
136
+
137
+ tag = event.dig("data", "tag")
138
+ tag.is_a?(String) && yield(tag)
139
+ end
140
+ end
141
+
142
+ # The last click of a segment that's navigated away from likely CAUSED a
143
+ # navigation slower than the dead window, so its dead verdict is withdrawn.
144
+ # The window's FINAL segment is exempt: a window that just ends proves no
145
+ # navigation, and the inert-button bounce is the signal this page exists for.
146
+ def navigated_away_final_click?(segments, index, event)
147
+ return false if index >= segments.size - 1
148
+
149
+ # Reuse the detectors' click? so "last click" can't drift from theirs.
150
+ last_click = segments[index][1].reverse_each.find { |e| Frustration::Detectors.click?(e) }
151
+ last_click.equal?(event)
152
+ end
153
+
154
+ # Page accumulator for a URL, or nil when the URL-row cap is full.
155
+ def page_for(pages, url)
156
+ bounded_fetch(pages, url, MAX_URLS) do
157
+ {rage_count: 0, dead_count: 0, session_ids: {}, selectors: Hash.new(0), incidents: []}
158
+ end
159
+ end
160
+
161
+ # [timestamp, selector] pairs from the window's "__click" annotations.
162
+ def click_annotations(events)
163
+ events.filter_map do |event|
164
+ next unless event["type"] == CUSTOM
165
+ data = event["data"]
166
+ next unless data.is_a?(Hash) && data["tag"] == CLICK_TAG
167
+
168
+ selector = data.dig("payload", "selector")
169
+ next unless selector.is_a?(String) && !selector.empty?
170
+ next unless event["timestamp"].is_a?(Numeric)
171
+
172
+ [event["timestamp"], selector]
173
+ end
174
+ end
175
+
176
+ # Returns false when the per-URL selector cap swallowed a new selector (the
177
+ # only lossy path — the incident-row cap is a display bound, counts stay complete).
178
+ def record_incident(page, incident, session_id, window_id, annotations)
179
+ selector = nil
180
+ selector_capped = false
181
+
182
+ if incident[:subtype] == "rage_click"
183
+ page[:rage_count] += 1
184
+ selector = nearest_selector(annotations, incident[:timestamp])
185
+ if selector
186
+ if page[:selectors].key?(selector) || page[:selectors].size < MAX_SELECTORS_PER_URL
187
+ page[:selectors][selector] += 1
188
+ else
189
+ selector_capped = true
190
+ end
191
+ end
192
+ else
193
+ page[:dead_count] += 1
194
+ end
195
+
196
+ if page[:incidents].size < MAX_INCIDENTS_PER_URL
197
+ page[:incidents] << {
198
+ subtype: incident[:subtype],
199
+ session_id: session_id,
200
+ window_id: window_id,
201
+ offset_ms: [incident[:offset], 0].max.round,
202
+ count: incident[:count],
203
+ selector: selector,
204
+ kind: incident[:kind]
205
+ }
206
+ end
207
+
208
+ !selector_capped
209
+ end
210
+
211
+ # Selector of the "__click" annotation nearest timestamp within
212
+ # NEAREST_CLICK_TOLERANCE_MS; nil when nothing is close enough.
213
+ def nearest_selector(annotations, timestamp)
214
+ nearest = annotations.min_by { |(ts, _selector)| (ts - timestamp).abs }
215
+ return nil unless nearest
216
+ ((nearest[0] - timestamp).abs <= NEAREST_CLICK_TOLERANCE_MS) ? nearest[1] : nil
217
+ end
218
+
219
+ def summarize(page)
220
+ {
221
+ rage_count: page[:rage_count],
222
+ dead_count: page[:dead_count],
223
+ sessions_affected: page[:session_ids].size,
224
+ top_selectors: top_selectors(page[:selectors]),
225
+ incidents: page[:incidents]
226
+ }
227
+ end
228
+
229
+ def top_selectors(selectors)
230
+ top_counts(selectors, limit: TOP_SELECTORS_LIMIT)
231
+ .map { |selector, count| {selector: selector, count: count} }
232
+ end
233
+ end
234
+ end
235
+ end
@@ -0,0 +1,160 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+
5
+ module Sentiero
6
+ module Analytics
7
+ # Custom-event funnel: ordered step conversion across sessions. A session
8
+ # reaches step N+1 only when an event with that tag occurs strictly after
9
+ # the step-N match. Greedy-earliest chain matching is optimal for subsequence
10
+ # reachability, so "how far did this session get" is exact.
11
+ class FunnelAnalyzer < Analyzer
12
+ # Excluded as a funnel step; has its own ErrorDiscovery surface.
13
+ ERROR_TAG = "error"
14
+
15
+ # Prefix of recorder-internal annotation tags (__perf, __click, ...).
16
+ INTERNAL_TAG_PREFIX = "__"
17
+
18
+ MAX_STEPS = 3
19
+
20
+ MAX_TAGS = 200
21
+
22
+ # Bounds per-session memory.
23
+ MAX_STEP_EVENTS_PER_SESSION = 100
24
+
25
+ MAX_EXAMPLES_PER_STEP = 10
26
+
27
+ class << self
28
+ def internal_tag?(tag)
29
+ !tag.is_a?(String) || tag.empty? || tag.start_with?(INTERNAL_TAG_PREFIX) || tag == ERROR_TAG
30
+ end
31
+
32
+ def usable_steps(tags)
33
+ Array(tags).reject { |tag| internal_tag?(tag) }.first(MAX_STEPS)
34
+ end
35
+ end
36
+
37
+ # Fewer than 2 usable steps yields steps: [] but still collects vocabulary.
38
+ def analyze(steps = [], limit: nil, since: nil, until_time: nil)
39
+ steps = self.class.usable_steps(steps)
40
+ steps = [] if steps.size < 2
41
+ step_set = steps.uniq
42
+
43
+ tags = {}
44
+ sessions = {}
45
+ accumulation_capped = false
46
+
47
+ _scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
48
+ session_id = summary[:session_id]
49
+
50
+ anchor = events.first&.fetch("timestamp", nil)
51
+ events.each do |event|
52
+ tag = custom_tag(event)
53
+ next unless tag
54
+
55
+ accumulation_capped = true unless tally_tag(tags, tag)
56
+ next if steps.empty? || !step_set.include?(tag)
57
+ next unless event["timestamp"].is_a?(Numeric)
58
+
59
+ entries = sessions[session_id] ||= []
60
+ if entries.size >= MAX_STEP_EVENTS_PER_SESSION
61
+ accumulation_capped = true
62
+ next
63
+ end
64
+
65
+ entries << {
66
+ tag: tag,
67
+ timestamp: event["timestamp"],
68
+ window_id: window_id,
69
+ offset_ms: offset_ms(anchor, event["timestamp"])
70
+ }
71
+ end
72
+ end
73
+
74
+ {
75
+ tags: tags.keys.sort,
76
+ steps: summarize_steps(steps, sessions),
77
+ was_truncated: accumulation_capped || hit_cap
78
+ }
79
+ end
80
+
81
+ private
82
+
83
+ def custom_tag(event)
84
+ return nil unless event.is_a?(Hash) && event["type"] == CUSTOM
85
+ data = event["data"]
86
+ return nil unless data.is_a?(Hash)
87
+ tag = data["tag"]
88
+ self.class.internal_tag?(tag) ? nil : tag
89
+ end
90
+
91
+ # Returns false for a new tag past MAX_TAGS (signals truncation).
92
+ def tally_tag(tags, tag)
93
+ return true if tags.key?(tag)
94
+ return false if tags.size >= MAX_TAGS
95
+ tags[tag] = true
96
+ true
97
+ end
98
+
99
+ def summarize_steps(steps, sessions)
100
+ return [] if steps.empty?
101
+
102
+ counts = Array.new(steps.size, 0)
103
+ inter_times = Array.new(steps.size) { [] }
104
+ examples = Array.new(steps.size) { [] }
105
+
106
+ sessions.each do |session_id, entries|
107
+ matches = chain(steps, entries)
108
+ next if matches.empty?
109
+
110
+ reached = matches.size
111
+ (0...reached).each { |i| counts[i] += 1 }
112
+ (1...reached).each { |i| inter_times[i] << matches[i][:timestamp] - matches[i - 1][:timestamp] }
113
+
114
+ next unless reached < steps.size # converted sessions never drop off
115
+
116
+ step_examples = examples[reached - 1]
117
+ if step_examples.size < MAX_EXAMPLES_PER_STEP
118
+ last = matches[reached - 1]
119
+ step_examples << {session_id: session_id, window_id: last[:window_id], offset_ms: last[:offset_ms]}
120
+ end
121
+ end
122
+
123
+ step_one = counts[0]
124
+ steps.each_with_index.map do |tag, i|
125
+ {
126
+ tag: tag,
127
+ sessions: counts[i],
128
+ conversion_pct: step_one.zero? ? nil : (counts[i].to_f / step_one * 100).round(1),
129
+ median_ms_from_previous: i.zero? ? nil : median(inter_times[i]),
130
+ drop_off_examples: examples[i]
131
+ }
132
+ end
133
+ end
134
+
135
+ # Greedy earliest chain over time-sorted step events: an event matches when
136
+ # its tag is the next pending step and its timestamp is strictly after the
137
+ # previous match.
138
+ def chain(steps, entries)
139
+ matches = []
140
+ last_ts = nil
141
+
142
+ entries.sort_by { |entry| entry[:timestamp] }.each do |entry|
143
+ break if matches.size >= steps.size
144
+ next unless entry[:tag] == steps[matches.size]
145
+ next unless last_ts.nil? || entry[:timestamp] > last_ts
146
+
147
+ matches << entry
148
+ last_ts = entry[:timestamp]
149
+ end
150
+
151
+ matches
152
+ end
153
+
154
+ def median(values)
155
+ return nil if values.empty?
156
+ percentile(values.sort, 50)
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "collectors/click_collector"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Aggregates click coordinates for a single page URL into a normalized density
9
+ # grid plus a most-clicked-elements table. The per-segment density math lives
10
+ # in ClickCollector (shared with PageReportAnalyzer).
11
+ class HeatmapAnalyzer < Analyzer
12
+ CLICK_TAG = ClickCollector::CLICK_TAG
13
+ GRID_SIZE = ClickCollector::GRID_SIZE
14
+
15
+ TOP_ELEMENTS_LIMIT = 20
16
+ MAX_URLS = 200
17
+
18
+ # Clicks are attributed per page segment (Meta-href boundaries).
19
+ def analyze(target_url, limit: nil, since: nil, until_time: nil)
20
+ clicks = ClickCollector.new
21
+ representative = nil
22
+
23
+ _scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
24
+ session_id = summary[:session_id]
25
+
26
+ each_page_segment(events) do |url, segment, _anchor|
27
+ next unless url == target_url
28
+
29
+ added = clicks.collect(segment)
30
+ representative ||= {session_id: session_id, window_id: window_id} unless added.nil?
31
+ end
32
+ end
33
+
34
+ {
35
+ clicks_by_bucket: clicks.buckets,
36
+ top_elements: top_elements(clicks.selectors),
37
+ total_clicks: clicks.total,
38
+ representative_window: representative,
39
+ was_truncated: hit_cap
40
+ }
41
+ end
42
+
43
+ def build_heatmap_table(since: nil, until_time: nil)
44
+ selectors_by_url = {}
45
+
46
+ scan_sessions(since: since, until_time: until_time) do |_summary, _window_id, events|
47
+ each_page_segment(events) do |url, segment, _anchor|
48
+ next unless url
49
+
50
+ selectors = selectors_by_url[url]
51
+ if selectors.nil?
52
+ next if selectors_by_url.size >= MAX_URLS
53
+ selectors = selectors_by_url[url] = Hash.new(0)
54
+ end
55
+ segment.each { |event| tally_selector(selectors, event) }
56
+ end
57
+ end
58
+
59
+ selectors_by_url
60
+ .sort_by { |url, _selectors| url }
61
+ .to_h { |url, selectors| [url, top_elements(selectors)] }
62
+ end
63
+
64
+ def recorded_urls
65
+ urls = {}
66
+
67
+ scan_sessions do |_summary, _window_id, events|
68
+ each_page_segment(events) do |url, _segment, _anchor|
69
+ urls[url] = true if url && (urls.key?(url) || urls.size < MAX_URLS)
70
+ end
71
+ end
72
+
73
+ urls.keys
74
+ end
75
+
76
+ private
77
+
78
+ def tally_selector(selectors, event)
79
+ return unless event["type"] == CUSTOM
80
+ data = event["data"]
81
+ return unless data.is_a?(Hash) && data["tag"] == CLICK_TAG
82
+
83
+ selector = data.dig("payload", "selector")
84
+ selectors[selector] += 1 if selector.is_a?(String) && !selector.empty?
85
+ end
86
+
87
+ def top_elements(selectors)
88
+ top_counts(selectors, limit: TOP_ELEMENTS_LIMIT)
89
+ .map { |selector, count| {selector: selector, count: count} }
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,198 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "frustration_analyzer"
5
+ require_relative "collectors/click_collector"
6
+ require_relative "collectors/scroll_collector"
7
+ require_relative "collectors/vitals_collector"
8
+ require_relative "collectors/error_collector"
9
+ require_relative "collectors/custom_tag_collector"
10
+ require_relative "collectors/form_collector"
11
+ require_relative "collectors/frustration_collector"
12
+
13
+ module Sentiero
14
+ module Analytics
15
+ # Per-URL drill-down: composes the suite's metrics (heatmap, scroll, forms,
16
+ # vitals, frustration, errors, custom tags) for one URL into one report via
17
+ # ONE bounded Store#each_session_events scan. Per-segment math lives in
18
+ # shared Collectors::. Frustration is the exception: only detection is shared
19
+ # (FrustrationAnalyzer.detect_frustration_events); the cross-session
20
+ # aggregation differs, so this URL's attribution uses FrustrationCollector.
21
+ class PageReportAnalyzer < Analyzer
22
+ # Output bounds — each caps a collector (flips #capped on hit).
23
+ MAX_SELECTORS = 200
24
+ MAX_SAMPLES_PER_METRIC = 2000
25
+ MAX_ERROR_GROUPS = 200
26
+ MAX_ERROR_OCCURRENCES = 50
27
+ MAX_CUSTOM_TAGS = 200
28
+ MAX_FIELDS = 500
29
+
30
+ # Display limits.
31
+ TOP_ELEMENTS_LIMIT = 20
32
+ TOP_SELECTORS_LIMIT = 10
33
+
34
+ # since/until_time are epoch seconds. Every result key is always present.
35
+ def analyze(target_url, limit: nil, since: nil, until_time: nil)
36
+ acc = new_accumulator
37
+
38
+ _scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
39
+ session_id = summary[:session_id]
40
+
41
+ # Detect over the FULL window: frustration semantics span page
42
+ # boundaries. FrustrationCollector then attributes each incident to a
43
+ # segment by object identity.
44
+ frustration = FrustrationAnalyzer.detect_frustration_events(events)
45
+
46
+ segment_index = 0
47
+ last_index = nil
48
+ first_was_target = false
49
+ target_segments = 0
50
+
51
+ each_page_segment(events) do |url, segment, anchor|
52
+ matches = url == target_url
53
+ first_was_target = true if segment_index.zero? && matches
54
+ last_index = segment_index if matches
55
+ segment_index += 1
56
+
57
+ next unless matches
58
+
59
+ target_segments += 1
60
+ acc[:page_views] += 1
61
+ acc[:sessions][session_id] = true
62
+
63
+ collect_time_on_page(acc, segment)
64
+ collect_heatmap(acc, segment, session_id, window_id)
65
+ acc[:vitals].collect(segment, session_id: session_id, window_id: window_id, anchor: anchor)
66
+ acc[:errors].collect(segment, session_id: session_id, window_id: window_id, anchor: anchor)
67
+ acc[:custom_tags].collect(segment)
68
+ acc[:forms].collect(session_id, url, segment)
69
+ acc[:scroll].observe(target_url, segment)
70
+ acc[:frustration].collect(frustration, segment)
71
+ end
72
+
73
+ # entry/exit/bounce decided once per window from the segment order.
74
+ if target_segments.positive?
75
+ acc[:windows_on_page] += 1
76
+ acc[:entries] += 1 if first_was_target
77
+ acc[:exits] += 1 if last_index == segment_index - 1
78
+ acc[:bounces] += 1 if segment_index == 1 && first_was_target
79
+ end
80
+
81
+ # One scroll sample per (session, window): deepest wins.
82
+ acc[:scroll].flush_window
83
+ end
84
+
85
+ build_result(target_url, acc, hit_cap)
86
+ end
87
+
88
+ private
89
+
90
+ def new_accumulator
91
+ {
92
+ page_views: 0,
93
+ sessions: {},
94
+ dwell_samples: [],
95
+ windows_on_page: 0,
96
+ entries: 0,
97
+ exits: 0,
98
+ bounces: 0,
99
+ representative: nil,
100
+ clicks: ClickCollector.new(max_selectors: MAX_SELECTORS),
101
+ scroll: ScrollCollector.new,
102
+ vitals: VitalsCollector.new(max_samples: MAX_SAMPLES_PER_METRIC),
103
+ errors: ErrorCollector.new(max_groups: MAX_ERROR_GROUPS, max_occurrences: MAX_ERROR_OCCURRENCES),
104
+ custom_tags: CustomTagCollector.new(max_tags: MAX_CUSTOM_TAGS),
105
+ forms: FormCollector.new(max_fields: MAX_FIELDS),
106
+ frustration: FrustrationCollector.new(max_selectors: MAX_SELECTORS)
107
+ }
108
+ end
109
+
110
+ # An A→B→A revisit yields TWO target segments, hence TWO dwell samples —
111
+ # intended ("time on page per visit").
112
+ def collect_time_on_page(acc, segment)
113
+ timestamps = segment.filter_map { |e| e["timestamp"] if e["timestamp"].is_a?(Numeric) }
114
+ return if timestamps.size < 2
115
+ acc[:dwell_samples] << (timestamps.max - timestamps.min)
116
+ end
117
+
118
+ # A segment with no valid Meta width/height contributes zero clicks
119
+ # (collect returns nil) and never becomes the representative window.
120
+ def collect_heatmap(acc, segment, session_id, window_id)
121
+ added = acc[:clicks].collect(segment)
122
+ acc[:representative] ||= {session_id: session_id, window_id: window_id} unless added.nil?
123
+ end
124
+
125
+ def build_result(target_url, acc, hit_cap)
126
+ collectors = [acc[:clicks], acc[:scroll], acc[:vitals], acc[:errors], acc[:custom_tags], acc[:forms], acc[:frustration]]
127
+ {
128
+ url: target_url,
129
+ sessions: acc[:sessions].size,
130
+ page_views: acc[:page_views],
131
+ time_on_page: summarize_time_on_page(acc[:dwell_samples]),
132
+ entry_exit: {
133
+ entries: acc[:entries],
134
+ exits: acc[:exits],
135
+ bounce_rate: acc[:entries].zero? ? 0.0 : acc[:bounces].to_f / acc[:entries],
136
+ windows_on_page: acc[:windows_on_page]
137
+ },
138
+ heatmap: {
139
+ top_elements: top_selectors(acc[:clicks].selectors, TOP_ELEMENTS_LIMIT),
140
+ total_clicks: acc[:clicks].total,
141
+ representative_window: acc[:representative]
142
+ },
143
+ scroll: acc[:scroll].summarize(target_url),
144
+ forms: build_forms_section(acc[:forms]),
145
+ vitals: build_vitals_section(acc[:vitals]),
146
+ errors: acc[:errors].summarize,
147
+ frustration: {
148
+ rage_count: acc[:frustration].rage_count,
149
+ dead_count: acc[:frustration].dead_count,
150
+ top_selectors: top_selectors(acc[:frustration].selectors, TOP_SELECTORS_LIMIT)
151
+ },
152
+ custom_events: acc[:custom_tags].top(MAX_CUSTOM_TAGS),
153
+ was_truncated: collectors.any?(&:capped) || hit_cap
154
+ }
155
+ end
156
+
157
+ def summarize_time_on_page(samples)
158
+ return {mean_ms: nil, median_ms: nil, samples: 0} if samples.empty?
159
+
160
+ sorted = samples.sort
161
+ {
162
+ mean_ms: (samples.sum.to_f / samples.size).round,
163
+ median_ms: percentile(sorted, 50),
164
+ samples: samples.size
165
+ }
166
+ end
167
+
168
+ def build_forms_section(forms)
169
+ started = forms.started_count
170
+ {
171
+ started: started,
172
+ # "completed" here = session submitted on the target URL at all
173
+ # (submitted_count), not FormAnalyzer's stricter completed_count.
174
+ completed: forms.submitted_count,
175
+ completion_rate: started.zero? ? 0.0 : forms.submitted_count.to_f / started,
176
+ total_submits: forms.total_submits,
177
+ fields: forms.summarize_fields(started),
178
+ drop_off_fields: forms.summarize_drop_off
179
+ }
180
+ end
181
+
182
+ # The collector's worst-sample carries :value (WebVitalsAnalyzer needs it);
183
+ # the page report exposes only the replay-link coordinates.
184
+ def build_vitals_section(vitals)
185
+ result = vitals.summarize
186
+ result[:metrics].each_value do |metric|
187
+ metric[:worst] = metric[:worst]&.slice(:session_id, :window_id, :offset_ms)
188
+ end
189
+ result
190
+ end
191
+
192
+ def top_selectors(selectors, limit)
193
+ top_counts(selectors, limit: limit)
194
+ .map { |selector, count| {selector: selector, count: count} }
195
+ end
196
+ end
197
+ end
198
+ end