sentiero 1.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +7 -0
  3. data/README.md +679 -0
  4. data/lib/sentiero/analytics/analyzer.rb +91 -0
  5. data/lib/sentiero/analytics/bounded.rb +29 -0
  6. data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
  7. data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
  8. data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
  9. data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
  10. data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
  11. data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
  12. data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
  13. data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
  14. data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
  15. data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
  16. data/lib/sentiero/analytics/entry_attribution.rb +71 -0
  17. data/lib/sentiero/analytics/error_discovery.rb +118 -0
  18. data/lib/sentiero/analytics/events.rb +21 -0
  19. data/lib/sentiero/analytics/exporter.rb +242 -0
  20. data/lib/sentiero/analytics/form_analyzer.rb +153 -0
  21. data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
  22. data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
  23. data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
  24. data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
  25. data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
  26. data/lib/sentiero/analytics/problem_detail.rb +97 -0
  27. data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
  28. data/lib/sentiero/analytics/segmenter.rb +133 -0
  29. data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
  30. data/lib/sentiero/analytics/stats.rb +30 -0
  31. data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
  32. data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
  33. data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
  34. data/lib/sentiero/configuration.rb +184 -0
  35. data/lib/sentiero/erasure.rb +48 -0
  36. data/lib/sentiero/fingerprint.rb +34 -0
  37. data/lib/sentiero/ip_anonymizer.rb +29 -0
  38. data/lib/sentiero/redaction/config.rb +61 -0
  39. data/lib/sentiero/redaction.rb +207 -0
  40. data/lib/sentiero/reporter/configuration.rb +50 -0
  41. data/lib/sentiero/reporter/context.rb +31 -0
  42. data/lib/sentiero/reporter/dispatcher.rb +91 -0
  43. data/lib/sentiero/reporter/http_transport.rb +57 -0
  44. data/lib/sentiero/reporter/log_transport.rb +26 -0
  45. data/lib/sentiero/reporter/middleware.rb +62 -0
  46. data/lib/sentiero/reporter/normalizer.rb +14 -0
  47. data/lib/sentiero/reporter/null_transport.rb +18 -0
  48. data/lib/sentiero/reporter/report_context.rb +29 -0
  49. data/lib/sentiero/reporter/scrubber.rb +47 -0
  50. data/lib/sentiero/reporter/test_helper.rb +32 -0
  51. data/lib/sentiero/reporter/test_transport.rb +28 -0
  52. data/lib/sentiero/reporter.rb +214 -0
  53. data/lib/sentiero/roda.rb +47 -0
  54. data/lib/sentiero/store/error_store.rb +220 -0
  55. data/lib/sentiero/store/limits.rb +31 -0
  56. data/lib/sentiero/store/session_store.rb +118 -0
  57. data/lib/sentiero/store.rb +72 -0
  58. data/lib/sentiero/stores/file.rb +566 -0
  59. data/lib/sentiero/stores/memory.rb +362 -0
  60. data/lib/sentiero/stores/redis/keys.rb +59 -0
  61. data/lib/sentiero/stores/redis/lua.rb +119 -0
  62. data/lib/sentiero/stores/redis.rb +665 -0
  63. data/lib/sentiero/stores/sqlite/schema.rb +79 -0
  64. data/lib/sentiero/stores/sqlite.rb +626 -0
  65. data/lib/sentiero/user_agent.rb +32 -0
  66. data/lib/sentiero/version.rb +5 -0
  67. data/lib/sentiero/web/analytics_app.rb +538 -0
  68. data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
  69. data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
  70. data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
  71. data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
  72. data/lib/sentiero/web/assets/manifest.json +11 -0
  73. data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
  74. data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
  75. data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
  76. data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
  77. data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
  78. data/lib/sentiero/web/assets_app.rb +42 -0
  79. data/lib/sentiero/web/base_app.rb +319 -0
  80. data/lib/sentiero/web/basic_auth.rb +27 -0
  81. data/lib/sentiero/web/basic_auth_check.rb +41 -0
  82. data/lib/sentiero/web/body_reader.rb +44 -0
  83. data/lib/sentiero/web/csv_writer.rb +45 -0
  84. data/lib/sentiero/web/dashboard_app.rb +236 -0
  85. data/lib/sentiero/web/errors_app.rb +97 -0
  86. data/lib/sentiero/web/escaping.rb +37 -0
  87. data/lib/sentiero/web/events_app.rb +196 -0
  88. data/lib/sentiero/web/formatting.rb +43 -0
  89. data/lib/sentiero/web/ingest_app.rb +92 -0
  90. data/lib/sentiero/web/manifest.rb +43 -0
  91. data/lib/sentiero/web/monitoring_app.rb +316 -0
  92. data/lib/sentiero/web/script_tag.rb +57 -0
  93. data/lib/sentiero/web/shareable_replay.rb +88 -0
  94. data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
  95. data/lib/sentiero/web/templates/_brand.html.erb +18 -0
  96. data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
  97. data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
  98. data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
  99. data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
  100. data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
  101. data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
  102. data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
  103. data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
  104. data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
  105. data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
  106. data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
  107. data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
  108. data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
  109. data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
  110. data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
  111. data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
  112. data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
  113. data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
  114. data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
  115. data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
  116. data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
  117. data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
  118. data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
  119. data/lib/sentiero/web/templates/event_show.html.erb +52 -0
  120. data/lib/sentiero/web/templates/events_index.html.erb +177 -0
  121. data/lib/sentiero/web/templates/export_index.html.erb +69 -0
  122. data/lib/sentiero/web/templates/forms.html.erb +105 -0
  123. data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
  124. data/lib/sentiero/web/templates/import.html.erb +39 -0
  125. data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
  126. data/lib/sentiero/web/templates/segments.html.erb +114 -0
  127. data/lib/sentiero/web/templates/session_show.html.erb +195 -0
  128. data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
  129. data/lib/sentiero/web/track_app.rb +57 -0
  130. data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
  131. data/lib/sentiero/web/views/analyzer_view.rb +27 -0
  132. data/lib/sentiero/web/views/base_view.rb +76 -0
  133. data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
  134. data/lib/sentiero/web/views/conversions_view.rb +41 -0
  135. data/lib/sentiero/web/views/engagement_view.rb +67 -0
  136. data/lib/sentiero/web/views/errors_index_view.rb +37 -0
  137. data/lib/sentiero/web/views/event_show_view.rb +20 -0
  138. data/lib/sentiero/web/views/events_index_view.rb +56 -0
  139. data/lib/sentiero/web/views/export_view.rb +23 -0
  140. data/lib/sentiero/web/views/forms_view.rb +28 -0
  141. data/lib/sentiero/web/views/frustration_view.rb +15 -0
  142. data/lib/sentiero/web/views/funnel_view.rb +36 -0
  143. data/lib/sentiero/web/views/heatmap_view.rb +34 -0
  144. data/lib/sentiero/web/views/import_view.rb +13 -0
  145. data/lib/sentiero/web/views/page_report_view.rb +43 -0
  146. data/lib/sentiero/web/views/problem_show_view.rb +46 -0
  147. data/lib/sentiero/web/views/scroll_view.rb +23 -0
  148. data/lib/sentiero/web/views/segments_view.rb +28 -0
  149. data/lib/sentiero/web/views/session_show_view.rb +105 -0
  150. data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
  151. data/lib/sentiero/web/views/vitals_view.rb +45 -0
  152. data/lib/sentiero/web/views.rb +24 -0
  153. data/lib/sentiero/window_ref.rb +6 -0
  154. data/lib/sentiero.rb +69 -0
  155. metadata +232 -0
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "events"
4
+ require_relative "stats"
5
+ require_relative "bounded"
6
+ require_relative "entry_attribution"
7
+
8
+ module Sentiero
9
+ # Compute-on-read analytics: query the store and aggregate at request time.
10
+ module Analytics
11
+ class Analyzer
12
+ include Events
13
+ include Stats
14
+ include Bounded
15
+ include EntryAttribution
16
+
17
+ attr_reader :store
18
+
19
+ def initialize(store = Sentiero.store)
20
+ @store = store
21
+ end
22
+
23
+ private
24
+
25
+ # The standard bounded session scan: yields each window's
26
+ # [summary, window_id, events] up to the scan cap, and returns
27
+ # [sessions_scanned, hit_cap]. Counts DISTINCT sessions (not windows), so
28
+ # `hit_cap` is correct even when a session spans several windows. Callers
29
+ # build was_truncated as `collector.capped || hit_cap`.
30
+ def scan_sessions(limit: nil, since: nil, until_time: nil)
31
+ scan_cap = limit || store.limits.analytics_max_scan_sessions
32
+ seen = {}
33
+ store.each_session_events(limit: scan_cap, since: since, until_time: until_time) do |summary, window_id, events|
34
+ seen[summary[:session_id]] = true
35
+ yield summary, window_id, events
36
+ end
37
+ [seen.size, seen.size >= scan_cap]
38
+ end
39
+
40
+ def duration_ms(summary)
41
+ first = summary[:first_event_at]
42
+ last = summary[:last_event_at]
43
+ return nil unless first && last
44
+
45
+ (last - first).abs
46
+ end
47
+
48
+ def meta_event(events)
49
+ events.find { |event| event["type"] == META && event["data"].is_a?(Hash) }
50
+ end
51
+
52
+ # Splits a window's events into per-page segments on Meta href boundaries
53
+ # (one non-SPA window spans every page). Yields [url, segment_events,
54
+ # anchor_ts]; consecutive same-href Metas (same-URL reloads) stay in one
55
+ # segment. anchor_ts is the WINDOW's first timestamp for every segment:
56
+ # replay deep-links (?t=offset) are window-relative, never segment-local.
57
+ def each_page_segment(events)
58
+ return if events.empty?
59
+
60
+ anchor_ts = events.first&.fetch("timestamp", nil)
61
+
62
+ boundaries = [] # [start_index, url] per href change
63
+ events.each_with_index do |event, index|
64
+ url = meta_href(event)
65
+ next unless url
66
+
67
+ boundaries << [index, url] if boundaries.empty? || boundaries.last[1] != url
68
+ end
69
+
70
+ if boundaries.empty?
71
+ yield nil, events, anchor_ts
72
+ return
73
+ end
74
+
75
+ boundaries.each_with_index do |(start, url), i|
76
+ start = 0 if i.zero? # pre-first-Meta events belong to the first page
77
+ stop = boundaries[i + 1]&.first || events.size
78
+ yield url, events[start...stop], anchor_ts
79
+ end
80
+ end
81
+
82
+ def meta_href(event)
83
+ return nil unless event.is_a?(Hash) && event["type"] == META
84
+
85
+ data = event["data"]
86
+ href = data.is_a?(Hash) ? data["href"] : nil
87
+ (href.is_a?(String) && !href.empty?) ? href : nil
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentiero
4
+ module Analytics
5
+ # Cap primitives shared by the compute-on-read collectors/analyzers, which
6
+ # bound memory during a scan. Both leave the collection unchanged once full,
7
+ # so the caller flips its own `@capped` flag on a false/nil return.
8
+ module Bounded
9
+ # Counter cap: bump counts[key] (a Hash defaulting to 0), adding a NEW key
10
+ # only while under `cap` (nil = unbounded). Returns true if counted, false
11
+ # if the cap dropped it.
12
+ def bounded_increment(counts, key, cap, by: 1)
13
+ return false unless counts.key?(key) || cap.nil? || counts.size < cap
14
+
15
+ counts[key] += by
16
+ true
17
+ end
18
+
19
+ # Slot cap: the existing entry, a freshly built one (yielded, while under
20
+ # `cap`), or nil when the store already holds `cap` distinct keys.
21
+ def bounded_fetch(store, key, cap)
22
+ return store[key] if store.key?(key)
23
+ return nil if cap && store.size >= cap
24
+
25
+ store[key] = yield
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+
5
+ module Sentiero
6
+ module Analytics
7
+ # Cross-session newest-first listing of browser custom events (rrweb type==5)
8
+ # excluding the "error" tag (errors have ErrorDiscovery). Each row carries
9
+ # session/window + offset(ms) from window start for replay deep-links (?t=).
10
+ class BrowserEventDiscovery < Analyzer
11
+ ERROR_TAG = "error"
12
+
13
+ MAX_ROWS = 500
14
+
15
+ # Trim cap during the scan so a busy store can't balloon memory. Mid-scan
16
+ # trimming is safe: it keeps the globally-newest seen, and newer events
17
+ # from later sessions still get added and survive the next trim.
18
+ ACCUMULATION_LIMIT = MAX_ROWS * 4
19
+
20
+ def recent_events(since: nil, until_time: nil)
21
+ rows = []
22
+ truncated = false
23
+
24
+ _scanned, hit_cap = scan_sessions(since: since, until_time: until_time) do |summary, window_id, events|
25
+ anchor = events.first&.fetch("timestamp", nil)
26
+ events.each do |event|
27
+ next unless browser_event?(event)
28
+
29
+ rows << build_row(summary, window_id, anchor, event)
30
+ end
31
+
32
+ next unless rows.size > ACCUMULATION_LIMIT
33
+
34
+ rows.sort_by! { |r| -(r[:timestamp] || 0) }
35
+ rows = rows.first(MAX_ROWS)
36
+ truncated = true
37
+ end
38
+
39
+ rows.sort_by! { |r| -(r[:timestamp] || 0) }
40
+ {
41
+ rows: rows.first(MAX_ROWS),
42
+ was_truncated: truncated || hit_cap || rows.size > MAX_ROWS
43
+ }
44
+ end
45
+
46
+ private
47
+
48
+ def browser_event?(event)
49
+ return false unless event["type"] == CUSTOM
50
+
51
+ data = event["data"]
52
+ data.is_a?(Hash) && data["tag"] != ERROR_TAG
53
+ end
54
+
55
+ def build_row(summary, window_id, anchor, event)
56
+ data = event["data"] || {}
57
+ ts = event["timestamp"]
58
+ payload = data["payload"]
59
+ {
60
+ name: data["tag"].to_s,
61
+ session_id: summary[:session_id],
62
+ window_id: window_id,
63
+ timestamp: ts,
64
+ offset_ms: offset_ms(anchor, ts),
65
+ payload: payload.is_a?(Hash) ? payload : nil
66
+ }
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../events"
4
+ require_relative "../bounded"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Per-URL click density grid + element selectors across page segments. A
9
+ # click's viewport y becomes a page coordinate by adding the latest scroll
10
+ # offset, normalized against the estimated page height (deepest scroll +
11
+ # viewport); x by viewport width. Both bucket into a GRID_SIZE x GRID_SIZE grid.
12
+ class ClickCollector
13
+ include Events
14
+ include Bounded
15
+
16
+ MOUSE_CLICK = 2
17
+
18
+ # Carries the clicked element's CSS selector; rrweb's own click event only
19
+ # references an internal node id, not a stable selector.
20
+ CLICK_TAG = "__click"
21
+
22
+ # Grid resolution per axis.
23
+ GRID_SIZE = 20
24
+
25
+ attr_reader :total, :buckets, :selectors, :capped
26
+
27
+ def initialize(max_selectors: nil)
28
+ @max_selectors = max_selectors
29
+ @buckets = Hash.new(0)
30
+ @selectors = Hash.new(0)
31
+ @total = 0
32
+ @capped = false
33
+ end
34
+
35
+ # Returns clicks added, or nil when the segment has no usable viewport
36
+ # (callers branch on the nil).
37
+ def collect(segment)
38
+ viewport = viewport_size(segment)
39
+ return nil unless viewport
40
+
41
+ page_height = estimate_page_height(segment, viewport)
42
+ scroll_y = 0
43
+ added = 0
44
+
45
+ segment.each do |event|
46
+ scroll_y = 0 if event["type"] == META
47
+ if (y = document_scroll_y(event))
48
+ scroll_y = y
49
+ end
50
+ if click?(event)
51
+ data = event["data"]
52
+ @buckets[bucket(data["x"], data["y"] + scroll_y, viewport, page_height)] += 1
53
+ added += 1
54
+ end
55
+ tally_selector(event)
56
+ end
57
+
58
+ @total += added
59
+ added
60
+ end
61
+
62
+ private
63
+
64
+ def viewport_size(events)
65
+ meta = events.find { |event| event["type"] == META && event["data"].is_a?(Hash) }
66
+ return nil unless meta
67
+
68
+ width = meta.dig("data", "width")
69
+ height = meta.dig("data", "height")
70
+ return nil unless width.is_a?(Numeric) && height.is_a?(Numeric)
71
+ return nil unless width > 0 && height > 0
72
+
73
+ {width: width, height: height}
74
+ end
75
+
76
+ def estimate_page_height(segment, viewport)
77
+ max_scroll = 0
78
+ segment.each do |event|
79
+ y = document_scroll_y(event)
80
+ max_scroll = y if y && y > max_scroll
81
+ end
82
+ max_scroll + viewport[:height]
83
+ end
84
+
85
+ def document_scroll_y(event)
86
+ return nil unless event["type"] == INCREMENTAL
87
+
88
+ data = event["data"]
89
+ return nil unless data.is_a?(Hash) && data["source"] == SOURCE_SCROLL
90
+
91
+ id = data["id"]
92
+ return nil unless id.nil? || id == 1
93
+
94
+ y = data["y"]
95
+ (y.is_a?(Numeric) && y >= 0) ? y : nil
96
+ end
97
+
98
+ def click?(event)
99
+ return false unless event["type"] == INCREMENTAL
100
+
101
+ data = event["data"]
102
+ return false unless data.is_a?(Hash)
103
+
104
+ data["source"] == SOURCE_MOUSE_INTERACTION &&
105
+ data["type"] == MOUSE_CLICK &&
106
+ data["x"].is_a?(Numeric) &&
107
+ data["y"].is_a?(Numeric)
108
+ end
109
+
110
+ def bucket(x, page_y, viewport, page_height)
111
+ [
112
+ bucket_index(x, viewport[:width]),
113
+ bucket_index(page_y, page_height)
114
+ ]
115
+ end
116
+
117
+ def bucket_index(value, axis_length)
118
+ index = (value.to_f / axis_length * GRID_SIZE).floor
119
+ index.clamp(0, GRID_SIZE - 1)
120
+ end
121
+
122
+ def tally_selector(event)
123
+ return unless event["type"] == CUSTOM
124
+
125
+ data = event["data"]
126
+ return unless data.is_a?(Hash) && data["tag"] == CLICK_TAG
127
+
128
+ selector = data.dig("payload", "selector")
129
+ return unless selector.is_a?(String) && !selector.empty?
130
+
131
+ @capped = true unless bounded_increment(@selectors, selector, @max_selectors)
132
+ end
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../events"
4
+ require_relative "../bounded"
5
+ require_relative "../stats"
6
+
7
+ module Sentiero
8
+ module Analytics
9
+ # Custom-event tag tally across page segments. The single definition of
10
+ # which tags are "internal" and how the rest are counted and ranked.
11
+ class CustomTagCollector
12
+ include Events
13
+ include Bounded
14
+ include Stats
15
+
16
+ # Recorder-internal annotations (__perf, __click, …); never on the panel.
17
+ INTERNAL_TAG_PREFIX = "__"
18
+ # The JS-error tag is also internal — it has its own panel.
19
+ ERROR_TAG = "error"
20
+ MAX_CUSTOM_TAGS = 200
21
+
22
+ attr_reader :tags, :capped
23
+
24
+ # max_tags: nil unbounded; an Integer caps distinct tags, flipping #capped.
25
+ def initialize(max_tags: nil)
26
+ @max_tags = max_tags
27
+ @tags = Hash.new(0)
28
+ @capped = false
29
+ end
30
+
31
+ def internal_tag?(tag)
32
+ tag.start_with?(INTERNAL_TAG_PREFIX) || tag == ERROR_TAG
33
+ end
34
+
35
+ # Returns true when counted, false when internal or capped — callers gate
36
+ # per-tag side-effects on this.
37
+ def tally(tag)
38
+ return false if internal_tag?(tag)
39
+
40
+ counted = bounded_increment(@tags, tag, @max_tags)
41
+ @capped = true unless counted
42
+ counted
43
+ end
44
+
45
+ def collect(segment)
46
+ segment.each do |event|
47
+ next unless event["type"] == CUSTOM
48
+
49
+ tag = event.dig("data", "tag")
50
+ next unless tag.is_a?(String) && !tag.empty?
51
+
52
+ tally(tag)
53
+ end
54
+ end
55
+
56
+ def top(n)
57
+ top_counts(@tags, limit: n).map { |tag, count| {tag: tag, count: count} }
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../events"
4
+ require_relative "../stats"
5
+ require_relative "../bounded"
6
+
7
+ module Sentiero
8
+ module Analytics
9
+ # Per-URL error grouping across page segments. Groups JS errors by a
10
+ # normalized key (see group_key) so messages differing only by an
11
+ # id/count/line number collapse into one row. Each occurrence records
12
+ # offset_ms from the window's first event so the UI can deep-link via ?t=.
13
+ # The three helpers are class methods so ErrorDiscovery can reuse them with
14
+ # its own group shape without instantiating an accumulator.
15
+ class ErrorCollector
16
+ include Events
17
+ include Stats
18
+ include Bounded
19
+
20
+ ERROR_TAG = "error"
21
+ MAX_KEY_LENGTH = 200
22
+
23
+ attr_reader :groups, :capped
24
+
25
+ # Integer caps distinct groups (flips #capped) / occurrences per group; nil unbounded.
26
+ def initialize(max_groups: nil, max_occurrences: nil)
27
+ @max_groups = max_groups
28
+ @max_occurrences = max_occurrences
29
+ @groups = {}
30
+ @capped = false
31
+ end
32
+
33
+ # anchor is the window's first-event timestamp; offset_ms is relative to it
34
+ # so replay ?t= deep-links stay consistent across segments of the window.
35
+ def collect(segment, session_id:, window_id:, anchor:)
36
+ segment.each do |event|
37
+ next unless self.class.error_event?(event)
38
+
39
+ message = self.class.extract_message(event)
40
+ key = self.class.group_key(message)
41
+
42
+ group = bounded_fetch(@groups, key, @max_groups) { {message: message, count: 0, occurrences: []} }
43
+ if group.nil?
44
+ @capped = true
45
+ next
46
+ end
47
+
48
+ group[:count] += 1
49
+
50
+ if @max_occurrences.nil? || group[:occurrences].size < @max_occurrences
51
+ group[:occurrences] << {
52
+ session_id: session_id,
53
+ window_id: window_id,
54
+ offset_ms: offset_ms(anchor, event["timestamp"])
55
+ }
56
+ end
57
+ end
58
+ end
59
+
60
+ def summarize
61
+ {
62
+ groups: @groups.values
63
+ .sort_by { |g| -g[:count] }
64
+ .map { |g| {message: g[:message], count: g[:count], occurrences: g[:occurrences]} },
65
+ total: @groups.values.sum { |g| g[:count] }
66
+ }
67
+ end
68
+
69
+ def self.error_event?(event)
70
+ return false unless event["type"] == CUSTOM
71
+
72
+ data = event["data"]
73
+ data.is_a?(Hash) && data["tag"] == ERROR_TAG
74
+ end
75
+
76
+ def self.group_key(message)
77
+ message.lines.first.to_s.strip.gsub(/\d+/, "#")[0, MAX_KEY_LENGTH]
78
+ end
79
+
80
+ def self.extract_message(event)
81
+ payload = event.dig("data", "payload")
82
+ message = payload.is_a?(Hash) ? payload["message"] : nil
83
+ return "Unknown error" if message.nil? || message.to_s.strip.empty?
84
+
85
+ message.to_s
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../events"
4
+ require_relative "../bounded"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Per-URL form interaction math. The single definition of what an "input"
9
+ # and a "submit" are in rrweb terms. Two completion semantics for two callers:
10
+ # #completed_count — sessions with inputs where EVERY input segment was
11
+ # submitted (strict; one abandoned segment disqualifies).
12
+ # #submitted_count — sessions with ANY __form_submit event, regardless of
13
+ # input timing.
14
+ class FormCollector
15
+ include Events
16
+ include Bounded
17
+
18
+ # Recorder tag for a document-level form submit.
19
+ SUBMIT_TAG = "__form_submit"
20
+
21
+ # Output cap for the drop-off table.
22
+ DROP_OFF_LIMIT = 50
23
+
24
+ attr_reader :total_submits, :capped
25
+
26
+ # max_fields: nil unbounded; an Integer caps the fields hash, flipping #capped.
27
+ def initialize(max_fields: nil)
28
+ @max_fields = max_fields
29
+ @total_submits = 0
30
+ @fields = {} # [url, node_id] => field-stats hash
31
+ @drop_off = Hash.new(0) # [url, node_id] => abandon count
32
+ @started = {} # session_id => true (≥1 input event seen)
33
+ @submitted = {} # session_id => true (≥1 submit event, any segment)
34
+ @abandoned = {} # session_id => true (≥1 input segment not submitted)
35
+ @capped = false
36
+ end
37
+
38
+ # Returns the count of input events found. labels: {node_id => label} from
39
+ # the segment's DOM snapshot; {} omits them.
40
+ def collect(session_id, url, segment, labels: {})
41
+ @total_submits += segment.count { |e| submit?(e) }
42
+ @submitted[session_id] = true if segment.any? { |e| submit?(e) }
43
+
44
+ inputs = segment.select { |e| input?(e) }
45
+ return 0 if inputs.empty?
46
+
47
+ @started[session_id] = true
48
+ record_fields(session_id, url, inputs, labels)
49
+
50
+ first_input_at = inputs.first["timestamp"]
51
+ unless segment_submitted?(segment, first_input_at)
52
+ @abandoned[session_id] = true
53
+ @drop_off[[url, node_id(inputs.last)]] += 1
54
+ end
55
+
56
+ inputs.size
57
+ end
58
+
59
+ def started_count
60
+ @started.size
61
+ end
62
+
63
+ # Counts a submit on the target URL even when inputs landed on a prior segment.
64
+ def submitted_count
65
+ @submitted.size
66
+ end
67
+
68
+ # Sessions with inputs where NO input segment was abandoned; a submit on a
69
+ # later page never masks an abandonment.
70
+ def completed_count
71
+ @started.count { |id, _| !@abandoned.key?(id) }
72
+ end
73
+
74
+ def summarize_fields(started, include_labels: false)
75
+ @fields
76
+ .sort_by { |(url, id), stats| [-stats[:sessions], url.to_s, id] }
77
+ .map do |(url, id), stats|
78
+ row = {}
79
+ row[:field_id] = id
80
+ row[:label] = stats[:label] if include_labels
81
+ row[:url] = url
82
+ row[:sessions] = stats[:sessions]
83
+ row[:completion_rate] = started.zero? ? 0.0 : stats[:sessions].to_f / started
84
+ row[:avg_time_to_fill_ms] = stats[:units].zero? ? 0.0 : stats[:total_fill_ms] / stats[:units]
85
+ row[:total_refills] = stats[:total_refills]
86
+ row
87
+ end
88
+ end
89
+
90
+ def summarize_drop_off(include_labels: false)
91
+ @drop_off
92
+ .sort_by { |(url, id), count| [-count, url.to_s, id] }
93
+ .first(DROP_OFF_LIMIT)
94
+ .map do |(url, id), count|
95
+ row = {}
96
+ row[:field_id] = id
97
+ row[:label] = @fields.key?([url, id]) ? @fields[[url, id]][:label] : nil if include_labels
98
+ row[:url] = url
99
+ row[:count] = count
100
+ row
101
+ end
102
+ end
103
+
104
+ private
105
+
106
+ def input?(event)
107
+ return false unless event["type"] == INCREMENTAL
108
+
109
+ data = event["data"]
110
+ data.is_a?(Hash) && data["source"] == SOURCE_INPUT && node_id(event)
111
+ end
112
+
113
+ def node_id(event)
114
+ id = event.dig("data", "id")
115
+ id.is_a?(Integer) ? id : nil
116
+ end
117
+
118
+ def submit?(event)
119
+ event["type"] == CUSTOM && event.dig("data", "tag") == SUBMIT_TAG
120
+ end
121
+
122
+ # A segment counts as submitted only when a __form_submit lands at or
123
+ # after the first input; an earlier submit belongs to a prior interaction
124
+ # (counting it resurrects "navigating away counts as submitting").
125
+ def segment_submitted?(segment, first_input_at)
126
+ segment.any? do |event|
127
+ next false unless submit?(event)
128
+
129
+ ts = event["timestamp"]
130
+ !first_input_at.is_a?(Numeric) || (ts.is_a?(Numeric) && ts >= first_input_at)
131
+ end
132
+ end
133
+
134
+ # Keyed by [url, node_id]: rrweb node ids reset on every full-page load, so
135
+ # the url scope keeps unrelated fields from conflating across pages.
136
+ def record_fields(session_id, url, inputs, labels)
137
+ inputs.group_by { |e| [url, node_id(e)] }.each do |key, field_inputs|
138
+ stats = bounded_fetch(@fields, key, @max_fields) do
139
+ {sessions: 0, units: 0, total_fill_ms: 0.0, total_refills: 0, last_session: nil, label: nil}
140
+ end
141
+ if stats.nil?
142
+ @capped = true
143
+ next
144
+ end
145
+ stats[:sessions] += 1 unless stats[:last_session] == session_id
146
+ stats[:last_session] = session_id
147
+ stats[:units] += 1
148
+ timestamps = field_inputs.map { |e| e["timestamp"] }
149
+ stats[:total_fill_ms] += (timestamps.max - timestamps.min).to_f
150
+ stats[:total_refills] += field_inputs.size - 1
151
+ stats[:label] ||= labels[node_id(field_inputs.first)]
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end