sentiero 1.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +7 -0
  3. data/README.md +679 -0
  4. data/lib/sentiero/analytics/analyzer.rb +91 -0
  5. data/lib/sentiero/analytics/bounded.rb +29 -0
  6. data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
  7. data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
  8. data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
  9. data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
  10. data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
  11. data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
  12. data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
  13. data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
  14. data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
  15. data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
  16. data/lib/sentiero/analytics/entry_attribution.rb +71 -0
  17. data/lib/sentiero/analytics/error_discovery.rb +118 -0
  18. data/lib/sentiero/analytics/events.rb +21 -0
  19. data/lib/sentiero/analytics/exporter.rb +242 -0
  20. data/lib/sentiero/analytics/form_analyzer.rb +153 -0
  21. data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
  22. data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
  23. data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
  24. data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
  25. data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
  26. data/lib/sentiero/analytics/problem_detail.rb +97 -0
  27. data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
  28. data/lib/sentiero/analytics/segmenter.rb +133 -0
  29. data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
  30. data/lib/sentiero/analytics/stats.rb +30 -0
  31. data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
  32. data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
  33. data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
  34. data/lib/sentiero/configuration.rb +184 -0
  35. data/lib/sentiero/erasure.rb +48 -0
  36. data/lib/sentiero/fingerprint.rb +34 -0
  37. data/lib/sentiero/ip_anonymizer.rb +29 -0
  38. data/lib/sentiero/redaction/config.rb +61 -0
  39. data/lib/sentiero/redaction.rb +207 -0
  40. data/lib/sentiero/reporter/configuration.rb +50 -0
  41. data/lib/sentiero/reporter/context.rb +31 -0
  42. data/lib/sentiero/reporter/dispatcher.rb +91 -0
  43. data/lib/sentiero/reporter/http_transport.rb +57 -0
  44. data/lib/sentiero/reporter/log_transport.rb +26 -0
  45. data/lib/sentiero/reporter/middleware.rb +62 -0
  46. data/lib/sentiero/reporter/normalizer.rb +14 -0
  47. data/lib/sentiero/reporter/null_transport.rb +18 -0
  48. data/lib/sentiero/reporter/report_context.rb +29 -0
  49. data/lib/sentiero/reporter/scrubber.rb +47 -0
  50. data/lib/sentiero/reporter/test_helper.rb +32 -0
  51. data/lib/sentiero/reporter/test_transport.rb +28 -0
  52. data/lib/sentiero/reporter.rb +214 -0
  53. data/lib/sentiero/roda.rb +47 -0
  54. data/lib/sentiero/store/error_store.rb +220 -0
  55. data/lib/sentiero/store/limits.rb +31 -0
  56. data/lib/sentiero/store/session_store.rb +118 -0
  57. data/lib/sentiero/store.rb +72 -0
  58. data/lib/sentiero/stores/file.rb +566 -0
  59. data/lib/sentiero/stores/memory.rb +362 -0
  60. data/lib/sentiero/stores/redis/keys.rb +59 -0
  61. data/lib/sentiero/stores/redis/lua.rb +119 -0
  62. data/lib/sentiero/stores/redis.rb +665 -0
  63. data/lib/sentiero/stores/sqlite/schema.rb +79 -0
  64. data/lib/sentiero/stores/sqlite.rb +626 -0
  65. data/lib/sentiero/user_agent.rb +32 -0
  66. data/lib/sentiero/version.rb +5 -0
  67. data/lib/sentiero/web/analytics_app.rb +538 -0
  68. data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
  69. data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
  70. data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
  71. data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
  72. data/lib/sentiero/web/assets/manifest.json +11 -0
  73. data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
  74. data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
  75. data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
  76. data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
  77. data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
  78. data/lib/sentiero/web/assets_app.rb +42 -0
  79. data/lib/sentiero/web/base_app.rb +319 -0
  80. data/lib/sentiero/web/basic_auth.rb +27 -0
  81. data/lib/sentiero/web/basic_auth_check.rb +41 -0
  82. data/lib/sentiero/web/body_reader.rb +44 -0
  83. data/lib/sentiero/web/csv_writer.rb +45 -0
  84. data/lib/sentiero/web/dashboard_app.rb +236 -0
  85. data/lib/sentiero/web/errors_app.rb +97 -0
  86. data/lib/sentiero/web/escaping.rb +37 -0
  87. data/lib/sentiero/web/events_app.rb +196 -0
  88. data/lib/sentiero/web/formatting.rb +43 -0
  89. data/lib/sentiero/web/ingest_app.rb +92 -0
  90. data/lib/sentiero/web/manifest.rb +43 -0
  91. data/lib/sentiero/web/monitoring_app.rb +316 -0
  92. data/lib/sentiero/web/script_tag.rb +57 -0
  93. data/lib/sentiero/web/shareable_replay.rb +88 -0
  94. data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
  95. data/lib/sentiero/web/templates/_brand.html.erb +18 -0
  96. data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
  97. data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
  98. data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
  99. data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
  100. data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
  101. data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
  102. data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
  103. data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
  104. data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
  105. data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
  106. data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
  107. data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
  108. data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
  109. data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
  110. data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
  111. data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
  112. data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
  113. data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
  114. data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
  115. data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
  116. data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
  117. data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
  118. data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
  119. data/lib/sentiero/web/templates/event_show.html.erb +52 -0
  120. data/lib/sentiero/web/templates/events_index.html.erb +177 -0
  121. data/lib/sentiero/web/templates/export_index.html.erb +69 -0
  122. data/lib/sentiero/web/templates/forms.html.erb +105 -0
  123. data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
  124. data/lib/sentiero/web/templates/import.html.erb +39 -0
  125. data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
  126. data/lib/sentiero/web/templates/segments.html.erb +114 -0
  127. data/lib/sentiero/web/templates/session_show.html.erb +195 -0
  128. data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
  129. data/lib/sentiero/web/track_app.rb +57 -0
  130. data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
  131. data/lib/sentiero/web/views/analyzer_view.rb +27 -0
  132. data/lib/sentiero/web/views/base_view.rb +76 -0
  133. data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
  134. data/lib/sentiero/web/views/conversions_view.rb +41 -0
  135. data/lib/sentiero/web/views/engagement_view.rb +67 -0
  136. data/lib/sentiero/web/views/errors_index_view.rb +37 -0
  137. data/lib/sentiero/web/views/event_show_view.rb +20 -0
  138. data/lib/sentiero/web/views/events_index_view.rb +56 -0
  139. data/lib/sentiero/web/views/export_view.rb +23 -0
  140. data/lib/sentiero/web/views/forms_view.rb +28 -0
  141. data/lib/sentiero/web/views/frustration_view.rb +15 -0
  142. data/lib/sentiero/web/views/funnel_view.rb +36 -0
  143. data/lib/sentiero/web/views/heatmap_view.rb +34 -0
  144. data/lib/sentiero/web/views/import_view.rb +13 -0
  145. data/lib/sentiero/web/views/page_report_view.rb +43 -0
  146. data/lib/sentiero/web/views/problem_show_view.rb +46 -0
  147. data/lib/sentiero/web/views/scroll_view.rb +23 -0
  148. data/lib/sentiero/web/views/segments_view.rb +28 -0
  149. data/lib/sentiero/web/views/session_show_view.rb +105 -0
  150. data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
  151. data/lib/sentiero/web/views/vitals_view.rb +45 -0
  152. data/lib/sentiero/web/views.rb +24 -0
  153. data/lib/sentiero/window_ref.rb +6 -0
  154. data/lib/sentiero.rb +69 -0
  155. metadata +232 -0
@@ -0,0 +1,346 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "collectors/custom_tag_collector"
5
+ require_relative "../user_agent"
6
+
7
+ module Sentiero
8
+ module Analytics
9
+ class StatsAggregator < Analyzer
10
+ # Required here, not at file top: ResultBuilder reopens this class, and
11
+ # requiring it before the `< Analyzer` superclass is established would
12
+ # raise a superclass mismatch.
13
+ require_relative "stats_aggregator/result_builder"
14
+
15
+ TOP_LIST_LIMIT = 10
16
+ TOP_TAGS_LIMIT = 20
17
+ TOP_PROBLEMS_LIMIT = 5
18
+
19
+ NAVIGATION_TAG = "navigation"
20
+
21
+ INTERNAL_METADATA_KEYS = %w[userAgent url referrer viewport has_errors entry_url entry_referrer].freeze
22
+
23
+ MAX_NAV_KEYS = 200
24
+ MAX_METADATA_KEYS = 50
25
+ MAX_METADATA_VALUES_PER_KEY = 50
26
+ MAX_TAG_SERIES_KEYS = 200
27
+ MAX_OVERLAY_PROBLEMS = 200
28
+ MAX_OCCURRENCES_PER_PROBLEM = 500
29
+
30
+ DURATION_BUCKETS = [
31
+ ["0-30s", 30_000],
32
+ ["30s-2m", 120_000],
33
+ ["2-5m", 300_000],
34
+ ["5-15m", 900_000],
35
+ ["15m+", nil]
36
+ ].freeze
37
+
38
+ WEEK_BUCKET_THRESHOLD_DAYS = 45
39
+
40
+ def aggregate(range_days: 30, since: nil, until_time: nil, server_exception_overlay: false)
41
+ scan_cap = store.limits.analytics_max_scan_sessions
42
+ since ||= default_since(range_days, until_time)
43
+
44
+ acc = new_accumulator(since, until_time)
45
+ seen_sessions = {}
46
+
47
+ store.each_session_events(limit: scan_cap, since: since, until_time: until_time) do |summary, _window_id, events|
48
+ accumulate_window(acc, seen_sessions, summary, events)
49
+ end
50
+
51
+ finalize(acc, seen_sessions, scan_cap, overlay: server_exception_overlay)
52
+ end
53
+
54
+ # Derives BOTH the current-window aggregate and the equal-length
55
+ # prior-window aggregate from a SINGLE widened scan over
56
+ # [prior_since, until_time], partitioning each session into the current or
57
+ # prior bucket by its updated_at. Returns {current:, prior:}; prior is nil
58
+ # when no comparison is possible (zero-length window) or when the widened
59
+ # scan is truncated — in which case the current aggregate is recomputed
60
+ # from an exact single-window scan so the displayed numbers stay correct
61
+ # (deltas are dropped on truncation anyway).
62
+ def aggregate_with_prior(range_days: 30, since: nil, until_time: nil)
63
+ scan_cap = store.limits.analytics_max_scan_sessions
64
+ since ||= default_since(range_days, until_time)
65
+ window_until = until_time || Time.now.to_f
66
+ span = window_until - since
67
+
68
+ return {current: aggregate(since: since, until_time: until_time, server_exception_overlay: true), prior: nil} unless span > 0
69
+
70
+ prior_since = since - span
71
+ prior_until = since - 0.001
72
+
73
+ current = {acc: new_accumulator(since, until_time), seen: {}}
74
+ prior = {acc: new_accumulator(prior_since, prior_until), seen: {}}
75
+
76
+ store.each_session_events(limit: scan_cap, since: prior_since, until_time: until_time) do |summary, _window_id, events|
77
+ bucket = (summary[:updated_at] >= since) ? current : prior
78
+ accumulate_window(bucket[:acc], bucket[:seen], summary, events)
79
+ end
80
+
81
+ if current[:seen].size + prior[:seen].size >= scan_cap
82
+ return {current: aggregate(since: since, until_time: until_time, server_exception_overlay: true), prior: nil}
83
+ end
84
+
85
+ {
86
+ current: finalize(current[:acc], current[:seen], scan_cap, overlay: true),
87
+ prior: finalize(prior[:acc], prior[:seen], scan_cap, overlay: false)
88
+ }
89
+ end
90
+
91
+ private
92
+
93
+ def accumulate_window(acc, seen_sessions, summary, events)
94
+ session_id = summary[:session_id]
95
+ collect_session(acc, summary, seen_sessions) unless seen_sessions.key?(session_id)
96
+ update_entry_candidate(seen_sessions[session_id], events)
97
+ collect_events(acc, events)
98
+ end
99
+
100
+ def finalize(acc, seen_sessions, scan_cap, overlay:)
101
+ tally_entries(acc, seen_sessions)
102
+ overlay_truncated = overlay ? collect_server_overlay(acc) : false
103
+ ResultBuilder.new(store).build(acc, seen_sessions.size, scan_cap, overlay_truncated)
104
+ end
105
+
106
+ # range_days - 1: the start day is itself one of the range_days buckets.
107
+ def default_since(range_days, until_time)
108
+ end_date = (until_time ? Time.at(until_time) : Time.now).utc.to_date
109
+ start_date = end_date - (range_days - 1)
110
+ Time.utc(start_date.year, start_date.month, start_date.day).to_f
111
+ end
112
+
113
+ # Mutable bag of the running tallies for one aggregate scan. A Struct (not a
114
+ # Hash) so the ~24 fields are named accessors threaded through the tally_*
115
+ # methods rather than string-typed acc.key lookups.
116
+ Accumulator = Struct.new(
117
+ :event_types, :custom_tags, :browser_tags, :browsers, :devices,
118
+ :entry_pages, :entry_page_errors, :referrers, :duration_buckets,
119
+ :total_events, :durations, :since, :until_time,
120
+ :per_day_events, :per_day_sessions, :per_day_errors, :per_day_tags,
121
+ :per_day_server_errors, :nav_internal, :nav_external, :nav_texts,
122
+ :metadata_keys, :metadata_values, :sessions_with_errors,
123
+ keyword_init: true
124
+ )
125
+
126
+ def new_accumulator(since, until_time)
127
+ Accumulator.new(
128
+ event_types: Hash.new(0),
129
+ custom_tags: CustomTagCollector.new,
130
+ browser_tags: Hash.new(0),
131
+ browsers: Hash.new(0),
132
+ devices: Hash.new(0),
133
+ entry_pages: Hash.new(0),
134
+ entry_page_errors: Hash.new(0),
135
+ referrers: Hash.new(0),
136
+ duration_buckets: DURATION_BUCKETS.to_h { |label, _| [label, 0] },
137
+ total_events: 0,
138
+ durations: [],
139
+ since: since,
140
+ until_time: until_time,
141
+ per_day_events: Hash.new(0),
142
+ per_day_sessions: Hash.new(0),
143
+ per_day_errors: Hash.new(0),
144
+ per_day_tags: {},
145
+ per_day_server_errors: Hash.new(0),
146
+ nav_internal: Hash.new(0),
147
+ nav_external: Hash.new(0),
148
+ nav_texts: Hash.new(0),
149
+ metadata_keys: Hash.new(0),
150
+ metadata_values: {},
151
+ sessions_with_errors: 0
152
+ )
153
+ end
154
+
155
+ # Runs once per session (windows share metadata and one duration).
156
+ def collect_session(acc, summary, seen_sessions)
157
+ metadata = summary[:metadata] || {}
158
+ entry_url = metadata["entry_url"]
159
+ seen_sessions[summary[:session_id]] = {
160
+ entry_url: entry_url,
161
+ # A real entry_url is authoritative; a first-Meta href may only claim
162
+ # the slot when we started from nil.
163
+ entry_anchor: entry_url ? -Float::INFINITY : nil,
164
+ referrer: metadata["entry_referrer"] || metadata["referrer"],
165
+ has_errors: !!metadata["has_errors"]
166
+ }
167
+
168
+ tally_browser_device(acc, metadata["userAgent"])
169
+ acc.sessions_with_errors += 1 if metadata["has_errors"]
170
+
171
+ tally_metadata(acc, metadata)
172
+ record_duration(acc, summary)
173
+ record_session_day(acc, summary)
174
+ end
175
+
176
+ # Deferred until every window is seen: entry page is the first Meta href,
177
+ # not the metadata URL the recorder overwrites on each navigation.
178
+ # Same-origin referrers dropped so Top Referrers shows only acquisition.
179
+ def tally_entries(acc, seen_sessions)
180
+ seen_sessions.each_value do |state|
181
+ entry_url = state[:entry_url]
182
+ tally(acc.entry_pages, entry_url)
183
+ tally(acc.entry_page_errors, entry_url) if state[:has_errors]
184
+
185
+ tally(acc.referrers, state[:referrer]) unless same_origin?(state[:referrer], entry_url)
186
+ end
187
+ end
188
+
189
+ # Values are tracked only for keys that survived the key cap.
190
+ def tally_metadata(acc, metadata)
191
+ metadata.each do |key, value|
192
+ next unless key.is_a?(String) && !key.empty?
193
+ next if INTERNAL_METADATA_KEYS.include?(key)
194
+ next unless bounded_tally(acc.metadata_keys, key, MAX_METADATA_KEYS)
195
+
196
+ values = acc.metadata_values[key] ||= Hash.new(0)
197
+ bounded_tally(values, value.to_s, MAX_METADATA_VALUES_PER_KEY)
198
+ end
199
+ end
200
+
201
+ def collect_events(acc, events)
202
+ events.each do |event|
203
+ next unless in_window?(acc, event["timestamp"])
204
+
205
+ type = event["type"]
206
+ acc.event_types[type] += 1
207
+ acc.total_events += 1
208
+ tally_custom_tag(acc, event) if type == CUSTOM
209
+ record_event_day(acc, event)
210
+ record_error_day(acc, event) if error_event?(event)
211
+ end
212
+ end
213
+
214
+ # Clamps per-event tallies to [since, until_time]: an in-range session can
215
+ # carry out-of-window events that must not inflate totals. Events without a
216
+ # numeric timestamp are kept (unplaceable).
217
+ def in_window?(acc, timestamp_ms)
218
+ return true unless timestamp_ms.is_a?(Numeric)
219
+
220
+ ts = timestamp_ms / 1000.0
221
+ ts >= acc.since && (acc.until_time.nil? || ts <= acc.until_time)
222
+ end
223
+
224
+ def tally_browser_device(acc, user_agent)
225
+ browser = UserAgent.browser(user_agent)
226
+ device = UserAgent.device(user_agent)
227
+ acc.browsers[browser] += 1 if browser
228
+ acc.devices[device] += 1 if device
229
+ end
230
+
231
+ def tally_custom_tag(acc, event)
232
+ data = event["data"]
233
+ return unless data.is_a?(Hash)
234
+ tag = data["tag"]
235
+ return unless tag.is_a?(String)
236
+
237
+ tally_navigation(acc, data["payload"]) if tag == NAVIGATION_TAG
238
+
239
+ # Branch on #tally's return so browser_tags and per-day series share its
240
+ # gate (internal "__" annotations and the JS-error tag are excluded).
241
+ return unless acc.custom_tags.tally(tag)
242
+
243
+ acc.browser_tags[tag] += 1
244
+ record_tag_day(acc, tag, event["timestamp"])
245
+ end
246
+
247
+ # New series bounded by MAX_TAG_SERIES_KEYS; existing tags count past it.
248
+ def record_tag_day(acc, tag, timestamp_ms)
249
+ date = day_string(timestamp_ms)
250
+ return unless date
251
+
252
+ series = acc.per_day_tags[tag]
253
+ series = acc.per_day_tags[tag] = Hash.new(0) if series.nil? && acc.per_day_tags.size < MAX_TAG_SERIES_KEYS
254
+ series[date] += 1 if series
255
+ end
256
+
257
+ def tally_navigation(acc, payload)
258
+ return unless payload.is_a?(Hash)
259
+
260
+ bucket = payload["external"] ? acc.nav_external : acc.nav_internal
261
+ bounded_tally(bucket, payload["url"], MAX_NAV_KEYS)
262
+ bounded_tally(acc.nav_texts, payload["text"], MAX_NAV_KEYS)
263
+ end
264
+
265
+ # Cap a per-value tally, ignoring blank/non-string values.
266
+ def bounded_tally(counts, value, cap)
267
+ return false unless value.is_a?(String) && !value.empty?
268
+
269
+ bounded_increment(counts, value, cap)
270
+ end
271
+
272
+ def tally(counts, value)
273
+ counts[value] += 1 if value.is_a?(String) && !value.empty?
274
+ end
275
+
276
+ def record_duration(acc, summary)
277
+ first = summary[:first_event_at]
278
+ last = summary[:last_event_at]
279
+ return unless first && last
280
+
281
+ duration = (last - first).abs
282
+ acc.durations << duration
283
+ label = bucket_label(duration)
284
+ acc.duration_buckets[label] += 1
285
+ end
286
+
287
+ def bucket_label(duration_ms)
288
+ label, _bound = DURATION_BUCKETS.find { |_label, bound| bound.nil? || duration_ms < bound }
289
+ label
290
+ end
291
+
292
+ def record_session_day(acc, summary)
293
+ date = day_string(summary[:first_event_at] || summary[:created_at])
294
+ acc.per_day_sessions[date] += 1 if date
295
+ end
296
+
297
+ def record_event_day(acc, event)
298
+ date = day_string(event["timestamp"])
299
+ acc.per_day_events[date] += 1 if date
300
+ end
301
+
302
+ def error_event?(event)
303
+ return false unless event["type"] == CUSTOM
304
+ data = event["data"]
305
+ data.is_a?(Hash) && data["tag"] == "error"
306
+ end
307
+
308
+ def record_error_day(acc, event)
309
+ date = day_string(event["timestamp"])
310
+ acc.per_day_errors[date] += 1 if date
311
+ end
312
+
313
+ # Per-day server-occurrence counts (occurrence timestamps are epoch
314
+ # seconds); returns whether either cap was hit. since filters problems by
315
+ # last_seen (safe: an in-window occurrence implies last_seen >= since).
316
+ # until_time is applied per occurrence, not to list_problems, since a
317
+ # still-active problem can own in-window occurrences.
318
+ def collect_server_overlay(acc)
319
+ since = acc.since
320
+ until_time = acc.until_time
321
+ problems = store.list_problems(project: nil, limit: MAX_OVERLAY_PROBLEMS, since: since)
322
+ truncated = problems.size >= MAX_OVERLAY_PROBLEMS
323
+
324
+ problems.each do |problem|
325
+ occurrences = store.get_occurrences(problem[:id], after: since, limit: MAX_OCCURRENCES_PER_PROBLEM)
326
+ truncated = true if occurrences.size >= MAX_OCCURRENCES_PER_PROBLEM
327
+
328
+ occurrences.each do |occurrence|
329
+ ts = occurrence["timestamp"].to_f
330
+ next if until_time && ts > until_time
331
+ acc.per_day_server_errors[Time.at(ts).utc.to_date.to_s] += 1
332
+ end
333
+ end
334
+
335
+ truncated
336
+ end
337
+
338
+ def day_string(timestamp_ms)
339
+ return nil unless timestamp_ms
340
+ Time.at(timestamp_ms / 1000.0).utc.to_date.to_s
341
+ rescue TypeError, ArgumentError
342
+ nil
343
+ end
344
+ end
345
+ end
346
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "collectors/vitals_collector"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Aggregates Web Vitals per page URL, scanning the store on read. The recorder
9
+ # emits one "__perf" custom event per metric report carrying {metric, value,
10
+ # rating}; ratings come from the client web-vitals library and are tallied
11
+ # as-is. Per-segment math (last-wins collapse, rating histogram, worst-sample)
12
+ # lives in VitalsCollector, shared with PageReportAnalyzer.
13
+ class WebVitalsAnalyzer < Analyzer
14
+ # Cap on distinct URLs tracked; sessions scan newest-first, so the cap keeps
15
+ # the most recently visited URLs.
16
+ MAX_URLS = 200
17
+
18
+ MAX_SAMPLES_PER_METRIC = 2000
19
+
20
+ # Samples are attributed per page segment so each report lands on the page it
21
+ # measured. Within a segment, repeated reports of the same metric collapse to
22
+ # the LAST one (one sample == one page view's final value) so re-emitted
23
+ # candidates and reloads cannot inflate counts or skew percentiles.
24
+ def analyze(limit: nil, since: nil, until_time: nil)
25
+ pages = {} # url => VitalsCollector
26
+ accumulation_capped = false
27
+
28
+ _scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
29
+ each_page_segment(events) do |url, segment, anchor|
30
+ next unless url
31
+
32
+ collector = collector_for(pages, url)
33
+ unless collector
34
+ accumulation_capped = true
35
+ next
36
+ end
37
+
38
+ collector.collect(segment, session_id: summary[:session_id], window_id: window_id, anchor: anchor)
39
+ accumulation_capped = true if collector.capped
40
+ end
41
+ end
42
+
43
+ {
44
+ pages: pages.transform_values(&:summarize),
45
+ was_truncated: accumulation_capped || hit_cap
46
+ }
47
+ end
48
+
49
+ private
50
+
51
+ # VitalsCollector for a URL, or nil when the URL-row cap is full.
52
+ def collector_for(pages, url)
53
+ bounded_fetch(pages, url, MAX_URLS) { VitalsCollector.new(max_samples: MAX_SAMPLES_PER_METRIC) }
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentiero
4
+ class Configuration
5
+ attr_reader :store
6
+
7
+ attr_accessor :cors_origins,
8
+ :auth_callback,
9
+ :flush_interval_ms,
10
+ :flush_event_threshold,
11
+ :max_events_per_page,
12
+ :max_events_per_request,
13
+ :max_sessions,
14
+ :max_events_per_session,
15
+ :max_problems,
16
+ :max_server_events,
17
+ :ingest_keys,
18
+ :cross_tab_sessions,
19
+ :capture_metadata,
20
+ :capture_errors,
21
+ :track_navigation,
22
+ :track_custom_events,
23
+ :capture_clicks,
24
+ :track_forms,
25
+ # snake_case rrweb recorder options; converted to camelCase for the frontend.
26
+ :mask_all_inputs,
27
+ :mask_input_options,
28
+ :block_selector,
29
+ :mask_text_selector,
30
+ :ignore_selector,
31
+ :sampling,
32
+ :inline_stylesheet,
33
+ :checkout_every_n_ms,
34
+ # Raw camelCase hash passed to rrweb verbatim; first-class attributes above
35
+ # take precedence for overlapping keys.
36
+ :recorder_options,
37
+ :capture_web_vitals,
38
+ :analytics_max_scan_sessions,
39
+ :user_opt_out,
40
+ :opt_out_cookie_name,
41
+ :respect_gpc,
42
+ :retention_period,
43
+ :anonymize_ip,
44
+ :redaction,
45
+ :audit_log,
46
+ :shareable_replays,
47
+ :basic_auth,
48
+ # Escape hatch: serve the dashboard/analytics/monitoring UIs with NO auth
49
+ # (see Configuration#initialize). Off by default so the UI fails closed.
50
+ :allow_insecure_dashboard
51
+
52
+ # session_idle_timeout / session_max_age have validating writers below (a
53
+ # bad value here is serialized straight into client-side session-rotation
54
+ # logic), so they're declared separately from the plain attr_accessor list.
55
+ attr_reader :session_idle_timeout, :session_max_age
56
+
57
+ ENFORCED_PRIVACY = {
58
+ maskInputOptions: {password: true}
59
+ }.freeze
60
+
61
+ # Replay sessions follow a user journey, so the idle boundary is generous
62
+ # (a lunch break shouldn't split a journey); max age is the hard cap that
63
+ # keeps the identifier from living forever on never-idle tabs.
64
+ DEFAULT_SESSION_IDLE_TIMEOUT = 6 * 60 * 60
65
+ DEFAULT_SESSION_MAX_AGE = 7 * 24 * 60 * 60
66
+
67
+ # Composition root for store caps: a store assigned to the configuration is
68
+ # bound to the configuration's caps here, so the store itself never reads
69
+ # global state. Set caps before assigning the store; inject explicit
70
+ # Store::Limits on the store afterward to override.
71
+ def store=(store)
72
+ store.limits = Store::Limits.from_configuration(self) if store.respond_to?(:limits=)
73
+ @store = store
74
+ end
75
+
76
+ # Reach the Rails / Reporter config from the one core object, e.g.
77
+ # Sentiero.configure { |c| c.reporter.endpoint = "..." }. They remain separate
78
+ # instances so the reporter stays usable as a standalone client.
79
+ def reporter
80
+ require_subsystem!("Sentiero::Reporter", 'require "sentiero/reporter"')
81
+ Reporter.configuration
82
+ end
83
+
84
+ def rails
85
+ require_subsystem!("Sentiero::Rails", "the sentiero-rails gem")
86
+ Rails.configuration
87
+ end
88
+
89
+ private def require_subsystem!(const_name, hint)
90
+ return if Object.const_defined?(const_name)
91
+
92
+ raise Error, "#{const_name} is not loaded — #{hint} to configure it."
93
+ end
94
+
95
+ # A non-positive or non-numeric value would either disable rotation
96
+ # (never expire) or break the client's Date.now() arithmetic once
97
+ # serialized into the config JSON, so it silently falls back instead of
98
+ # raising.
99
+ def session_idle_timeout=(value)
100
+ @session_idle_timeout = clamp_positive_seconds(value, DEFAULT_SESSION_IDLE_TIMEOUT)
101
+ end
102
+
103
+ def session_max_age=(value)
104
+ @session_max_age = clamp_positive_seconds(value, DEFAULT_SESSION_MAX_AGE)
105
+ end
106
+
107
+ private def clamp_positive_seconds(value, default)
108
+ (value.is_a?(Numeric) && value.finite? && value > 0) ? value : default
109
+ end
110
+
111
+ def initialize
112
+ @store = nil
113
+ @cors_origins = []
114
+ @auth_callback = nil
115
+ @flush_interval_ms = 10_000
116
+ @flush_event_threshold = 50
117
+ @max_events_per_page = 1_000
118
+ @max_problems = 5_000
119
+ @max_server_events = 50_000
120
+ @ingest_keys = {}
121
+ @cross_tab_sessions = true
122
+ @capture_metadata = false
123
+ @capture_errors = false
124
+ @track_navigation = false
125
+ @track_custom_events = false
126
+ @capture_clicks = false
127
+ @track_forms = false
128
+
129
+ @mask_all_inputs = true
130
+ @mask_input_options = {}
131
+ @block_selector = "[data-rr-block]"
132
+ @mask_text_selector = "[data-rr-mask]"
133
+ @ignore_selector = "[data-rr-ignore]"
134
+ @sampling = {scroll: 150, input: "last"}
135
+ @inline_stylesheet = nil
136
+ @checkout_every_n_ms = nil
137
+ @recorder_options = {}
138
+
139
+ @capture_web_vitals = false
140
+ @analytics_max_scan_sessions = 5000
141
+ @user_opt_out = false
142
+ @opt_out_cookie_name = "sentiero_optout"
143
+ @respect_gpc = true
144
+ @retention_period = nil
145
+ @session_idle_timeout = DEFAULT_SESSION_IDLE_TIMEOUT
146
+ @session_max_age = DEFAULT_SESSION_MAX_AGE
147
+ @redaction = Sentiero::Redaction::Config.new
148
+ @anonymize_ip = true
149
+ @audit_log = nil
150
+ # Opt-in: a share file is a full session dump leaving the operator's
151
+ # infrastructure, so export/import routes 404 until explicitly enabled.
152
+ @shareable_replays = false
153
+ @basic_auth = nil
154
+ # The dashboard exposes recordings/analytics, so with neither basic_auth nor
155
+ # auth_callback set it fails closed (403). Set true to opt into serving it
156
+ # unauthenticated (e.g. behind a trusted proxy or in local dev).
157
+ @allow_insecure_dashboard = false
158
+ end
159
+
160
+ def effective_recorder_options
161
+ first_class = {
162
+ maskAllInputs: mask_all_inputs,
163
+ maskInputOptions: mask_input_options,
164
+ blockSelector: block_selector,
165
+ maskTextSelector: mask_text_selector,
166
+ ignoreSelector: ignore_selector,
167
+ sampling: sampling
168
+ }
169
+
170
+ first_class[:inlineStylesheet] = inline_stylesheet unless inline_stylesheet.nil?
171
+ first_class[:checkoutEveryNms] = checkout_every_n_ms unless checkout_every_n_ms.nil?
172
+
173
+ recorder_options
174
+ .merge(first_class)
175
+ .merge(ENFORCED_PRIVACY) { |_key, existing, enforced|
176
+ if enforced.is_a?(Hash)
177
+ (existing.is_a?(Hash) ? existing : {}).merge(enforced)
178
+ else
179
+ enforced
180
+ end
181
+ }
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentiero
4
+ # Right-to-erasure helpers (GDPR Art. 17). Store-agnostic.
5
+ # Erasure is destructive and irreversible; deleted sessions cannot be recovered.
6
+ module Erasure
7
+ module_function
8
+
9
+ def erase_sessions(store, ids)
10
+ ids.each do |id|
11
+ raise ArgumentError, "Invalid ID: #{id.inspect}" unless Store::VALID_ID.match?(id.to_s)
12
+ end
13
+ present = ids.select { |id| store.get_session(id) }
14
+ present.each { |id| store.delete_session(id) }
15
+ present.size
16
+ end
17
+
18
+ # At least one bound is required to guard against erasing everything; the
19
+ # range is inclusive. Lists/deletes in capped batches (paging) until a scan
20
+ # is short, so one call erases every match regardless of count.
21
+ def erase_where(store, since: nil, until_time: nil)
22
+ raise ArgumentError, "provide since: and/or until_time:" if since.nil? && until_time.nil?
23
+ if since && until_time && since.to_f > until_time.to_f
24
+ raise ArgumentError, "since: must not be after until_time:"
25
+ end
26
+
27
+ cap = store.limits.analytics_max_scan_sessions
28
+ erased = 0
29
+
30
+ loop do
31
+ ids = store.list_sessions(
32
+ limit: cap,
33
+ since: since,
34
+ until_time: until_time
35
+ ).map { |summary| summary[:session_id] }
36
+
37
+ ids.each { |id| store.delete_session(id) }
38
+ erased += ids.size
39
+
40
+ # Each listed session matched and was deleted, so the set shrinks;
41
+ # a short batch means the matches are exhausted.
42
+ break if ids.size < cap
43
+ end
44
+
45
+ erased
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module Sentiero
6
+ # Computes the grouping fingerprint for an exception occurrence. The
7
+ # normalization regexes are deliberately linear-time (simple character classes
8
+ # with a single quantifier, no nesting) so untrusted backtraces cannot trigger
9
+ # catastrophic backtracking (ReDoS).
10
+ module Fingerprint
11
+ # Only the top frames drive grouping (deeper frames vary by call site).
12
+ MAX_FRAMES = 5
13
+ MAX_FRAME_LENGTH = 1000
14
+
15
+ module_function
16
+
17
+ def compute(exception_class:, backtrace:, project:)
18
+ frames = Array(backtrace).first(MAX_FRAMES).map { |frame| normalize_frame(frame.to_s) }
19
+ input = "#{project}\n#{exception_class}\n#{frames.join("\n")}"
20
+ Digest::SHA256.hexdigest(input)[0, 40]
21
+ end
22
+
23
+ # Strips per-occurrence noise (memory addresses, line numbers). Digits inside
24
+ # identifiers (e.g. `step_1`, `V2::Api`) are preserved so distinct methods do
25
+ # not collapse into one group.
26
+ def normalize_frame(frame)
27
+ frame = frame[0, MAX_FRAME_LENGTH].strip
28
+ frame
29
+ .gsub(/0x[0-9a-fA-F]+/, "0xHEX") # memory addresses
30
+ .gsub(/:[0-9]+(?=:in )/, ":N") # `path:LINE:in 'method'`
31
+ .gsub(/:[0-9]+\z/, ":N") # `path:LINE` (top-level frame, no method)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ipaddr"
4
+
5
+ module Sentiero
6
+ # Truncates client IPs before they reach a store or log when
7
+ # +config.anonymize_ip+ is on. Standard practice for GDPR/CCPA "reasonable"
8
+ # anonymization: zero the last IPv4 octet (/24) and the last 80 IPv6 bits
9
+ # (/48). IPv4-mapped IPv6 collapses to its anonymized dotted-quad form.
10
+ #
11
+ # Anonymization is one-way and best-effort, not a re-identification guarantee.
12
+ module IpAnonymizer
13
+ module_function
14
+
15
+ def anonymize(ip)
16
+ return unless ip.is_a?(String)
17
+
18
+ stripped = ip.strip
19
+ return if stripped.empty?
20
+ return if stripped.include?("/") # a client IP is a bare host, not a CIDR
21
+
22
+ addr = IPAddr.new(stripped)
23
+ addr = addr.native if addr.ipv4_mapped?
24
+ addr.mask(addr.ipv4? ? 24 : 48).to_s
25
+ rescue IPAddr::Error
26
+ nil
27
+ end
28
+ end
29
+ end