sentiero 1.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +7 -0
  3. data/README.md +679 -0
  4. data/lib/sentiero/analytics/analyzer.rb +91 -0
  5. data/lib/sentiero/analytics/bounded.rb +29 -0
  6. data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
  7. data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
  8. data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
  9. data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
  10. data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
  11. data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
  12. data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
  13. data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
  14. data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
  15. data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
  16. data/lib/sentiero/analytics/entry_attribution.rb +71 -0
  17. data/lib/sentiero/analytics/error_discovery.rb +118 -0
  18. data/lib/sentiero/analytics/events.rb +21 -0
  19. data/lib/sentiero/analytics/exporter.rb +242 -0
  20. data/lib/sentiero/analytics/form_analyzer.rb +153 -0
  21. data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
  22. data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
  23. data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
  24. data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
  25. data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
  26. data/lib/sentiero/analytics/problem_detail.rb +97 -0
  27. data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
  28. data/lib/sentiero/analytics/segmenter.rb +133 -0
  29. data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
  30. data/lib/sentiero/analytics/stats.rb +30 -0
  31. data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
  32. data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
  33. data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
  34. data/lib/sentiero/configuration.rb +184 -0
  35. data/lib/sentiero/erasure.rb +48 -0
  36. data/lib/sentiero/fingerprint.rb +34 -0
  37. data/lib/sentiero/ip_anonymizer.rb +29 -0
  38. data/lib/sentiero/redaction/config.rb +61 -0
  39. data/lib/sentiero/redaction.rb +207 -0
  40. data/lib/sentiero/reporter/configuration.rb +50 -0
  41. data/lib/sentiero/reporter/context.rb +31 -0
  42. data/lib/sentiero/reporter/dispatcher.rb +91 -0
  43. data/lib/sentiero/reporter/http_transport.rb +57 -0
  44. data/lib/sentiero/reporter/log_transport.rb +26 -0
  45. data/lib/sentiero/reporter/middleware.rb +62 -0
  46. data/lib/sentiero/reporter/normalizer.rb +14 -0
  47. data/lib/sentiero/reporter/null_transport.rb +18 -0
  48. data/lib/sentiero/reporter/report_context.rb +29 -0
  49. data/lib/sentiero/reporter/scrubber.rb +47 -0
  50. data/lib/sentiero/reporter/test_helper.rb +32 -0
  51. data/lib/sentiero/reporter/test_transport.rb +28 -0
  52. data/lib/sentiero/reporter.rb +214 -0
  53. data/lib/sentiero/roda.rb +47 -0
  54. data/lib/sentiero/store/error_store.rb +220 -0
  55. data/lib/sentiero/store/limits.rb +31 -0
  56. data/lib/sentiero/store/session_store.rb +118 -0
  57. data/lib/sentiero/store.rb +72 -0
  58. data/lib/sentiero/stores/file.rb +566 -0
  59. data/lib/sentiero/stores/memory.rb +362 -0
  60. data/lib/sentiero/stores/redis/keys.rb +59 -0
  61. data/lib/sentiero/stores/redis/lua.rb +119 -0
  62. data/lib/sentiero/stores/redis.rb +665 -0
  63. data/lib/sentiero/stores/sqlite/schema.rb +79 -0
  64. data/lib/sentiero/stores/sqlite.rb +626 -0
  65. data/lib/sentiero/user_agent.rb +32 -0
  66. data/lib/sentiero/version.rb +5 -0
  67. data/lib/sentiero/web/analytics_app.rb +538 -0
  68. data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
  69. data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
  70. data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
  71. data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
  72. data/lib/sentiero/web/assets/manifest.json +11 -0
  73. data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
  74. data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
  75. data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
  76. data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
  77. data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
  78. data/lib/sentiero/web/assets_app.rb +42 -0
  79. data/lib/sentiero/web/base_app.rb +319 -0
  80. data/lib/sentiero/web/basic_auth.rb +27 -0
  81. data/lib/sentiero/web/basic_auth_check.rb +41 -0
  82. data/lib/sentiero/web/body_reader.rb +44 -0
  83. data/lib/sentiero/web/csv_writer.rb +45 -0
  84. data/lib/sentiero/web/dashboard_app.rb +236 -0
  85. data/lib/sentiero/web/errors_app.rb +97 -0
  86. data/lib/sentiero/web/escaping.rb +37 -0
  87. data/lib/sentiero/web/events_app.rb +196 -0
  88. data/lib/sentiero/web/formatting.rb +43 -0
  89. data/lib/sentiero/web/ingest_app.rb +92 -0
  90. data/lib/sentiero/web/manifest.rb +43 -0
  91. data/lib/sentiero/web/monitoring_app.rb +316 -0
  92. data/lib/sentiero/web/script_tag.rb +57 -0
  93. data/lib/sentiero/web/shareable_replay.rb +88 -0
  94. data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
  95. data/lib/sentiero/web/templates/_brand.html.erb +18 -0
  96. data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
  97. data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
  98. data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
  99. data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
  100. data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
  101. data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
  102. data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
  103. data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
  104. data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
  105. data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
  106. data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
  107. data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
  108. data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
  109. data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
  110. data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
  111. data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
  112. data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
  113. data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
  114. data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
  115. data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
  116. data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
  117. data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
  118. data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
  119. data/lib/sentiero/web/templates/event_show.html.erb +52 -0
  120. data/lib/sentiero/web/templates/events_index.html.erb +177 -0
  121. data/lib/sentiero/web/templates/export_index.html.erb +69 -0
  122. data/lib/sentiero/web/templates/forms.html.erb +105 -0
  123. data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
  124. data/lib/sentiero/web/templates/import.html.erb +39 -0
  125. data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
  126. data/lib/sentiero/web/templates/segments.html.erb +114 -0
  127. data/lib/sentiero/web/templates/session_show.html.erb +195 -0
  128. data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
  129. data/lib/sentiero/web/track_app.rb +57 -0
  130. data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
  131. data/lib/sentiero/web/views/analyzer_view.rb +27 -0
  132. data/lib/sentiero/web/views/base_view.rb +76 -0
  133. data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
  134. data/lib/sentiero/web/views/conversions_view.rb +41 -0
  135. data/lib/sentiero/web/views/engagement_view.rb +67 -0
  136. data/lib/sentiero/web/views/errors_index_view.rb +37 -0
  137. data/lib/sentiero/web/views/event_show_view.rb +20 -0
  138. data/lib/sentiero/web/views/events_index_view.rb +56 -0
  139. data/lib/sentiero/web/views/export_view.rb +23 -0
  140. data/lib/sentiero/web/views/forms_view.rb +28 -0
  141. data/lib/sentiero/web/views/frustration_view.rb +15 -0
  142. data/lib/sentiero/web/views/funnel_view.rb +36 -0
  143. data/lib/sentiero/web/views/heatmap_view.rb +34 -0
  144. data/lib/sentiero/web/views/import_view.rb +13 -0
  145. data/lib/sentiero/web/views/page_report_view.rb +43 -0
  146. data/lib/sentiero/web/views/problem_show_view.rb +46 -0
  147. data/lib/sentiero/web/views/scroll_view.rb +23 -0
  148. data/lib/sentiero/web/views/segments_view.rb +28 -0
  149. data/lib/sentiero/web/views/session_show_view.rb +105 -0
  150. data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
  151. data/lib/sentiero/web/views/vitals_view.rb +45 -0
  152. data/lib/sentiero/web/views.rb +24 -0
  153. data/lib/sentiero/window_ref.rb +6 -0
  154. data/lib/sentiero.rb +69 -0
  155. metadata +232 -0
@@ -0,0 +1,236 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_app"
4
+ require_relative "analytics_app"
5
+ require_relative "monitoring_app"
6
+
7
+ module Sentiero
8
+ module Web
9
+ class DashboardApp < BaseApp
10
+ def initialize
11
+ super
12
+ BaseApp.warn_unauthenticated_once
13
+ end
14
+
15
+ def call(env)
16
+ # Rack::Builder#map "/sentiero" leaves PATH_INFO empty (not "/") for a
17
+ # request to the bare mount point; treat it as the index.
18
+ path = env["PATH_INFO"]
19
+ path = "/" if path.nil? || path.empty?
20
+ method = env["REQUEST_METHOD"]
21
+
22
+ # Static assets are served before auth: they carry no session data, and
23
+ # the standalone AssetsApp endpoint serves the same files unauthenticated.
24
+ if (asset_path = path[%r{\A/assets/(.+)\z}, 1])
25
+ return handle_asset(asset_path)
26
+ end
27
+
28
+ # Stable alias for the content-hashed recorder bundle, so pages outside
29
+ # the Ruby helpers (static HTML) can hardcode one URL that survives
30
+ # rebuilds. Sibling of the /events mount, so the recorder's
31
+ # currentScript fallback derives eventsUrl correctly from it.
32
+ return handle_recorder_alias if path == "/recorder.js"
33
+
34
+ return unauthorized_response unless authorized?(env)
35
+
36
+ case path
37
+ when "/"
38
+ handle_index(env)
39
+ when %r{\A/sessions/([^/]+)/windows/([^/]+)\z}
40
+ sid, wid = $1, $2
41
+ guard(sid, wid) ||
42
+ (delete_request?(method, env) ? handle_delete_window(env, sid, wid) : handle_show(env, sid, wid))
43
+ when %r{\A/api/sessions/([^/]+)/windows/([^/]+)/events\z}
44
+ sid, wid = $1, $2
45
+ guard(sid, wid) || handle_events_api(env, sid, wid)
46
+ when "/sessions/bulk_delete"
47
+ post_only(method) || handle_bulk_delete(env)
48
+ when %r{\A/sessions/([^/]+)\z}
49
+ sid = $1
50
+ # The delete branch keys off delete_request? (DELETE, or POST with
51
+ # ?_method=delete), so the id guard nests inside the method dispatch
52
+ # rather than using the get_only/post_only combinators.
53
+ if method == "GET"
54
+ guard(sid) || handle_session_redirect(env, sid)
55
+ elsif delete_request?(method, env)
56
+ guard(sid) || handle_delete(env, sid)
57
+ else
58
+ not_found
59
+ end
60
+ when %r{\A/custom-events(?:/.*)?\z}
61
+ Sentiero::Web::MonitoringApp.new.call(env)
62
+ when %r{\A/issues(?:/.*)?\z}
63
+ Sentiero::Web::MonitoringApp.new.call(env)
64
+ when %r{\A/analytics(?:/.*)?\z}
65
+ Sentiero::Web::AnalyticsApp.new.call(env)
66
+ else
67
+ not_found
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ def handle_recorder_alias
74
+ filename = Manifest.manifest["recorder"]
75
+ return not_found unless filename
76
+
77
+ status, headers, body = handle_asset(filename)
78
+ # The alias serves new bundle contents after an upgrade, so it must not
79
+ # inherit the fingerprinted file's year-long immutable cache.
80
+ headers["cache-control"] = "public, max-age=300" if status == 200
81
+ [status, headers, body]
82
+ end
83
+
84
+ def qs_url(path, env)
85
+ qs = env["QUERY_STRING"]
86
+ (qs && !qs.empty?) ? "#{path}?#{qs}" : path
87
+ end
88
+
89
+ def handle_index(env)
90
+ params = query_params(env)
91
+ page, per_page, offset = paginate(params, default: 20, max: 100)
92
+
93
+ since, until_time = parse_range_params(params)
94
+ sort_by = %w[updated_at created_at event_count].include?(params["sort_by"]) ? params["sort_by"] : nil
95
+ search = params["search"]&.strip
96
+ search = nil if search&.empty?
97
+ has_errors_filter = params["has_errors"] == "true"
98
+
99
+ sessions = fetch_sessions(
100
+ has_errors_filter: has_errors_filter,
101
+ per_page: per_page,
102
+ offset: offset,
103
+ since: since,
104
+ until_time: until_time,
105
+ sort_by: sort_by,
106
+ search: search
107
+ )
108
+
109
+ sessions, has_next = take_page(sessions, per_page)
110
+
111
+ audit!(env, action: :list_sessions)
112
+
113
+ render_page(env, Views::SessionsIndexView.new(
114
+ sessions: sessions,
115
+ page: page,
116
+ per_page: per_page,
117
+ has_next: has_next,
118
+ search: search || "",
119
+ sort_by: sort_by || "updated_at",
120
+ since: params["since"] || "",
121
+ until_param: params["until"] || "",
122
+ has_errors: has_errors_filter
123
+ ))
124
+ end
125
+
126
+ def fetch_sessions(has_errors_filter:, per_page:, offset:, since:, until_time:, sort_by:, search:)
127
+ unless has_errors_filter
128
+ return Sentiero.store.list_sessions(
129
+ limit: per_page + 1,
130
+ offset: offset,
131
+ since: since,
132
+ until_time: until_time,
133
+ sort_by: sort_by,
134
+ search: search
135
+ )
136
+ end
137
+
138
+ # The store has no has_errors index, so filter compute-on-read: scan up to
139
+ # analytics_max_scan_sessions matching sessions, then slice the requested
140
+ # page. Very large session counts make this a full scan.
141
+ scan_cap = Sentiero.store.limits.analytics_max_scan_sessions
142
+ all_matching = Sentiero.store.list_sessions(
143
+ limit: scan_cap,
144
+ offset: 0,
145
+ since: since,
146
+ until_time: until_time,
147
+ sort_by: sort_by,
148
+ search: search
149
+ ).select { |s| s[:metadata] && s[:metadata]["has_errors"] }
150
+
151
+ all_matching.slice(offset, per_page + 1) || []
152
+ end
153
+
154
+ def handle_session_redirect(env, session_id)
155
+ session = Sentiero.store.get_session(session_id)
156
+ return [404, {"content-type" => "text/plain"}, ["Session not found"]] unless session
157
+
158
+ windows = session[:windows] || []
159
+ return [404, {"content-type" => "text/plain"}, ["No windows found"]] if windows.empty?
160
+
161
+ best = windows.max_by { |w| w[:last_event_at] || 0 }
162
+ redirect("#{base_path(env)}/sessions/#{session_id}/windows/#{best[:window_id]}")
163
+ end
164
+
165
+ def handle_show(env, session_id, window_id)
166
+ session = Sentiero.store.get_session(session_id)
167
+ return [404, {"content-type" => "text/plain"}, ["Session not found"]] unless session
168
+
169
+ audit!(env, action: :view_session, session_id: session_id, window_id: window_id)
170
+
171
+ occ = Sentiero.store.occurrences_for_session(session_id, limit: 100)
172
+ evs = Sentiero.store.server_events_for_session(session_id, limit: 100)
173
+ server_activity = (
174
+ occ.map { |o| {kind: "exception", timestamp: o["timestamp"].to_f, occurrence: o} } +
175
+ evs.map { |e| {kind: "event", timestamp: e["timestamp"].to_f, event: e} }
176
+ ).sort_by { |item| -item[:timestamp] }
177
+
178
+ render_page(env, Views::SessionShowView.new(
179
+ session: session,
180
+ session_id: session_id,
181
+ window_id: window_id,
182
+ shareable_replays: Sentiero.configuration.shareable_replays,
183
+ server_activity: server_activity
184
+ ))
185
+ end
186
+
187
+ def handle_events_api(env, session_id, window_id)
188
+ params = query_params(env)
189
+ after = params["after"]
190
+ limit = Sentiero.configuration.max_events_per_page
191
+
192
+ events = Sentiero.store.get_events(Sentiero::WindowRef.new(session_id, window_id), after: after, limit: limit)
193
+
194
+ audit!(env, action: :view_session, session_id: session_id, window_id: window_id)
195
+
196
+ [200, json_headers, [JSON.generate(events)]]
197
+ end
198
+
199
+ def handle_delete(env, session_id)
200
+ verify_csrf(env) || begin
201
+ audit!(env, action: :delete_session, session_id: session_id)
202
+ Sentiero.store.delete_session(session_id)
203
+ redirect("#{base_path(env)}/")
204
+ end
205
+ end
206
+
207
+ def handle_delete_window(env, session_id, window_id)
208
+ verify_csrf(env) || begin
209
+ audit!(env, action: :delete_session, session_id: session_id, window_id: window_id)
210
+ Sentiero.store.delete_window(Sentiero::WindowRef.new(session_id, window_id))
211
+
212
+ # Deleting the last window removes the session
213
+ session = Sentiero.store.get_session(session_id)
214
+ if session && session[:windows] && !session[:windows].empty?
215
+ best = session[:windows].max_by { |w| w[:last_event_at] || 0 }
216
+ redirect("#{base_path(env)}/sessions/#{session_id}/windows/#{best[:window_id]}")
217
+ else
218
+ redirect("#{base_path(env)}/")
219
+ end
220
+ end
221
+ end
222
+
223
+ def handle_bulk_delete(env)
224
+ verify_csrf(env) || begin
225
+ Array(Rack::Request.new(env).POST["session_ids"]).each do |sid|
226
+ next unless valid_id?(sid)
227
+
228
+ audit!(env, action: :delete_session, session_id: sid)
229
+ Sentiero.store.delete_session(sid)
230
+ end
231
+ redirect("#{base_path(env)}/")
232
+ end
233
+ end
234
+ end
235
+ end
236
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "ingest_app"
4
+ require_relative "../fingerprint"
5
+ require_relative "../redaction"
6
+ require_relative "../ip_anonymizer"
7
+
8
+ module Sentiero
9
+ module Web
10
+ # Server-lane ingest for exceptions. Computes the grouping fingerprint
11
+ # server-side, then persists via Sentiero.store.save_occurrence.
12
+ class ErrorsApp < IngestApp
13
+ MAX_BACKTRACE_FRAMES = 100
14
+ MAX_MESSAGE_LENGTH = 4000
15
+ MAX_CONTEXT_BYTES = 16_384
16
+
17
+ private
18
+
19
+ def handle(env, project, data)
20
+ exception_class = data["exception_class"]
21
+ message = data["message"]
22
+
23
+ unless exception_class.is_a?(String) && !exception_class.empty?
24
+ return json_response(400, {error: "exception_class is required"})
25
+ end
26
+ unless message.is_a?(String) && !message.empty?
27
+ return json_response(400, {error: "message is required"})
28
+ end
29
+
30
+ session_id = data["session_id"]
31
+ window_id = data["window_id"]
32
+ if session_id && !valid_optional_id?(session_id)
33
+ return json_response(400, {error: "invalid session_id"})
34
+ end
35
+ if window_id && !valid_optional_id?(window_id)
36
+ return json_response(400, {error: "invalid window_id"})
37
+ end
38
+
39
+ backtrace = data["backtrace"]
40
+ backtrace = backtrace.is_a?(Array) ? backtrace.first(MAX_BACKTRACE_FRAMES).map(&:to_s) : nil
41
+
42
+ # Redact before fingerprinting so grouping is stable whether or not the
43
+ # client already redacted.
44
+ redaction = Sentiero.configuration.redaction
45
+ message = Redaction.redact_text(message, redaction)
46
+ backtrace &&= backtrace.map { |frame| Redaction.redact_text(frame, redaction) }
47
+
48
+ timestamp = numeric_timestamp(data["timestamp"])
49
+
50
+ fingerprint = Fingerprint.compute(
51
+ exception_class: exception_class,
52
+ backtrace: backtrace,
53
+ project: project
54
+ )
55
+
56
+ occurrence = {
57
+ "fingerprint" => fingerprint,
58
+ "project" => project,
59
+ "exception_class" => exception_class,
60
+ "message" => message[0, MAX_MESSAGE_LENGTH],
61
+ "timestamp" => timestamp
62
+ }
63
+ occurrence["backtrace"] = backtrace if backtrace
64
+ if data["context"].is_a?(Hash)
65
+ context = Redaction.deep_redact_strings(capped_context(data["context"]), redaction)
66
+ occurrence["context"] = anonymize_request_ip(context)
67
+ end
68
+ occurrence["session_id"] = session_id if session_id
69
+ occurrence["window_id"] = window_id if window_id
70
+
71
+ begin
72
+ Sentiero.store.save_occurrence(occurrence)
73
+ rescue ArgumentError => e
74
+ return json_response(400, {error: e.message})
75
+ end
76
+
77
+ json_response(200, {status: "ok", fingerprint: fingerprint})
78
+ end
79
+
80
+ def capped_context(context)
81
+ (JSON.generate(context).bytesize <= MAX_CONTEXT_BYTES) ? context : {"_truncated" => true}
82
+ end
83
+
84
+ # Backstop for reporters that don't (or can't) honor anonymize_ip
85
+ # themselves: mirrors Reporter::Middleware#client_ip's truncation so a
86
+ # server configured with anonymize_ip: true never persists a raw IP.
87
+ def anonymize_request_ip(context)
88
+ return context unless Sentiero.configuration.anonymize_ip
89
+
90
+ request = context["request"]
91
+ return context unless request.is_a?(Hash) && request["ip"].is_a?(String)
92
+
93
+ context.merge("request" => request.merge("ip" => IpAnonymizer.anonymize(request["ip"])))
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cgi/escape"
4
+ require "json"
5
+
6
+ module Sentiero
7
+ module Web
8
+ # HTML and JavaScript escaping for safe template rendering using only stdlib.
9
+ module Escaping
10
+ # Chars safe in JSON but unsafe in HTML <script>: prevents </script>
11
+ # breakout and HTML entity interpretation (mirrors ERB::Util.json_escape).
12
+ HTML_UNSAFE_IN_SCRIPT = {
13
+ "<" => '\u003c',
14
+ ">" => '\u003e',
15
+ "&" => '\u0026',
16
+ "\u2028" => '\u2028',
17
+ "\u2029" => '\u2029'
18
+ }.freeze
19
+
20
+ HTML_UNSAFE_IN_SCRIPT_PATTERN = Regexp.union(HTML_UNSAFE_IN_SCRIPT.keys).freeze
21
+
22
+ def escape_html(text)
23
+ CGI.escapeHTML(text.to_s)
24
+ end
25
+
26
+ # Escapes for embedding in a JS string literal; returns content WITHOUT surrounding quotes.
27
+ def escape_js_string(text)
28
+ json = JSON.generate(text.to_s)
29
+ json[1..-2].gsub(HTML_UNSAFE_IN_SCRIPT_PATTERN, HTML_UNSAFE_IN_SCRIPT)
30
+ end
31
+
32
+ def escape_json(json_string)
33
+ json_string.gsub(HTML_UNSAFE_IN_SCRIPT_PATTERN, HTML_UNSAFE_IN_SCRIPT)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "rack/utils"
5
+ require_relative "body_reader"
6
+ require_relative "../analytics/events"
7
+ require_relative "../redaction"
8
+ require_relative "../store"
9
+
10
+ module Sentiero
11
+ module Web
12
+ class EventsApp
13
+ def call(env)
14
+ method = env["REQUEST_METHOD"]
15
+
16
+ case method
17
+ when "POST"
18
+ handle_post(env)
19
+ when "OPTIONS"
20
+ handle_options(env)
21
+ else
22
+ with_cors(env, [405, {"content-type" => "application/json"}, ['{"error":"method not allowed"}']])
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def handle_post(env)
29
+ # Drop the batch (silently, 204, same as the opt-out path so clients
30
+ # treat it as success and don't retry) when the user opted out via
31
+ # cookie, or sent Sec-GPC and the server is configured to honor it.
32
+ # This backstops the client, which is expected to not even start
33
+ # recording for GPC users, but a stale bundle or non-Sentiero caller
34
+ # could still POST here.
35
+ return dropped(env) if opted_out?(env) || gpc_signaled?(env)
36
+
37
+ raw_body, error = BodyReader.read(env)
38
+ if error
39
+ status, message = BodyReader::ERRORS[error]
40
+ return cors_error(env, status, message)
41
+ end
42
+
43
+ begin
44
+ data = JSON.parse(raw_body)
45
+ rescue JSON::ParserError
46
+ return cors_error(env, 400, "invalid JSON body")
47
+ end
48
+
49
+ session_id = data["sessionId"]
50
+ window_id = data["windowId"]
51
+ events = data["events"]
52
+
53
+ unless session_id.is_a?(String) && session_id.match?(Store::VALID_ID)
54
+ return cors_error(env, 400, "sessionId must be 1-128 alphanumeric, hyphen, or underscore characters")
55
+ end
56
+
57
+ unless window_id.is_a?(String) && window_id.match?(Store::VALID_ID)
58
+ return cors_error(env, 400, "windowId must be 1-128 alphanumeric, hyphen, or underscore characters")
59
+ end
60
+
61
+ unless events.is_a?(Array) && !events.empty? && events.all? { |e| e.is_a?(Hash) }
62
+ return cors_error(env, 400, "events must be a non-empty array of objects")
63
+ end
64
+
65
+ max_per_request = Sentiero.configuration.max_events_per_request
66
+ if max_per_request && events.size > max_per_request
67
+ return cors_error(env, 400, "too many events (max #{max_per_request})")
68
+ end
69
+
70
+ events, error = normalize_timestamps(env, events)
71
+ return error if error
72
+
73
+ events = redact_events(events)
74
+
75
+ Sentiero.store.save_events(Sentiero::WindowRef.new(session_id, window_id), events)
76
+
77
+ # Save optional session metadata if present, plus a monotonic has_errors
78
+ # flag computed from the incoming batch when error capture is enabled.
79
+ metadata = data["metadata"]
80
+ metadata = {} unless metadata.is_a?(Hash)
81
+
82
+ if Sentiero.configuration.capture_errors && batch_has_errors?(events)
83
+ metadata = metadata.merge("has_errors" => true)
84
+ end
85
+
86
+ unless metadata.empty?
87
+ metadata = Sentiero::Redaction.redact_metadata(metadata, Sentiero.configuration.redaction)
88
+ Sentiero.store.save_metadata(session_id, metadata)
89
+ end
90
+
91
+ with_cors(env, [200, {"content-type" => "application/json"}, ['{"status":"ok"}']])
92
+ end
93
+
94
+ def normalize_timestamps(env, events)
95
+ events.each do |event|
96
+ next unless event.key?("timestamp")
97
+
98
+ raw = event["timestamp"]
99
+ begin
100
+ ts = raw.is_a?(Numeric) ? raw.to_f : Float(raw)
101
+ rescue ArgumentError, TypeError
102
+ return [nil, cors_error(env, 400, "invalid timestamp value")]
103
+ end
104
+ return [nil, cors_error(env, 400, "invalid timestamp value")] unless ts.finite?
105
+
106
+ event["timestamp"] = ts
107
+ end
108
+
109
+ [events, nil]
110
+ end
111
+
112
+ # Field-aware redaction (defense-in-depth; the client already redacts) plus
113
+ # the optional Ruby-only server_proc. A raising or nil/false server_proc
114
+ # drops the event (we never store unsanitized data).
115
+ def redact_events(events)
116
+ config = Sentiero.configuration.redaction
117
+ proc = config.server_proc
118
+
119
+ events.filter_map do |event|
120
+ redacted = Sentiero::Redaction.redact_event(event, config)
121
+ next redacted unless proc.respond_to?(:call)
122
+
123
+ begin
124
+ proc.call(redacted)
125
+ rescue => e
126
+ warn "[Sentiero] redaction server_proc raised #{e.class}: #{e.message}; dropping event"
127
+ nil
128
+ end
129
+ end
130
+ end
131
+
132
+ def opted_out?(env)
133
+ config = Sentiero.configuration
134
+ return false unless config.user_opt_out
135
+
136
+ cookies = Rack::Utils.parse_cookies(env)
137
+ value = cookies[config.opt_out_cookie_name]
138
+ !value.nil? && !value.empty? && value != "0" && value != "false"
139
+ end
140
+
141
+ def gpc_signaled?(env)
142
+ Sentiero.configuration.respect_gpc && env["HTTP_SEC_GPC"] == "1"
143
+ end
144
+
145
+ def dropped(env)
146
+ with_cors(env, [204, {"content-type" => "application/json"}, []])
147
+ end
148
+
149
+ # rrweb custom events arrive as { type: 5, data: { tag, payload } }; the
150
+ # recorder tags error events "error", so a batch "has errors" when any
151
+ # custom event carries that tag.
152
+ def batch_has_errors?(events)
153
+ events.any? do |event|
154
+ event["type"] == Sentiero::Analytics::Events::CUSTOM &&
155
+ event["data"].is_a?(Hash) && event["data"]["tag"] == "error"
156
+ end
157
+ end
158
+
159
+ def handle_options(env)
160
+ headers = {
161
+ "access-control-allow-methods" => "POST",
162
+ "access-control-allow-headers" => "Content-Type, Content-Encoding",
163
+ "access-control-max-age" => "86400",
164
+ "content-type" => "text/plain"
165
+ }
166
+
167
+ with_cors(env, [204, headers, []])
168
+ end
169
+
170
+ def with_cors(env, response)
171
+ status, headers, body = response
172
+ headers["x-content-type-options"] = "nosniff"
173
+ origins = Sentiero.configuration.cors_origins
174
+
175
+ if origins && !origins.empty?
176
+ request_origin = env["HTTP_ORIGIN"]
177
+
178
+ if request_origin && origins.include?(request_origin)
179
+ headers["access-control-allow-origin"] = request_origin
180
+ headers["vary"] = "Origin"
181
+ end
182
+ end
183
+
184
+ [status, headers, body]
185
+ end
186
+
187
+ def cors_error(env, status, message)
188
+ with_cors(env, [status, {"content-type" => "application/json"}, [json_error(message)]])
189
+ end
190
+
191
+ def json_error(message)
192
+ JSON.generate({error: message})
193
+ end
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../user_agent"
4
+
5
+ module Sentiero
6
+ module Web
7
+ # Small presentation formatters shared by the Rack apps and the view layer.
8
+ module Formatting
9
+ def parse_device(user_agent)
10
+ Sentiero::UserAgent.device(user_agent)
11
+ end
12
+
13
+ def parse_browser(user_agent)
14
+ Sentiero::UserAgent.browser(user_agent)
15
+ end
16
+
17
+ # CLS is a unitless ratio (3 decimals); the other Web Vitals are millisecond durations.
18
+ def format_vital(metric, value)
19
+ (metric == "CLS") ? format("%.3f", value) : "#{value.round} ms"
20
+ end
21
+
22
+ def format_duration(first_event_at, last_event_at)
23
+ return "N/A" unless first_event_at && last_event_at
24
+
25
+ # Event timestamps are in milliseconds
26
+ total_ms = (last_event_at - first_event_at).abs
27
+ total_seconds = (total_ms / 1000.0).round
28
+
29
+ if total_seconds < 60
30
+ "#{total_seconds}s"
31
+ elsif total_seconds < 3600
32
+ minutes = total_seconds / 60
33
+ seconds = total_seconds % 60
34
+ (seconds > 0) ? "#{minutes}m #{seconds}s" : "#{minutes}m"
35
+ else
36
+ hours = total_seconds / 3600
37
+ minutes = (total_seconds % 3600) / 60
38
+ (minutes > 0) ? "#{hours}h #{minutes}m" : "#{hours}h"
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end