sentiero 1.0.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +7 -0
- data/README.md +679 -0
- data/lib/sentiero/analytics/analyzer.rb +91 -0
- data/lib/sentiero/analytics/bounded.rb +29 -0
- data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
- data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
- data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
- data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
- data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
- data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
- data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
- data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
- data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
- data/lib/sentiero/analytics/entry_attribution.rb +71 -0
- data/lib/sentiero/analytics/error_discovery.rb +118 -0
- data/lib/sentiero/analytics/events.rb +21 -0
- data/lib/sentiero/analytics/exporter.rb +242 -0
- data/lib/sentiero/analytics/form_analyzer.rb +153 -0
- data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
- data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
- data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
- data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
- data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
- data/lib/sentiero/analytics/problem_detail.rb +97 -0
- data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
- data/lib/sentiero/analytics/segmenter.rb +133 -0
- data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
- data/lib/sentiero/analytics/stats.rb +30 -0
- data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
- data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
- data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
- data/lib/sentiero/configuration.rb +184 -0
- data/lib/sentiero/erasure.rb +48 -0
- data/lib/sentiero/fingerprint.rb +34 -0
- data/lib/sentiero/ip_anonymizer.rb +29 -0
- data/lib/sentiero/redaction/config.rb +61 -0
- data/lib/sentiero/redaction.rb +207 -0
- data/lib/sentiero/reporter/configuration.rb +50 -0
- data/lib/sentiero/reporter/context.rb +31 -0
- data/lib/sentiero/reporter/dispatcher.rb +91 -0
- data/lib/sentiero/reporter/http_transport.rb +57 -0
- data/lib/sentiero/reporter/log_transport.rb +26 -0
- data/lib/sentiero/reporter/middleware.rb +62 -0
- data/lib/sentiero/reporter/normalizer.rb +14 -0
- data/lib/sentiero/reporter/null_transport.rb +18 -0
- data/lib/sentiero/reporter/report_context.rb +29 -0
- data/lib/sentiero/reporter/scrubber.rb +47 -0
- data/lib/sentiero/reporter/test_helper.rb +32 -0
- data/lib/sentiero/reporter/test_transport.rb +28 -0
- data/lib/sentiero/reporter.rb +214 -0
- data/lib/sentiero/roda.rb +47 -0
- data/lib/sentiero/store/error_store.rb +220 -0
- data/lib/sentiero/store/limits.rb +31 -0
- data/lib/sentiero/store/session_store.rb +118 -0
- data/lib/sentiero/store.rb +72 -0
- data/lib/sentiero/stores/file.rb +566 -0
- data/lib/sentiero/stores/memory.rb +362 -0
- data/lib/sentiero/stores/redis/keys.rb +59 -0
- data/lib/sentiero/stores/redis/lua.rb +119 -0
- data/lib/sentiero/stores/redis.rb +665 -0
- data/lib/sentiero/stores/sqlite/schema.rb +79 -0
- data/lib/sentiero/stores/sqlite.rb +626 -0
- data/lib/sentiero/user_agent.rb +32 -0
- data/lib/sentiero/version.rb +5 -0
- data/lib/sentiero/web/analytics_app.rb +538 -0
- data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
- data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
- data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
- data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
- data/lib/sentiero/web/assets/manifest.json +11 -0
- data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
- data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
- data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
- data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
- data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
- data/lib/sentiero/web/assets_app.rb +42 -0
- data/lib/sentiero/web/base_app.rb +319 -0
- data/lib/sentiero/web/basic_auth.rb +27 -0
- data/lib/sentiero/web/basic_auth_check.rb +41 -0
- data/lib/sentiero/web/body_reader.rb +44 -0
- data/lib/sentiero/web/csv_writer.rb +45 -0
- data/lib/sentiero/web/dashboard_app.rb +236 -0
- data/lib/sentiero/web/errors_app.rb +97 -0
- data/lib/sentiero/web/escaping.rb +37 -0
- data/lib/sentiero/web/events_app.rb +196 -0
- data/lib/sentiero/web/formatting.rb +43 -0
- data/lib/sentiero/web/ingest_app.rb +92 -0
- data/lib/sentiero/web/manifest.rb +43 -0
- data/lib/sentiero/web/monitoring_app.rb +316 -0
- data/lib/sentiero/web/script_tag.rb +57 -0
- data/lib/sentiero/web/shareable_replay.rb +88 -0
- data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
- data/lib/sentiero/web/templates/_brand.html.erb +18 -0
- data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
- data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
- data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
- data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
- data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
- data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
- data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
- data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
- data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
- data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
- data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
- data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
- data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
- data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
- data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
- data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
- data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
- data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
- data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
- data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
- data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
- data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
- data/lib/sentiero/web/templates/event_show.html.erb +52 -0
- data/lib/sentiero/web/templates/events_index.html.erb +177 -0
- data/lib/sentiero/web/templates/export_index.html.erb +69 -0
- data/lib/sentiero/web/templates/forms.html.erb +105 -0
- data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
- data/lib/sentiero/web/templates/import.html.erb +39 -0
- data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
- data/lib/sentiero/web/templates/segments.html.erb +114 -0
- data/lib/sentiero/web/templates/session_show.html.erb +195 -0
- data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
- data/lib/sentiero/web/track_app.rb +57 -0
- data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
- data/lib/sentiero/web/views/analyzer_view.rb +27 -0
- data/lib/sentiero/web/views/base_view.rb +76 -0
- data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
- data/lib/sentiero/web/views/conversions_view.rb +41 -0
- data/lib/sentiero/web/views/engagement_view.rb +67 -0
- data/lib/sentiero/web/views/errors_index_view.rb +37 -0
- data/lib/sentiero/web/views/event_show_view.rb +20 -0
- data/lib/sentiero/web/views/events_index_view.rb +56 -0
- data/lib/sentiero/web/views/export_view.rb +23 -0
- data/lib/sentiero/web/views/forms_view.rb +28 -0
- data/lib/sentiero/web/views/frustration_view.rb +15 -0
- data/lib/sentiero/web/views/funnel_view.rb +36 -0
- data/lib/sentiero/web/views/heatmap_view.rb +34 -0
- data/lib/sentiero/web/views/import_view.rb +13 -0
- data/lib/sentiero/web/views/page_report_view.rb +43 -0
- data/lib/sentiero/web/views/problem_show_view.rb +46 -0
- data/lib/sentiero/web/views/scroll_view.rb +23 -0
- data/lib/sentiero/web/views/segments_view.rb +28 -0
- data/lib/sentiero/web/views/session_show_view.rb +105 -0
- data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
- data/lib/sentiero/web/views/vitals_view.rb +45 -0
- data/lib/sentiero/web/views.rb +24 -0
- data/lib/sentiero/window_ref.rb +6 -0
- data/lib/sentiero.rb +69 -0
- metadata +232 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "analyzer"
|
|
4
|
+
require_relative "collectors/custom_tag_collector"
|
|
5
|
+
require_relative "../user_agent"
|
|
6
|
+
|
|
7
|
+
module Sentiero
|
|
8
|
+
module Analytics
|
|
9
|
+
class StatsAggregator < Analyzer
|
|
10
|
+
# Required here, not at file top: ResultBuilder reopens this class, and
|
|
11
|
+
# requiring it before the `< Analyzer` superclass is established would
|
|
12
|
+
# raise a superclass mismatch.
|
|
13
|
+
require_relative "stats_aggregator/result_builder"
|
|
14
|
+
|
|
15
|
+
TOP_LIST_LIMIT = 10
|
|
16
|
+
TOP_TAGS_LIMIT = 20
|
|
17
|
+
TOP_PROBLEMS_LIMIT = 5
|
|
18
|
+
|
|
19
|
+
NAVIGATION_TAG = "navigation"
|
|
20
|
+
|
|
21
|
+
INTERNAL_METADATA_KEYS = %w[userAgent url referrer viewport has_errors entry_url entry_referrer].freeze
|
|
22
|
+
|
|
23
|
+
MAX_NAV_KEYS = 200
|
|
24
|
+
MAX_METADATA_KEYS = 50
|
|
25
|
+
MAX_METADATA_VALUES_PER_KEY = 50
|
|
26
|
+
MAX_TAG_SERIES_KEYS = 200
|
|
27
|
+
MAX_OVERLAY_PROBLEMS = 200
|
|
28
|
+
MAX_OCCURRENCES_PER_PROBLEM = 500
|
|
29
|
+
|
|
30
|
+
DURATION_BUCKETS = [
|
|
31
|
+
["0-30s", 30_000],
|
|
32
|
+
["30s-2m", 120_000],
|
|
33
|
+
["2-5m", 300_000],
|
|
34
|
+
["5-15m", 900_000],
|
|
35
|
+
["15m+", nil]
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
WEEK_BUCKET_THRESHOLD_DAYS = 45
|
|
39
|
+
|
|
40
|
+
def aggregate(range_days: 30, since: nil, until_time: nil, server_exception_overlay: false)
|
|
41
|
+
scan_cap = store.limits.analytics_max_scan_sessions
|
|
42
|
+
since ||= default_since(range_days, until_time)
|
|
43
|
+
|
|
44
|
+
acc = new_accumulator(since, until_time)
|
|
45
|
+
seen_sessions = {}
|
|
46
|
+
|
|
47
|
+
store.each_session_events(limit: scan_cap, since: since, until_time: until_time) do |summary, _window_id, events|
|
|
48
|
+
accumulate_window(acc, seen_sessions, summary, events)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
finalize(acc, seen_sessions, scan_cap, overlay: server_exception_overlay)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Derives BOTH the current-window aggregate and the equal-length
|
|
55
|
+
# prior-window aggregate from a SINGLE widened scan over
|
|
56
|
+
# [prior_since, until_time], partitioning each session into the current or
|
|
57
|
+
# prior bucket by its updated_at. Returns {current:, prior:}; prior is nil
|
|
58
|
+
# when no comparison is possible (zero-length window) or when the widened
|
|
59
|
+
# scan is truncated — in which case the current aggregate is recomputed
|
|
60
|
+
# from an exact single-window scan so the displayed numbers stay correct
|
|
61
|
+
# (deltas are dropped on truncation anyway).
|
|
62
|
+
def aggregate_with_prior(range_days: 30, since: nil, until_time: nil)
|
|
63
|
+
scan_cap = store.limits.analytics_max_scan_sessions
|
|
64
|
+
since ||= default_since(range_days, until_time)
|
|
65
|
+
window_until = until_time || Time.now.to_f
|
|
66
|
+
span = window_until - since
|
|
67
|
+
|
|
68
|
+
return {current: aggregate(since: since, until_time: until_time, server_exception_overlay: true), prior: nil} unless span > 0
|
|
69
|
+
|
|
70
|
+
prior_since = since - span
|
|
71
|
+
prior_until = since - 0.001
|
|
72
|
+
|
|
73
|
+
current = {acc: new_accumulator(since, until_time), seen: {}}
|
|
74
|
+
prior = {acc: new_accumulator(prior_since, prior_until), seen: {}}
|
|
75
|
+
|
|
76
|
+
store.each_session_events(limit: scan_cap, since: prior_since, until_time: until_time) do |summary, _window_id, events|
|
|
77
|
+
bucket = (summary[:updated_at] >= since) ? current : prior
|
|
78
|
+
accumulate_window(bucket[:acc], bucket[:seen], summary, events)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
if current[:seen].size + prior[:seen].size >= scan_cap
|
|
82
|
+
return {current: aggregate(since: since, until_time: until_time, server_exception_overlay: true), prior: nil}
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
{
|
|
86
|
+
current: finalize(current[:acc], current[:seen], scan_cap, overlay: true),
|
|
87
|
+
prior: finalize(prior[:acc], prior[:seen], scan_cap, overlay: false)
|
|
88
|
+
}
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
def accumulate_window(acc, seen_sessions, summary, events)
|
|
94
|
+
session_id = summary[:session_id]
|
|
95
|
+
collect_session(acc, summary, seen_sessions) unless seen_sessions.key?(session_id)
|
|
96
|
+
update_entry_candidate(seen_sessions[session_id], events)
|
|
97
|
+
collect_events(acc, events)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def finalize(acc, seen_sessions, scan_cap, overlay:)
|
|
101
|
+
tally_entries(acc, seen_sessions)
|
|
102
|
+
overlay_truncated = overlay ? collect_server_overlay(acc) : false
|
|
103
|
+
ResultBuilder.new(store).build(acc, seen_sessions.size, scan_cap, overlay_truncated)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# range_days - 1: the start day is itself one of the range_days buckets.
|
|
107
|
+
def default_since(range_days, until_time)
|
|
108
|
+
end_date = (until_time ? Time.at(until_time) : Time.now).utc.to_date
|
|
109
|
+
start_date = end_date - (range_days - 1)
|
|
110
|
+
Time.utc(start_date.year, start_date.month, start_date.day).to_f
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Mutable bag of the running tallies for one aggregate scan. A Struct (not a
|
|
114
|
+
# Hash) so the ~24 fields are named accessors threaded through the tally_*
|
|
115
|
+
# methods rather than string-typed acc.key lookups.
|
|
116
|
+
Accumulator = Struct.new(
|
|
117
|
+
:event_types, :custom_tags, :browser_tags, :browsers, :devices,
|
|
118
|
+
:entry_pages, :entry_page_errors, :referrers, :duration_buckets,
|
|
119
|
+
:total_events, :durations, :since, :until_time,
|
|
120
|
+
:per_day_events, :per_day_sessions, :per_day_errors, :per_day_tags,
|
|
121
|
+
:per_day_server_errors, :nav_internal, :nav_external, :nav_texts,
|
|
122
|
+
:metadata_keys, :metadata_values, :sessions_with_errors,
|
|
123
|
+
keyword_init: true
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def new_accumulator(since, until_time)
|
|
127
|
+
Accumulator.new(
|
|
128
|
+
event_types: Hash.new(0),
|
|
129
|
+
custom_tags: CustomTagCollector.new,
|
|
130
|
+
browser_tags: Hash.new(0),
|
|
131
|
+
browsers: Hash.new(0),
|
|
132
|
+
devices: Hash.new(0),
|
|
133
|
+
entry_pages: Hash.new(0),
|
|
134
|
+
entry_page_errors: Hash.new(0),
|
|
135
|
+
referrers: Hash.new(0),
|
|
136
|
+
duration_buckets: DURATION_BUCKETS.to_h { |label, _| [label, 0] },
|
|
137
|
+
total_events: 0,
|
|
138
|
+
durations: [],
|
|
139
|
+
since: since,
|
|
140
|
+
until_time: until_time,
|
|
141
|
+
per_day_events: Hash.new(0),
|
|
142
|
+
per_day_sessions: Hash.new(0),
|
|
143
|
+
per_day_errors: Hash.new(0),
|
|
144
|
+
per_day_tags: {},
|
|
145
|
+
per_day_server_errors: Hash.new(0),
|
|
146
|
+
nav_internal: Hash.new(0),
|
|
147
|
+
nav_external: Hash.new(0),
|
|
148
|
+
nav_texts: Hash.new(0),
|
|
149
|
+
metadata_keys: Hash.new(0),
|
|
150
|
+
metadata_values: {},
|
|
151
|
+
sessions_with_errors: 0
|
|
152
|
+
)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Runs once per session (windows share metadata and one duration).
|
|
156
|
+
def collect_session(acc, summary, seen_sessions)
|
|
157
|
+
metadata = summary[:metadata] || {}
|
|
158
|
+
entry_url = metadata["entry_url"]
|
|
159
|
+
seen_sessions[summary[:session_id]] = {
|
|
160
|
+
entry_url: entry_url,
|
|
161
|
+
# A real entry_url is authoritative; a first-Meta href may only claim
|
|
162
|
+
# the slot when we started from nil.
|
|
163
|
+
entry_anchor: entry_url ? -Float::INFINITY : nil,
|
|
164
|
+
referrer: metadata["entry_referrer"] || metadata["referrer"],
|
|
165
|
+
has_errors: !!metadata["has_errors"]
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
tally_browser_device(acc, metadata["userAgent"])
|
|
169
|
+
acc.sessions_with_errors += 1 if metadata["has_errors"]
|
|
170
|
+
|
|
171
|
+
tally_metadata(acc, metadata)
|
|
172
|
+
record_duration(acc, summary)
|
|
173
|
+
record_session_day(acc, summary)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Deferred until every window is seen: entry page is the first Meta href,
|
|
177
|
+
# not the metadata URL the recorder overwrites on each navigation.
|
|
178
|
+
# Same-origin referrers dropped so Top Referrers shows only acquisition.
|
|
179
|
+
def tally_entries(acc, seen_sessions)
|
|
180
|
+
seen_sessions.each_value do |state|
|
|
181
|
+
entry_url = state[:entry_url]
|
|
182
|
+
tally(acc.entry_pages, entry_url)
|
|
183
|
+
tally(acc.entry_page_errors, entry_url) if state[:has_errors]
|
|
184
|
+
|
|
185
|
+
tally(acc.referrers, state[:referrer]) unless same_origin?(state[:referrer], entry_url)
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Values are tracked only for keys that survived the key cap.
|
|
190
|
+
def tally_metadata(acc, metadata)
|
|
191
|
+
metadata.each do |key, value|
|
|
192
|
+
next unless key.is_a?(String) && !key.empty?
|
|
193
|
+
next if INTERNAL_METADATA_KEYS.include?(key)
|
|
194
|
+
next unless bounded_tally(acc.metadata_keys, key, MAX_METADATA_KEYS)
|
|
195
|
+
|
|
196
|
+
values = acc.metadata_values[key] ||= Hash.new(0)
|
|
197
|
+
bounded_tally(values, value.to_s, MAX_METADATA_VALUES_PER_KEY)
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def collect_events(acc, events)
|
|
202
|
+
events.each do |event|
|
|
203
|
+
next unless in_window?(acc, event["timestamp"])
|
|
204
|
+
|
|
205
|
+
type = event["type"]
|
|
206
|
+
acc.event_types[type] += 1
|
|
207
|
+
acc.total_events += 1
|
|
208
|
+
tally_custom_tag(acc, event) if type == CUSTOM
|
|
209
|
+
record_event_day(acc, event)
|
|
210
|
+
record_error_day(acc, event) if error_event?(event)
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Clamps per-event tallies to [since, until_time]: an in-range session can
|
|
215
|
+
# carry out-of-window events that must not inflate totals. Events without a
|
|
216
|
+
# numeric timestamp are kept (unplaceable).
|
|
217
|
+
def in_window?(acc, timestamp_ms)
|
|
218
|
+
return true unless timestamp_ms.is_a?(Numeric)
|
|
219
|
+
|
|
220
|
+
ts = timestamp_ms / 1000.0
|
|
221
|
+
ts >= acc.since && (acc.until_time.nil? || ts <= acc.until_time)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def tally_browser_device(acc, user_agent)
|
|
225
|
+
browser = UserAgent.browser(user_agent)
|
|
226
|
+
device = UserAgent.device(user_agent)
|
|
227
|
+
acc.browsers[browser] += 1 if browser
|
|
228
|
+
acc.devices[device] += 1 if device
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def tally_custom_tag(acc, event)
|
|
232
|
+
data = event["data"]
|
|
233
|
+
return unless data.is_a?(Hash)
|
|
234
|
+
tag = data["tag"]
|
|
235
|
+
return unless tag.is_a?(String)
|
|
236
|
+
|
|
237
|
+
tally_navigation(acc, data["payload"]) if tag == NAVIGATION_TAG
|
|
238
|
+
|
|
239
|
+
# Branch on #tally's return so browser_tags and per-day series share its
|
|
240
|
+
# gate (internal "__" annotations and the JS-error tag are excluded).
|
|
241
|
+
return unless acc.custom_tags.tally(tag)
|
|
242
|
+
|
|
243
|
+
acc.browser_tags[tag] += 1
|
|
244
|
+
record_tag_day(acc, tag, event["timestamp"])
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# New series bounded by MAX_TAG_SERIES_KEYS; existing tags count past it.
|
|
248
|
+
def record_tag_day(acc, tag, timestamp_ms)
|
|
249
|
+
date = day_string(timestamp_ms)
|
|
250
|
+
return unless date
|
|
251
|
+
|
|
252
|
+
series = acc.per_day_tags[tag]
|
|
253
|
+
series = acc.per_day_tags[tag] = Hash.new(0) if series.nil? && acc.per_day_tags.size < MAX_TAG_SERIES_KEYS
|
|
254
|
+
series[date] += 1 if series
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def tally_navigation(acc, payload)
|
|
258
|
+
return unless payload.is_a?(Hash)
|
|
259
|
+
|
|
260
|
+
bucket = payload["external"] ? acc.nav_external : acc.nav_internal
|
|
261
|
+
bounded_tally(bucket, payload["url"], MAX_NAV_KEYS)
|
|
262
|
+
bounded_tally(acc.nav_texts, payload["text"], MAX_NAV_KEYS)
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Cap a per-value tally, ignoring blank/non-string values.
|
|
266
|
+
def bounded_tally(counts, value, cap)
|
|
267
|
+
return false unless value.is_a?(String) && !value.empty?
|
|
268
|
+
|
|
269
|
+
bounded_increment(counts, value, cap)
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def tally(counts, value)
|
|
273
|
+
counts[value] += 1 if value.is_a?(String) && !value.empty?
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def record_duration(acc, summary)
|
|
277
|
+
first = summary[:first_event_at]
|
|
278
|
+
last = summary[:last_event_at]
|
|
279
|
+
return unless first && last
|
|
280
|
+
|
|
281
|
+
duration = (last - first).abs
|
|
282
|
+
acc.durations << duration
|
|
283
|
+
label = bucket_label(duration)
|
|
284
|
+
acc.duration_buckets[label] += 1
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def bucket_label(duration_ms)
|
|
288
|
+
label, _bound = DURATION_BUCKETS.find { |_label, bound| bound.nil? || duration_ms < bound }
|
|
289
|
+
label
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def record_session_day(acc, summary)
|
|
293
|
+
date = day_string(summary[:first_event_at] || summary[:created_at])
|
|
294
|
+
acc.per_day_sessions[date] += 1 if date
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def record_event_day(acc, event)
|
|
298
|
+
date = day_string(event["timestamp"])
|
|
299
|
+
acc.per_day_events[date] += 1 if date
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def error_event?(event)
|
|
303
|
+
return false unless event["type"] == CUSTOM
|
|
304
|
+
data = event["data"]
|
|
305
|
+
data.is_a?(Hash) && data["tag"] == "error"
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def record_error_day(acc, event)
|
|
309
|
+
date = day_string(event["timestamp"])
|
|
310
|
+
acc.per_day_errors[date] += 1 if date
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Per-day server-occurrence counts (occurrence timestamps are epoch
|
|
314
|
+
# seconds); returns whether either cap was hit. since filters problems by
|
|
315
|
+
# last_seen (safe: an in-window occurrence implies last_seen >= since).
|
|
316
|
+
# until_time is applied per occurrence, not to list_problems, since a
|
|
317
|
+
# still-active problem can own in-window occurrences.
|
|
318
|
+
def collect_server_overlay(acc)
|
|
319
|
+
since = acc.since
|
|
320
|
+
until_time = acc.until_time
|
|
321
|
+
problems = store.list_problems(project: nil, limit: MAX_OVERLAY_PROBLEMS, since: since)
|
|
322
|
+
truncated = problems.size >= MAX_OVERLAY_PROBLEMS
|
|
323
|
+
|
|
324
|
+
problems.each do |problem|
|
|
325
|
+
occurrences = store.get_occurrences(problem[:id], after: since, limit: MAX_OCCURRENCES_PER_PROBLEM)
|
|
326
|
+
truncated = true if occurrences.size >= MAX_OCCURRENCES_PER_PROBLEM
|
|
327
|
+
|
|
328
|
+
occurrences.each do |occurrence|
|
|
329
|
+
ts = occurrence["timestamp"].to_f
|
|
330
|
+
next if until_time && ts > until_time
|
|
331
|
+
acc.per_day_server_errors[Time.at(ts).utc.to_date.to_s] += 1
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
truncated
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def day_string(timestamp_ms)
|
|
339
|
+
return nil unless timestamp_ms
|
|
340
|
+
Time.at(timestamp_ms / 1000.0).utc.to_date.to_s
|
|
341
|
+
rescue TypeError, ArgumentError
|
|
342
|
+
nil
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "analyzer"
|
|
4
|
+
require_relative "collectors/vitals_collector"
|
|
5
|
+
|
|
6
|
+
module Sentiero
|
|
7
|
+
module Analytics
|
|
8
|
+
# Aggregates Web Vitals per page URL, scanning the store on read. The recorder
|
|
9
|
+
# emits one "__perf" custom event per metric report carrying {metric, value,
|
|
10
|
+
# rating}; ratings come from the client web-vitals library and are tallied
|
|
11
|
+
# as-is. Per-segment math (last-wins collapse, rating histogram, worst-sample)
|
|
12
|
+
# lives in VitalsCollector, shared with PageReportAnalyzer.
|
|
13
|
+
class WebVitalsAnalyzer < Analyzer
|
|
14
|
+
# Cap on distinct URLs tracked; sessions scan newest-first, so the cap keeps
|
|
15
|
+
# the most recently visited URLs.
|
|
16
|
+
MAX_URLS = 200
|
|
17
|
+
|
|
18
|
+
MAX_SAMPLES_PER_METRIC = 2000
|
|
19
|
+
|
|
20
|
+
# Samples are attributed per page segment so each report lands on the page it
|
|
21
|
+
# measured. Within a segment, repeated reports of the same metric collapse to
|
|
22
|
+
# the LAST one (one sample == one page view's final value) so re-emitted
|
|
23
|
+
# candidates and reloads cannot inflate counts or skew percentiles.
|
|
24
|
+
def analyze(limit: nil, since: nil, until_time: nil)
|
|
25
|
+
pages = {} # url => VitalsCollector
|
|
26
|
+
accumulation_capped = false
|
|
27
|
+
|
|
28
|
+
_scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |summary, window_id, events|
|
|
29
|
+
each_page_segment(events) do |url, segment, anchor|
|
|
30
|
+
next unless url
|
|
31
|
+
|
|
32
|
+
collector = collector_for(pages, url)
|
|
33
|
+
unless collector
|
|
34
|
+
accumulation_capped = true
|
|
35
|
+
next
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
collector.collect(segment, session_id: summary[:session_id], window_id: window_id, anchor: anchor)
|
|
39
|
+
accumulation_capped = true if collector.capped
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
{
|
|
44
|
+
pages: pages.transform_values(&:summarize),
|
|
45
|
+
was_truncated: accumulation_capped || hit_cap
|
|
46
|
+
}
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
# VitalsCollector for a URL, or nil when the URL-row cap is full.
|
|
52
|
+
def collector_for(pages, url)
|
|
53
|
+
bounded_fetch(pages, url, MAX_URLS) { VitalsCollector.new(max_samples: MAX_SAMPLES_PER_METRIC) }
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sentiero
|
|
4
|
+
class Configuration
|
|
5
|
+
attr_reader :store
|
|
6
|
+
|
|
7
|
+
attr_accessor :cors_origins,
|
|
8
|
+
:auth_callback,
|
|
9
|
+
:flush_interval_ms,
|
|
10
|
+
:flush_event_threshold,
|
|
11
|
+
:max_events_per_page,
|
|
12
|
+
:max_events_per_request,
|
|
13
|
+
:max_sessions,
|
|
14
|
+
:max_events_per_session,
|
|
15
|
+
:max_problems,
|
|
16
|
+
:max_server_events,
|
|
17
|
+
:ingest_keys,
|
|
18
|
+
:cross_tab_sessions,
|
|
19
|
+
:capture_metadata,
|
|
20
|
+
:capture_errors,
|
|
21
|
+
:track_navigation,
|
|
22
|
+
:track_custom_events,
|
|
23
|
+
:capture_clicks,
|
|
24
|
+
:track_forms,
|
|
25
|
+
# snake_case rrweb recorder options; converted to camelCase for the frontend.
|
|
26
|
+
:mask_all_inputs,
|
|
27
|
+
:mask_input_options,
|
|
28
|
+
:block_selector,
|
|
29
|
+
:mask_text_selector,
|
|
30
|
+
:ignore_selector,
|
|
31
|
+
:sampling,
|
|
32
|
+
:inline_stylesheet,
|
|
33
|
+
:checkout_every_n_ms,
|
|
34
|
+
# Raw camelCase hash passed to rrweb verbatim; first-class attributes above
|
|
35
|
+
# take precedence for overlapping keys.
|
|
36
|
+
:recorder_options,
|
|
37
|
+
:capture_web_vitals,
|
|
38
|
+
:analytics_max_scan_sessions,
|
|
39
|
+
:user_opt_out,
|
|
40
|
+
:opt_out_cookie_name,
|
|
41
|
+
:respect_gpc,
|
|
42
|
+
:retention_period,
|
|
43
|
+
:anonymize_ip,
|
|
44
|
+
:redaction,
|
|
45
|
+
:audit_log,
|
|
46
|
+
:shareable_replays,
|
|
47
|
+
:basic_auth,
|
|
48
|
+
# Escape hatch: serve the dashboard/analytics/monitoring UIs with NO auth
|
|
49
|
+
# (see Configuration#initialize). Off by default so the UI fails closed.
|
|
50
|
+
:allow_insecure_dashboard
|
|
51
|
+
|
|
52
|
+
# session_idle_timeout / session_max_age have validating writers below (a
|
|
53
|
+
# bad value here is serialized straight into client-side session-rotation
|
|
54
|
+
# logic), so they're declared separately from the plain attr_accessor list.
|
|
55
|
+
attr_reader :session_idle_timeout, :session_max_age
|
|
56
|
+
|
|
57
|
+
ENFORCED_PRIVACY = {
|
|
58
|
+
maskInputOptions: {password: true}
|
|
59
|
+
}.freeze
|
|
60
|
+
|
|
61
|
+
# Replay sessions follow a user journey, so the idle boundary is generous
|
|
62
|
+
# (a lunch break shouldn't split a journey); max age is the hard cap that
|
|
63
|
+
# keeps the identifier from living forever on never-idle tabs.
|
|
64
|
+
DEFAULT_SESSION_IDLE_TIMEOUT = 6 * 60 * 60
|
|
65
|
+
DEFAULT_SESSION_MAX_AGE = 7 * 24 * 60 * 60
|
|
66
|
+
|
|
67
|
+
# Composition root for store caps: a store assigned to the configuration is
|
|
68
|
+
# bound to the configuration's caps here, so the store itself never reads
|
|
69
|
+
# global state. Set caps before assigning the store; inject explicit
|
|
70
|
+
# Store::Limits on the store afterward to override.
|
|
71
|
+
def store=(store)
|
|
72
|
+
store.limits = Store::Limits.from_configuration(self) if store.respond_to?(:limits=)
|
|
73
|
+
@store = store
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Reach the Rails / Reporter config from the one core object, e.g.
|
|
77
|
+
# Sentiero.configure { |c| c.reporter.endpoint = "..." }. They remain separate
|
|
78
|
+
# instances so the reporter stays usable as a standalone client.
|
|
79
|
+
def reporter
|
|
80
|
+
require_subsystem!("Sentiero::Reporter", 'require "sentiero/reporter"')
|
|
81
|
+
Reporter.configuration
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def rails
|
|
85
|
+
require_subsystem!("Sentiero::Rails", "the sentiero-rails gem")
|
|
86
|
+
Rails.configuration
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private def require_subsystem!(const_name, hint)
|
|
90
|
+
return if Object.const_defined?(const_name)
|
|
91
|
+
|
|
92
|
+
raise Error, "#{const_name} is not loaded — #{hint} to configure it."
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# A non-positive or non-numeric value would either disable rotation
|
|
96
|
+
# (never expire) or break the client's Date.now() arithmetic once
|
|
97
|
+
# serialized into the config JSON, so it silently falls back instead of
|
|
98
|
+
# raising.
|
|
99
|
+
def session_idle_timeout=(value)
|
|
100
|
+
@session_idle_timeout = clamp_positive_seconds(value, DEFAULT_SESSION_IDLE_TIMEOUT)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def session_max_age=(value)
|
|
104
|
+
@session_max_age = clamp_positive_seconds(value, DEFAULT_SESSION_MAX_AGE)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
private def clamp_positive_seconds(value, default)
|
|
108
|
+
(value.is_a?(Numeric) && value.finite? && value > 0) ? value : default
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def initialize
|
|
112
|
+
@store = nil
|
|
113
|
+
@cors_origins = []
|
|
114
|
+
@auth_callback = nil
|
|
115
|
+
@flush_interval_ms = 10_000
|
|
116
|
+
@flush_event_threshold = 50
|
|
117
|
+
@max_events_per_page = 1_000
|
|
118
|
+
@max_problems = 5_000
|
|
119
|
+
@max_server_events = 50_000
|
|
120
|
+
@ingest_keys = {}
|
|
121
|
+
@cross_tab_sessions = true
|
|
122
|
+
@capture_metadata = false
|
|
123
|
+
@capture_errors = false
|
|
124
|
+
@track_navigation = false
|
|
125
|
+
@track_custom_events = false
|
|
126
|
+
@capture_clicks = false
|
|
127
|
+
@track_forms = false
|
|
128
|
+
|
|
129
|
+
@mask_all_inputs = true
|
|
130
|
+
@mask_input_options = {}
|
|
131
|
+
@block_selector = "[data-rr-block]"
|
|
132
|
+
@mask_text_selector = "[data-rr-mask]"
|
|
133
|
+
@ignore_selector = "[data-rr-ignore]"
|
|
134
|
+
@sampling = {scroll: 150, input: "last"}
|
|
135
|
+
@inline_stylesheet = nil
|
|
136
|
+
@checkout_every_n_ms = nil
|
|
137
|
+
@recorder_options = {}
|
|
138
|
+
|
|
139
|
+
@capture_web_vitals = false
|
|
140
|
+
@analytics_max_scan_sessions = 5000
|
|
141
|
+
@user_opt_out = false
|
|
142
|
+
@opt_out_cookie_name = "sentiero_optout"
|
|
143
|
+
@respect_gpc = true
|
|
144
|
+
@retention_period = nil
|
|
145
|
+
@session_idle_timeout = DEFAULT_SESSION_IDLE_TIMEOUT
|
|
146
|
+
@session_max_age = DEFAULT_SESSION_MAX_AGE
|
|
147
|
+
@redaction = Sentiero::Redaction::Config.new
|
|
148
|
+
@anonymize_ip = true
|
|
149
|
+
@audit_log = nil
|
|
150
|
+
# Opt-in: a share file is a full session dump leaving the operator's
|
|
151
|
+
# infrastructure, so export/import routes 404 until explicitly enabled.
|
|
152
|
+
@shareable_replays = false
|
|
153
|
+
@basic_auth = nil
|
|
154
|
+
# The dashboard exposes recordings/analytics, so with neither basic_auth nor
|
|
155
|
+
# auth_callback set it fails closed (403). Set true to opt into serving it
|
|
156
|
+
# unauthenticated (e.g. behind a trusted proxy or in local dev).
|
|
157
|
+
@allow_insecure_dashboard = false
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def effective_recorder_options
|
|
161
|
+
first_class = {
|
|
162
|
+
maskAllInputs: mask_all_inputs,
|
|
163
|
+
maskInputOptions: mask_input_options,
|
|
164
|
+
blockSelector: block_selector,
|
|
165
|
+
maskTextSelector: mask_text_selector,
|
|
166
|
+
ignoreSelector: ignore_selector,
|
|
167
|
+
sampling: sampling
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
first_class[:inlineStylesheet] = inline_stylesheet unless inline_stylesheet.nil?
|
|
171
|
+
first_class[:checkoutEveryNms] = checkout_every_n_ms unless checkout_every_n_ms.nil?
|
|
172
|
+
|
|
173
|
+
recorder_options
|
|
174
|
+
.merge(first_class)
|
|
175
|
+
.merge(ENFORCED_PRIVACY) { |_key, existing, enforced|
|
|
176
|
+
if enforced.is_a?(Hash)
|
|
177
|
+
(existing.is_a?(Hash) ? existing : {}).merge(enforced)
|
|
178
|
+
else
|
|
179
|
+
enforced
|
|
180
|
+
end
|
|
181
|
+
}
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sentiero
|
|
4
|
+
# Right-to-erasure helpers (GDPR Art. 17). Store-agnostic.
|
|
5
|
+
# Erasure is destructive and irreversible; deleted sessions cannot be recovered.
|
|
6
|
+
module Erasure
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def erase_sessions(store, ids)
|
|
10
|
+
ids.each do |id|
|
|
11
|
+
raise ArgumentError, "Invalid ID: #{id.inspect}" unless Store::VALID_ID.match?(id.to_s)
|
|
12
|
+
end
|
|
13
|
+
present = ids.select { |id| store.get_session(id) }
|
|
14
|
+
present.each { |id| store.delete_session(id) }
|
|
15
|
+
present.size
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# At least one bound is required to guard against erasing everything; the
|
|
19
|
+
# range is inclusive. Lists/deletes in capped batches (paging) until a scan
|
|
20
|
+
# is short, so one call erases every match regardless of count.
|
|
21
|
+
def erase_where(store, since: nil, until_time: nil)
|
|
22
|
+
raise ArgumentError, "provide since: and/or until_time:" if since.nil? && until_time.nil?
|
|
23
|
+
if since && until_time && since.to_f > until_time.to_f
|
|
24
|
+
raise ArgumentError, "since: must not be after until_time:"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
cap = store.limits.analytics_max_scan_sessions
|
|
28
|
+
erased = 0
|
|
29
|
+
|
|
30
|
+
loop do
|
|
31
|
+
ids = store.list_sessions(
|
|
32
|
+
limit: cap,
|
|
33
|
+
since: since,
|
|
34
|
+
until_time: until_time
|
|
35
|
+
).map { |summary| summary[:session_id] }
|
|
36
|
+
|
|
37
|
+
ids.each { |id| store.delete_session(id) }
|
|
38
|
+
erased += ids.size
|
|
39
|
+
|
|
40
|
+
# Each listed session matched and was deleted, so the set shrinks;
|
|
41
|
+
# a short batch means the matches are exhausted.
|
|
42
|
+
break if ids.size < cap
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
erased
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
module Sentiero
|
|
6
|
+
# Computes the grouping fingerprint for an exception occurrence. The
|
|
7
|
+
# normalization regexes are deliberately linear-time (simple character classes
|
|
8
|
+
# with a single quantifier, no nesting) so untrusted backtraces cannot trigger
|
|
9
|
+
# catastrophic backtracking (ReDoS).
|
|
10
|
+
module Fingerprint
|
|
11
|
+
# Only the top frames drive grouping (deeper frames vary by call site).
|
|
12
|
+
MAX_FRAMES = 5
|
|
13
|
+
MAX_FRAME_LENGTH = 1000
|
|
14
|
+
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def compute(exception_class:, backtrace:, project:)
|
|
18
|
+
frames = Array(backtrace).first(MAX_FRAMES).map { |frame| normalize_frame(frame.to_s) }
|
|
19
|
+
input = "#{project}\n#{exception_class}\n#{frames.join("\n")}"
|
|
20
|
+
Digest::SHA256.hexdigest(input)[0, 40]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Strips per-occurrence noise (memory addresses, line numbers). Digits inside
|
|
24
|
+
# identifiers (e.g. `step_1`, `V2::Api`) are preserved so distinct methods do
|
|
25
|
+
# not collapse into one group.
|
|
26
|
+
def normalize_frame(frame)
|
|
27
|
+
frame = frame[0, MAX_FRAME_LENGTH].strip
|
|
28
|
+
frame
|
|
29
|
+
.gsub(/0x[0-9a-fA-F]+/, "0xHEX") # memory addresses
|
|
30
|
+
.gsub(/:[0-9]+(?=:in )/, ":N") # `path:LINE:in 'method'`
|
|
31
|
+
.gsub(/:[0-9]+\z/, ":N") # `path:LINE` (top-level frame, no method)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ipaddr"
|
|
4
|
+
|
|
5
|
+
module Sentiero
|
|
6
|
+
# Truncates client IPs before they reach a store or log when
|
|
7
|
+
# +config.anonymize_ip+ is on. Standard practice for GDPR/CCPA "reasonable"
|
|
8
|
+
# anonymization: zero the last IPv4 octet (/24) and the last 80 IPv6 bits
|
|
9
|
+
# (/48). IPv4-mapped IPv6 collapses to its anonymized dotted-quad form.
|
|
10
|
+
#
|
|
11
|
+
# Anonymization is one-way and best-effort, not a re-identification guarantee.
|
|
12
|
+
module IpAnonymizer
|
|
13
|
+
module_function
|
|
14
|
+
|
|
15
|
+
def anonymize(ip)
|
|
16
|
+
return unless ip.is_a?(String)
|
|
17
|
+
|
|
18
|
+
stripped = ip.strip
|
|
19
|
+
return if stripped.empty?
|
|
20
|
+
return if stripped.include?("/") # a client IP is a bare host, not a CIDR
|
|
21
|
+
|
|
22
|
+
addr = IPAddr.new(stripped)
|
|
23
|
+
addr = addr.native if addr.ipv4_mapped?
|
|
24
|
+
addr.mask(addr.ipv4? ? 24 : 48).to_s
|
|
25
|
+
rescue IPAddr::Error
|
|
26
|
+
nil
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|