sentiero 1.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +7 -0
  3. data/README.md +679 -0
  4. data/lib/sentiero/analytics/analyzer.rb +91 -0
  5. data/lib/sentiero/analytics/bounded.rb +29 -0
  6. data/lib/sentiero/analytics/browser_event_discovery.rb +70 -0
  7. data/lib/sentiero/analytics/collectors/click_collector.rb +135 -0
  8. data/lib/sentiero/analytics/collectors/custom_tag_collector.rb +61 -0
  9. data/lib/sentiero/analytics/collectors/error_collector.rb +89 -0
  10. data/lib/sentiero/analytics/collectors/form_collector.rb +156 -0
  11. data/lib/sentiero/analytics/collectors/frustration_collector.rb +85 -0
  12. data/lib/sentiero/analytics/collectors/scroll_collector.rb +156 -0
  13. data/lib/sentiero/analytics/collectors/vitals_collector.rb +104 -0
  14. data/lib/sentiero/analytics/conversion_analyzer.rb +247 -0
  15. data/lib/sentiero/analytics/engagement_analyzer.rb +331 -0
  16. data/lib/sentiero/analytics/entry_attribution.rb +71 -0
  17. data/lib/sentiero/analytics/error_discovery.rb +118 -0
  18. data/lib/sentiero/analytics/events.rb +21 -0
  19. data/lib/sentiero/analytics/exporter.rb +242 -0
  20. data/lib/sentiero/analytics/form_analyzer.rb +153 -0
  21. data/lib/sentiero/analytics/frustration/detectors.rb +158 -0
  22. data/lib/sentiero/analytics/frustration_analyzer.rb +235 -0
  23. data/lib/sentiero/analytics/funnel_analyzer.rb +160 -0
  24. data/lib/sentiero/analytics/heatmap_analyzer.rb +93 -0
  25. data/lib/sentiero/analytics/page_report_analyzer.rb +198 -0
  26. data/lib/sentiero/analytics/problem_detail.rb +97 -0
  27. data/lib/sentiero/analytics/scroll_depth_analyzer.rb +30 -0
  28. data/lib/sentiero/analytics/segmenter.rb +133 -0
  29. data/lib/sentiero/analytics/server_event_metrics.rb +120 -0
  30. data/lib/sentiero/analytics/stats.rb +30 -0
  31. data/lib/sentiero/analytics/stats_aggregator/result_builder.rb +153 -0
  32. data/lib/sentiero/analytics/stats_aggregator.rb +346 -0
  33. data/lib/sentiero/analytics/web_vitals_analyzer.rb +57 -0
  34. data/lib/sentiero/configuration.rb +184 -0
  35. data/lib/sentiero/erasure.rb +48 -0
  36. data/lib/sentiero/fingerprint.rb +34 -0
  37. data/lib/sentiero/ip_anonymizer.rb +29 -0
  38. data/lib/sentiero/redaction/config.rb +61 -0
  39. data/lib/sentiero/redaction.rb +207 -0
  40. data/lib/sentiero/reporter/configuration.rb +50 -0
  41. data/lib/sentiero/reporter/context.rb +31 -0
  42. data/lib/sentiero/reporter/dispatcher.rb +91 -0
  43. data/lib/sentiero/reporter/http_transport.rb +57 -0
  44. data/lib/sentiero/reporter/log_transport.rb +26 -0
  45. data/lib/sentiero/reporter/middleware.rb +62 -0
  46. data/lib/sentiero/reporter/normalizer.rb +14 -0
  47. data/lib/sentiero/reporter/null_transport.rb +18 -0
  48. data/lib/sentiero/reporter/report_context.rb +29 -0
  49. data/lib/sentiero/reporter/scrubber.rb +47 -0
  50. data/lib/sentiero/reporter/test_helper.rb +32 -0
  51. data/lib/sentiero/reporter/test_transport.rb +28 -0
  52. data/lib/sentiero/reporter.rb +214 -0
  53. data/lib/sentiero/roda.rb +47 -0
  54. data/lib/sentiero/store/error_store.rb +220 -0
  55. data/lib/sentiero/store/limits.rb +31 -0
  56. data/lib/sentiero/store/session_store.rb +118 -0
  57. data/lib/sentiero/store.rb +72 -0
  58. data/lib/sentiero/stores/file.rb +566 -0
  59. data/lib/sentiero/stores/memory.rb +362 -0
  60. data/lib/sentiero/stores/redis/keys.rb +59 -0
  61. data/lib/sentiero/stores/redis/lua.rb +119 -0
  62. data/lib/sentiero/stores/redis.rb +665 -0
  63. data/lib/sentiero/stores/sqlite/schema.rb +79 -0
  64. data/lib/sentiero/stores/sqlite.rb +626 -0
  65. data/lib/sentiero/user_agent.rb +32 -0
  66. data/lib/sentiero/version.rb +5 -0
  67. data/lib/sentiero/web/analytics_app.rb +538 -0
  68. data/lib/sentiero/web/assets/analytics-RH24EOLD.js +1 -0
  69. data/lib/sentiero/web/assets/dashboard-JFYNHZZV.js +3 -0
  70. data/lib/sentiero/web/assets/heatmap-EBKFWSKN.js +1 -0
  71. data/lib/sentiero/web/assets/import-HIMBJJ4S.js +1 -0
  72. data/lib/sentiero/web/assets/manifest.json +11 -0
  73. data/lib/sentiero/web/assets/recorder-SLLXSUUX.js +71 -0
  74. data/lib/sentiero/web/assets/rrweb-player-cd435a95.js +126 -0
  75. data/lib/sentiero/web/assets/rrweb-player-css-ce5e9629.css +2 -0
  76. data/lib/sentiero/web/assets/sessions_index-2RAGTEZM.js +1 -0
  77. data/lib/sentiero/web/assets/style-d71e72fd.css +2 -0
  78. data/lib/sentiero/web/assets_app.rb +42 -0
  79. data/lib/sentiero/web/base_app.rb +319 -0
  80. data/lib/sentiero/web/basic_auth.rb +27 -0
  81. data/lib/sentiero/web/basic_auth_check.rb +41 -0
  82. data/lib/sentiero/web/body_reader.rb +44 -0
  83. data/lib/sentiero/web/csv_writer.rb +45 -0
  84. data/lib/sentiero/web/dashboard_app.rb +236 -0
  85. data/lib/sentiero/web/errors_app.rb +97 -0
  86. data/lib/sentiero/web/escaping.rb +37 -0
  87. data/lib/sentiero/web/events_app.rb +196 -0
  88. data/lib/sentiero/web/formatting.rb +43 -0
  89. data/lib/sentiero/web/ingest_app.rb +92 -0
  90. data/lib/sentiero/web/manifest.rb +43 -0
  91. data/lib/sentiero/web/monitoring_app.rb +316 -0
  92. data/lib/sentiero/web/script_tag.rb +57 -0
  93. data/lib/sentiero/web/shareable_replay.rb +88 -0
  94. data/lib/sentiero/web/templates/_analytics_nav.html.erb +22 -0
  95. data/lib/sentiero/web/templates/_brand.html.erb +18 -0
  96. data/lib/sentiero/web/templates/_date_range.html.erb +18 -0
  97. data/lib/sentiero/web/templates/_errors_client_filter.html.erb +25 -0
  98. data/lib/sentiero/web/templates/_errors_server_filter.html.erb +36 -0
  99. data/lib/sentiero/web/templates/_events_browser_filter.html.erb +18 -0
  100. data/lib/sentiero/web/templates/_events_server_filter.html.erb +39 -0
  101. data/lib/sentiero/web/templates/_pagination.html.erb +14 -0
  102. data/lib/sentiero/web/templates/_payload_metrics.html.erb +62 -0
  103. data/lib/sentiero/web/templates/_session_row.html.erb +42 -0
  104. data/lib/sentiero/web/templates/_sibling_tab_hint.html.erb +6 -0
  105. data/lib/sentiero/web/templates/_tabs.html.erb +10 -0
  106. data/lib/sentiero/web/templates/_truncation_warning.html.erb +19 -0
  107. data/lib/sentiero/web/templates/_window_tab.html.erb +5 -0
  108. data/lib/sentiero/web/templates/analytics_conversions.html.erb +94 -0
  109. data/lib/sentiero/web/templates/analytics_engagement.html.erb +101 -0
  110. data/lib/sentiero/web/templates/analytics_frustration.html.erb +135 -0
  111. data/lib/sentiero/web/templates/analytics_funnel.html.erb +103 -0
  112. data/lib/sentiero/web/templates/analytics_index.html.erb +380 -0
  113. data/lib/sentiero/web/templates/analytics_page.html.erb +287 -0
  114. data/lib/sentiero/web/templates/analytics_scroll.html.erb +94 -0
  115. data/lib/sentiero/web/templates/analytics_vitals.html.erb +91 -0
  116. data/lib/sentiero/web/templates/client_error_show.html.erb +73 -0
  117. data/lib/sentiero/web/templates/dashboard.html.erb +56 -0
  118. data/lib/sentiero/web/templates/errors_index.html.erb +149 -0
  119. data/lib/sentiero/web/templates/event_show.html.erb +52 -0
  120. data/lib/sentiero/web/templates/events_index.html.erb +177 -0
  121. data/lib/sentiero/web/templates/export_index.html.erb +69 -0
  122. data/lib/sentiero/web/templates/forms.html.erb +105 -0
  123. data/lib/sentiero/web/templates/heatmap.html.erb +76 -0
  124. data/lib/sentiero/web/templates/import.html.erb +39 -0
  125. data/lib/sentiero/web/templates/problem_show.html.erb +200 -0
  126. data/lib/sentiero/web/templates/segments.html.erb +114 -0
  127. data/lib/sentiero/web/templates/session_show.html.erb +195 -0
  128. data/lib/sentiero/web/templates/sessions_index.html.erb +97 -0
  129. data/lib/sentiero/web/track_app.rb +57 -0
  130. data/lib/sentiero/web/views/analytics_index_view.rb +86 -0
  131. data/lib/sentiero/web/views/analyzer_view.rb +27 -0
  132. data/lib/sentiero/web/views/base_view.rb +76 -0
  133. data/lib/sentiero/web/views/client_error_show_view.rb +29 -0
  134. data/lib/sentiero/web/views/conversions_view.rb +41 -0
  135. data/lib/sentiero/web/views/engagement_view.rb +67 -0
  136. data/lib/sentiero/web/views/errors_index_view.rb +37 -0
  137. data/lib/sentiero/web/views/event_show_view.rb +20 -0
  138. data/lib/sentiero/web/views/events_index_view.rb +56 -0
  139. data/lib/sentiero/web/views/export_view.rb +23 -0
  140. data/lib/sentiero/web/views/forms_view.rb +28 -0
  141. data/lib/sentiero/web/views/frustration_view.rb +15 -0
  142. data/lib/sentiero/web/views/funnel_view.rb +36 -0
  143. data/lib/sentiero/web/views/heatmap_view.rb +34 -0
  144. data/lib/sentiero/web/views/import_view.rb +13 -0
  145. data/lib/sentiero/web/views/page_report_view.rb +43 -0
  146. data/lib/sentiero/web/views/problem_show_view.rb +46 -0
  147. data/lib/sentiero/web/views/scroll_view.rb +23 -0
  148. data/lib/sentiero/web/views/segments_view.rb +28 -0
  149. data/lib/sentiero/web/views/session_show_view.rb +105 -0
  150. data/lib/sentiero/web/views/sessions_index_view.rb +28 -0
  151. data/lib/sentiero/web/views/vitals_view.rb +45 -0
  152. data/lib/sentiero/web/views.rb +24 -0
  153. data/lib/sentiero/window_ref.rb +6 -0
  154. data/lib/sentiero.rb +69 -0
  155. metadata +232 -0
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+
5
+ module Sentiero
6
+ module Analytics
7
+ # Aggregations for the problem detail page: facet distributions over the
8
+ # already-fetched occurrences/session summaries, and the occurrence trend.
9
+ # The trend's rolling counts query the store (count_occurrences); the facets
10
+ # and the sparkline buckets are computed purely from the passed-in rows.
11
+ class ProblemDetail
12
+ # Rows shown per facet group on the problem detail page.
13
+ FACET_LIMIT = 8
14
+
15
+ # Day buckets in the problem-detail occurrence sparkline.
16
+ TREND_DAYS = 30
17
+
18
+ def initialize(store)
19
+ @store = store
20
+ end
21
+
22
+ def facets(occurrences, session_summaries)
23
+ paths = Hash.new(0)
24
+ environments = Hash.new(0)
25
+ releases = {}
26
+ browsers = Hash.new(0)
27
+
28
+ occurrences.each do |occ|
29
+ ctx = occ["context"]
30
+ next unless ctx.is_a?(Hash)
31
+ tally_facet(paths, ctx.dig("request", "path"))
32
+ tally_facet(environments, ctx["environment"])
33
+ tally_release(releases, ctx["release"], occ["timestamp"])
34
+ end
35
+
36
+ session_summaries.each { |s| tally_facet(browsers, s[:browser]) }
37
+
38
+ {
39
+ paths: top_facet(paths),
40
+ environments: top_facet(environments),
41
+ releases: releases.sort_by { |_release, info| -info[:count] }.first(FACET_LIMIT),
42
+ browsers: top_facet(browsers),
43
+ sample_size: occurrences.size
44
+ }
45
+ end
46
+
47
+ # The 24h/7d/30d header counts are exact (count_occurrences after:); the
48
+ # sparkline buckets the already-fetched occurrences by UTC day, labeled
49
+ # with its sample size.
50
+ def trend(problem_id, occurrences)
51
+ now = Time.now.to_f
52
+ per_day = Hash.new(0)
53
+ occurrences.each do |occ|
54
+ ts = occ["timestamp"]&.to_f
55
+ next unless ts && ts > 0
56
+ per_day[Time.at(ts).utc.to_date.to_s] += 1
57
+ end
58
+
59
+ end_date = Time.now.utc.to_date
60
+ series = ((end_date - (TREND_DAYS - 1))..end_date).map do |date|
61
+ {date: date.to_s, count: per_day[date.to_s]}
62
+ end
63
+
64
+ {
65
+ series: series,
66
+ sample_size: occurrences.size,
67
+ last_24h: occurrence_count_after(problem_id, now - 86_400),
68
+ last_7d: occurrence_count_after(problem_id, now - 7 * 86_400),
69
+ last_30d: occurrence_count_after(problem_id, now - TREND_DAYS * 86_400)
70
+ }
71
+ end
72
+
73
+ private
74
+
75
+ def occurrence_count_after(problem_id, after)
76
+ @store.count_occurrences(problem_id, after: after)
77
+ end
78
+
79
+ def tally_facet(counts, value)
80
+ counts[value] += 1 if value.is_a?(String) && !value.empty?
81
+ end
82
+
83
+ def tally_release(releases, release, timestamp)
84
+ return unless release.is_a?(String) && !release.empty?
85
+
86
+ info = releases[release] ||= {count: 0, first_seen: nil}
87
+ info[:count] += 1
88
+ ts = timestamp&.to_f
89
+ info[:first_seen] = [info[:first_seen], ts].compact.min if ts && ts > 0
90
+ end
91
+
92
+ def top_facet(counts)
93
+ counts.sort_by { |_value, count| -count }.first(FACET_LIMIT)
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "collectors/scroll_collector"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Aggregates per-URL scroll depth across sessions (avg depth, fold lines,
9
+ # distribution). Depth math lives in ScrollCollector; this drives it per URL.
10
+ class ScrollDepthAnalyzer < Analyzer
11
+ MAX_URLS = 200
12
+
13
+ def analyze(limit: nil, since: nil, until_time: nil)
14
+ scroll = ScrollCollector.new(max_urls: MAX_URLS)
15
+
16
+ _scanned, hit_cap = scan_sessions(limit: limit, since: since, until_time: until_time) do |_summary, _window_id, events|
17
+ each_page_segment(events) do |url, segment, _anchor|
18
+ scroll.observe(url, segment) if url
19
+ end
20
+ scroll.flush_window
21
+ end
22
+
23
+ {
24
+ pages: scroll.pages,
25
+ was_truncated: scroll.capped || hit_cap
26
+ }
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer"
4
+ require_relative "../user_agent"
5
+
6
+ module Sentiero
7
+ module Analytics
8
+ # Filters sessions on read by browser/device/URL/metadata/has-errors/duration
9
+ # (AND-combined), scanning up to the store's limits.analytics_max_scan_sessions.
10
+ class Segmenter < Analyzer
11
+ def initialize(
12
+ store = Sentiero.store,
13
+ browser: nil,
14
+ device: nil,
15
+ url_pattern: nil,
16
+ metadata_key: nil,
17
+ metadata_value: nil,
18
+ metadata_match: "exact",
19
+ has_errors: false,
20
+ min_duration_ms: nil,
21
+ max_duration_ms: nil,
22
+ since: nil,
23
+ until_time: nil
24
+ )
25
+ super(store)
26
+ @browser = presence(browser)
27
+ @device = presence(device)
28
+ @url_pattern = presence(url_pattern)
29
+ @metadata_key = presence(metadata_key)
30
+ @metadata_value = presence(metadata_value)
31
+ @metadata_match = (metadata_match == "contains") ? "contains" : "exact"
32
+ @has_errors = has_errors
33
+ @min_duration_ms = min_duration_ms
34
+ @max_duration_ms = max_duration_ms
35
+ @since = since
36
+ @until_time = until_time
37
+ end
38
+
39
+ def matching(limit: 20, offset: 0)
40
+ scan_cap = store.limits.analytics_max_scan_sessions
41
+
42
+ scanned = store.list_sessions(limit: scan_cap, offset: 0, since: @since, until_time: @until_time)
43
+ matches = scanned.select { |summary| match?(summary) }
44
+
45
+ page = matches.slice(offset, limit + 1) || []
46
+ has_next = page.size > limit
47
+
48
+ {
49
+ sessions: page.first(limit),
50
+ has_next: has_next,
51
+ was_truncated: scanned.size >= scan_cap
52
+ }
53
+ end
54
+
55
+ private
56
+
57
+ def presence(value)
58
+ return nil unless value.is_a?(String)
59
+ stripped = value.strip
60
+ stripped.empty? ? nil : stripped
61
+ end
62
+
63
+ def match?(summary)
64
+ metadata = summary[:metadata] || {}
65
+
66
+ browser_match?(metadata) &&
67
+ device_match?(metadata) &&
68
+ url_match?(metadata) &&
69
+ metadata_match?(metadata) &&
70
+ has_errors_match?(metadata) &&
71
+ duration_match?(summary)
72
+ end
73
+
74
+ def browser_match?(metadata)
75
+ return true unless @browser
76
+
77
+ UserAgent.browser(metadata["userAgent"]) == @browser
78
+ end
79
+
80
+ def device_match?(metadata)
81
+ return true unless @device
82
+
83
+ UserAgent.device(metadata["userAgent"]) == @device
84
+ end
85
+
86
+ def url_match?(metadata)
87
+ return true unless @url_pattern
88
+
89
+ url = metadata["url"]
90
+ return false unless url.is_a?(String)
91
+
92
+ glob?(@url_pattern) ? glob_match?(url, @url_pattern) : url.downcase.include?(@url_pattern.downcase)
93
+ end
94
+
95
+ def glob?(pattern)
96
+ pattern.include?("*") || pattern.include?("?")
97
+ end
98
+
99
+ def glob_match?(url, pattern)
100
+ File.fnmatch(pattern, url, File::FNM_CASEFOLD)
101
+ end
102
+
103
+ def metadata_match?(metadata)
104
+ return true unless @metadata_key
105
+ return false unless metadata.key?(@metadata_key)
106
+ return true unless @metadata_value
107
+
108
+ value = metadata[@metadata_key].to_s
109
+ if @metadata_match == "contains"
110
+ value.downcase.include?(@metadata_value.downcase)
111
+ else
112
+ value == @metadata_value
113
+ end
114
+ end
115
+
116
+ def has_errors_match?(metadata)
117
+ return true unless @has_errors
118
+
119
+ metadata["has_errors"] == true
120
+ end
121
+
122
+ def duration_match?(summary)
123
+ return true unless @min_duration_ms || @max_duration_ms
124
+
125
+ duration = duration_ms(summary)
126
+ return false if duration.nil?
127
+ return false if @min_duration_ms && duration < @min_duration_ms
128
+ return false if @max_duration_ms && duration > @max_duration_ms
129
+ true
130
+ end
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+
5
+ module Sentiero
6
+ module Analytics
7
+ # Per-day aggregations over an already-fetched list of custom events for the
8
+ # events-index strips (level mix, numeric payload metrics). Operates purely on
9
+ # the passed-in rows — it never re-reads the store, so the dashboard fetches
10
+ # once and aggregates the full pre-pagination list here.
11
+ class ServerEventMetrics
12
+ # Most-recent day rows rendered in the events-index level-mix strip.
13
+ LEVEL_MIX_MAX_DAYS = 30
14
+ SERVER_EVENT_LEVELS = %w[debug info warn error].freeze
15
+
16
+ # Cap on distinct payload keys offered in the metric_key dropdown.
17
+ MAX_METRIC_KEYS = 50
18
+
19
+ # Adapts BrowserEventDiscovery rows (symbol-keyed, rrweb epoch-MILLISECOND
20
+ # timestamps) to the string-keyed, epoch-seconds shape these helpers expect,
21
+ # so the browser tab can reuse them.
22
+ def self.adapt_browser_rows(rows)
23
+ rows.map do |row|
24
+ {
25
+ "name" => row[:name],
26
+ "payload" => row[:payload],
27
+ "timestamp" => row[:timestamp] && (row[:timestamp].to_f / 1000.0)
28
+ }
29
+ end
30
+ end
31
+
32
+ def initialize(events)
33
+ @events = events
34
+ end
35
+
36
+ # Per-UTC-day level tallies. Returns [[date, {level => count}], ...]
37
+ # ascending, capped to the most recent LEVEL_MIX_MAX_DAYS days with data.
38
+ def level_mix_by_day
39
+ days = Hash.new { |hash, key| hash[key] = Hash.new(0) }
40
+ @events.each do |event|
41
+ ts = event["timestamp"]&.to_f
42
+ next unless ts && ts > 0
43
+ level = event["level"]
44
+ level = "info" unless SERVER_EVENT_LEVELS.include?(level)
45
+ days[Time.at(ts).utc.to_date.to_s][level] += 1
46
+ end
47
+ days.sort_by { |date, _counts| date }.last(LEVEL_MIX_MAX_DAYS)
48
+ end
49
+
50
+ # Payload metrics are offered only when the rows share a single event name,
51
+ # computed over those rows. `requested_key` is the user-selected metric key,
52
+ # honored only if it names a numeric payload key.
53
+ def payload_metric_locals(requested_key)
54
+ single_name = single_event_name
55
+ metric_keys = single_name ? numeric_payload_keys : []
56
+ metric_key = metric_keys.include?(requested_key) ? requested_key : nil
57
+
58
+ {
59
+ single_name: single_name,
60
+ metric_keys: metric_keys,
61
+ metric_key: metric_key,
62
+ metric_days: metric_key ? payload_metrics_by_day(metric_key) : []
63
+ }
64
+ end
65
+
66
+ private
67
+
68
+ # The shared event name when every row carries the same one — the
69
+ # precondition for offering payload metrics.
70
+ def single_event_name
71
+ return nil if @events.empty?
72
+ name = @events.first["name"]
73
+ return nil unless name.is_a?(String) && !name.empty?
74
+ (@events.all? { |event| event["name"] == name }) ? name : nil
75
+ end
76
+
77
+ # Payload keys observed with at least one Numeric value across the rows,
78
+ # sorted; distinct keys capped at MAX_METRIC_KEYS.
79
+ def numeric_payload_keys
80
+ keys = {}
81
+ @events.each do |event|
82
+ payload = event["payload"]
83
+ next unless payload.is_a?(Hash)
84
+ payload.each do |key, value|
85
+ next unless value.is_a?(Numeric)
86
+ next if !keys.key?(key) && keys.size >= MAX_METRIC_KEYS
87
+ keys[key] = true
88
+ end
89
+ end
90
+ keys.keys.sort
91
+ end
92
+
93
+ # Per-UTC-day count/sum/min/max of one payload key (mirrors level_mix_by_day).
94
+ # Non-numeric values are skipped and tallied separately. Returns
95
+ # [[date, {count:, sum:, min:, max:, non_numeric:}], ...] ascending.
96
+ def payload_metrics_by_day(key)
97
+ days = Hash.new { |hash, date| hash[date] = {count: 0, sum: 0.0, min: nil, max: nil, non_numeric: 0} }
98
+ @events.each do |event|
99
+ ts = event["timestamp"]&.to_f
100
+ next unless ts && ts > 0
101
+ payload = event["payload"]
102
+ next unless payload.is_a?(Hash) && payload.key?(key)
103
+
104
+ day = days[Time.at(ts).utc.to_date.to_s]
105
+ value = payload[key]
106
+ unless value.is_a?(Numeric)
107
+ day[:non_numeric] += 1
108
+ next
109
+ end
110
+
111
+ day[:count] += 1
112
+ day[:sum] += value
113
+ day[:min] = day[:min] ? [day[:min], value].min : value
114
+ day[:max] = day[:max] ? [day[:max], value].max : value
115
+ end
116
+ days.sort_by { |date, _metrics| date }.last(LEVEL_MIX_MAX_DAYS)
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentiero
4
+ module Analytics
5
+ module Stats
6
+ # Nearest-rank percentile; `sorted` must be pre-sorted and non-empty.
7
+ def percentile(sorted, pct)
8
+ rank = (pct / 100.0 * sorted.size).ceil
9
+ sorted[rank.clamp(1, sorted.size) - 1]
10
+ end
11
+
12
+ def mean(values)
13
+ values.sum.to_f / values.size
14
+ end
15
+
16
+ # Milliseconds from `anchor` to `timestamp`, floored at 0; 0 if either is nil.
17
+ def offset_ms(anchor, timestamp)
18
+ return 0 unless anchor && timestamp
19
+
20
+ [timestamp - anchor, 0].max.round
21
+ end
22
+
23
+ # Top `limit` [key, count] pairs, highest count first, ties broken by key
24
+ # so the ordering is deterministic.
25
+ def top_counts(counts, limit:)
26
+ counts.sort_by { |key, count| [-count, key] }.first(limit)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sentiero
4
+ module Analytics
5
+ class StatsAggregator
6
+ # Pure presentation layer: turns a finished Accumulator into the
7
+ # aggregate's public result hash. Needs the store only for the two
8
+ # problem lookups (open_problems count and the top_problems list).
9
+ class ResultBuilder
10
+ include Events
11
+ include Stats
12
+
13
+ def initialize(store)
14
+ @store = store
15
+ end
16
+
17
+ def build(acc, sessions_scanned, scan_cap, server_overlay_truncated)
18
+ open_problems = store.list_problems(project: nil, limit: 10_000, status: "open")
19
+ groups, series_bucket = bucket_groups(acc)
20
+ series = groups.map { |label, dates| series_entry(acc, label, dates) }
21
+ custom_event_tags = top_tags(acc.custom_tags.tags)
22
+
23
+ {
24
+ total_sessions: sessions_scanned,
25
+ total_events: acc.total_events,
26
+ sessions_scanned: sessions_scanned,
27
+ avg_duration_ms: average(acc.durations),
28
+ open_problems: open_problems.size,
29
+ top_problems: top_problems(open_problems, acc.since),
30
+ sessions_with_errors: acc.sessions_with_errors,
31
+ event_type_breakdown: event_type_breakdown(acc.event_types),
32
+ browser_distribution: sort_by_count(acc.browsers),
33
+ device_distribution: sort_by_count(acc.devices),
34
+ top_entry_pages: top_entry_pages(acc),
35
+ top_referrers: top_list(acc.referrers, :referrer, StatsAggregator::TOP_LIST_LIMIT),
36
+ session_duration_buckets: acc.duration_buckets,
37
+ custom_event_tags: custom_event_tags,
38
+ custom_event_tag_series: tag_series(acc, custom_event_tags, groups),
39
+ browser_event_tags: acc.browser_tags,
40
+ navigation: {
41
+ internal: top_list(acc.nav_internal, :url, StatsAggregator::TOP_LIST_LIMIT),
42
+ external: top_list(acc.nav_external, :url, StatsAggregator::TOP_LIST_LIMIT),
43
+ top_texts: top_list(acc.nav_texts, :text, StatsAggregator::TOP_LIST_LIMIT)
44
+ },
45
+ metadata_distributions: metadata_distributions(acc),
46
+ events_per_day_series: series,
47
+ series_bucket: series_bucket,
48
+ # True when a cap was hit: server_error_count values are then a lower bound.
49
+ server_overlay_truncated: server_overlay_truncated,
50
+ # Effective bounds (unbounded "until" resolves to now) for period-over-period.
51
+ window_since: acc.since,
52
+ window_until: acc.until_time || Time.now.to_f,
53
+ was_truncated: sessions_scanned >= scan_cap
54
+ }
55
+ end
56
+
57
+ private
58
+
59
+ attr_reader :store
60
+
61
+ def top_problems(open_problems, since)
62
+ open_problems
63
+ .sort_by { |problem| -problem[:count].to_i }
64
+ .first(StatsAggregator::TOP_PROBLEMS_LIMIT)
65
+ .map do |problem|
66
+ {
67
+ id: problem[:id],
68
+ exception_class: problem[:exception_class],
69
+ message: problem[:message],
70
+ count: problem[:count],
71
+ first_seen: problem[:first_seen],
72
+ new: !!(problem[:first_seen] && problem[:first_seen] >= since)
73
+ }
74
+ end
75
+ end
76
+
77
+ def event_type_breakdown(types)
78
+ {
79
+ incremental: types[INCREMENTAL],
80
+ meta: types[META],
81
+ custom: types[CUSTOM]
82
+ }
83
+ end
84
+
85
+ def average(values)
86
+ return nil if values.empty?
87
+ values.sum / values.size.to_f
88
+ end
89
+
90
+ def sort_by_count(counts)
91
+ top_counts(counts, limit: counts.size).to_h
92
+ end
93
+
94
+ def top_list(counts, key, limit)
95
+ top_counts(counts, limit: limit).map { |value, count| {key => value, :count => count} }
96
+ end
97
+
98
+ def top_tags(counts)
99
+ top_counts(counts, limit: StatsAggregator::TOP_TAGS_LIMIT).to_h
100
+ end
101
+
102
+ def metadata_distributions(acc)
103
+ acc.metadata_keys
104
+ .sort_by { |_key, count| -count }
105
+ .first(StatsAggregator::TOP_LIST_LIMIT)
106
+ .map do |key, count|
107
+ {key: key, count: count, values: top_list(acc.metadata_values[key] || {}, :value, 5)}
108
+ end
109
+ end
110
+
111
+ def top_entry_pages(acc)
112
+ top_list(acc.entry_pages, :url, StatsAggregator::TOP_LIST_LIMIT).map do |row|
113
+ row.merge(error_count: acc.entry_page_errors[row[:url]])
114
+ end
115
+ end
116
+
117
+ # One bucket per UTC day, or per ISO week past WEEK_BUCKET_THRESHOLD_DAYS.
118
+ # Main and per-tag series share these groups so they align.
119
+ def bucket_groups(acc)
120
+ start_date = Time.at(acc.since).utc.to_date
121
+ end_date = (acc.until_time ? Time.at(acc.until_time) : Time.now).utc.to_date
122
+ end_date = start_date if end_date < start_date
123
+ days = (start_date..end_date).to_a
124
+
125
+ if days.size > StatsAggregator::WEEK_BUCKET_THRESHOLD_DAYS
126
+ [days.group_by { |date| date.strftime("%G-W%V") }.to_a, "week"]
127
+ else
128
+ [days.map { |date| [date.to_s, [date]] }, "day"]
129
+ end
130
+ end
131
+
132
+ def tag_series(acc, custom_event_tags, groups)
133
+ custom_event_tags.keys.filter_map do |tag|
134
+ per_day = acc.per_day_tags[tag]
135
+ next unless per_day
136
+
137
+ [tag, groups.map { |label, dates| {date: label, count: dates.sum { |date| per_day[date.to_s] }} }]
138
+ end.to_h
139
+ end
140
+
141
+ def series_entry(acc, label, dates)
142
+ {
143
+ date: label,
144
+ event_count: dates.sum { |date| acc.per_day_events[date.to_s] },
145
+ session_count: dates.sum { |date| acc.per_day_sessions[date.to_s] },
146
+ error_count: dates.sum { |date| acc.per_day_errors[date.to_s] },
147
+ server_error_count: dates.sum { |date| acc.per_day_server_errors[date.to_s] }
148
+ }
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end