behavior_analytics 0.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/behavior_analytics.gemspec +3 -1
  3. data/db/migrate/002_enhance_behavior_events_v2.rb +46 -0
  4. data/lib/behavior_analytics/analytics/cohorts.rb +242 -0
  5. data/lib/behavior_analytics/analytics/engine.rb +15 -0
  6. data/lib/behavior_analytics/analytics/funnels.rb +176 -0
  7. data/lib/behavior_analytics/analytics/retention.rb +186 -0
  8. data/lib/behavior_analytics/debug/inspector.rb +82 -0
  9. data/lib/behavior_analytics/export/csv_exporter.rb +102 -0
  10. data/lib/behavior_analytics/export/json_exporter.rb +55 -0
  11. data/lib/behavior_analytics/hooks/callback.rb +50 -0
  12. data/lib/behavior_analytics/hooks/manager.rb +106 -0
  13. data/lib/behavior_analytics/hooks/webhook.rb +114 -0
  14. data/lib/behavior_analytics/integrations/rails/middleware.rb +99 -0
  15. data/lib/behavior_analytics/integrations/rails.rb +106 -0
  16. data/lib/behavior_analytics/jobs/active_event_job.rb +37 -0
  17. data/lib/behavior_analytics/jobs/delayed_event_job.rb +29 -0
  18. data/lib/behavior_analytics/jobs/sidekiq_event_job.rb +37 -0
  19. data/lib/behavior_analytics/observability/metrics.rb +112 -0
  20. data/lib/behavior_analytics/observability/tracer.rb +85 -0
  21. data/lib/behavior_analytics/processors/async_processor.rb +24 -0
  22. data/lib/behavior_analytics/processors/background_job_processor.rb +72 -0
  23. data/lib/behavior_analytics/query.rb +87 -2
  24. data/lib/behavior_analytics/replay/engine.rb +108 -0
  25. data/lib/behavior_analytics/replay/processor.rb +107 -0
  26. data/lib/behavior_analytics/reporting/generator.rb +125 -0
  27. data/lib/behavior_analytics/sampling/strategy.rb +54 -0
  28. data/lib/behavior_analytics/schema/definition.rb +71 -0
  29. data/lib/behavior_analytics/schema/validator.rb +113 -0
  30. data/lib/behavior_analytics/storage/active_record_adapter.rb +168 -8
  31. data/lib/behavior_analytics/storage/elasticsearch_adapter.rb +175 -0
  32. data/lib/behavior_analytics/storage/in_memory_adapter.rb +214 -2
  33. data/lib/behavior_analytics/storage/kafka_adapter.rb +112 -0
  34. data/lib/behavior_analytics/storage/redis_adapter.rb +175 -0
  35. data/lib/behavior_analytics/streaming/event_stream.rb +77 -0
  36. data/lib/behavior_analytics/throttling/limiter.rb +97 -0
  37. data/lib/behavior_analytics/tracker.rb +130 -4
  38. data/lib/behavior_analytics/version.rb +1 -1
  39. data/lib/behavior_analytics.rb +138 -2
  40. metadata +33 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3e4f126ff83e60165d7b93e152c0d3ebd20f354c0d25aad69aaa88503359ea8a
4
- data.tar.gz: 1222c310f8447a36c79566f5d5aeaea504c057eecdda81f4d2b569fa62c1bc99
3
+ metadata.gz: 8ab1248da55b16fed6161e2d716579fdca58301bebd9336f0a15c3ffa3b036a7
4
+ data.tar.gz: 60dfde4fc53044dcf04a6fda27ab6d9caf41bbe3b421e892051e17ada80fbf8c
5
5
  SHA512:
6
- metadata.gz: 3fe2f936597d8c9b5016e527aa5b3ed86fb3b82f2b7cf5ba41df66c56a5d557a05758069bbaf5051e4960e2f01ad9e41fb4c3251c1f7c7d1808691c90a3272ec
7
- data.tar.gz: 3e1c42b6e12f012333a44085395f070f7f66ea40851ed73d4f994dcda2acd7013cf0db22ad908461665cc9dd04977c1412e6d6bcdf7f8c334cc4889a22b848a6
6
+ metadata.gz: f967ac25119baf6458fd318c8685de334686f0c0577c4ef5956aeef9bec5791e1bcaf9ea6697fdf69aa3040468aa02a3d6afad176dae33ffa0eed958d3d571c1
7
+ data.tar.gz: 2b230bb9364e2afcf1f8465b37a287d10a6f48a6ecc5251a152f607eafa1f045eebe5a46f3fdfa4bd3874a4b27039d7d925f5aa42399b4e8535d5a77473e3757
@@ -13,6 +13,7 @@ Gem::Specification.new do |spec|
13
13
  "computing analytics (engagement scores, time-based trends, feature usage), " \
14
14
  "and supporting API calls, feature usage, and custom events."
15
15
  spec.homepage = "https://github.com/nerdawey/behavior_analytics"
16
+ spec.license = "MIT"
16
17
  spec.required_ruby_version = ">= 3.0.0"
17
18
 
18
19
  spec.metadata["homepage_uri"] = spec.homepage
@@ -24,7 +25,8 @@ Gem::Specification.new do |spec|
24
25
  spec.files = Dir.chdir(__dir__) do
25
26
  `git ls-files -z`.split("\x0").reject do |f|
26
27
  (File.expand_path(f) == __FILE__) ||
27
- f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile])
28
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile]) ||
29
+ f.end_with?('.gem')
28
30
  end
29
31
  end
30
32
  spec.bindir = "exe"
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ class EnhanceBehaviorEventsV2 < ActiveRecord::Migration[7.0]
4
+ def change
5
+ # Add new columns for faster queries
6
+ add_column :behavior_events, :path, :string unless column_exists?(:behavior_events, :path)
7
+ add_column :behavior_events, :method, :string unless column_exists?(:behavior_events, :method)
8
+ add_column :behavior_events, :status_code, :integer unless column_exists?(:behavior_events, :status_code)
9
+ add_column :behavior_events, :correlation_id, :string unless column_exists?(:behavior_events, :correlation_id)
10
+ add_column :behavior_events, :parent_event_id, :string unless column_exists?(:behavior_events, :parent_event_id)
11
+ add_column :behavior_events, :tags, :string, array: true, default: [] unless column_exists?(:behavior_events, :tags)
12
+
13
+ # Add indexes for new columns
14
+ add_index :behavior_events, :path unless index_exists?(:behavior_events, :path)
15
+ add_index :behavior_events, :method unless index_exists?(:behavior_events, :method)
16
+ add_index :behavior_events, :status_code unless index_exists?(:behavior_events, :status_code)
17
+ add_index :behavior_events, :correlation_id unless index_exists?(:behavior_events, :correlation_id)
18
+ add_index :behavior_events, :parent_event_id unless index_exists?(:behavior_events, :parent_event_id)
19
+ add_index :behavior_events, :tags, using: :gin unless index_exists?(:behavior_events, :tags)
20
+
21
+ # Add composite indexes for common query patterns
22
+ add_index :behavior_events, [:tenant_id, :path, :created_at],
23
+ name: "index_behavior_events_on_tenant_path_created" unless
24
+ index_exists?(:behavior_events, [:tenant_id, :path, :created_at],
25
+ name: "index_behavior_events_on_tenant_path_created")
26
+
27
+ add_index :behavior_events, [:tenant_id, :user_type, :created_at],
28
+ name: "index_behavior_events_on_tenant_user_type_created" unless
29
+ index_exists?(:behavior_events, [:tenant_id, :user_type, :created_at],
30
+ name: "index_behavior_events_on_tenant_user_type_created")
31
+
32
+ add_index :behavior_events, [:tenant_id, :event_type, :created_at],
33
+ name: "index_behavior_events_on_tenant_event_type_created" unless
34
+ index_exists?(:behavior_events, [:tenant_id, :event_type, :created_at],
35
+ name: "index_behavior_events_on_tenant_event_type_created")
36
+
37
+ # Add GIN index on metadata JSONB for faster queries
38
+ if column_exists?(:behavior_events, :metadata)
39
+ execute <<-SQL
40
+ CREATE INDEX IF NOT EXISTS index_behavior_events_on_metadata_gin
41
+ ON behavior_events USING gin (metadata);
42
+ SQL
43
+ end
44
+ end
45
+ end
46
+
@@ -0,0 +1,242 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module BehaviorAnalytics
6
+ module Analytics
7
+ class Cohorts
8
+ def initialize(storage_adapter)
9
+ @storage_adapter = storage_adapter
10
+ end
11
+
12
+ def create_cohort(context, cohort_definition, options = {})
13
+ context.validate!
14
+
15
+ date_range = options[:date_range] || (options[:since]..options[:until])
16
+ since = date_range.begin || options[:since]
17
+ until_date = date_range.end || options[:until]
18
+
19
+ all_events = @storage_adapter.events_for_context(
20
+ context,
21
+ since: since,
22
+ until: until_date
23
+ )
24
+
25
+ # Group users by cohort definition
26
+ cohort_key = cohort_definition[:key] || :created_at
27
+ cohort_period = cohort_definition[:period] || :month
28
+
29
+ cohorts = {}
30
+
31
+ all_events.each do |event|
32
+ cohort_date = extract_cohort_date(event, cohort_key, cohort_period)
33
+ cohort_id = cohort_date.strftime(cohort_format(cohort_period))
34
+
35
+ cohorts[cohort_id] ||= {
36
+ cohort_id: cohort_id,
37
+ cohort_date: cohort_date,
38
+ users: Set.new,
39
+ events: []
40
+ }
41
+
42
+ cohorts[cohort_id][:users] << event[:user_id] if event[:user_id]
43
+ cohorts[cohort_id][:events] << event
44
+ end
45
+
46
+ cohorts.values.map do |cohort|
47
+ {
48
+ cohort_id: cohort[:cohort_id],
49
+ cohort_date: cohort[:cohort_date],
50
+ user_count: cohort[:users].size,
51
+ event_count: cohort[:events].size
52
+ }
53
+ end
54
+ end
55
+
56
+ def retention_analysis(context, cohorts, options = {})
57
+ context.validate!
58
+
59
+ period = options[:period] || :day
60
+ periods_to_analyze = options[:periods] || 30
61
+
62
+ retention_data = {}
63
+
64
+ cohorts.each do |cohort|
65
+ cohort_id = cohort[:cohort_id] || cohort[:cohort_date]
66
+ cohort_date = cohort[:cohort_date] || parse_cohort_date(cohort_id)
67
+
68
+ # Get users in this cohort
69
+ cohort_users = get_cohort_users(context, cohort_date, period)
70
+
71
+ # Calculate retention for each period
72
+ retention_curve = []
73
+
74
+ (0..periods_to_analyze).each do |period_offset|
75
+ period_date = cohort_date + period_offset.send(period)
76
+
77
+ active_users = get_active_users(context, cohort_users, period_date, period)
78
+ retention_rate = cohort_users.empty? ? 0.0 : (active_users.size.to_f / cohort_users.size) * 100
79
+
80
+ retention_curve << {
81
+ period: period_offset,
82
+ date: period_date,
83
+ active_users: active_users.size,
84
+ retention_rate: retention_rate.round(2)
85
+ }
86
+ end
87
+
88
+ retention_data[cohort_id] = {
89
+ cohort_id: cohort_id,
90
+ cohort_date: cohort_date,
91
+ cohort_size: cohort_users.size,
92
+ retention_curve: retention_curve
93
+ }
94
+ end
95
+
96
+ retention_data
97
+ end
98
+
99
+ def compare_cohorts(context, cohort_ids, options = {})
100
+ context.validate!
101
+
102
+ cohorts_data = cohort_ids.map do |cohort_id|
103
+ retention_analysis(context, [{ cohort_id: cohort_id }], options)
104
+ end
105
+
106
+ # Compare retention rates across cohorts
107
+ comparison = {}
108
+
109
+ max_periods = cohorts_data.map { |c| c.values.first[:retention_curve].size }.max || 0
110
+
111
+ (0...max_periods).each do |period|
112
+ period_comparison = {
113
+ period: period,
114
+ cohorts: {}
115
+ }
116
+
117
+ cohorts_data.each do |cohort_data|
118
+ cohort_id = cohort_data.keys.first
119
+ retention_curve = cohort_data[cohort_id][:retention_curve]
120
+
121
+ if retention_curve[period]
122
+ period_comparison[:cohorts][cohort_id] = {
123
+ retention_rate: retention_curve[period][:retention_rate],
124
+ active_users: retention_curve[period][:active_users]
125
+ }
126
+ end
127
+ end
128
+
129
+ comparison[period] = period_comparison
130
+ end
131
+
132
+ comparison
133
+ end
134
+
135
+ private
136
+
137
+ def extract_cohort_date(event, key, period)
138
+ date_value = event[key.to_sym] || event[key.to_s]
139
+ date = case date_value
140
+ when Time
141
+ date_value
142
+ when String
143
+ Time.parse(date_value)
144
+ else
145
+ Time.parse(event[:created_at].to_s)
146
+ end
147
+
148
+ normalize_to_period(date, period)
149
+ end
150
+
151
+ def normalize_to_period(date, period)
152
+ case period
153
+ when :day
154
+ date.to_date
155
+ when :week
156
+ date.to_date.beginning_of_week
157
+ when :month
158
+ date.to_date.beginning_of_month
159
+ when :year
160
+ date.to_date.beginning_of_year
161
+ else
162
+ date.to_date
163
+ end
164
+ end
165
+
166
+ def cohort_format(period)
167
+ case period
168
+ when :day
169
+ "%Y-%m-%d"
170
+ when :week
171
+ "%Y-W%V"
172
+ when :month
173
+ "%Y-%m"
174
+ when :year
175
+ "%Y"
176
+ else
177
+ "%Y-%m-%d"
178
+ end
179
+ end
180
+
181
+ def parse_cohort_date(cohort_id)
182
+ # Try to parse various formats
183
+ Time.parse(cohort_id.to_s)
184
+ rescue
185
+ Time.now
186
+ end
187
+
188
+ def get_cohort_users(context, cohort_date, period)
189
+ # Get all users who had their first event in this cohort period
190
+ since = cohort_date
191
+ until_date = case period
192
+ when :day
193
+ since + 1.day
194
+ when :week
195
+ since + 1.week
196
+ when :month
197
+ since + 1.month
198
+ when :year
199
+ since + 1.year
200
+ else
201
+ since + 1.day
202
+ end
203
+
204
+ events = @storage_adapter.events_for_context(
205
+ context,
206
+ since: since,
207
+ until: until_date
208
+ )
209
+
210
+ # Get unique users
211
+ events.map { |e| e[:user_id] }.compact.uniq
212
+ end
213
+
214
+ def get_active_users(context, cohort_users, period_date, period)
215
+ # Get users who were active in this period
216
+ since = period_date
217
+ until_date = case period
218
+ when :day
219
+ since + 1.day
220
+ when :week
221
+ since + 1.week
222
+ when :month
223
+ since + 1.month
224
+ when :year
225
+ since + 1.year
226
+ else
227
+ since + 1.day
228
+ end
229
+
230
+ events = @storage_adapter.events_for_context(
231
+ context,
232
+ since: since,
233
+ until: until_date
234
+ )
235
+
236
+ active_user_ids = events.map { |e| e[:user_id] }.compact.uniq
237
+ cohort_users & active_user_ids
238
+ end
239
+ end
240
+ end
241
+ end
242
+
@@ -10,6 +10,9 @@ module BehaviorAnalytics
10
10
  class Engine
11
11
  def initialize(storage_adapter)
12
12
  @storage_adapter = storage_adapter
13
+ @funnels = nil
14
+ @cohorts = nil
15
+ @retention = nil
13
16
  end
14
17
 
15
18
  def event_count(context, options = {})
@@ -124,6 +127,18 @@ module BehaviorAnalytics
124
127
  stats.sort_by { |_feature, count| -count }.first(limit).to_h
125
128
  end
126
129
 
130
+ def funnels
131
+ @funnels ||= Funnels.new(@storage_adapter)
132
+ end
133
+
134
+ def cohorts
135
+ @cohorts ||= Cohorts.new(@storage_adapter)
136
+ end
137
+
138
+ def retention
139
+ @retention ||= Retention.new(@storage_adapter)
140
+ end
141
+
127
142
  private
128
143
 
129
144
  def normalize_context(context)
@@ -0,0 +1,176 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Analytics
5
+ class Funnels
6
+ def initialize(storage_adapter)
7
+ @storage_adapter = storage_adapter
8
+ end
9
+
10
+ def analyze_funnel(context, steps, options = {})
11
+ context.validate!
12
+
13
+ date_range = options[:date_range] || (options[:since]..options[:until])
14
+ since = date_range.begin || options[:since]
15
+ until_date = date_range.end || options[:until]
16
+
17
+ # Get all events for the context in the date range
18
+ all_events = @storage_adapter.events_for_context(
19
+ context,
20
+ since: since,
21
+ until: until_date
22
+ )
23
+
24
+ # Group events by user
25
+ user_events = group_events_by_user(all_events)
26
+
27
+ # Analyze each step
28
+ funnel_results = steps.map.with_index do |step, index|
29
+ step_name = step.is_a?(Hash) ? step[:name] : step.to_s
30
+ step_condition = step.is_a?(Hash) ? step[:condition] : ->(e) { e[:event_name] == step }
31
+
32
+ users_at_step = user_events.select do |user_id, events|
33
+ events.any? { |e| evaluate_condition(e, step_condition) }
34
+ end
35
+
36
+ {
37
+ step: step_name,
38
+ step_index: index,
39
+ users: users_at_step.keys.count,
40
+ events: all_events.count { |e| evaluate_condition(e, step_condition) }
41
+ }
42
+ end
43
+
44
+ # Calculate drop-off rates
45
+ funnel_results.each_with_index do |step_result, index|
46
+ if index == 0
47
+ step_result[:drop_off_rate] = 0.0
48
+ step_result[:conversion_rate] = 100.0
49
+ else
50
+ previous_users = funnel_results[index - 1][:users]
51
+ current_users = step_result[:users]
52
+
53
+ if previous_users > 0
54
+ step_result[:drop_off_rate] = ((previous_users - current_users).to_f / previous_users) * 100
55
+ step_result[:conversion_rate] = (current_users.to_f / previous_users) * 100
56
+ else
57
+ step_result[:drop_off_rate] = 100.0
58
+ step_result[:conversion_rate] = 0.0
59
+ end
60
+ end
61
+ end
62
+
63
+ {
64
+ steps: funnel_results,
65
+ total_users: funnel_results.first[:users],
66
+ completed_users: funnel_results.last[:users],
67
+ overall_conversion_rate: calculate_overall_conversion(funnel_results)
68
+ }
69
+ end
70
+
71
+ def time_to_conversion(context, start_event, end_event, options = {})
72
+ context.validate!
73
+
74
+ date_range = options[:date_range] || (options[:since]..options[:until])
75
+ since = date_range.begin || options[:since]
76
+ until_date = date_range.end || options[:until]
77
+
78
+ all_events = @storage_adapter.events_for_context(
79
+ context,
80
+ since: since,
81
+ until: until_date
82
+ )
83
+
84
+ user_events = group_events_by_user(all_events)
85
+
86
+ conversion_times = []
87
+
88
+ user_events.each do |user_id, events|
89
+ sorted_events = events.sort_by { |e| parse_time(e[:created_at]) }
90
+
91
+ start_index = sorted_events.index { |e| matches_event(e, start_event) }
92
+ next unless start_index
93
+
94
+ end_index = sorted_events[start_index..-1].index { |e| matches_event(e, end_event) }
95
+ next unless end_index
96
+
97
+ start_time = parse_time(sorted_events[start_index][:created_at])
98
+ end_time = parse_time(sorted_events[start_index + end_index][:created_at])
99
+
100
+ conversion_times << (end_time - start_time)
101
+ end
102
+
103
+ return {} if conversion_times.empty?
104
+
105
+ {
106
+ average_seconds: conversion_times.sum / conversion_times.size,
107
+ median_seconds: median(conversion_times),
108
+ min_seconds: conversion_times.min,
109
+ max_seconds: conversion_times.max,
110
+ count: conversion_times.size
111
+ }
112
+ end
113
+
114
+ private
115
+
116
+ def group_events_by_user(events)
117
+ events.group_by { |e| e[:user_id] }.reject { |k, _| k.nil? }
118
+ end
119
+
120
+ def evaluate_condition(event, condition)
121
+ case condition
122
+ when Proc
123
+ condition.call(event)
124
+ when String, Symbol
125
+ event[:event_name] == condition.to_s
126
+ when Hash
127
+ condition.all? { |key, value| event[key.to_sym] == value || event[key.to_s] == value }
128
+ else
129
+ false
130
+ end
131
+ end
132
+
133
+ def matches_event(event, event_spec)
134
+ case event_spec
135
+ when String, Symbol
136
+ event[:event_name] == event_spec.to_s
137
+ when Hash
138
+ event_spec.all? { |key, value| event[key.to_sym] == value || event[key.to_s] == value }
139
+ when Proc
140
+ event_spec.call(event)
141
+ else
142
+ false
143
+ end
144
+ end
145
+
146
+ def parse_time(time_value)
147
+ case time_value
148
+ when Time
149
+ time_value
150
+ when String
151
+ Time.parse(time_value)
152
+ else
153
+ Time.now
154
+ end
155
+ end
156
+
157
+ def median(array)
158
+ sorted = array.sort
159
+ len = sorted.length
160
+ (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
161
+ end
162
+
163
+ def calculate_overall_conversion(funnel_results)
164
+ return 0.0 if funnel_results.empty?
165
+
166
+ first_step = funnel_results.first
167
+ last_step = funnel_results.last
168
+
169
+ return 0.0 if first_step[:users] == 0
170
+
171
+ (last_step[:users].to_f / first_step[:users]) * 100
172
+ end
173
+ end
174
+ end
175
+ end
176
+