behavior_analytics 0.1.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +146 -5
  3. data/behavior_analytics.gemspec +3 -1
  4. data/db/migrate/002_enhance_behavior_events_v2.rb +46 -0
  5. data/lib/behavior_analytics/analytics/cohorts.rb +242 -0
  6. data/lib/behavior_analytics/analytics/engine.rb +15 -0
  7. data/lib/behavior_analytics/analytics/funnels.rb +176 -0
  8. data/lib/behavior_analytics/analytics/retention.rb +186 -0
  9. data/lib/behavior_analytics/context.rb +38 -2
  10. data/lib/behavior_analytics/debug/inspector.rb +82 -0
  11. data/lib/behavior_analytics/event.rb +7 -1
  12. data/lib/behavior_analytics/export/csv_exporter.rb +102 -0
  13. data/lib/behavior_analytics/export/json_exporter.rb +55 -0
  14. data/lib/behavior_analytics/hooks/callback.rb +50 -0
  15. data/lib/behavior_analytics/hooks/manager.rb +106 -0
  16. data/lib/behavior_analytics/hooks/webhook.rb +114 -0
  17. data/lib/behavior_analytics/integrations/rails/middleware.rb +99 -0
  18. data/lib/behavior_analytics/integrations/rails.rb +123 -2
  19. data/lib/behavior_analytics/jobs/active_event_job.rb +37 -0
  20. data/lib/behavior_analytics/jobs/delayed_event_job.rb +29 -0
  21. data/lib/behavior_analytics/jobs/sidekiq_event_job.rb +37 -0
  22. data/lib/behavior_analytics/observability/metrics.rb +112 -0
  23. data/lib/behavior_analytics/observability/tracer.rb +85 -0
  24. data/lib/behavior_analytics/processors/async_processor.rb +24 -0
  25. data/lib/behavior_analytics/processors/background_job_processor.rb +72 -0
  26. data/lib/behavior_analytics/query.rb +89 -4
  27. data/lib/behavior_analytics/replay/engine.rb +108 -0
  28. data/lib/behavior_analytics/replay/processor.rb +107 -0
  29. data/lib/behavior_analytics/reporting/generator.rb +125 -0
  30. data/lib/behavior_analytics/sampling/strategy.rb +54 -0
  31. data/lib/behavior_analytics/schema/definition.rb +71 -0
  32. data/lib/behavior_analytics/schema/validator.rb +113 -0
  33. data/lib/behavior_analytics/storage/active_record_adapter.rb +183 -10
  34. data/lib/behavior_analytics/storage/elasticsearch_adapter.rb +185 -0
  35. data/lib/behavior_analytics/storage/in_memory_adapter.rb +234 -5
  36. data/lib/behavior_analytics/storage/kafka_adapter.rb +127 -0
  37. data/lib/behavior_analytics/storage/redis_adapter.rb +211 -0
  38. data/lib/behavior_analytics/streaming/event_stream.rb +77 -0
  39. data/lib/behavior_analytics/throttling/limiter.rb +97 -0
  40. data/lib/behavior_analytics/tracker.rb +130 -4
  41. data/lib/behavior_analytics/version.rb +1 -1
  42. data/lib/behavior_analytics.rb +139 -2
  43. metadata +33 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3e4f126ff83e60165d7b93e152c0d3ebd20f354c0d25aad69aaa88503359ea8a
4
- data.tar.gz: 1222c310f8447a36c79566f5d5aeaea504c057eecdda81f4d2b569fa62c1bc99
3
+ metadata.gz: 7a76aa6cdb7e21d5ac45f72e98f1e9b7bb678169eae8c75d0fdf821166986fa0
4
+ data.tar.gz: a80ea11cdfe8b429e4f793736fe756b583ffe3ffc7d8118e551ac68229f10b38
5
5
  SHA512:
6
- metadata.gz: 3fe2f936597d8c9b5016e527aa5b3ed86fb3b82f2b7cf5ba41df66c56a5d557a05758069bbaf5051e4960e2f01ad9e41fb4c3251c1f7c7d1808691c90a3272ec
7
- data.tar.gz: 3e1c42b6e12f012333a44085395f070f7f66ea40851ed73d4f994dcda2acd7013cf0db22ad908461665cc9dd04977c1412e6d6bcdf7f8c334cc4889a22b848a6
6
+ metadata.gz: 84183ce32e7e7ba31bad6f19ddbaa83b3f761c8546eff06b43a5824bc981b09bce7598d9da8df329be0e3cfec72831607cae3fe1cd1fa5e45292379aa5b00ac2
7
+ data.tar.gz: bb85ca2d9bc6ff08264eb98d0732b3d58d43401a253dbfdbb8b6fb52e68e832f01caf72699b1e71bcf6631bb6aaa450b3d9ee02bb2e95132362193ba0b42d9d8
data/README.md CHANGED
@@ -100,19 +100,97 @@ end
100
100
 
101
101
  ## Usage
102
102
 
103
+ ### Supported Business Cases
104
+
105
+ The gem is flexible and supports different business scenarios:
106
+
107
+ #### 1. Multi-Tenant Systems
108
+ Track events with tenant isolation for SaaS applications:
109
+
110
+ ```ruby
111
+ context = BehaviorAnalytics::Context.new(
112
+ tenant_id: "org_123",
113
+ user_id: "user_456",
114
+ user_type: "premium"
115
+ )
116
+ ```
117
+
118
+ #### 2. Single-Tenant Web Apps
119
+ Track events for regular web applications without tenant concept:
120
+
121
+ ```ruby
122
+ # Option A: Set default tenant (recommended)
123
+ BehaviorAnalytics.configure do |config|
124
+ config.default_tenant_id = "global"
125
+ end
126
+
127
+ context = BehaviorAnalytics::Context.new(
128
+ user_id: current_user.id,
129
+ user_type: "admin"
130
+ )
131
+
132
+ # Option B: Track without tenant_id (uses session_id or user_id as identifier)
133
+ context = BehaviorAnalytics::Context.new(
134
+ user_id: current_user.id
135
+ )
136
+ ```
137
+
138
+ #### 3. API-Only Tracking
139
+ Track API calls without user context (for monitoring, analytics, etc.):
140
+
141
+ ```ruby
142
+ # Track API calls directly without user context
143
+ tracker.track_api_call(
144
+ context: BehaviorAnalytics::Context.new, # Empty context - uses session_id from request
145
+ method: "POST",
146
+ path: "/api/endpoint",
147
+ status_code: 200,
148
+ duration_ms: 150
149
+ )
150
+
151
+ # Or with minimal context
152
+ context = BehaviorAnalytics::Context.new(
153
+ filters: { environment: "production", service: "api" }
154
+ )
155
+ ```
156
+
157
+ #### 4. Anonymous/Public Tracking
158
+ Track events for anonymous users or public pages:
159
+
160
+ ```ruby
161
+ context = BehaviorAnalytics::Context.new(
162
+ filters: { page: "homepage", referrer: request.referer }
163
+ )
164
+
165
+ tracker.track(
166
+ context: context,
167
+ event_name: "page_view",
168
+ metadata: { path: request.path }
169
+ )
170
+ ```
171
+
103
172
  ### Basic Tracking
104
173
 
105
174
  ```ruby
106
175
  # Create a tracker
107
176
  tracker = BehaviorAnalytics.create_tracker
108
177
 
109
- # Create a context
178
+ # Multi-tenant example
110
179
  context = BehaviorAnalytics::Context.new(
111
180
  tenant_id: "org_123",
112
181
  user_id: "user_456",
113
182
  user_type: "trial"
114
183
  )
115
184
 
185
+ # Single-tenant example (with default tenant)
186
+ context = BehaviorAnalytics::Context.new(
187
+ user_id: "user_456",
188
+ user_type: "trial"
189
+ )
190
+
191
+ # API-only example (no user context)
192
+ context = BehaviorAnalytics::Context.new
193
+
116
194
  # Track a custom event
117
195
  tracker.track(
118
196
  context: context,
@@ -215,6 +293,7 @@ tracker = BehaviorAnalytics.create_tracker(
215
293
  - `flush_interval`: Seconds between automatic flushes (default: 300)
216
294
  - `context_resolver`: Lambda/proc to resolve context from requests
217
295
  - `scoring_weights`: Hash of weights for engagement scoring
296
+ - `default_tenant_id`: Default tenant ID for single-tenant systems (default: "default")
218
297
 
219
298
  ## Event Types
220
299
 
@@ -224,12 +303,74 @@ tracker = BehaviorAnalytics.create_tracker(
224
303
 
225
304
  ## Context
226
305
 
227
- The `Context` class encapsulates tracking context:
306
+ The `Context` class encapsulates tracking context and is flexible to support different business cases:
228
307
 
229
- - `tenant_id` (required) - Multi-tenant identifier
230
- - `user_id` (optional) - User identifier
308
+ - `tenant_id` (optional) - Multi-tenant identifier. Only required for multi-tenant systems
309
+ - `user_id` (optional) - User identifier. Useful for user-based analytics
231
310
  - `user_type` (optional) - User type (e.g., "trial", "premium", "admin")
232
- - `filters` (optional) - Hash of custom filter criteria
311
+ - `filters` (optional) - Hash of custom filter criteria for additional context
312
+
313
+ ### Context Validation
314
+
315
+ A context is valid if it has **at least one identifier**:
316
+ - `tenant_id` (for multi-tenant systems)
317
+ - `user_id` (for user-based tracking)
318
+ - `filters` with identifying information (for anonymous/public tracking)
319
+ - `session_id` (automatically added for API calls)
320
+
321
+ This allows the gem to support:
322
+ - ✅ Multi-tenant SaaS applications
323
+ - ✅ Single-tenant web applications
324
+ - ✅ API monitoring without user context
325
+ - ✅ Anonymous/public page tracking
326
+
327
+ ### Examples by Use Case
328
+
329
+ **Multi-Tenant SaaS:**
330
+ ```ruby
331
+ context = BehaviorAnalytics::Context.new(
332
+ tenant_id: "org_123", # Required
333
+ user_id: "user_456",
334
+ user_type: "premium"
335
+ )
336
+ ```
337
+
338
+ **Single-Tenant Web App:**
339
+ ```ruby
340
+ # Set default tenant (optional but recommended)
341
+ BehaviorAnalytics.configure do |config|
342
+ config.default_tenant_id = "global"
343
+ end
344
+
345
+ # Track with just user_id
346
+ context = BehaviorAnalytics::Context.new(
347
+ user_id: current_user.id,
348
+ user_type: current_user.role
349
+ )
350
+ ```
351
+
352
+ **API-Only Tracking:**
353
+ ```ruby
354
+ # Track API calls without user context
355
+ context = BehaviorAnalytics::Context.new # Empty context - session_id will be used
356
+ tracker.track_api_call(
357
+ context: context,
358
+ method: "POST",
359
+ path: "/api/endpoint",
360
+ status_code: 200
361
+ )
362
+ ```
363
+
364
+ **Anonymous/Public Tracking:**
365
+ ```ruby
366
+ context = BehaviorAnalytics::Context.new(
367
+ filters: {
368
+ page_type: "public",
369
+ referrer: request.referer
370
+ }
371
+ )
372
+ tracker.track(context: context, event_name: "page_view")
373
+ ```
233
374
 
234
375
  ## Development
235
376
 
@@ -13,6 +13,7 @@ Gem::Specification.new do |spec|
13
13
  "computing analytics (engagement scores, time-based trends, feature usage), " \
14
14
  "and supporting API calls, feature usage, and custom events."
15
15
  spec.homepage = "https://github.com/nerdawey/behavior_analytics"
16
+ spec.license = "MIT"
16
17
  spec.required_ruby_version = ">= 3.0.0"
17
18
 
18
19
  spec.metadata["homepage_uri"] = spec.homepage
@@ -24,7 +25,8 @@ Gem::Specification.new do |spec|
24
25
  spec.files = Dir.chdir(__dir__) do
25
26
  `git ls-files -z`.split("\x0").reject do |f|
26
27
  (File.expand_path(f) == __FILE__) ||
27
- f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile])
28
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile]) ||
29
+ f.end_with?('.gem')
28
30
  end
29
31
  end
30
32
  spec.bindir = "exe"
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ class EnhanceBehaviorEventsV2 < ActiveRecord::Migration[7.0]
4
+ def change
5
+ # Add new columns for faster queries
6
+ add_column :behavior_events, :path, :string unless column_exists?(:behavior_events, :path)
7
+ add_column :behavior_events, :method, :string unless column_exists?(:behavior_events, :method)
8
+ add_column :behavior_events, :status_code, :integer unless column_exists?(:behavior_events, :status_code)
9
+ add_column :behavior_events, :correlation_id, :string unless column_exists?(:behavior_events, :correlation_id)
10
+ add_column :behavior_events, :parent_event_id, :string unless column_exists?(:behavior_events, :parent_event_id)
11
+ add_column :behavior_events, :tags, :string, array: true, default: [] unless column_exists?(:behavior_events, :tags)
12
+
13
+ # Add indexes for new columns
14
+ add_index :behavior_events, :path unless index_exists?(:behavior_events, :path)
15
+ add_index :behavior_events, :method unless index_exists?(:behavior_events, :method)
16
+ add_index :behavior_events, :status_code unless index_exists?(:behavior_events, :status_code)
17
+ add_index :behavior_events, :correlation_id unless index_exists?(:behavior_events, :correlation_id)
18
+ add_index :behavior_events, :parent_event_id unless index_exists?(:behavior_events, :parent_event_id)
19
+ add_index :behavior_events, :tags, using: :gin unless index_exists?(:behavior_events, :tags)
20
+
21
+ # Add composite indexes for common query patterns
22
+ add_index :behavior_events, [:tenant_id, :path, :created_at],
23
+ name: "index_behavior_events_on_tenant_path_created" unless
24
+ index_exists?(:behavior_events, [:tenant_id, :path, :created_at],
25
+ name: "index_behavior_events_on_tenant_path_created")
26
+
27
+ add_index :behavior_events, [:tenant_id, :user_type, :created_at],
28
+ name: "index_behavior_events_on_tenant_user_type_created" unless
29
+ index_exists?(:behavior_events, [:tenant_id, :user_type, :created_at],
30
+ name: "index_behavior_events_on_tenant_user_type_created")
31
+
32
+ add_index :behavior_events, [:tenant_id, :event_type, :created_at],
33
+ name: "index_behavior_events_on_tenant_event_type_created" unless
34
+ index_exists?(:behavior_events, [:tenant_id, :event_type, :created_at],
35
+ name: "index_behavior_events_on_tenant_event_type_created")
36
+
37
+ # Add GIN index on metadata JSONB for faster queries
38
+ if column_exists?(:behavior_events, :metadata)
39
+ execute <<-SQL
40
+ CREATE INDEX IF NOT EXISTS index_behavior_events_on_metadata_gin
41
+ ON behavior_events USING gin (metadata);
42
+ SQL
43
+ end
44
+ end
45
+ end
46
+
@@ -0,0 +1,242 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module BehaviorAnalytics
6
+ module Analytics
7
+ class Cohorts
8
+ def initialize(storage_adapter)
9
+ @storage_adapter = storage_adapter
10
+ end
11
+
12
+ def create_cohort(context, cohort_definition, options = {})
13
+ context.validate!
14
+
15
+ date_range = options[:date_range] || (options[:since]..options[:until])
16
+ since = date_range.begin || options[:since]
17
+ until_date = date_range.end || options[:until]
18
+
19
+ all_events = @storage_adapter.events_for_context(
20
+ context,
21
+ since: since,
22
+ until: until_date
23
+ )
24
+
25
+ # Group users by cohort definition
26
+ cohort_key = cohort_definition[:key] || :created_at
27
+ cohort_period = cohort_definition[:period] || :month
28
+
29
+ cohorts = {}
30
+
31
+ all_events.each do |event|
32
+ cohort_date = extract_cohort_date(event, cohort_key, cohort_period)
33
+ cohort_id = cohort_date.strftime(cohort_format(cohort_period))
34
+
35
+ cohorts[cohort_id] ||= {
36
+ cohort_id: cohort_id,
37
+ cohort_date: cohort_date,
38
+ users: Set.new,
39
+ events: []
40
+ }
41
+
42
+ cohorts[cohort_id][:users] << event[:user_id] if event[:user_id]
43
+ cohorts[cohort_id][:events] << event
44
+ end
45
+
46
+ cohorts.values.map do |cohort|
47
+ {
48
+ cohort_id: cohort[:cohort_id],
49
+ cohort_date: cohort[:cohort_date],
50
+ user_count: cohort[:users].size,
51
+ event_count: cohort[:events].size
52
+ }
53
+ end
54
+ end
55
+
56
+ def retention_analysis(context, cohorts, options = {})
57
+ context.validate!
58
+
59
+ period = options[:period] || :day
60
+ periods_to_analyze = options[:periods] || 30
61
+
62
+ retention_data = {}
63
+
64
+ cohorts.each do |cohort|
65
+ cohort_id = cohort[:cohort_id] || cohort[:cohort_date]
66
+ cohort_date = cohort[:cohort_date] || parse_cohort_date(cohort_id)
67
+
68
+ # Get users in this cohort
69
+ cohort_users = get_cohort_users(context, cohort_date, period)
70
+
71
+ # Calculate retention for each period
72
+ retention_curve = []
73
+
74
+ (0..periods_to_analyze).each do |period_offset|
75
+ period_date = cohort_date + period_offset.send(period)
76
+
77
+ active_users = get_active_users(context, cohort_users, period_date, period)
78
+ retention_rate = cohort_users.empty? ? 0.0 : (active_users.size.to_f / cohort_users.size) * 100
79
+
80
+ retention_curve << {
81
+ period: period_offset,
82
+ date: period_date,
83
+ active_users: active_users.size,
84
+ retention_rate: retention_rate.round(2)
85
+ }
86
+ end
87
+
88
+ retention_data[cohort_id] = {
89
+ cohort_id: cohort_id,
90
+ cohort_date: cohort_date,
91
+ cohort_size: cohort_users.size,
92
+ retention_curve: retention_curve
93
+ }
94
+ end
95
+
96
+ retention_data
97
+ end
98
+
99
+ def compare_cohorts(context, cohort_ids, options = {})
100
+ context.validate!
101
+
102
+ cohorts_data = cohort_ids.map do |cohort_id|
103
+ retention_analysis(context, [{ cohort_id: cohort_id }], options)
104
+ end
105
+
106
+ # Compare retention rates across cohorts
107
+ comparison = {}
108
+
109
+ max_periods = cohorts_data.map { |c| c.values.first[:retention_curve].size }.max || 0
110
+
111
+ (0...max_periods).each do |period|
112
+ period_comparison = {
113
+ period: period,
114
+ cohorts: {}
115
+ }
116
+
117
+ cohorts_data.each do |cohort_data|
118
+ cohort_id = cohort_data.keys.first
119
+ retention_curve = cohort_data[cohort_id][:retention_curve]
120
+
121
+ if retention_curve[period]
122
+ period_comparison[:cohorts][cohort_id] = {
123
+ retention_rate: retention_curve[period][:retention_rate],
124
+ active_users: retention_curve[period][:active_users]
125
+ }
126
+ end
127
+ end
128
+
129
+ comparison[period] = period_comparison
130
+ end
131
+
132
+ comparison
133
+ end
134
+
135
+ private
136
+
137
+ def extract_cohort_date(event, key, period)
138
+ date_value = event[key.to_sym] || event[key.to_s]
139
+ date = case date_value
140
+ when Time
141
+ date_value
142
+ when String
143
+ Time.parse(date_value)
144
+ else
145
+ Time.parse(event[:created_at].to_s)
146
+ end
147
+
148
+ normalize_to_period(date, period)
149
+ end
150
+
151
+ def normalize_to_period(date, period)
152
+ case period
153
+ when :day
154
+ date.to_date
155
+ when :week
156
+ date.to_date.beginning_of_week
157
+ when :month
158
+ date.to_date.beginning_of_month
159
+ when :year
160
+ date.to_date.beginning_of_year
161
+ else
162
+ date.to_date
163
+ end
164
+ end
165
+
166
+ def cohort_format(period)
167
+ case period
168
+ when :day
169
+ "%Y-%m-%d"
170
+ when :week
171
+ "%Y-W%V"
172
+ when :month
173
+ "%Y-%m"
174
+ when :year
175
+ "%Y"
176
+ else
177
+ "%Y-%m-%d"
178
+ end
179
+ end
180
+
181
+ def parse_cohort_date(cohort_id)
182
+ # Try to parse various formats
183
+ Time.parse(cohort_id.to_s)
184
+ rescue
185
+ Time.now
186
+ end
187
+
188
+ def get_cohort_users(context, cohort_date, period)
189
+ # Get all users who had their first event in this cohort period
190
+ since = cohort_date
191
+ until_date = case period
192
+ when :day
193
+ since + 1.day
194
+ when :week
195
+ since + 1.week
196
+ when :month
197
+ since + 1.month
198
+ when :year
199
+ since + 1.year
200
+ else
201
+ since + 1.day
202
+ end
203
+
204
+ events = @storage_adapter.events_for_context(
205
+ context,
206
+ since: since,
207
+ until: until_date
208
+ )
209
+
210
+ # Get unique users
211
+ events.map { |e| e[:user_id] }.compact.uniq
212
+ end
213
+
214
+ def get_active_users(context, cohort_users, period_date, period)
215
+ # Get users who were active in this period
216
+ since = period_date
217
+ until_date = case period
218
+ when :day
219
+ since + 1.day
220
+ when :week
221
+ since + 1.week
222
+ when :month
223
+ since + 1.month
224
+ when :year
225
+ since + 1.year
226
+ else
227
+ since + 1.day
228
+ end
229
+
230
+ events = @storage_adapter.events_for_context(
231
+ context,
232
+ since: since,
233
+ until: until_date
234
+ )
235
+
236
+ active_user_ids = events.map { |e| e[:user_id] }.compact.uniq
237
+ cohort_users & active_user_ids
238
+ end
239
+ end
240
+ end
241
+ end
242
+
@@ -10,6 +10,9 @@ module BehaviorAnalytics
10
10
  class Engine
11
11
  def initialize(storage_adapter)
12
12
  @storage_adapter = storage_adapter
13
+ @funnels = nil
14
+ @cohorts = nil
15
+ @retention = nil
13
16
  end
14
17
 
15
18
  def event_count(context, options = {})
@@ -124,6 +127,18 @@ module BehaviorAnalytics
124
127
  stats.sort_by { |_feature, count| -count }.first(limit).to_h
125
128
  end
126
129
 
130
+ def funnels
131
+ @funnels ||= Funnels.new(@storage_adapter)
132
+ end
133
+
134
+ def cohorts
135
+ @cohorts ||= Cohorts.new(@storage_adapter)
136
+ end
137
+
138
+ def retention
139
+ @retention ||= Retention.new(@storage_adapter)
140
+ end
141
+
127
142
  private
128
143
 
129
144
  def normalize_context(context)
@@ -0,0 +1,176 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Analytics
5
+ class Funnels
6
+ def initialize(storage_adapter)
7
+ @storage_adapter = storage_adapter
8
+ end
9
+
10
+ def analyze_funnel(context, steps, options = {})
11
+ context.validate!
12
+
13
+ date_range = options[:date_range] || (options[:since]..options[:until])
14
+ since = date_range.begin || options[:since]
15
+ until_date = date_range.end || options[:until]
16
+
17
+ # Get all events for the context in the date range
18
+ all_events = @storage_adapter.events_for_context(
19
+ context,
20
+ since: since,
21
+ until: until_date
22
+ )
23
+
24
+ # Group events by user
25
+ user_events = group_events_by_user(all_events)
26
+
27
+ # Analyze each step
28
+ funnel_results = steps.map.with_index do |step, index|
29
+ step_name = step.is_a?(Hash) ? step[:name] : step.to_s
30
+ step_condition = step.is_a?(Hash) ? step[:condition] : ->(e) { e[:event_name] == step }
31
+
32
+ users_at_step = user_events.select do |user_id, events|
33
+ events.any? { |e| evaluate_condition(e, step_condition) }
34
+ end
35
+
36
+ {
37
+ step: step_name,
38
+ step_index: index,
39
+ users: users_at_step.keys.count,
40
+ events: all_events.count { |e| evaluate_condition(e, step_condition) }
41
+ }
42
+ end
43
+
44
+ # Calculate drop-off rates
45
+ funnel_results.each_with_index do |step_result, index|
46
+ if index == 0
47
+ step_result[:drop_off_rate] = 0.0
48
+ step_result[:conversion_rate] = 100.0
49
+ else
50
+ previous_users = funnel_results[index - 1][:users]
51
+ current_users = step_result[:users]
52
+
53
+ if previous_users > 0
54
+ step_result[:drop_off_rate] = ((previous_users - current_users).to_f / previous_users) * 100
55
+ step_result[:conversion_rate] = (current_users.to_f / previous_users) * 100
56
+ else
57
+ step_result[:drop_off_rate] = 100.0
58
+ step_result[:conversion_rate] = 0.0
59
+ end
60
+ end
61
+ end
62
+
63
+ {
64
+ steps: funnel_results,
65
+ total_users: funnel_results.first[:users],
66
+ completed_users: funnel_results.last[:users],
67
+ overall_conversion_rate: calculate_overall_conversion(funnel_results)
68
+ }
69
+ end
70
+
71
+ def time_to_conversion(context, start_event, end_event, options = {})
72
+ context.validate!
73
+
74
+ date_range = options[:date_range] || (options[:since]..options[:until])
75
+ since = date_range.begin || options[:since]
76
+ until_date = date_range.end || options[:until]
77
+
78
+ all_events = @storage_adapter.events_for_context(
79
+ context,
80
+ since: since,
81
+ until: until_date
82
+ )
83
+
84
+ user_events = group_events_by_user(all_events)
85
+
86
+ conversion_times = []
87
+
88
+ user_events.each do |user_id, events|
89
+ sorted_events = events.sort_by { |e| parse_time(e[:created_at]) }
90
+
91
+ start_index = sorted_events.index { |e| matches_event(e, start_event) }
92
+ next unless start_index
93
+
94
+ end_index = sorted_events[start_index..-1].index { |e| matches_event(e, end_event) }
95
+ next unless end_index
96
+
97
+ start_time = parse_time(sorted_events[start_index][:created_at])
98
+ end_time = parse_time(sorted_events[start_index + end_index][:created_at])
99
+
100
+ conversion_times << (end_time - start_time)
101
+ end
102
+
103
+ return {} if conversion_times.empty?
104
+
105
+ {
106
+ average_seconds: conversion_times.sum / conversion_times.size,
107
+ median_seconds: median(conversion_times),
108
+ min_seconds: conversion_times.min,
109
+ max_seconds: conversion_times.max,
110
+ count: conversion_times.size
111
+ }
112
+ end
113
+
114
+ private
115
+
116
+ def group_events_by_user(events)
117
+ events.group_by { |e| e[:user_id] }.reject { |k, _| k.nil? }
118
+ end
119
+
120
+ def evaluate_condition(event, condition)
121
+ case condition
122
+ when Proc
123
+ condition.call(event)
124
+ when String, Symbol
125
+ event[:event_name] == condition.to_s
126
+ when Hash
127
+ condition.all? { |key, value| event[key.to_sym] == value || event[key.to_s] == value }
128
+ else
129
+ false
130
+ end
131
+ end
132
+
133
+ def matches_event(event, event_spec)
134
+ case event_spec
135
+ when String, Symbol
136
+ event[:event_name] == event_spec.to_s
137
+ when Hash
138
+ event_spec.all? { |key, value| event[key.to_sym] == value || event[key.to_s] == value }
139
+ when Proc
140
+ event_spec.call(event)
141
+ else
142
+ false
143
+ end
144
+ end
145
+
146
+ def parse_time(time_value)
147
+ case time_value
148
+ when Time
149
+ time_value
150
+ when String
151
+ Time.parse(time_value)
152
+ else
153
+ Time.now
154
+ end
155
+ end
156
+
157
+ def median(array)
158
+ sorted = array.sort
159
+ len = sorted.length
160
+ (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
161
+ end
162
+
163
+ def calculate_overall_conversion(funnel_results)
164
+ return 0.0 if funnel_results.empty?
165
+
166
+ first_step = funnel_results.first
167
+ last_step = funnel_results.last
168
+
169
+ return 0.0 if first_step[:users] == 0
170
+
171
+ (last_step[:users].to_f / first_step[:users]) * 100
172
+ end
173
+ end
174
+ end
175
+ end
176
+