behavior_analytics 0.1.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +146 -5
- data/behavior_analytics.gemspec +3 -1
- data/db/migrate/002_enhance_behavior_events_v2.rb +46 -0
- data/lib/behavior_analytics/analytics/cohorts.rb +242 -0
- data/lib/behavior_analytics/analytics/engine.rb +15 -0
- data/lib/behavior_analytics/analytics/funnels.rb +176 -0
- data/lib/behavior_analytics/analytics/retention.rb +186 -0
- data/lib/behavior_analytics/context.rb +38 -2
- data/lib/behavior_analytics/debug/inspector.rb +82 -0
- data/lib/behavior_analytics/event.rb +7 -1
- data/lib/behavior_analytics/export/csv_exporter.rb +102 -0
- data/lib/behavior_analytics/export/json_exporter.rb +55 -0
- data/lib/behavior_analytics/hooks/callback.rb +50 -0
- data/lib/behavior_analytics/hooks/manager.rb +106 -0
- data/lib/behavior_analytics/hooks/webhook.rb +114 -0
- data/lib/behavior_analytics/integrations/rails/middleware.rb +99 -0
- data/lib/behavior_analytics/integrations/rails.rb +123 -2
- data/lib/behavior_analytics/jobs/active_event_job.rb +37 -0
- data/lib/behavior_analytics/jobs/delayed_event_job.rb +29 -0
- data/lib/behavior_analytics/jobs/sidekiq_event_job.rb +37 -0
- data/lib/behavior_analytics/observability/metrics.rb +112 -0
- data/lib/behavior_analytics/observability/tracer.rb +85 -0
- data/lib/behavior_analytics/processors/async_processor.rb +24 -0
- data/lib/behavior_analytics/processors/background_job_processor.rb +72 -0
- data/lib/behavior_analytics/query.rb +89 -4
- data/lib/behavior_analytics/replay/engine.rb +108 -0
- data/lib/behavior_analytics/replay/processor.rb +107 -0
- data/lib/behavior_analytics/reporting/generator.rb +125 -0
- data/lib/behavior_analytics/sampling/strategy.rb +54 -0
- data/lib/behavior_analytics/schema/definition.rb +71 -0
- data/lib/behavior_analytics/schema/validator.rb +113 -0
- data/lib/behavior_analytics/storage/active_record_adapter.rb +183 -10
- data/lib/behavior_analytics/storage/elasticsearch_adapter.rb +185 -0
- data/lib/behavior_analytics/storage/in_memory_adapter.rb +234 -5
- data/lib/behavior_analytics/storage/kafka_adapter.rb +127 -0
- data/lib/behavior_analytics/storage/redis_adapter.rb +211 -0
- data/lib/behavior_analytics/streaming/event_stream.rb +77 -0
- data/lib/behavior_analytics/throttling/limiter.rb +97 -0
- data/lib/behavior_analytics/tracker.rb +130 -4
- data/lib/behavior_analytics/version.rb +1 -1
- data/lib/behavior_analytics.rb +139 -2
- metadata +33 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7a76aa6cdb7e21d5ac45f72e98f1e9b7bb678169eae8c75d0fdf821166986fa0
|
|
4
|
+
data.tar.gz: a80ea11cdfe8b429e4f793736fe756b583ffe3ffc7d8118e551ac68229f10b38
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 84183ce32e7e7ba31bad6f19ddbaa83b3f761c8546eff06b43a5824bc981b09bce7598d9da8df329be0e3cfec72831607cae3fe1cd1fa5e45292379aa5b00ac2
|
|
7
|
+
data.tar.gz: bb85ca2d9bc6ff08264eb98d0732b3d58d43401a253dbfdbb8b6fb52e68e832f01caf72699b1e71bcf6631bb6aaa450b3d9ee02bb2e95132362193ba0b42d9d8
|
data/README.md
CHANGED
|
@@ -100,19 +100,97 @@ end
|
|
|
100
100
|
|
|
101
101
|
## Usage
|
|
102
102
|
|
|
103
|
+
### Supported Business Cases
|
|
104
|
+
|
|
105
|
+
The gem is flexible and supports different business scenarios:
|
|
106
|
+
|
|
107
|
+
#### 1. Multi-Tenant Systems
|
|
108
|
+
Track events with tenant isolation for SaaS applications:
|
|
109
|
+
|
|
110
|
+
```ruby
|
|
111
|
+
context = BehaviorAnalytics::Context.new(
|
|
112
|
+
tenant_id: "org_123",
|
|
113
|
+
user_id: "user_456",
|
|
114
|
+
user_type: "premium"
|
|
115
|
+
)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
#### 2. Single-Tenant Web Apps
|
|
119
|
+
Track events for regular web applications without tenant concept:
|
|
120
|
+
|
|
121
|
+
```ruby
|
|
122
|
+
# Option A: Set default tenant (recommended)
|
|
123
|
+
BehaviorAnalytics.configure do |config|
|
|
124
|
+
config.default_tenant_id = "global"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
context = BehaviorAnalytics::Context.new(
|
|
128
|
+
user_id: current_user.id,
|
|
129
|
+
user_type: "admin"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Option B: Track without tenant_id (uses session_id or user_id as identifier)
|
|
133
|
+
context = BehaviorAnalytics::Context.new(
|
|
134
|
+
user_id: current_user.id
|
|
135
|
+
)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
#### 3. API-Only Tracking
|
|
139
|
+
Track API calls without user context (for monitoring, analytics, etc.):
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
# Track API calls directly without user context
|
|
143
|
+
tracker.track_api_call(
|
|
144
|
+
context: BehaviorAnalytics::Context.new, # Empty context - uses session_id from request
|
|
145
|
+
method: "POST",
|
|
146
|
+
path: "/api/endpoint",
|
|
147
|
+
status_code: 200,
|
|
148
|
+
duration_ms: 150
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Or with minimal context
|
|
152
|
+
context = BehaviorAnalytics::Context.new(
|
|
153
|
+
filters: { environment: "production", service: "api" }
|
|
154
|
+
)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
#### 4. Anonymous/Public Tracking
|
|
158
|
+
Track events for anonymous users or public pages:
|
|
159
|
+
|
|
160
|
+
```ruby
|
|
161
|
+
context = BehaviorAnalytics::Context.new(
|
|
162
|
+
filters: { page: "homepage", referrer: request.referer }
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
tracker.track(
|
|
166
|
+
context: context,
|
|
167
|
+
event_name: "page_view",
|
|
168
|
+
metadata: { path: request.path }
|
|
169
|
+
)
|
|
170
|
+
```
|
|
171
|
+
|
|
103
172
|
### Basic Tracking
|
|
104
173
|
|
|
105
174
|
```ruby
|
|
106
175
|
# Create a tracker
|
|
107
176
|
tracker = BehaviorAnalytics.create_tracker
|
|
108
177
|
|
|
109
|
-
#
|
|
178
|
+
# Multi-tenant example
|
|
110
179
|
context = BehaviorAnalytics::Context.new(
|
|
111
180
|
tenant_id: "org_123",
|
|
112
181
|
user_id: "user_456",
|
|
113
182
|
user_type: "trial"
|
|
114
183
|
)
|
|
115
184
|
|
|
185
|
+
# Single-tenant example (with default tenant)
|
|
186
|
+
context = BehaviorAnalytics::Context.new(
|
|
187
|
+
user_id: "user_456",
|
|
188
|
+
user_type: "trial"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# API-only example (no user context)
|
|
192
|
+
context = BehaviorAnalytics::Context.new
|
|
193
|
+
|
|
116
194
|
# Track a custom event
|
|
117
195
|
tracker.track(
|
|
118
196
|
context: context,
|
|
@@ -215,6 +293,7 @@ tracker = BehaviorAnalytics.create_tracker(
|
|
|
215
293
|
- `flush_interval`: Seconds between automatic flushes (default: 300)
|
|
216
294
|
- `context_resolver`: Lambda/proc to resolve context from requests
|
|
217
295
|
- `scoring_weights`: Hash of weights for engagement scoring
|
|
296
|
+
- `default_tenant_id`: Default tenant ID for single-tenant systems (default: "default")
|
|
218
297
|
|
|
219
298
|
## Event Types
|
|
220
299
|
|
|
@@ -224,12 +303,74 @@ tracker = BehaviorAnalytics.create_tracker(
|
|
|
224
303
|
|
|
225
304
|
## Context
|
|
226
305
|
|
|
227
|
-
The `Context` class encapsulates tracking context:
|
|
306
|
+
The `Context` class encapsulates tracking context and is flexible to support different business cases:
|
|
228
307
|
|
|
229
|
-
- `tenant_id` (
|
|
230
|
-
- `user_id` (optional) - User identifier
|
|
308
|
+
- `tenant_id` (optional) - Multi-tenant identifier. Only required for multi-tenant systems
|
|
309
|
+
- `user_id` (optional) - User identifier. Useful for user-based analytics
|
|
231
310
|
- `user_type` (optional) - User type (e.g., "trial", "premium", "admin")
|
|
232
|
-
- `filters` (optional) - Hash of custom filter criteria
|
|
311
|
+
- `filters` (optional) - Hash of custom filter criteria for additional context
|
|
312
|
+
|
|
313
|
+
### Context Validation
|
|
314
|
+
|
|
315
|
+
A context is valid if it has **at least one identifier**:
|
|
316
|
+
- `tenant_id` (for multi-tenant systems)
|
|
317
|
+
- `user_id` (for user-based tracking)
|
|
318
|
+
- `filters` with identifying information (for anonymous/public tracking)
|
|
319
|
+
- `session_id` (automatically added for API calls)
|
|
320
|
+
|
|
321
|
+
This allows the gem to support:
|
|
322
|
+
- ✅ Multi-tenant SaaS applications
|
|
323
|
+
- ✅ Single-tenant web applications
|
|
324
|
+
- ✅ API monitoring without user context
|
|
325
|
+
- ✅ Anonymous/public page tracking
|
|
326
|
+
|
|
327
|
+
### Examples by Use Case
|
|
328
|
+
|
|
329
|
+
**Multi-Tenant SaaS:**
|
|
330
|
+
```ruby
|
|
331
|
+
context = BehaviorAnalytics::Context.new(
|
|
332
|
+
tenant_id: "org_123", # Required
|
|
333
|
+
user_id: "user_456",
|
|
334
|
+
user_type: "premium"
|
|
335
|
+
)
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
**Single-Tenant Web App:**
|
|
339
|
+
```ruby
|
|
340
|
+
# Set default tenant (optional but recommended)
|
|
341
|
+
BehaviorAnalytics.configure do |config|
|
|
342
|
+
config.default_tenant_id = "global"
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# Track with just user_id
|
|
346
|
+
context = BehaviorAnalytics::Context.new(
|
|
347
|
+
user_id: current_user.id,
|
|
348
|
+
user_type: current_user.role
|
|
349
|
+
)
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
**API-Only Tracking:**
|
|
353
|
+
```ruby
|
|
354
|
+
# Track API calls without user context
|
|
355
|
+
context = BehaviorAnalytics::Context.new # Empty context - session_id will be used
|
|
356
|
+
tracker.track_api_call(
|
|
357
|
+
context: context,
|
|
358
|
+
method: "POST",
|
|
359
|
+
path: "/api/endpoint",
|
|
360
|
+
status_code: 200
|
|
361
|
+
)
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
**Anonymous/Public Tracking:**
|
|
365
|
+
```ruby
|
|
366
|
+
context = BehaviorAnalytics::Context.new(
|
|
367
|
+
filters: {
|
|
368
|
+
page_type: "public",
|
|
369
|
+
referrer: request.referer
|
|
370
|
+
}
|
|
371
|
+
)
|
|
372
|
+
tracker.track(context: context, event_name: "page_view")
|
|
373
|
+
```
|
|
233
374
|
|
|
234
375
|
## Development
|
|
235
376
|
|
data/behavior_analytics.gemspec
CHANGED
|
@@ -13,6 +13,7 @@ Gem::Specification.new do |spec|
|
|
|
13
13
|
"computing analytics (engagement scores, time-based trends, feature usage), " \
|
|
14
14
|
"and supporting API calls, feature usage, and custom events."
|
|
15
15
|
spec.homepage = "https://github.com/nerdawey/behavior_analytics"
|
|
16
|
+
spec.license = "MIT"
|
|
16
17
|
spec.required_ruby_version = ">= 3.0.0"
|
|
17
18
|
|
|
18
19
|
spec.metadata["homepage_uri"] = spec.homepage
|
|
@@ -24,7 +25,8 @@ Gem::Specification.new do |spec|
|
|
|
24
25
|
spec.files = Dir.chdir(__dir__) do
|
|
25
26
|
`git ls-files -z`.split("\x0").reject do |f|
|
|
26
27
|
(File.expand_path(f) == __FILE__) ||
|
|
27
|
-
f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile])
|
|
28
|
+
f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile]) ||
|
|
29
|
+
f.end_with?('.gem')
|
|
28
30
|
end
|
|
29
31
|
end
|
|
30
32
|
spec.bindir = "exe"
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class EnhanceBehaviorEventsV2 < ActiveRecord::Migration[7.0]
|
|
4
|
+
def change
|
|
5
|
+
# Add new columns for faster queries
|
|
6
|
+
add_column :behavior_events, :path, :string unless column_exists?(:behavior_events, :path)
|
|
7
|
+
add_column :behavior_events, :method, :string unless column_exists?(:behavior_events, :method)
|
|
8
|
+
add_column :behavior_events, :status_code, :integer unless column_exists?(:behavior_events, :status_code)
|
|
9
|
+
add_column :behavior_events, :correlation_id, :string unless column_exists?(:behavior_events, :correlation_id)
|
|
10
|
+
add_column :behavior_events, :parent_event_id, :string unless column_exists?(:behavior_events, :parent_event_id)
|
|
11
|
+
add_column :behavior_events, :tags, :string, array: true, default: [] unless column_exists?(:behavior_events, :tags)
|
|
12
|
+
|
|
13
|
+
# Add indexes for new columns
|
|
14
|
+
add_index :behavior_events, :path unless index_exists?(:behavior_events, :path)
|
|
15
|
+
add_index :behavior_events, :method unless index_exists?(:behavior_events, :method)
|
|
16
|
+
add_index :behavior_events, :status_code unless index_exists?(:behavior_events, :status_code)
|
|
17
|
+
add_index :behavior_events, :correlation_id unless index_exists?(:behavior_events, :correlation_id)
|
|
18
|
+
add_index :behavior_events, :parent_event_id unless index_exists?(:behavior_events, :parent_event_id)
|
|
19
|
+
add_index :behavior_events, :tags, using: :gin unless index_exists?(:behavior_events, :tags)
|
|
20
|
+
|
|
21
|
+
# Add composite indexes for common query patterns
|
|
22
|
+
add_index :behavior_events, [:tenant_id, :path, :created_at],
|
|
23
|
+
name: "index_behavior_events_on_tenant_path_created" unless
|
|
24
|
+
index_exists?(:behavior_events, [:tenant_id, :path, :created_at],
|
|
25
|
+
name: "index_behavior_events_on_tenant_path_created")
|
|
26
|
+
|
|
27
|
+
add_index :behavior_events, [:tenant_id, :user_type, :created_at],
|
|
28
|
+
name: "index_behavior_events_on_tenant_user_type_created" unless
|
|
29
|
+
index_exists?(:behavior_events, [:tenant_id, :user_type, :created_at],
|
|
30
|
+
name: "index_behavior_events_on_tenant_user_type_created")
|
|
31
|
+
|
|
32
|
+
add_index :behavior_events, [:tenant_id, :event_type, :created_at],
|
|
33
|
+
name: "index_behavior_events_on_tenant_event_type_created" unless
|
|
34
|
+
index_exists?(:behavior_events, [:tenant_id, :event_type, :created_at],
|
|
35
|
+
name: "index_behavior_events_on_tenant_event_type_created")
|
|
36
|
+
|
|
37
|
+
# Add GIN index on metadata JSONB for faster queries
|
|
38
|
+
if column_exists?(:behavior_events, :metadata)
|
|
39
|
+
execute <<-SQL
|
|
40
|
+
CREATE INDEX IF NOT EXISTS index_behavior_events_on_metadata_gin
|
|
41
|
+
ON behavior_events USING gin (metadata);
|
|
42
|
+
SQL
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module BehaviorAnalytics
|
|
6
|
+
module Analytics
|
|
7
|
+
class Cohorts
|
|
8
|
+
def initialize(storage_adapter)
|
|
9
|
+
@storage_adapter = storage_adapter
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def create_cohort(context, cohort_definition, options = {})
|
|
13
|
+
context.validate!
|
|
14
|
+
|
|
15
|
+
date_range = options[:date_range] || (options[:since]..options[:until])
|
|
16
|
+
since = date_range.begin || options[:since]
|
|
17
|
+
until_date = date_range.end || options[:until]
|
|
18
|
+
|
|
19
|
+
all_events = @storage_adapter.events_for_context(
|
|
20
|
+
context,
|
|
21
|
+
since: since,
|
|
22
|
+
until: until_date
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Group users by cohort definition
|
|
26
|
+
cohort_key = cohort_definition[:key] || :created_at
|
|
27
|
+
cohort_period = cohort_definition[:period] || :month
|
|
28
|
+
|
|
29
|
+
cohorts = {}
|
|
30
|
+
|
|
31
|
+
all_events.each do |event|
|
|
32
|
+
cohort_date = extract_cohort_date(event, cohort_key, cohort_period)
|
|
33
|
+
cohort_id = cohort_date.strftime(cohort_format(cohort_period))
|
|
34
|
+
|
|
35
|
+
cohorts[cohort_id] ||= {
|
|
36
|
+
cohort_id: cohort_id,
|
|
37
|
+
cohort_date: cohort_date,
|
|
38
|
+
users: Set.new,
|
|
39
|
+
events: []
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
cohorts[cohort_id][:users] << event[:user_id] if event[:user_id]
|
|
43
|
+
cohorts[cohort_id][:events] << event
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
cohorts.values.map do |cohort|
|
|
47
|
+
{
|
|
48
|
+
cohort_id: cohort[:cohort_id],
|
|
49
|
+
cohort_date: cohort[:cohort_date],
|
|
50
|
+
user_count: cohort[:users].size,
|
|
51
|
+
event_count: cohort[:events].size
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def retention_analysis(context, cohorts, options = {})
|
|
57
|
+
context.validate!
|
|
58
|
+
|
|
59
|
+
period = options[:period] || :day
|
|
60
|
+
periods_to_analyze = options[:periods] || 30
|
|
61
|
+
|
|
62
|
+
retention_data = {}
|
|
63
|
+
|
|
64
|
+
cohorts.each do |cohort|
|
|
65
|
+
cohort_id = cohort[:cohort_id] || cohort[:cohort_date]
|
|
66
|
+
cohort_date = cohort[:cohort_date] || parse_cohort_date(cohort_id)
|
|
67
|
+
|
|
68
|
+
# Get users in this cohort
|
|
69
|
+
cohort_users = get_cohort_users(context, cohort_date, period)
|
|
70
|
+
|
|
71
|
+
# Calculate retention for each period
|
|
72
|
+
retention_curve = []
|
|
73
|
+
|
|
74
|
+
(0..periods_to_analyze).each do |period_offset|
|
|
75
|
+
period_date = cohort_date + period_offset.send(period)
|
|
76
|
+
|
|
77
|
+
active_users = get_active_users(context, cohort_users, period_date, period)
|
|
78
|
+
retention_rate = cohort_users.empty? ? 0.0 : (active_users.size.to_f / cohort_users.size) * 100
|
|
79
|
+
|
|
80
|
+
retention_curve << {
|
|
81
|
+
period: period_offset,
|
|
82
|
+
date: period_date,
|
|
83
|
+
active_users: active_users.size,
|
|
84
|
+
retention_rate: retention_rate.round(2)
|
|
85
|
+
}
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
retention_data[cohort_id] = {
|
|
89
|
+
cohort_id: cohort_id,
|
|
90
|
+
cohort_date: cohort_date,
|
|
91
|
+
cohort_size: cohort_users.size,
|
|
92
|
+
retention_curve: retention_curve
|
|
93
|
+
}
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
retention_data
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def compare_cohorts(context, cohort_ids, options = {})
|
|
100
|
+
context.validate!
|
|
101
|
+
|
|
102
|
+
cohorts_data = cohort_ids.map do |cohort_id|
|
|
103
|
+
retention_analysis(context, [{ cohort_id: cohort_id }], options)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Compare retention rates across cohorts
|
|
107
|
+
comparison = {}
|
|
108
|
+
|
|
109
|
+
max_periods = cohorts_data.map { |c| c.values.first[:retention_curve].size }.max || 0
|
|
110
|
+
|
|
111
|
+
(0...max_periods).each do |period|
|
|
112
|
+
period_comparison = {
|
|
113
|
+
period: period,
|
|
114
|
+
cohorts: {}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
cohorts_data.each do |cohort_data|
|
|
118
|
+
cohort_id = cohort_data.keys.first
|
|
119
|
+
retention_curve = cohort_data[cohort_id][:retention_curve]
|
|
120
|
+
|
|
121
|
+
if retention_curve[period]
|
|
122
|
+
period_comparison[:cohorts][cohort_id] = {
|
|
123
|
+
retention_rate: retention_curve[period][:retention_rate],
|
|
124
|
+
active_users: retention_curve[period][:active_users]
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
comparison[period] = period_comparison
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
comparison
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
private
|
|
136
|
+
|
|
137
|
+
def extract_cohort_date(event, key, period)
|
|
138
|
+
date_value = event[key.to_sym] || event[key.to_s]
|
|
139
|
+
date = case date_value
|
|
140
|
+
when Time
|
|
141
|
+
date_value
|
|
142
|
+
when String
|
|
143
|
+
Time.parse(date_value)
|
|
144
|
+
else
|
|
145
|
+
Time.parse(event[:created_at].to_s)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
normalize_to_period(date, period)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def normalize_to_period(date, period)
|
|
152
|
+
case period
|
|
153
|
+
when :day
|
|
154
|
+
date.to_date
|
|
155
|
+
when :week
|
|
156
|
+
date.to_date.beginning_of_week
|
|
157
|
+
when :month
|
|
158
|
+
date.to_date.beginning_of_month
|
|
159
|
+
when :year
|
|
160
|
+
date.to_date.beginning_of_year
|
|
161
|
+
else
|
|
162
|
+
date.to_date
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def cohort_format(period)
|
|
167
|
+
case period
|
|
168
|
+
when :day
|
|
169
|
+
"%Y-%m-%d"
|
|
170
|
+
when :week
|
|
171
|
+
"%Y-W%V"
|
|
172
|
+
when :month
|
|
173
|
+
"%Y-%m"
|
|
174
|
+
when :year
|
|
175
|
+
"%Y"
|
|
176
|
+
else
|
|
177
|
+
"%Y-%m-%d"
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def parse_cohort_date(cohort_id)
|
|
182
|
+
# Try to parse various formats
|
|
183
|
+
Time.parse(cohort_id.to_s)
|
|
184
|
+
rescue
|
|
185
|
+
Time.now
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def get_cohort_users(context, cohort_date, period)
|
|
189
|
+
# Get all users who had their first event in this cohort period
|
|
190
|
+
since = cohort_date
|
|
191
|
+
until_date = case period
|
|
192
|
+
when :day
|
|
193
|
+
since + 1.day
|
|
194
|
+
when :week
|
|
195
|
+
since + 1.week
|
|
196
|
+
when :month
|
|
197
|
+
since + 1.month
|
|
198
|
+
when :year
|
|
199
|
+
since + 1.year
|
|
200
|
+
else
|
|
201
|
+
since + 1.day
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
events = @storage_adapter.events_for_context(
|
|
205
|
+
context,
|
|
206
|
+
since: since,
|
|
207
|
+
until: until_date
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Get unique users
|
|
211
|
+
events.map { |e| e[:user_id] }.compact.uniq
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def get_active_users(context, cohort_users, period_date, period)
|
|
215
|
+
# Get users who were active in this period
|
|
216
|
+
since = period_date
|
|
217
|
+
until_date = case period
|
|
218
|
+
when :day
|
|
219
|
+
since + 1.day
|
|
220
|
+
when :week
|
|
221
|
+
since + 1.week
|
|
222
|
+
when :month
|
|
223
|
+
since + 1.month
|
|
224
|
+
when :year
|
|
225
|
+
since + 1.year
|
|
226
|
+
else
|
|
227
|
+
since + 1.day
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
events = @storage_adapter.events_for_context(
|
|
231
|
+
context,
|
|
232
|
+
since: since,
|
|
233
|
+
until: until_date
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
active_user_ids = events.map { |e| e[:user_id] }.compact.uniq
|
|
237
|
+
cohort_users & active_user_ids
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
@@ -10,6 +10,9 @@ module BehaviorAnalytics
|
|
|
10
10
|
class Engine
|
|
11
11
|
def initialize(storage_adapter)
|
|
12
12
|
@storage_adapter = storage_adapter
|
|
13
|
+
@funnels = nil
|
|
14
|
+
@cohorts = nil
|
|
15
|
+
@retention = nil
|
|
13
16
|
end
|
|
14
17
|
|
|
15
18
|
def event_count(context, options = {})
|
|
@@ -124,6 +127,18 @@ module BehaviorAnalytics
|
|
|
124
127
|
stats.sort_by { |_feature, count| -count }.first(limit).to_h
|
|
125
128
|
end
|
|
126
129
|
|
|
130
|
+
def funnels
|
|
131
|
+
@funnels ||= Funnels.new(@storage_adapter)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def cohorts
|
|
135
|
+
@cohorts ||= Cohorts.new(@storage_adapter)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def retention
|
|
139
|
+
@retention ||= Retention.new(@storage_adapter)
|
|
140
|
+
end
|
|
141
|
+
|
|
127
142
|
private
|
|
128
143
|
|
|
129
144
|
def normalize_context(context)
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BehaviorAnalytics
|
|
4
|
+
module Analytics
|
|
5
|
+
class Funnels
|
|
6
|
+
def initialize(storage_adapter)
|
|
7
|
+
@storage_adapter = storage_adapter
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def analyze_funnel(context, steps, options = {})
|
|
11
|
+
context.validate!
|
|
12
|
+
|
|
13
|
+
date_range = options[:date_range] || (options[:since]..options[:until])
|
|
14
|
+
since = date_range.begin || options[:since]
|
|
15
|
+
until_date = date_range.end || options[:until]
|
|
16
|
+
|
|
17
|
+
# Get all events for the context in the date range
|
|
18
|
+
all_events = @storage_adapter.events_for_context(
|
|
19
|
+
context,
|
|
20
|
+
since: since,
|
|
21
|
+
until: until_date
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Group events by user
|
|
25
|
+
user_events = group_events_by_user(all_events)
|
|
26
|
+
|
|
27
|
+
# Analyze each step
|
|
28
|
+
funnel_results = steps.map.with_index do |step, index|
|
|
29
|
+
step_name = step.is_a?(Hash) ? step[:name] : step.to_s
|
|
30
|
+
step_condition = step.is_a?(Hash) ? step[:condition] : ->(e) { e[:event_name] == step }
|
|
31
|
+
|
|
32
|
+
users_at_step = user_events.select do |user_id, events|
|
|
33
|
+
events.any? { |e| evaluate_condition(e, step_condition) }
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
{
|
|
37
|
+
step: step_name,
|
|
38
|
+
step_index: index,
|
|
39
|
+
users: users_at_step.keys.count,
|
|
40
|
+
events: all_events.count { |e| evaluate_condition(e, step_condition) }
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Calculate drop-off rates
|
|
45
|
+
funnel_results.each_with_index do |step_result, index|
|
|
46
|
+
if index == 0
|
|
47
|
+
step_result[:drop_off_rate] = 0.0
|
|
48
|
+
step_result[:conversion_rate] = 100.0
|
|
49
|
+
else
|
|
50
|
+
previous_users = funnel_results[index - 1][:users]
|
|
51
|
+
current_users = step_result[:users]
|
|
52
|
+
|
|
53
|
+
if previous_users > 0
|
|
54
|
+
step_result[:drop_off_rate] = ((previous_users - current_users).to_f / previous_users) * 100
|
|
55
|
+
step_result[:conversion_rate] = (current_users.to_f / previous_users) * 100
|
|
56
|
+
else
|
|
57
|
+
step_result[:drop_off_rate] = 100.0
|
|
58
|
+
step_result[:conversion_rate] = 0.0
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
{
|
|
64
|
+
steps: funnel_results,
|
|
65
|
+
total_users: funnel_results.first[:users],
|
|
66
|
+
completed_users: funnel_results.last[:users],
|
|
67
|
+
overall_conversion_rate: calculate_overall_conversion(funnel_results)
|
|
68
|
+
}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def time_to_conversion(context, start_event, end_event, options = {})
|
|
72
|
+
context.validate!
|
|
73
|
+
|
|
74
|
+
date_range = options[:date_range] || (options[:since]..options[:until])
|
|
75
|
+
since = date_range.begin || options[:since]
|
|
76
|
+
until_date = date_range.end || options[:until]
|
|
77
|
+
|
|
78
|
+
all_events = @storage_adapter.events_for_context(
|
|
79
|
+
context,
|
|
80
|
+
since: since,
|
|
81
|
+
until: until_date
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
user_events = group_events_by_user(all_events)
|
|
85
|
+
|
|
86
|
+
conversion_times = []
|
|
87
|
+
|
|
88
|
+
user_events.each do |user_id, events|
|
|
89
|
+
sorted_events = events.sort_by { |e| parse_time(e[:created_at]) }
|
|
90
|
+
|
|
91
|
+
start_index = sorted_events.index { |e| matches_event(e, start_event) }
|
|
92
|
+
next unless start_index
|
|
93
|
+
|
|
94
|
+
end_index = sorted_events[start_index..-1].index { |e| matches_event(e, end_event) }
|
|
95
|
+
next unless end_index
|
|
96
|
+
|
|
97
|
+
start_time = parse_time(sorted_events[start_index][:created_at])
|
|
98
|
+
end_time = parse_time(sorted_events[start_index + end_index][:created_at])
|
|
99
|
+
|
|
100
|
+
conversion_times << (end_time - start_time)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
return {} if conversion_times.empty?
|
|
104
|
+
|
|
105
|
+
{
|
|
106
|
+
average_seconds: conversion_times.sum / conversion_times.size,
|
|
107
|
+
median_seconds: median(conversion_times),
|
|
108
|
+
min_seconds: conversion_times.min,
|
|
109
|
+
max_seconds: conversion_times.max,
|
|
110
|
+
count: conversion_times.size
|
|
111
|
+
}
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
private
|
|
115
|
+
|
|
116
|
+
def group_events_by_user(events)
|
|
117
|
+
events.group_by { |e| e[:user_id] }.reject { |k, _| k.nil? }
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def evaluate_condition(event, condition)
|
|
121
|
+
case condition
|
|
122
|
+
when Proc
|
|
123
|
+
condition.call(event)
|
|
124
|
+
when String, Symbol
|
|
125
|
+
event[:event_name] == condition.to_s
|
|
126
|
+
when Hash
|
|
127
|
+
condition.all? { |key, value| event[key.to_sym] == value || event[key.to_s] == value }
|
|
128
|
+
else
|
|
129
|
+
false
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def matches_event(event, event_spec)
|
|
134
|
+
case event_spec
|
|
135
|
+
when String, Symbol
|
|
136
|
+
event[:event_name] == event_spec.to_s
|
|
137
|
+
when Hash
|
|
138
|
+
event_spec.all? { |key, value| event[key.to_sym] == value || event[key.to_s] == value }
|
|
139
|
+
when Proc
|
|
140
|
+
event_spec.call(event)
|
|
141
|
+
else
|
|
142
|
+
false
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def parse_time(time_value)
|
|
147
|
+
case time_value
|
|
148
|
+
when Time
|
|
149
|
+
time_value
|
|
150
|
+
when String
|
|
151
|
+
Time.parse(time_value)
|
|
152
|
+
else
|
|
153
|
+
Time.now
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def median(array)
|
|
158
|
+
sorted = array.sort
|
|
159
|
+
len = sorted.length
|
|
160
|
+
(sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def calculate_overall_conversion(funnel_results)
|
|
164
|
+
return 0.0 if funnel_results.empty?
|
|
165
|
+
|
|
166
|
+
first_step = funnel_results.first
|
|
167
|
+
last_step = funnel_results.last
|
|
168
|
+
|
|
169
|
+
return 0.0 if first_step[:users] == 0
|
|
170
|
+
|
|
171
|
+
(last_step[:users].to_f / first_step[:users]) * 100
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|