behavior_analytics 0.1.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +146 -5
  3. data/behavior_analytics.gemspec +3 -1
  4. data/db/migrate/002_enhance_behavior_events_v2.rb +46 -0
  5. data/lib/behavior_analytics/analytics/cohorts.rb +242 -0
  6. data/lib/behavior_analytics/analytics/engine.rb +15 -0
  7. data/lib/behavior_analytics/analytics/funnels.rb +176 -0
  8. data/lib/behavior_analytics/analytics/retention.rb +186 -0
  9. data/lib/behavior_analytics/context.rb +38 -2
  10. data/lib/behavior_analytics/debug/inspector.rb +82 -0
  11. data/lib/behavior_analytics/event.rb +7 -1
  12. data/lib/behavior_analytics/export/csv_exporter.rb +102 -0
  13. data/lib/behavior_analytics/export/json_exporter.rb +55 -0
  14. data/lib/behavior_analytics/hooks/callback.rb +50 -0
  15. data/lib/behavior_analytics/hooks/manager.rb +106 -0
  16. data/lib/behavior_analytics/hooks/webhook.rb +114 -0
  17. data/lib/behavior_analytics/integrations/rails/middleware.rb +99 -0
  18. data/lib/behavior_analytics/integrations/rails.rb +123 -2
  19. data/lib/behavior_analytics/jobs/active_event_job.rb +37 -0
  20. data/lib/behavior_analytics/jobs/delayed_event_job.rb +29 -0
  21. data/lib/behavior_analytics/jobs/sidekiq_event_job.rb +37 -0
  22. data/lib/behavior_analytics/observability/metrics.rb +112 -0
  23. data/lib/behavior_analytics/observability/tracer.rb +85 -0
  24. data/lib/behavior_analytics/processors/async_processor.rb +24 -0
  25. data/lib/behavior_analytics/processors/background_job_processor.rb +72 -0
  26. data/lib/behavior_analytics/query.rb +89 -4
  27. data/lib/behavior_analytics/replay/engine.rb +108 -0
  28. data/lib/behavior_analytics/replay/processor.rb +107 -0
  29. data/lib/behavior_analytics/reporting/generator.rb +125 -0
  30. data/lib/behavior_analytics/sampling/strategy.rb +54 -0
  31. data/lib/behavior_analytics/schema/definition.rb +71 -0
  32. data/lib/behavior_analytics/schema/validator.rb +113 -0
  33. data/lib/behavior_analytics/storage/active_record_adapter.rb +183 -10
  34. data/lib/behavior_analytics/storage/elasticsearch_adapter.rb +185 -0
  35. data/lib/behavior_analytics/storage/in_memory_adapter.rb +234 -5
  36. data/lib/behavior_analytics/storage/kafka_adapter.rb +127 -0
  37. data/lib/behavior_analytics/storage/redis_adapter.rb +211 -0
  38. data/lib/behavior_analytics/streaming/event_stream.rb +77 -0
  39. data/lib/behavior_analytics/throttling/limiter.rb +97 -0
  40. data/lib/behavior_analytics/tracker.rb +130 -4
  41. data/lib/behavior_analytics/version.rb +1 -1
  42. data/lib/behavior_analytics.rb +139 -2
  43. metadata +33 -3
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Analytics
5
+ class Retention
6
+ def initialize(storage_adapter)
7
+ @storage_adapter = storage_adapter
8
+ end
9
+
10
+ def calculate_retention(context, options = {})
11
+ context.validate!
12
+
13
+ period = options[:period] || :day
14
+ periods = options[:periods] || 30
15
+
16
+ # Get all events for the context
17
+ date_range = options[:date_range] || (options[:since]..options[:until])
18
+ since = date_range.begin || options[:since]
19
+ until_date = date_range.end || options[:until]
20
+
21
+ all_events = @storage_adapter.events_for_context(
22
+ context,
23
+ since: since,
24
+ until: until_date
25
+ )
26
+
27
+ # Group events by user and calculate first activity
28
+ user_first_activity = {}
29
+ user_activity_by_period = {}
30
+
31
+ all_events.each do |event|
32
+ user_id = event[:user_id]
33
+ next unless user_id
34
+
35
+ event_time = parse_time(event[:created_at])
36
+ period_key = period_key_for_time(event_time, period)
37
+
38
+ # Track first activity
39
+ unless user_first_activity[user_id]
40
+ user_first_activity[user_id] = {
41
+ first_period: period_key,
42
+ first_date: event_time
43
+ }
44
+ end
45
+
46
+ # Track activity by period
47
+ user_activity_by_period[user_id] ||= Set.new
48
+ user_activity_by_period[user_id] << period_key
49
+ end
50
+
51
+ # Calculate retention
52
+ retention_by_period = {}
53
+
54
+ user_first_activity.each do |user_id, first_activity|
55
+ first_period = first_activity[:first_period]
56
+ user_periods = user_activity_by_period[user_id] || Set.new
57
+
58
+ (0..periods).each do |offset|
59
+ target_period = offset_period(first_period, offset, period)
60
+ is_active = user_periods.include?(target_period)
61
+
62
+ retention_by_period[offset] ||= {
63
+ period: offset,
64
+ total_users: 0,
65
+ active_users: 0,
66
+ retention_rate: 0.0
67
+ }
68
+
69
+ retention_by_period[offset][:total_users] += 1
70
+ retention_by_period[offset][:active_users] += 1 if is_active
71
+ end
72
+ end
73
+
74
+ # Calculate retention rates
75
+ retention_by_period.values.each do |data|
76
+ if data[:total_users] > 0
77
+ data[:retention_rate] = (data[:active_users].to_f / data[:total_users]) * 100
78
+ end
79
+ end
80
+
81
+ {
82
+ retention_curve: retention_by_period.values.sort_by { |d| d[:period] },
83
+ total_cohort_size: user_first_activity.size,
84
+ period_type: period
85
+ }
86
+ end
87
+
88
+ def calculate_churn(context, options = {})
89
+ context.validate!
90
+
91
+ period = options[:period] || :day
92
+ lookback_periods = options[:lookback_periods] || 7
93
+
94
+ # Get recent events
95
+ since = options[:since] || (Time.now - lookback_periods.send(period))
96
+ until_date = options[:until] || Time.now
97
+
98
+ all_events = @storage_adapter.events_for_context(
99
+ context,
100
+ since: since,
101
+ until: until_date
102
+ )
103
+
104
+ # Group by user and period
105
+ user_periods = {}
106
+
107
+ all_events.each do |event|
108
+ user_id = event[:user_id]
109
+ next unless user_id
110
+
111
+ event_time = parse_time(event[:created_at])
112
+ period_key = period_key_for_time(event_time, period)
113
+
114
+ user_periods[user_id] ||= Set.new
115
+ user_periods[user_id] << period_key
116
+ end
117
+
118
+ # Calculate churn (users who were active but stopped)
119
+ current_period = period_key_for_time(until_date, period)
120
+ previous_period = offset_period(current_period, -1, period)
121
+
122
+ active_in_previous = user_periods.select { |_, periods| periods.include?(previous_period) }.keys
123
+ active_in_current = user_periods.select { |_, periods| periods.include?(current_period) }.keys
124
+
125
+ churned_users = active_in_previous - active_in_current
126
+
127
+ {
128
+ churned_users: churned_users.size,
129
+ churned_user_ids: churned_users,
130
+ previous_period_active: active_in_previous.size,
131
+ current_period_active: active_in_current.size,
132
+ churn_rate: active_in_previous.empty? ? 0.0 : (churned_users.size.to_f / active_in_previous.size) * 100
133
+ }
134
+ end
135
+
136
+ private
137
+
138
+ def parse_time(time_value)
139
+ case time_value
140
+ when Time
141
+ time_value
142
+ when String
143
+ Time.parse(time_value)
144
+ else
145
+ Time.now
146
+ end
147
+ end
148
+
149
+ def period_key_for_time(time, period)
150
+ case period
151
+ when :day
152
+ time.to_date.strftime("%Y-%m-%d")
153
+ when :week
154
+ time.to_date.beginning_of_week.strftime("%Y-W%V")
155
+ when :month
156
+ time.to_date.beginning_of_month.strftime("%Y-%m")
157
+ when :year
158
+ time.to_date.beginning_of_year.strftime("%Y")
159
+ else
160
+ time.to_date.strftime("%Y-%m-%d")
161
+ end
162
+ end
163
+
164
+ def offset_period(period_key, offset, period)
165
+ # Parse period key and add offset
166
+ base_date = case period
167
+ when :day
168
+ Date.parse(period_key)
169
+ when :week
170
+ year, week = period_key.match(/(\d{4})-W(\d{2})/).captures
171
+ Date.commercial(year.to_i, week.to_i, 1)
172
+ when :month
173
+ Date.parse("#{period_key}-01")
174
+ when :year
175
+ Date.parse("#{period_key}-01-01")
176
+ else
177
+ Date.parse(period_key)
178
+ end
179
+
180
+ offset_date = base_date + offset.send(period)
181
+ period_key_for_time(offset_date.to_time, period)
182
+ end
183
+ end
184
+ end
185
+ end
186
+
@@ -5,7 +5,11 @@ module BehaviorAnalytics
5
5
  attr_accessor :tenant_id, :user_id, :user_type, :filters
6
6
 
7
7
  def initialize(attributes = {})
8
+ # Only use default_tenant_id if explicitly configured and no tenant_id provided
9
+ # This allows tracking without tenant_id for non-multi-tenant systems
8
10
  @tenant_id = attributes[:tenant_id] || attributes[:tenant]
11
+ @tenant_id ||= default_tenant_id if use_default_tenant?
12
+
9
13
  @user_id = attributes[:user_id] || attributes[:user]
10
14
  @user_type = attributes[:user_type]
11
15
  @filters = attributes[:filters] || {}
@@ -21,11 +25,43 @@ module BehaviorAnalytics
21
25
  end
22
26
 
23
27
  def valid?
24
- !tenant_id.nil? && !tenant_id.empty?
28
+ # Context is valid if it has at least one identifier (tenant_id, user_id, or both)
29
+ # This supports different business cases:
30
+ # - Multi-tenant: tenant_id required
31
+ # - Single-tenant: user_id sufficient
32
+ # - API-only tracking: tenant_id or user_id optional
33
+ has_tenant? || has_user? || has_any_identifier?
34
+ end
35
+
36
+ def has_tenant?
37
+ !tenant_id.nil? && !tenant_id.to_s.empty?
38
+ end
39
+
40
+ def has_user?
41
+ !user_id.nil? && !user_id.to_s.empty?
42
+ end
43
+
44
+ def has_any_identifier?
45
+ # Check if filters contain any identifying information
46
+ filters.is_a?(Hash) && !filters.empty?
25
47
  end
26
48
 
27
49
  def validate!
28
- raise Error, "tenant_id is required in context" unless valid?
50
+ unless valid?
51
+ raise Error, "Context must have at least one identifier (tenant_id, user_id, or filters). " \
52
+ "For single-tenant systems, set default_tenant_id in configuration or provide user_id."
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def default_tenant_id
59
+ BehaviorAnalytics.configuration.default_tenant_id
60
+ end
61
+
62
+ def use_default_tenant?
63
+ # Only use default tenant if it's explicitly set (not nil)
64
+ default_tenant_id && !default_tenant_id.to_s.empty?
29
65
  end
30
66
  end
31
67
  end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module BehaviorAnalytics
6
+ module Debug
7
+ class Inspector
8
+ def initialize(tracker)
9
+ @tracker = tracker
10
+ end
11
+
12
+ def inspect_event(event_id, context)
13
+ context.validate!
14
+
15
+ events = @tracker.query
16
+ .for_tenant(context.tenant_id)
17
+ .where(id: event_id)
18
+ .execute
19
+
20
+ return nil if events.empty?
21
+
22
+ event = events.first
23
+ {
24
+ event: event,
25
+ context: context.to_h,
26
+ metadata: event[:metadata] || {},
27
+ related_events: find_related_events(event, context)
28
+ }
29
+ end
30
+
31
+ def inspect_context(context, options = {})
32
+ context.validate!
33
+
34
+ {
35
+ context: context.to_h,
36
+ event_count: @tracker.analytics.event_count(context, options),
37
+ unique_users: @tracker.analytics.unique_users(context, options),
38
+ active_days: @tracker.analytics.active_days(context, options),
39
+ recent_events: @tracker.query
40
+ .for_tenant(context.tenant_id)
41
+ .limit(10)
42
+ .execute
43
+ }
44
+ end
45
+
46
+ def inspect_buffer
47
+ {
48
+ buffer_size: @tracker.instance_variable_get(:@buffer)&.size || 0,
49
+ batch_size: @tracker.batch_size,
50
+ flush_interval: @tracker.flush_interval
51
+ }
52
+ end
53
+
54
+ private
55
+
56
+ def find_related_events(event, context)
57
+ related = []
58
+
59
+ # Find events with same session
60
+ if event[:session_id]
61
+ related.concat(@tracker.query
62
+ .for_tenant(context.tenant_id)
63
+ .where(session_id: event[:session_id])
64
+ .limit(10)
65
+ .execute)
66
+ end
67
+
68
+ # Find events with same correlation_id
69
+ if event[:correlation_id]
70
+ related.concat(@tracker.query
71
+ .for_tenant(context.tenant_id)
72
+ .where(correlation_id: event[:correlation_id])
73
+ .limit(10)
74
+ .execute)
75
+ end
76
+
77
+ related.uniq { |e| e[:id] }
78
+ end
79
+ end
80
+ end
81
+ end
82
+
@@ -46,7 +46,13 @@ module BehaviorAnalytics
46
46
  private
47
47
 
48
48
  def validate!
49
- raise Error, "tenant_id is required" if tenant_id.nil? || tenant_id.empty?
49
+ # tenant_id is optional - events can be tracked without tenant for non-multi-tenant systems
50
+ # At least one identifier should be present (tenant_id, user_id, or session_id)
51
+ has_identifier = (!tenant_id.nil? && !tenant_id.to_s.empty?) ||
52
+ (!user_id.nil? && !user_id.to_s.empty?) ||
53
+ (!session_id.nil? && !session_id.to_s.empty?)
54
+
55
+ raise Error, "Event must have at least one identifier (tenant_id, user_id, or session_id)" unless has_identifier
50
56
  raise Error, "event_name is required" if event_name.nil? || event_name.empty?
51
57
  raise Error, "event_type must be one of: #{EVENT_TYPES.join(', ')}" unless EVENT_TYPES.include?(event_type)
52
58
  end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "set"
5
+
6
+ module BehaviorAnalytics
7
+ module Export
8
+ class CsvExporter
9
+ def initialize(storage_adapter)
10
+ @storage_adapter = storage_adapter
11
+ end
12
+
13
+ def export(context, options = {})
14
+ context.validate!
15
+
16
+ events = @storage_adapter.events_for_context(context, options)
17
+ return "" if events.empty?
18
+
19
+ # Determine columns
20
+ columns = options[:columns] || extract_columns(events)
21
+
22
+ CSV.generate(headers: true) do |csv|
23
+ csv << columns
24
+
25
+ events.each do |event|
26
+ row = columns.map do |column|
27
+ get_value(event, column)
28
+ end
29
+ csv << row
30
+ end
31
+ end
32
+ end
33
+
34
+ def export_to_file(context, file_path, options = {})
35
+ csv_content = export(context, options)
36
+ File.write(file_path, csv_content)
37
+ file_path
38
+ end
39
+
40
+ def stream_export(context, options = {}, &block)
41
+ context.validate!
42
+
43
+ batch_size = options[:batch_size] || 1000
44
+ offset = 0
45
+ first_batch = true
46
+
47
+ loop do
48
+ batch_options = options.merge(limit: batch_size, offset: offset)
49
+ events = @storage_adapter.events_for_context(context, batch_options)
50
+
51
+ break if events.empty?
52
+
53
+ if first_batch
54
+ columns = options[:columns] || extract_columns(events)
55
+ yield CSV.generate_line(columns)
56
+ first_batch = false
57
+ end
58
+
59
+ events.each do |event|
60
+ columns = options[:columns] || extract_columns([event])
61
+ row = columns.map { |col| get_value(event, col) }
62
+ yield CSV.generate_line(row)
63
+ end
64
+
65
+ offset += batch_size
66
+ break if events.size < batch_size
67
+ end
68
+ end
69
+
70
+ private
71
+
72
+ def extract_columns(events)
73
+ return [] if events.empty?
74
+
75
+ columns = Set.new
76
+ events.each do |event|
77
+ event.keys.each { |key| columns << key.to_s }
78
+ if event[:metadata] && event[:metadata].is_a?(Hash)
79
+ event[:metadata].keys.each { |key| columns << "metadata.#{key}" }
80
+ end
81
+ end
82
+
83
+ columns.sort
84
+ end
85
+
86
+ def get_value(event, column)
87
+ if column.include?(".")
88
+ parts = column.split(".", 2)
89
+ if parts[0] == "metadata"
90
+ metadata = event[:metadata] || event["metadata"] || {}
91
+ metadata[parts[1].to_sym] || metadata[parts[1].to_s] || metadata[parts[1]] || ""
92
+ else
93
+ event[parts[0].to_sym] || event[parts[0].to_s] || ""
94
+ end
95
+ else
96
+ event[column.to_sym] || event[column.to_s] || ""
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module BehaviorAnalytics
6
+ module Export
7
+ class JsonExporter
8
+ def initialize(storage_adapter)
9
+ @storage_adapter = storage_adapter
10
+ end
11
+
12
+ def export(context, options = {})
13
+ context.validate!
14
+
15
+ events = @storage_adapter.events_for_context(context, options)
16
+ JSON.pretty_generate(events)
17
+ end
18
+
19
+ def export_to_file(context, file_path, options = {})
20
+ json_content = export(context, options)
21
+ File.write(file_path, json_content)
22
+ file_path
23
+ end
24
+
25
+ def stream_export(context, options = {}, &block)
26
+ context.validate!
27
+
28
+ batch_size = options[:batch_size] || 1000
29
+ offset = 0
30
+ first_item = true
31
+
32
+ yield "["
33
+
34
+ loop do
35
+ batch_options = options.merge(limit: batch_size, offset: offset)
36
+ events = @storage_adapter.events_for_context(context, batch_options)
37
+
38
+ break if events.empty?
39
+
40
+ events.each_with_index do |event, index|
41
+ yield "," unless first_item && index == 0
42
+ yield JSON.generate(event)
43
+ first_item = false
44
+ end
45
+
46
+ offset += batch_size
47
+ break if events.size < batch_size
48
+ end
49
+
50
+ yield "]"
51
+ end
52
+ end
53
+ end
54
+ end
55
+
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Hooks
5
+ class Callback
6
+ attr_reader :name, :block, :condition
7
+
8
+ def initialize(name, condition: nil, &block)
9
+ @name = name
10
+ @block = block
11
+ @condition = condition
12
+ end
13
+
14
+ def call(*args)
15
+ return unless should_execute?(*args)
16
+ @block.call(*args)
17
+ end
18
+
19
+ private
20
+
21
+ def should_execute?(*args)
22
+ return true unless @condition
23
+
24
+ case @condition
25
+ when Proc
26
+ @condition.call(*args)
27
+ when Hash
28
+ event = args[0]
29
+ @condition.all? { |key, value| matches?(event, key, value) }
30
+ when Symbol, String
31
+ event = args[0]
32
+ event[:event_type] == @condition || event[:event_type].to_s == @condition.to_s
33
+ else
34
+ true
35
+ end
36
+ end
37
+
38
+ def matches?(event, key, value)
39
+ event_value = event[key.to_sym] || event[key.to_s] || get_metadata_value(event, key.to_s)
40
+ event_value == value || event_value.to_s == value.to_s
41
+ end
42
+
43
+ def get_metadata_value(event, key)
44
+ metadata = event[:metadata] || event["metadata"] || {}
45
+ metadata[key.to_sym] || metadata[key.to_s] || metadata[key]
46
+ end
47
+ end
48
+ end
49
+ end
50
+
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Hooks
5
+ class Manager
6
+ attr_reader :before_track_hooks, :after_track_hooks, :on_error_hooks
7
+
8
+ def initialize
9
+ @before_track_hooks = []
10
+ @after_track_hooks = []
11
+ @on_error_hooks = []
12
+ @mutex = Mutex.new
13
+ end
14
+
15
+ def before_track(condition: nil, &block)
16
+ @mutex.synchronize do
17
+ @before_track_hooks << { condition: condition, callback: block }
18
+ end
19
+ self
20
+ end
21
+
22
+ def after_track(condition: nil, &block)
23
+ @mutex.synchronize do
24
+ @after_track_hooks << { condition: condition, callback: block }
25
+ end
26
+ self
27
+ end
28
+
29
+ def on_error(condition: nil, &block)
30
+ @mutex.synchronize do
31
+ @on_error_hooks << { condition: condition, callback: block }
32
+ end
33
+ self
34
+ end
35
+
36
+ def execute_before_track(event, context)
37
+ execute_hooks(@before_track_hooks, event, context)
38
+ end
39
+
40
+ def execute_after_track(event, context)
41
+ execute_hooks(@after_track_hooks, event, context)
42
+ end
43
+
44
+ def execute_on_error(error, event, context)
45
+ execute_hooks(@on_error_hooks, error, event, context)
46
+ end
47
+
48
+ def clear_all
49
+ @mutex.synchronize do
50
+ @before_track_hooks.clear
51
+ @after_track_hooks.clear
52
+ @on_error_hooks.clear
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def execute_hooks(hooks, *args)
59
+ hooks.each do |hook|
60
+ next if hook[:condition] && !evaluate_condition(hook[:condition], *args)
61
+
62
+ begin
63
+ hook[:callback].call(*args)
64
+ rescue StandardError => e
65
+ handle_hook_error(e, hook)
66
+ end
67
+ end
68
+ end
69
+
70
+ def evaluate_condition(condition, *args)
71
+ case condition
72
+ when Proc
73
+ condition.call(*args)
74
+ when Hash
75
+ # For event/context matching
76
+ event = args[0]
77
+ condition.all? { |key, value| matches?(event, key, value) }
78
+ when Symbol, String
79
+ # Match event type
80
+ event = args[0]
81
+ event[:event_type] == condition || event[:event_type].to_s == condition.to_s
82
+ else
83
+ true
84
+ end
85
+ end
86
+
87
+ def matches?(event, key, value)
88
+ event_value = event[key.to_sym] || event[key.to_s] || get_metadata_value(event, key.to_s)
89
+ event_value == value || event_value.to_s == value.to_s
90
+ end
91
+
92
+ def get_metadata_value(event, key)
93
+ metadata = event[:metadata] || event["metadata"] || {}
94
+ metadata[key.to_sym] || metadata[key.to_s] || metadata[key]
95
+ end
96
+
97
+ def handle_hook_error(error, hook)
98
+ if defined?(Rails) && Rails.logger
99
+ Rails.logger.error("BehaviorAnalytics: Hook error: #{error.message}")
100
+ end
101
+ # Don't re-raise - allow other hooks to execute
102
+ end
103
+ end
104
+ end
105
+ end
106
+