behavior_analytics 0.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/behavior_analytics.gemspec +3 -1
  3. data/db/migrate/002_enhance_behavior_events_v2.rb +46 -0
  4. data/lib/behavior_analytics/analytics/cohorts.rb +242 -0
  5. data/lib/behavior_analytics/analytics/engine.rb +15 -0
  6. data/lib/behavior_analytics/analytics/funnels.rb +176 -0
  7. data/lib/behavior_analytics/analytics/retention.rb +186 -0
  8. data/lib/behavior_analytics/debug/inspector.rb +82 -0
  9. data/lib/behavior_analytics/export/csv_exporter.rb +102 -0
  10. data/lib/behavior_analytics/export/json_exporter.rb +55 -0
  11. data/lib/behavior_analytics/hooks/callback.rb +50 -0
  12. data/lib/behavior_analytics/hooks/manager.rb +106 -0
  13. data/lib/behavior_analytics/hooks/webhook.rb +114 -0
  14. data/lib/behavior_analytics/integrations/rails/middleware.rb +99 -0
  15. data/lib/behavior_analytics/integrations/rails.rb +106 -0
  16. data/lib/behavior_analytics/jobs/active_event_job.rb +37 -0
  17. data/lib/behavior_analytics/jobs/delayed_event_job.rb +29 -0
  18. data/lib/behavior_analytics/jobs/sidekiq_event_job.rb +37 -0
  19. data/lib/behavior_analytics/observability/metrics.rb +112 -0
  20. data/lib/behavior_analytics/observability/tracer.rb +85 -0
  21. data/lib/behavior_analytics/processors/async_processor.rb +24 -0
  22. data/lib/behavior_analytics/processors/background_job_processor.rb +72 -0
  23. data/lib/behavior_analytics/query.rb +87 -2
  24. data/lib/behavior_analytics/replay/engine.rb +108 -0
  25. data/lib/behavior_analytics/replay/processor.rb +107 -0
  26. data/lib/behavior_analytics/reporting/generator.rb +125 -0
  27. data/lib/behavior_analytics/sampling/strategy.rb +54 -0
  28. data/lib/behavior_analytics/schema/definition.rb +71 -0
  29. data/lib/behavior_analytics/schema/validator.rb +113 -0
  30. data/lib/behavior_analytics/storage/active_record_adapter.rb +168 -8
  31. data/lib/behavior_analytics/storage/elasticsearch_adapter.rb +175 -0
  32. data/lib/behavior_analytics/storage/in_memory_adapter.rb +214 -2
  33. data/lib/behavior_analytics/storage/kafka_adapter.rb +112 -0
  34. data/lib/behavior_analytics/storage/redis_adapter.rb +175 -0
  35. data/lib/behavior_analytics/streaming/event_stream.rb +77 -0
  36. data/lib/behavior_analytics/throttling/limiter.rb +97 -0
  37. data/lib/behavior_analytics/tracker.rb +130 -4
  38. data/lib/behavior_analytics/version.rb +1 -1
  39. data/lib/behavior_analytics.rb +138 -2
  40. metadata +33 -3
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Analytics
5
+ class Retention
6
+ def initialize(storage_adapter)
7
+ @storage_adapter = storage_adapter
8
+ end
9
+
10
+ def calculate_retention(context, options = {})
11
+ context.validate!
12
+
13
+ period = options[:period] || :day
14
+ periods = options[:periods] || 30
15
+
16
+ # Get all events for the context
17
+ date_range = options[:date_range] || (options[:since]..options[:until])
18
+ since = date_range.begin || options[:since]
19
+ until_date = date_range.end || options[:until]
20
+
21
+ all_events = @storage_adapter.events_for_context(
22
+ context,
23
+ since: since,
24
+ until: until_date
25
+ )
26
+
27
+ # Group events by user and calculate first activity
28
+ user_first_activity = {}
29
+ user_activity_by_period = {}
30
+
31
+ all_events.each do |event|
32
+ user_id = event[:user_id]
33
+ next unless user_id
34
+
35
+ event_time = parse_time(event[:created_at])
36
+ period_key = period_key_for_time(event_time, period)
37
+
38
+ # Track first activity
39
+ unless user_first_activity[user_id]
40
+ user_first_activity[user_id] = {
41
+ first_period: period_key,
42
+ first_date: event_time
43
+ }
44
+ end
45
+
46
+ # Track activity by period
47
+ user_activity_by_period[user_id] ||= Set.new
48
+ user_activity_by_period[user_id] << period_key
49
+ end
50
+
51
+ # Calculate retention
52
+ retention_by_period = {}
53
+
54
+ user_first_activity.each do |user_id, first_activity|
55
+ first_period = first_activity[:first_period]
56
+ user_periods = user_activity_by_period[user_id] || Set.new
57
+
58
+ (0..periods).each do |offset|
59
+ target_period = offset_period(first_period, offset, period)
60
+ is_active = user_periods.include?(target_period)
61
+
62
+ retention_by_period[offset] ||= {
63
+ period: offset,
64
+ total_users: 0,
65
+ active_users: 0,
66
+ retention_rate: 0.0
67
+ }
68
+
69
+ retention_by_period[offset][:total_users] += 1
70
+ retention_by_period[offset][:active_users] += 1 if is_active
71
+ end
72
+ end
73
+
74
+ # Calculate retention rates
75
+ retention_by_period.values.each do |data|
76
+ if data[:total_users] > 0
77
+ data[:retention_rate] = (data[:active_users].to_f / data[:total_users]) * 100
78
+ end
79
+ end
80
+
81
+ {
82
+ retention_curve: retention_by_period.values.sort_by { |d| d[:period] },
83
+ total_cohort_size: user_first_activity.size,
84
+ period_type: period
85
+ }
86
+ end
87
+
88
+ def calculate_churn(context, options = {})
89
+ context.validate!
90
+
91
+ period = options[:period] || :day
92
+ lookback_periods = options[:lookback_periods] || 7
93
+
94
+ # Get recent events
95
+ since = options[:since] || (Time.now - lookback_periods.send(period))
96
+ until_date = options[:until] || Time.now
97
+
98
+ all_events = @storage_adapter.events_for_context(
99
+ context,
100
+ since: since,
101
+ until: until_date
102
+ )
103
+
104
+ # Group by user and period
105
+ user_periods = {}
106
+
107
+ all_events.each do |event|
108
+ user_id = event[:user_id]
109
+ next unless user_id
110
+
111
+ event_time = parse_time(event[:created_at])
112
+ period_key = period_key_for_time(event_time, period)
113
+
114
+ user_periods[user_id] ||= Set.new
115
+ user_periods[user_id] << period_key
116
+ end
117
+
118
+ # Calculate churn (users who were active but stopped)
119
+ current_period = period_key_for_time(until_date, period)
120
+ previous_period = offset_period(current_period, -1, period)
121
+
122
+ active_in_previous = user_periods.select { |_, periods| periods.include?(previous_period) }.keys
123
+ active_in_current = user_periods.select { |_, periods| periods.include?(current_period) }.keys
124
+
125
+ churned_users = active_in_previous - active_in_current
126
+
127
+ {
128
+ churned_users: churned_users.size,
129
+ churned_user_ids: churned_users,
130
+ previous_period_active: active_in_previous.size,
131
+ current_period_active: active_in_current.size,
132
+ churn_rate: active_in_previous.empty? ? 0.0 : (churned_users.size.to_f / active_in_previous.size) * 100
133
+ }
134
+ end
135
+
136
+ private
137
+
138
+ def parse_time(time_value)
139
+ case time_value
140
+ when Time
141
+ time_value
142
+ when String
143
+ Time.parse(time_value)
144
+ else
145
+ Time.now
146
+ end
147
+ end
148
+
149
+ def period_key_for_time(time, period)
150
+ case period
151
+ when :day
152
+ time.to_date.strftime("%Y-%m-%d")
153
+ when :week
154
+ time.to_date.beginning_of_week.strftime("%Y-W%V")
155
+ when :month
156
+ time.to_date.beginning_of_month.strftime("%Y-%m")
157
+ when :year
158
+ time.to_date.beginning_of_year.strftime("%Y")
159
+ else
160
+ time.to_date.strftime("%Y-%m-%d")
161
+ end
162
+ end
163
+
164
+ def offset_period(period_key, offset, period)
165
+ # Parse period key and add offset
166
+ base_date = case period
167
+ when :day
168
+ Date.parse(period_key)
169
+ when :week
170
+ year, week = period_key.match(/(\d{4})-W(\d{2})/).captures
171
+ Date.commercial(year.to_i, week.to_i, 1)
172
+ when :month
173
+ Date.parse("#{period_key}-01")
174
+ when :year
175
+ Date.parse("#{period_key}-01-01")
176
+ else
177
+ Date.parse(period_key)
178
+ end
179
+
180
+ offset_date = base_date + offset.send(period)
181
+ period_key_for_time(offset_date.to_time, period)
182
+ end
183
+ end
184
+ end
185
+ end
186
+
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module BehaviorAnalytics
6
+ module Debug
7
+ class Inspector
8
+ def initialize(tracker)
9
+ @tracker = tracker
10
+ end
11
+
12
+ def inspect_event(event_id, context)
13
+ context.validate!
14
+
15
+ events = @tracker.query
16
+ .for_tenant(context.tenant_id)
17
+ .where(id: event_id)
18
+ .execute
19
+
20
+ return nil if events.empty?
21
+
22
+ event = events.first
23
+ {
24
+ event: event,
25
+ context: context.to_h,
26
+ metadata: event[:metadata] || {},
27
+ related_events: find_related_events(event, context)
28
+ }
29
+ end
30
+
31
+ def inspect_context(context, options = {})
32
+ context.validate!
33
+
34
+ {
35
+ context: context.to_h,
36
+ event_count: @tracker.analytics.event_count(context, options),
37
+ unique_users: @tracker.analytics.unique_users(context, options),
38
+ active_days: @tracker.analytics.active_days(context, options),
39
+ recent_events: @tracker.query
40
+ .for_tenant(context.tenant_id)
41
+ .limit(10)
42
+ .execute
43
+ }
44
+ end
45
+
46
+ def inspect_buffer
47
+ {
48
+ buffer_size: @tracker.instance_variable_get(:@buffer)&.size || 0,
49
+ batch_size: @tracker.batch_size,
50
+ flush_interval: @tracker.flush_interval
51
+ }
52
+ end
53
+
54
+ private
55
+
56
+ def find_related_events(event, context)
57
+ related = []
58
+
59
+ # Find events with same session
60
+ if event[:session_id]
61
+ related.concat(@tracker.query
62
+ .for_tenant(context.tenant_id)
63
+ .where(session_id: event[:session_id])
64
+ .limit(10)
65
+ .execute)
66
+ end
67
+
68
+ # Find events with same correlation_id
69
+ if event[:correlation_id]
70
+ related.concat(@tracker.query
71
+ .for_tenant(context.tenant_id)
72
+ .where(correlation_id: event[:correlation_id])
73
+ .limit(10)
74
+ .execute)
75
+ end
76
+
77
+ related.uniq { |e| e[:id] }
78
+ end
79
+ end
80
+ end
81
+ end
82
+
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "set"
5
+
6
+ module BehaviorAnalytics
7
+ module Export
8
+ class CsvExporter
9
+ def initialize(storage_adapter)
10
+ @storage_adapter = storage_adapter
11
+ end
12
+
13
+ def export(context, options = {})
14
+ context.validate!
15
+
16
+ events = @storage_adapter.events_for_context(context, options)
17
+ return "" if events.empty?
18
+
19
+ # Determine columns
20
+ columns = options[:columns] || extract_columns(events)
21
+
22
+ CSV.generate(headers: true) do |csv|
23
+ csv << columns
24
+
25
+ events.each do |event|
26
+ row = columns.map do |column|
27
+ get_value(event, column)
28
+ end
29
+ csv << row
30
+ end
31
+ end
32
+ end
33
+
34
+ def export_to_file(context, file_path, options = {})
35
+ csv_content = export(context, options)
36
+ File.write(file_path, csv_content)
37
+ file_path
38
+ end
39
+
40
+ def stream_export(context, options = {}, &block)
41
+ context.validate!
42
+
43
+ batch_size = options[:batch_size] || 1000
44
+ offset = 0
45
+ first_batch = true
46
+
47
+ loop do
48
+ batch_options = options.merge(limit: batch_size, offset: offset)
49
+ events = @storage_adapter.events_for_context(context, batch_options)
50
+
51
+ break if events.empty?
52
+
53
+ if first_batch
54
+ columns = options[:columns] || extract_columns(events)
55
+ yield CSV.generate_line(columns)
56
+ first_batch = false
57
+ end
58
+
59
+ events.each do |event|
60
+ columns = options[:columns] || extract_columns([event])
61
+ row = columns.map { |col| get_value(event, col) }
62
+ yield CSV.generate_line(row)
63
+ end
64
+
65
+ offset += batch_size
66
+ break if events.size < batch_size
67
+ end
68
+ end
69
+
70
+ private
71
+
72
+ def extract_columns(events)
73
+ return [] if events.empty?
74
+
75
+ columns = Set.new
76
+ events.each do |event|
77
+ event.keys.each { |key| columns << key.to_s }
78
+ if event[:metadata] && event[:metadata].is_a?(Hash)
79
+ event[:metadata].keys.each { |key| columns << "metadata.#{key}" }
80
+ end
81
+ end
82
+
83
+ columns.sort
84
+ end
85
+
86
+ def get_value(event, column)
87
+ if column.include?(".")
88
+ parts = column.split(".", 2)
89
+ if parts[0] == "metadata"
90
+ metadata = event[:metadata] || event["metadata"] || {}
91
+ metadata[parts[1].to_sym] || metadata[parts[1].to_s] || metadata[parts[1]] || ""
92
+ else
93
+ event[parts[0].to_sym] || event[parts[0].to_s] || ""
94
+ end
95
+ else
96
+ event[column.to_sym] || event[column.to_s] || ""
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module BehaviorAnalytics
6
+ module Export
7
+ class JsonExporter
8
+ def initialize(storage_adapter)
9
+ @storage_adapter = storage_adapter
10
+ end
11
+
12
+ def export(context, options = {})
13
+ context.validate!
14
+
15
+ events = @storage_adapter.events_for_context(context, options)
16
+ JSON.pretty_generate(events)
17
+ end
18
+
19
+ def export_to_file(context, file_path, options = {})
20
+ json_content = export(context, options)
21
+ File.write(file_path, json_content)
22
+ file_path
23
+ end
24
+
25
+ def stream_export(context, options = {}, &block)
26
+ context.validate!
27
+
28
+ batch_size = options[:batch_size] || 1000
29
+ offset = 0
30
+ first_item = true
31
+
32
+ yield "["
33
+
34
+ loop do
35
+ batch_options = options.merge(limit: batch_size, offset: offset)
36
+ events = @storage_adapter.events_for_context(context, batch_options)
37
+
38
+ break if events.empty?
39
+
40
+ events.each_with_index do |event, index|
41
+ yield "," unless first_item && index == 0
42
+ yield JSON.generate(event)
43
+ first_item = false
44
+ end
45
+
46
+ offset += batch_size
47
+ break if events.size < batch_size
48
+ end
49
+
50
+ yield "]"
51
+ end
52
+ end
53
+ end
54
+ end
55
+
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Hooks
5
+ class Callback
6
+ attr_reader :name, :block, :condition
7
+
8
+ def initialize(name, condition: nil, &block)
9
+ @name = name
10
+ @block = block
11
+ @condition = condition
12
+ end
13
+
14
+ def call(*args)
15
+ return unless should_execute?(*args)
16
+ @block.call(*args)
17
+ end
18
+
19
+ private
20
+
21
+ def should_execute?(*args)
22
+ return true unless @condition
23
+
24
+ case @condition
25
+ when Proc
26
+ @condition.call(*args)
27
+ when Hash
28
+ event = args[0]
29
+ @condition.all? { |key, value| matches?(event, key, value) }
30
+ when Symbol, String
31
+ event = args[0]
32
+ event[:event_type] == @condition || event[:event_type].to_s == @condition.to_s
33
+ else
34
+ true
35
+ end
36
+ end
37
+
38
+ def matches?(event, key, value)
39
+ event_value = event[key.to_sym] || event[key.to_s] || get_metadata_value(event, key.to_s)
40
+ event_value == value || event_value.to_s == value.to_s
41
+ end
42
+
43
+ def get_metadata_value(event, key)
44
+ metadata = event[:metadata] || event["metadata"] || {}
45
+ metadata[key.to_sym] || metadata[key.to_s] || metadata[key]
46
+ end
47
+ end
48
+ end
49
+ end
50
+
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Hooks
5
+ class Manager
6
+ attr_reader :before_track_hooks, :after_track_hooks, :on_error_hooks
7
+
8
+ def initialize
9
+ @before_track_hooks = []
10
+ @after_track_hooks = []
11
+ @on_error_hooks = []
12
+ @mutex = Mutex.new
13
+ end
14
+
15
+ def before_track(condition: nil, &block)
16
+ @mutex.synchronize do
17
+ @before_track_hooks << { condition: condition, callback: block }
18
+ end
19
+ self
20
+ end
21
+
22
+ def after_track(condition: nil, &block)
23
+ @mutex.synchronize do
24
+ @after_track_hooks << { condition: condition, callback: block }
25
+ end
26
+ self
27
+ end
28
+
29
+ def on_error(condition: nil, &block)
30
+ @mutex.synchronize do
31
+ @on_error_hooks << { condition: condition, callback: block }
32
+ end
33
+ self
34
+ end
35
+
36
+ def execute_before_track(event, context)
37
+ execute_hooks(@before_track_hooks, event, context)
38
+ end
39
+
40
+ def execute_after_track(event, context)
41
+ execute_hooks(@after_track_hooks, event, context)
42
+ end
43
+
44
+ def execute_on_error(error, event, context)
45
+ execute_hooks(@on_error_hooks, error, event, context)
46
+ end
47
+
48
+ def clear_all
49
+ @mutex.synchronize do
50
+ @before_track_hooks.clear
51
+ @after_track_hooks.clear
52
+ @on_error_hooks.clear
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def execute_hooks(hooks, *args)
59
+ hooks.each do |hook|
60
+ next if hook[:condition] && !evaluate_condition(hook[:condition], *args)
61
+
62
+ begin
63
+ hook[:callback].call(*args)
64
+ rescue StandardError => e
65
+ handle_hook_error(e, hook)
66
+ end
67
+ end
68
+ end
69
+
70
+ def evaluate_condition(condition, *args)
71
+ case condition
72
+ when Proc
73
+ condition.call(*args)
74
+ when Hash
75
+ # For event/context matching
76
+ event = args[0]
77
+ condition.all? { |key, value| matches?(event, key, value) }
78
+ when Symbol, String
79
+ # Match event type
80
+ event = args[0]
81
+ event[:event_type] == condition || event[:event_type].to_s == condition.to_s
82
+ else
83
+ true
84
+ end
85
+ end
86
+
87
+ def matches?(event, key, value)
88
+ event_value = event[key.to_sym] || event[key.to_s] || get_metadata_value(event, key.to_s)
89
+ event_value == value || event_value.to_s == value.to_s
90
+ end
91
+
92
+ def get_metadata_value(event, key)
93
+ metadata = event[:metadata] || event["metadata"] || {}
94
+ metadata[key.to_sym] || metadata[key.to_s] || metadata[key]
95
+ end
96
+
97
+ def handle_hook_error(error, hook)
98
+ if defined?(Rails) && Rails.logger
99
+ Rails.logger.error("BehaviorAnalytics: Hook error: #{error.message}")
100
+ end
101
+ # Don't re-raise - allow other hooks to execute
102
+ end
103
+ end
104
+ end
105
+ end
106
+
@@ -0,0 +1,114 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "uri"
5
+ require "json"
6
+
7
+ module BehaviorAnalytics
8
+ module Hooks
9
+ class Webhook
10
+ attr_reader :url, :secret, :filter, :retry_count, :timeout
11
+
12
+ def initialize(url:, secret: nil, filter: nil, retry_count: 3, timeout: 5)
13
+ @url = URI(url)
14
+ @secret = secret
15
+ @filter = filter
16
+ @retry_count = retry_count
17
+ @timeout = timeout
18
+ @mutex = Mutex.new
19
+ end
20
+
21
+ def deliver(event, context = nil)
22
+ return unless should_deliver?(event)
23
+
24
+ payload = build_payload(event, context)
25
+ signature = generate_signature(payload) if @secret
26
+
27
+ headers = {
28
+ "Content-Type" => "application/json",
29
+ "User-Agent" => "BehaviorAnalytics/2.0"
30
+ }
31
+ headers["X-Webhook-Signature"] = signature if signature
32
+
33
+ deliver_with_retry(payload, headers)
34
+ end
35
+
36
+ private
37
+
38
+ def should_deliver?(event)
39
+ return true unless @filter
40
+
41
+ case @filter
42
+ when Proc
43
+ @filter.call(event)
44
+ when Hash
45
+ @filter.all? { |key, value| matches?(event, key, value) }
46
+ when Symbol, String
47
+ event[:event_type] == @filter || event[:event_type].to_s == @filter.to_s
48
+ else
49
+ true
50
+ end
51
+ end
52
+
53
+ def matches?(event, key, value)
54
+ event_value = event[key.to_sym] || event[key.to_s] || get_metadata_value(event, key.to_s)
55
+ event_value == value || event_value.to_s == value.to_s
56
+ end
57
+
58
+ def get_metadata_value(event, key)
59
+ metadata = event[:metadata] || event["metadata"] || {}
60
+ metadata[key.to_sym] || metadata[key.to_s] || metadata[key]
61
+ end
62
+
63
+ def build_payload(event, context)
64
+ {
65
+ event: event.is_a?(Hash) ? event : event.to_h,
66
+ context: context ? (context.is_a?(Hash) ? context : context.to_h) : nil,
67
+ timestamp: Time.now.iso8601
68
+ }
69
+ end
70
+
71
+ def generate_signature(payload)
72
+ require "openssl" unless defined?(OpenSSL)
73
+ payload_json = JSON.generate(payload)
74
+ OpenSSL::HMAC.hexdigest("SHA256", @secret, payload_json)
75
+ end
76
+
77
+ def deliver_with_retry(payload, headers)
78
+ last_error = nil
79
+
80
+ (@retry_count + 1).times do |attempt|
81
+ begin
82
+ http = Net::HTTP.new(@url.host, @url.port)
83
+ http.use_ssl = @url.scheme == "https"
84
+ http.read_timeout = @timeout
85
+ http.open_timeout = @timeout
86
+
87
+ request = Net::HTTP::Post.new(@url.path)
88
+ headers.each { |key, value| request[key] = value }
89
+ request.body = JSON.generate(payload)
90
+
91
+ response = http.request(request)
92
+
93
+ if response.code.to_i >= 200 && response.code.to_i < 300
94
+ return { success: true, response_code: response.code.to_i }
95
+ else
96
+ last_error = "HTTP #{response.code}: #{response.message}"
97
+ end
98
+ rescue StandardError => e
99
+ last_error = e.message
100
+ sleep(calculate_backoff(attempt)) if attempt < @retry_count
101
+ end
102
+ end
103
+
104
+ { success: false, error: last_error }
105
+ end
106
+
107
+ def calculate_backoff(attempt)
108
+ # Exponential backoff: 1s, 2s, 4s, etc.
109
+ 2 ** attempt
110
+ end
111
+ end
112
+ end
113
+ end
114
+