behavior_analytics 0.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/behavior_analytics.gemspec +3 -1
  3. data/db/migrate/002_enhance_behavior_events_v2.rb +46 -0
  4. data/lib/behavior_analytics/analytics/cohorts.rb +242 -0
  5. data/lib/behavior_analytics/analytics/engine.rb +15 -0
  6. data/lib/behavior_analytics/analytics/funnels.rb +176 -0
  7. data/lib/behavior_analytics/analytics/retention.rb +186 -0
  8. data/lib/behavior_analytics/debug/inspector.rb +82 -0
  9. data/lib/behavior_analytics/export/csv_exporter.rb +102 -0
  10. data/lib/behavior_analytics/export/json_exporter.rb +55 -0
  11. data/lib/behavior_analytics/hooks/callback.rb +50 -0
  12. data/lib/behavior_analytics/hooks/manager.rb +106 -0
  13. data/lib/behavior_analytics/hooks/webhook.rb +114 -0
  14. data/lib/behavior_analytics/integrations/rails/middleware.rb +99 -0
  15. data/lib/behavior_analytics/integrations/rails.rb +106 -0
  16. data/lib/behavior_analytics/jobs/active_event_job.rb +37 -0
  17. data/lib/behavior_analytics/jobs/delayed_event_job.rb +29 -0
  18. data/lib/behavior_analytics/jobs/sidekiq_event_job.rb +37 -0
  19. data/lib/behavior_analytics/observability/metrics.rb +112 -0
  20. data/lib/behavior_analytics/observability/tracer.rb +85 -0
  21. data/lib/behavior_analytics/processors/async_processor.rb +24 -0
  22. data/lib/behavior_analytics/processors/background_job_processor.rb +72 -0
  23. data/lib/behavior_analytics/query.rb +87 -2
  24. data/lib/behavior_analytics/replay/engine.rb +108 -0
  25. data/lib/behavior_analytics/replay/processor.rb +107 -0
  26. data/lib/behavior_analytics/reporting/generator.rb +125 -0
  27. data/lib/behavior_analytics/sampling/strategy.rb +54 -0
  28. data/lib/behavior_analytics/schema/definition.rb +71 -0
  29. data/lib/behavior_analytics/schema/validator.rb +113 -0
  30. data/lib/behavior_analytics/storage/active_record_adapter.rb +168 -8
  31. data/lib/behavior_analytics/storage/elasticsearch_adapter.rb +175 -0
  32. data/lib/behavior_analytics/storage/in_memory_adapter.rb +214 -2
  33. data/lib/behavior_analytics/storage/kafka_adapter.rb +112 -0
  34. data/lib/behavior_analytics/storage/redis_adapter.rb +175 -0
  35. data/lib/behavior_analytics/streaming/event_stream.rb +77 -0
  36. data/lib/behavior_analytics/throttling/limiter.rb +97 -0
  37. data/lib/behavior_analytics/tracker.rb +130 -4
  38. data/lib/behavior_analytics/version.rb +1 -1
  39. data/lib/behavior_analytics.rb +138 -2
  40. metadata +33 -3
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Integrations
5
+ module Rails
6
+ class Middleware
7
+ def initialize(app)
8
+ @app = app
9
+ end
10
+
11
+ def call(env)
12
+ start_time = Time.now
13
+ status, headers, response = @app.call(env)
14
+
15
+ # Track request if enabled
16
+ if should_track_request?(env)
17
+ track_request(env, status, start_time)
18
+ end
19
+
20
+ [status, headers, response]
21
+ end
22
+
23
+ private
24
+
25
+ def should_track_request?(env)
26
+ return false unless BehaviorAnalytics.configuration.storage_adapter
27
+ return false unless BehaviorAnalytics.configuration.track_middleware_requests
28
+
29
+ path = env["PATH_INFO"]
30
+ return false if path_blacklisted?(path)
31
+ return false if path_not_whitelisted?(path)
32
+
33
+ true
34
+ end
35
+
36
+ def path_blacklisted?(path)
37
+ blacklist = BehaviorAnalytics.configuration.tracking_blacklist || []
38
+ return false if blacklist.empty?
39
+
40
+ blacklist.any? { |pattern| matches_pattern?(path, pattern) }
41
+ end
42
+
43
+ def path_not_whitelisted?(path)
44
+ whitelist = BehaviorAnalytics.configuration.tracking_whitelist
45
+ return false unless whitelist && !whitelist.empty?
46
+
47
+ !whitelist.any? { |pattern| matches_pattern?(path, pattern) }
48
+ end
49
+
50
+ def matches_pattern?(path, pattern)
51
+ case pattern
52
+ when Regexp
53
+ pattern.match?(path)
54
+ when String
55
+ path.include?(pattern) || File.fnmatch?(pattern, path)
56
+ else
57
+ false
58
+ end
59
+ end
60
+
61
+ def track_request(env, status, start_time)
62
+ duration_ms = ((Time.now - start_time) * 1000).to_i
63
+
64
+ # Try to extract context from env
65
+ context = extract_context_from_env(env)
66
+ return unless context&.valid?
67
+
68
+ tracker = BehaviorAnalytics.create_tracker
69
+ tracker.track_api_call(
70
+ context: context,
71
+ method: env["REQUEST_METHOD"],
72
+ path: env["PATH_INFO"],
73
+ status_code: status,
74
+ duration_ms: duration_ms,
75
+ ip: env["REMOTE_ADDR"],
76
+ user_agent: env["HTTP_USER_AGENT"]
77
+ )
78
+ rescue StandardError => e
79
+ # Don't let tracking errors break the request
80
+ if defined?(Rails) && Rails.logger
81
+ Rails.logger.error("BehaviorAnalytics: Middleware tracking error: #{e.message}")
82
+ end
83
+ end
84
+
85
+ def extract_context_from_env(env)
86
+ # Try to get context from request store or session
87
+ if defined?(ActionDispatch::Request)
88
+ request = ActionDispatch::Request.new(env)
89
+ # This would need to be customized based on your app's context resolution
90
+ nil
91
+ else
92
+ nil
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+
@@ -79,12 +79,118 @@ module BehaviorAnalytics
79
79
  context = resolve_tracking_context
80
80
  return false unless context&.valid?
81
81
 
82
+ # Check path whitelist/blacklist
83
+ return false if path_blacklisted?
84
+ return false if path_not_whitelisted?
85
+
86
+ # Check user agent filtering
87
+ return false if bot_user_agent?
88
+
89
+ # Check controller/action filtering
90
+ return false if controller_action_filtered?
91
+
82
92
  true
83
93
  end
84
94
 
95
+ def path_blacklisted?
96
+ blacklist = BehaviorAnalytics.configuration.tracking_blacklist || []
97
+ return false if blacklist.empty?
98
+
99
+ blacklist.any? { |pattern| matches_pattern?(request.path, pattern) }
100
+ end
101
+
102
+ def path_not_whitelisted?
103
+ whitelist = BehaviorAnalytics.configuration.tracking_whitelist
104
+ return false unless whitelist && !whitelist.empty?
105
+
106
+ !whitelist.any? { |pattern| matches_pattern?(request.path, pattern) }
107
+ end
108
+
109
+ def bot_user_agent?
110
+ return false unless BehaviorAnalytics.configuration.skip_bots
111
+
112
+ user_agent = request.user_agent.to_s.downcase
113
+ bot_patterns = %w[bot crawler spider crawlerbot googlebot bingbot yandex]
114
+ bot_patterns.any? { |pattern| user_agent.include?(pattern) }
115
+ end
116
+
117
+ def controller_action_filtered?
118
+ filters = BehaviorAnalytics.configuration.controller_action_filters || {}
119
+ return false if filters.empty?
120
+
121
+ controller_filter = filters[:controllers] || []
122
+ action_filter = filters[:actions] || []
123
+
124
+ return true if controller_filter.include?(controller_name)
125
+ return true if action_filter.include?("#{controller_name}##{action_name}")
126
+
127
+ false
128
+ end
129
+
130
+ def matches_pattern?(path, pattern)
131
+ case pattern
132
+ when Regexp
133
+ pattern.match?(path)
134
+ when String
135
+ path.include?(pattern) || File.fnmatch?(pattern, path)
136
+ else
137
+ false
138
+ end
139
+ end
140
+
85
141
  def behavior_analytics_enabled?
86
142
  BehaviorAnalytics.configuration.storage_adapter.present?
87
143
  end
144
+
145
+ def track_behavior_analytics
146
+ start_time = Time.current
147
+ error_occurred = false
148
+ error_message = nil
149
+
150
+ yield
151
+ rescue StandardError => e
152
+ error_occurred = true
153
+ error_message = e.message
154
+ raise
155
+ ensure
156
+ if should_track?
157
+ context = resolve_tracking_context
158
+ if context&.valid?
159
+ duration_ms = ((Time.current - start_time) * 1000).to_i
160
+
161
+ # Check for slow queries
162
+ if BehaviorAnalytics.configuration.slow_query_threshold
163
+ if duration_ms > BehaviorAnalytics.configuration.slow_query_threshold
164
+ log_slow_query(duration_ms, request.path)
165
+ end
166
+ end
167
+
168
+ behavior_tracker.track_api_call(
169
+ context: context,
170
+ method: request.method,
171
+ path: request.path,
172
+ status_code: response.status,
173
+ duration_ms: duration_ms,
174
+ ip: request.remote_ip,
175
+ user_agent: request.user_agent,
176
+ session_id: session.id,
177
+ metadata: {
178
+ controller: controller_name,
179
+ action: action_name,
180
+ format: request.format.to_s,
181
+ error: error_occurred,
182
+ error_message: error_message
183
+ }.compact
184
+ )
185
+ end
186
+ end
187
+ end
188
+
189
+ def log_slow_query(duration_ms, path)
190
+ if defined?(Rails) && Rails.logger
191
+ Rails.logger.warn("BehaviorAnalytics: Slow query detected: #{path} took #{duration_ms}ms")
192
+ end
193
+ end
88
194
  end
89
195
  end
90
196
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "active_job"
5
+ rescue LoadError
6
+ raise LoadError, "ActiveJob is required for ActiveEventJob. Please add 'activejob' to your Gemfile."
7
+ end
8
+
9
+ module BehaviorAnalytics
10
+ module Jobs
11
+ class ActiveEventJob < ActiveJob::Base
12
+ queue_as :default
13
+
14
+ retry_on StandardError, wait: :exponentially_longer, attempts: 3
15
+
16
+ def perform(events_data, storage_adapter_class = nil)
17
+ storage_adapter = resolve_storage_adapter(storage_adapter_class)
18
+ events = events_data.map { |data| Event.new(data) }
19
+ storage_adapter.save_events(events)
20
+ rescue StandardError => e
21
+ Rails.logger.error("BehaviorAnalytics: Failed to process events: #{e.message}") if defined?(Rails)
22
+ raise
23
+ end
24
+
25
+ private
26
+
27
+ def resolve_storage_adapter(storage_adapter_class)
28
+ if storage_adapter_class
29
+ storage_adapter_class.constantize.new
30
+ else
31
+ BehaviorAnalytics.configuration.storage_adapter
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "delayed_job"
5
+ rescue LoadError
6
+ raise LoadError, "DelayedJob is required for DelayedEventJob. Please add 'delayed_job' to your Gemfile."
7
+ end
8
+
9
+ module BehaviorAnalytics
10
+ module Jobs
11
+ class DelayedEventJob
12
+ attr_reader :events_data, :storage_adapter
13
+
14
+ def initialize(events_data, storage_adapter = nil)
15
+ @events_data = events_data
16
+ @storage_adapter = storage_adapter || BehaviorAnalytics.configuration.storage_adapter
17
+ end
18
+
19
+ def perform
20
+ events = @events_data.map { |data| Event.new(data) }
21
+ @storage_adapter.save_events(events)
22
+ rescue StandardError => e
23
+ Delayed::Worker.logger.error("BehaviorAnalytics: Failed to process events: #{e.message}") if defined?(Delayed::Worker)
24
+ raise
25
+ end
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "sidekiq"
5
+ rescue LoadError
6
+ raise LoadError, "Sidekiq is required for SidekiqEventJob. Please add 'sidekiq' to your Gemfile."
7
+ end
8
+
9
+ module BehaviorAnalytics
10
+ module Jobs
11
+ class SidekiqEventJob
12
+ include Sidekiq::Job
13
+
14
+ sidekiq_options retry: 3, backtrace: true
15
+
16
+ def perform(events_data, storage_adapter_class = nil)
17
+ storage_adapter = resolve_storage_adapter(storage_adapter_class)
18
+ events = events_data.map { |data| Event.new(data) }
19
+ storage_adapter.save_events(events)
20
+ rescue StandardError => e
21
+ Sidekiq.logger.error("BehaviorAnalytics: Failed to process events: #{e.message}")
22
+ raise
23
+ end
24
+
25
+ private
26
+
27
+ def resolve_storage_adapter(storage_adapter_class)
28
+ if storage_adapter_class
29
+ storage_adapter_class.constantize.new
30
+ else
31
+ BehaviorAnalytics.configuration.storage_adapter
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Observability
5
+ class Metrics
6
+ def initialize
7
+ @counters = {}
8
+ @gauges = {}
9
+ @histograms = {}
10
+ @mutex = Mutex.new
11
+ end
12
+
13
+ def increment_counter(name, value: 1, tags: {})
14
+ @mutex.synchronize do
15
+ key = metric_key(name, tags)
16
+ @counters[key] ||= 0
17
+ @counters[key] += value
18
+ end
19
+ end
20
+
21
+ def set_gauge(name, value, tags: {})
22
+ @mutex.synchronize do
23
+ key = metric_key(name, tags)
24
+ @gauges[key] = value
25
+ end
26
+ end
27
+
28
+ def record_histogram(name, value, tags: {})
29
+ @mutex.synchronize do
30
+ key = metric_key(name, tags)
31
+ @histograms[key] ||= []
32
+ @histograms[key] << value
33
+ # Keep only last 1000 values
34
+ @histograms[key] = @histograms[key].last(1000) if @histograms[key].size > 1000
35
+ end
36
+ end
37
+
38
+ def get_counter(name, tags: {})
39
+ key = metric_key(name, tags)
40
+ @counters[key] || 0
41
+ end
42
+
43
+ def get_gauge(name, tags: {})
44
+ key = metric_key(name, tags)
45
+ @gauges[key]
46
+ end
47
+
48
+ def get_histogram_stats(name, tags: {})
49
+ key = metric_key(name, tags)
50
+ values = @histograms[key] || []
51
+ return {} if values.empty?
52
+
53
+ sorted = values.sort
54
+ {
55
+ count: values.size,
56
+ min: sorted.first,
57
+ max: sorted.last,
58
+ sum: values.sum,
59
+ avg: values.sum.to_f / values.size,
60
+ p50: percentile(sorted, 50),
61
+ p95: percentile(sorted, 95),
62
+ p99: percentile(sorted, 99)
63
+ }
64
+ end
65
+
66
+ def all_metrics
67
+ {
68
+ counters: @counters.dup,
69
+ gauges: @gauges.dup,
70
+ histograms: @histograms.keys.map { |k| [k, get_histogram_stats(*parse_key(k))] }.to_h
71
+ }
72
+ end
73
+
74
+ def reset
75
+ @mutex.synchronize do
76
+ @counters.clear
77
+ @gauges.clear
78
+ @histograms.clear
79
+ end
80
+ end
81
+
82
+ private
83
+
84
+ def metric_key(name, tags)
85
+ if tags.empty?
86
+ name.to_s
87
+ else
88
+ tag_str = tags.map { |k, v| "#{k}:#{v}" }.join(",")
89
+ "#{name}[#{tag_str}]"
90
+ end
91
+ end
92
+
93
+ def parse_key(key)
94
+ if key.include?("[")
95
+ name, tag_str = key.split("[", 2)
96
+ tag_str = tag_str.chomp("]")
97
+ tags = tag_str.split(",").map { |t| t.split(":") }.to_h
98
+ [name, tags]
99
+ else
100
+ [key, {}]
101
+ end
102
+ end
103
+
104
+ def percentile(sorted_array, percentile)
105
+ return nil if sorted_array.empty?
106
+ index = (percentile / 100.0) * (sorted_array.size - 1)
107
+ sorted_array[index.floor]
108
+ end
109
+ end
110
+ end
111
+ end
112
+
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+
5
+ module BehaviorAnalytics
6
+ module Observability
7
+ class Tracer
8
+ attr_reader :correlation_id
9
+
10
+ def initialize(correlation_id: nil)
11
+ @correlation_id = correlation_id || generate_correlation_id
12
+ @spans = []
13
+ @mutex = Mutex.new
14
+ end
15
+
16
+ def start_span(name, tags: {})
17
+ span = {
18
+ id: SecureRandom.uuid,
19
+ name: name,
20
+ start_time: Time.now,
21
+ tags: tags,
22
+ correlation_id: @correlation_id
23
+ }
24
+
25
+ @mutex.synchronize do
26
+ @spans << span
27
+ end
28
+
29
+ span
30
+ end
31
+
32
+ def finish_span(span_id, tags: {})
33
+ @mutex.synchronize do
34
+ span = @spans.find { |s| s[:id] == span_id }
35
+ return unless span
36
+
37
+ span[:end_time] = Time.now
38
+ span[:duration_ms] = ((span[:end_time] - span[:start_time]) * 1000).to_i
39
+ span[:tags].merge!(tags)
40
+ end
41
+ end
42
+
43
+ def add_tags_to_span(span_id, tags)
44
+ @mutex.synchronize do
45
+ span = @spans.find { |s| s[:id] == span_id }
46
+ return unless span
47
+
48
+ span[:tags].merge!(tags)
49
+ end
50
+ end
51
+
52
+ def get_spans
53
+ @spans.dup
54
+ end
55
+
56
+ def get_trace
57
+ {
58
+ correlation_id: @correlation_id,
59
+ spans: @spans,
60
+ total_duration_ms: calculate_total_duration
61
+ }
62
+ end
63
+
64
+ private
65
+
66
+ def generate_correlation_id
67
+ "#{Time.now.to_i}-#{SecureRandom.hex(8)}"
68
+ end
69
+
70
+ def calculate_total_duration
71
+ return 0 if @spans.empty?
72
+
73
+ start_times = @spans.map { |s| s[:start_time] }.compact
74
+ end_times = @spans.map { |s| s[:end_time] || Time.now }.compact
75
+
76
+ return 0 if start_times.empty? || end_times.empty?
77
+
78
+ earliest_start = start_times.min
79
+ latest_end = end_times.max
80
+ ((latest_end - earliest_start) * 1000).to_i
81
+ end
82
+ end
83
+ end
84
+ end
85
+
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Processors
5
+ class AsyncProcessor
6
+ attr_reader :storage_adapter, :queue_name, :priority
7
+
8
+ def initialize(storage_adapter:, queue_name: "default", priority: 0)
9
+ @storage_adapter = storage_adapter
10
+ @queue_name = queue_name
11
+ @priority = priority
12
+ end
13
+
14
+ def process_async(events)
15
+ raise NotImplementedError, "#{self.class} must implement #process_async"
16
+ end
17
+
18
+ def process_sync(events)
19
+ @storage_adapter.save_events(events)
20
+ end
21
+ end
22
+ end
23
+ end
24
+
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Processors
5
+ class BackgroundJobProcessor < AsyncProcessor
6
+ JOB_CLASSES = {
7
+ sidekiq: "BehaviorAnalytics::Jobs::SidekiqEventJob",
8
+ delayed_job: "BehaviorAnalytics::Jobs::DelayedEventJob",
9
+ active_job: "BehaviorAnalytics::Jobs::ActiveEventJob"
10
+ }.freeze
11
+
12
+ def initialize(storage_adapter:, queue_name: "default", priority: 0, adapter: :active_job)
13
+ super(storage_adapter: storage_adapter, queue_name: queue_name, priority: priority)
14
+ @adapter = adapter.to_sym
15
+ @job_class = resolve_job_class
16
+ end
17
+
18
+ def process_async(events)
19
+ events.each_slice(100) do |batch|
20
+ enqueue_batch(batch)
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def resolve_job_class
27
+ class_name = JOB_CLASSES[@adapter]
28
+ return class_name.constantize if defined?(class_name.constantize)
29
+
30
+ case @adapter
31
+ when :sidekiq
32
+ require_sidekiq
33
+ BehaviorAnalytics::Jobs::SidekiqEventJob
34
+ when :delayed_job
35
+ require_delayed_job
36
+ BehaviorAnalytics::Jobs::DelayedEventJob
37
+ when :active_job
38
+ require_active_job
39
+ BehaviorAnalytics::Jobs::ActiveEventJob
40
+ else
41
+ raise Error, "Unsupported job adapter: #{@adapter}"
42
+ end
43
+ end
44
+
45
+ def enqueue_batch(batch)
46
+ events_data = batch.map(&:to_h)
47
+
48
+ case @adapter
49
+ when :sidekiq
50
+ @job_class.set(queue: @queue_name).perform_async(events_data)
51
+ when :delayed_job
52
+ @job_class.new(events_data, @storage_adapter).delay(queue: @queue_name).perform
53
+ when :active_job
54
+ @job_class.set(queue: @queue_name).perform_later(events_data, @storage_adapter)
55
+ end
56
+ end
57
+
58
+ def require_sidekiq
59
+ require "sidekiq" unless defined?(Sidekiq)
60
+ end
61
+
62
+ def require_delayed_job
63
+ require "delayed_job" unless defined?(Delayed::Job)
64
+ end
65
+
66
+ def require_active_job
67
+ require "active_job" unless defined?(ActiveJob)
68
+ end
69
+ end
70
+ end
71
+ end
72
+