behavior_analytics 0.1.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +146 -5
  3. data/behavior_analytics.gemspec +3 -1
  4. data/db/migrate/002_enhance_behavior_events_v2.rb +46 -0
  5. data/lib/behavior_analytics/analytics/cohorts.rb +242 -0
  6. data/lib/behavior_analytics/analytics/engine.rb +15 -0
  7. data/lib/behavior_analytics/analytics/funnels.rb +176 -0
  8. data/lib/behavior_analytics/analytics/retention.rb +186 -0
  9. data/lib/behavior_analytics/context.rb +38 -2
  10. data/lib/behavior_analytics/debug/inspector.rb +82 -0
  11. data/lib/behavior_analytics/event.rb +7 -1
  12. data/lib/behavior_analytics/export/csv_exporter.rb +102 -0
  13. data/lib/behavior_analytics/export/json_exporter.rb +55 -0
  14. data/lib/behavior_analytics/hooks/callback.rb +50 -0
  15. data/lib/behavior_analytics/hooks/manager.rb +106 -0
  16. data/lib/behavior_analytics/hooks/webhook.rb +114 -0
  17. data/lib/behavior_analytics/integrations/rails/middleware.rb +99 -0
  18. data/lib/behavior_analytics/integrations/rails.rb +123 -2
  19. data/lib/behavior_analytics/jobs/active_event_job.rb +37 -0
  20. data/lib/behavior_analytics/jobs/delayed_event_job.rb +29 -0
  21. data/lib/behavior_analytics/jobs/sidekiq_event_job.rb +37 -0
  22. data/lib/behavior_analytics/observability/metrics.rb +112 -0
  23. data/lib/behavior_analytics/observability/tracer.rb +85 -0
  24. data/lib/behavior_analytics/processors/async_processor.rb +24 -0
  25. data/lib/behavior_analytics/processors/background_job_processor.rb +72 -0
  26. data/lib/behavior_analytics/query.rb +89 -4
  27. data/lib/behavior_analytics/replay/engine.rb +108 -0
  28. data/lib/behavior_analytics/replay/processor.rb +107 -0
  29. data/lib/behavior_analytics/reporting/generator.rb +125 -0
  30. data/lib/behavior_analytics/sampling/strategy.rb +54 -0
  31. data/lib/behavior_analytics/schema/definition.rb +71 -0
  32. data/lib/behavior_analytics/schema/validator.rb +113 -0
  33. data/lib/behavior_analytics/storage/active_record_adapter.rb +183 -10
  34. data/lib/behavior_analytics/storage/elasticsearch_adapter.rb +185 -0
  35. data/lib/behavior_analytics/storage/in_memory_adapter.rb +234 -5
  36. data/lib/behavior_analytics/storage/kafka_adapter.rb +127 -0
  37. data/lib/behavior_analytics/storage/redis_adapter.rb +211 -0
  38. data/lib/behavior_analytics/streaming/event_stream.rb +77 -0
  39. data/lib/behavior_analytics/throttling/limiter.rb +97 -0
  40. data/lib/behavior_analytics/tracker.rb +130 -4
  41. data/lib/behavior_analytics/version.rb +1 -1
  42. data/lib/behavior_analytics.rb +139 -2
  43. metadata +33 -3
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Processors
5
+ class BackgroundJobProcessor < AsyncProcessor
6
+ JOB_CLASSES = {
7
+ sidekiq: "BehaviorAnalytics::Jobs::SidekiqEventJob",
8
+ delayed_job: "BehaviorAnalytics::Jobs::DelayedEventJob",
9
+ active_job: "BehaviorAnalytics::Jobs::ActiveEventJob"
10
+ }.freeze
11
+
12
+ def initialize(storage_adapter:, queue_name: "default", priority: 0, adapter: :active_job)
13
+ super(storage_adapter: storage_adapter, queue_name: queue_name, priority: priority)
14
+ @adapter = adapter.to_sym
15
+ @job_class = resolve_job_class
16
+ end
17
+
18
+ def process_async(events)
19
+ events.each_slice(100) do |batch|
20
+ enqueue_batch(batch)
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def resolve_job_class
27
+ class_name = JOB_CLASSES[@adapter]
28
+ return class_name.constantize if defined?(class_name.constantize)
29
+
30
+ case @adapter
31
+ when :sidekiq
32
+ require_sidekiq
33
+ BehaviorAnalytics::Jobs::SidekiqEventJob
34
+ when :delayed_job
35
+ require_delayed_job
36
+ BehaviorAnalytics::Jobs::DelayedEventJob
37
+ when :active_job
38
+ require_active_job
39
+ BehaviorAnalytics::Jobs::ActiveEventJob
40
+ else
41
+ raise Error, "Unsupported job adapter: #{@adapter}"
42
+ end
43
+ end
44
+
45
+ def enqueue_batch(batch)
46
+ events_data = batch.map(&:to_h)
47
+
48
+ case @adapter
49
+ when :sidekiq
50
+ @job_class.set(queue: @queue_name).perform_async(events_data)
51
+ when :delayed_job
52
+ @job_class.new(events_data, @storage_adapter).delay(queue: @queue_name).perform
53
+ when :active_job
54
+ @job_class.set(queue: @queue_name).perform_later(events_data, @storage_adapter)
55
+ end
56
+ end
57
+
58
+ def require_sidekiq
59
+ require "sidekiq" unless defined?(Sidekiq)
60
+ end
61
+
62
+ def require_delayed_job
63
+ require "delayed_job" unless defined?(Delayed::Job)
64
+ end
65
+
66
+ def require_active_job
67
+ require "active_job" unless defined?(ActiveJob)
68
+ end
69
+ end
70
+ end
71
+ end
72
+
@@ -6,6 +6,11 @@ module BehaviorAnalytics
6
6
  @storage_adapter = storage_adapter
7
7
  @context = nil
8
8
  @options = {}
9
+ @metadata_filters = {}
10
+ @aggregations = []
11
+ @group_by_fields = []
12
+ @where_conditions = []
13
+ @having_conditions = []
9
14
  end
10
15
 
11
16
  def for_tenant(tenant_id)
@@ -60,14 +65,94 @@ module BehaviorAnalytics
60
65
  self
61
66
  end
62
67
 
68
+ # Metadata querying methods
69
+ def with_metadata(key, value)
70
+ @metadata_filters[key.to_s] = value
71
+ self
72
+ end
73
+
74
+ def with_path(path)
75
+ @options[:path] = path
76
+ self
77
+ end
78
+
79
+ def with_path_pattern(pattern)
80
+ @options[:path_pattern] = pattern
81
+ self
82
+ end
83
+
84
+ def with_method(method)
85
+ @options[:method] = method.to_s.upcase
86
+ self
87
+ end
88
+
89
+ def with_status_code(code)
90
+ @options[:status_code] = code
91
+ self
92
+ end
93
+
94
+ # Aggregation methods
95
+ def group_by(field)
96
+ @group_by_fields << field.to_s
97
+ self
98
+ end
99
+
100
+ def aggregate(function, field)
101
+ @aggregations << { function: function.to_s.downcase, field: field.to_s }
102
+ self
103
+ end
104
+
105
+ def distinct(field)
106
+ @options[:distinct] = field.to_s
107
+ self
108
+ end
109
+
110
+ # Advanced filtering
111
+ def where(conditions)
112
+ if conditions.is_a?(Hash)
113
+ @where_conditions << conditions
114
+ else
115
+ @where_conditions << { raw: conditions }
116
+ end
117
+ self
118
+ end
119
+
120
+ def having(conditions)
121
+ if conditions.is_a?(Hash)
122
+ @having_conditions << conditions
123
+ else
124
+ @having_conditions << { raw: conditions }
125
+ end
126
+ self
127
+ end
128
+
129
+ def join(relation)
130
+ @options[:join] = relation
131
+ self
132
+ end
133
+
63
134
  def execute
64
- raise Error, "Context must have tenant_id" unless @context&.valid?
65
- @storage_adapter.events_for_context(@context, @options)
135
+ raise Error, "Context must be valid (have at least tenant_id, user_id, or filters)" unless @context&.valid?
136
+
137
+ # Merge metadata filters and other options
138
+ final_options = @options.dup
139
+ final_options[:metadata_filters] = @metadata_filters unless @metadata_filters.empty?
140
+ final_options[:aggregations] = @aggregations unless @aggregations.empty?
141
+ final_options[:group_by] = @group_by_fields unless @group_by_fields.empty?
142
+ final_options[:where_conditions] = @where_conditions unless @where_conditions.empty?
143
+ final_options[:having_conditions] = @having_conditions unless @having_conditions.empty?
144
+
145
+ @storage_adapter.events_for_context(@context, final_options)
66
146
  end
67
147
 
68
148
  def count
69
- raise Error, "Context must have tenant_id" unless @context&.valid?
70
- @storage_adapter.event_count(@context, @options)
149
+ raise Error, "Context must be valid (have at least tenant_id, user_id, or filters)" unless @context&.valid?
150
+
151
+ final_options = @options.dup
152
+ final_options[:metadata_filters] = @metadata_filters unless @metadata_filters.empty?
153
+ final_options[:where_conditions] = @where_conditions unless @where_conditions.empty?
154
+
155
+ @storage_adapter.event_count(@context, final_options)
71
156
  end
72
157
 
73
158
  private
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Replay
5
+ class Engine
6
+ def initialize(source_adapter, target_adapter = nil)
7
+ @source_adapter = source_adapter
8
+ @target_adapter = target_adapter || source_adapter
9
+ end
10
+
11
+ def replay(context, options = {})
12
+ context.validate!
13
+
14
+ since = options[:since]
15
+ until_date = options[:until]
16
+ event_types = options[:event_types]
17
+ event_names = options[:event_names]
18
+ transformation = options[:transformation]
19
+
20
+ # Get events from source
21
+ events = @source_adapter.events_for_context(context, {
22
+ since: since,
23
+ until: until_date,
24
+ event_type: event_types&.first,
25
+ event_name: event_names&.first
26
+ })
27
+
28
+ # Apply additional filters
29
+ events = filter_events(events, event_types, event_names)
30
+
31
+ # Apply transformation if provided
32
+ if transformation
33
+ events = events.map { |event| apply_transformation(event, transformation) }
34
+ end
35
+
36
+ # Save to target adapter
37
+ if @target_adapter != @source_adapter
38
+ events_to_save = events.map { |e| Event.new(e) }
39
+ @target_adapter.save_events(events_to_save)
40
+ end
41
+
42
+ {
43
+ replayed_count: events.size,
44
+ source_adapter: @source_adapter.class.name,
45
+ target_adapter: @target_adapter.class.name
46
+ }
47
+ end
48
+
49
+ def replay_with_batch(context, options = {})
50
+ context.validate!
51
+
52
+ batch_size = options[:batch_size] || 1000
53
+ results = []
54
+
55
+ # Process in batches
56
+ offset = 0
57
+ loop do
58
+ batch_options = options.merge(limit: batch_size, offset: offset)
59
+ batch = @source_adapter.events_for_context(context, batch_options)
60
+
61
+ break if batch.empty?
62
+
63
+ result = replay(context, options.merge(events: batch))
64
+ results << result
65
+
66
+ offset += batch_size
67
+ break if batch.size < batch_size
68
+ end
69
+
70
+ {
71
+ total_batches: results.size,
72
+ total_replayed: results.sum { |r| r[:replayed_count] },
73
+ batches: results
74
+ }
75
+ end
76
+
77
+ private
78
+
79
+ def filter_events(events, event_types, event_names)
80
+ events.select do |event|
81
+ matches = true
82
+ matches &&= event_types.include?(event[:event_type]) if event_types && !event_types.empty?
83
+ matches &&= event_names.include?(event[:event_name]) if event_names && !event_names.empty?
84
+ matches
85
+ end
86
+ end
87
+
88
+ def apply_transformation(event, transformation)
89
+ case transformation
90
+ when Proc
91
+ transformation.call(event)
92
+ when Hash
93
+ event.merge(transformation)
94
+ when Symbol, String
95
+ # Assume it's a method name
96
+ if event.respond_to?(transformation)
97
+ event.send(transformation)
98
+ else
99
+ event
100
+ end
101
+ else
102
+ event
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Replay
5
+ class Processor
6
+ def initialize(storage_adapter)
7
+ @storage_adapter = storage_adapter
8
+ end
9
+
10
+ def reprocess(context, options = {})
11
+ context.validate!
12
+
13
+ pipeline = options[:pipeline] || []
14
+ since = options[:since]
15
+ until_date = options[:until]
16
+
17
+ # Get events
18
+ events = @storage_adapter.events_for_context(context, {
19
+ since: since,
20
+ until: until_date
21
+ })
22
+
23
+ # Apply pipeline transformations
24
+ processed_events = events
25
+ pipeline.each do |step|
26
+ processed_events = apply_pipeline_step(processed_events, step)
27
+ end
28
+
29
+ # Save processed events back
30
+ if options[:save_results]
31
+ events_to_save = processed_events.map { |e| Event.new(e) }
32
+ @storage_adapter.save_events(events_to_save)
33
+ end
34
+
35
+ {
36
+ original_count: events.size,
37
+ processed_count: processed_events.size,
38
+ pipeline_steps: pipeline.size
39
+ }
40
+ end
41
+
42
+ def enrich_events(context, enrichment_data, options = {})
43
+ context.validate!
44
+
45
+ since = options[:since]
46
+ until_date = options[:until]
47
+
48
+ events = @storage_adapter.events_for_context(context, {
49
+ since: since,
50
+ until: until_date
51
+ })
52
+
53
+ enriched_events = events.map do |event|
54
+ enriched = event.dup
55
+
56
+ enrichment_data.each do |key, value|
57
+ if value.is_a?(Proc)
58
+ enriched[key] = value.call(event)
59
+ elsif value.is_a?(Hash)
60
+ # Merge nested hash
61
+ enriched[key] = (enriched[key] || {}).merge(value)
62
+ else
63
+ enriched[key] = value
64
+ end
65
+ end
66
+
67
+ enriched
68
+ end
69
+
70
+ if options[:save_results]
71
+ events_to_save = enriched_events.map { |e| Event.new(e) }
72
+ @storage_adapter.save_events(events_to_save)
73
+ end
74
+
75
+ {
76
+ enriched_count: enriched_events.size
77
+ }
78
+ end
79
+
80
+ private
81
+
82
+ def apply_pipeline_step(events, step)
83
+ case step
84
+ when Proc
85
+ events.map { |e| step.call(e) }.compact
86
+ when Hash
87
+ # Apply multiple transformations
88
+ step.reduce(events) do |result, (key, value)|
89
+ result.map do |event|
90
+ if value.is_a?(Proc)
91
+ event.merge(key => value.call(event))
92
+ else
93
+ event.merge(key => value)
94
+ end
95
+ end
96
+ end
97
+ when Symbol, String
98
+ # Assume it's a filter or transformation method
99
+ events.select { |e| e.respond_to?(step) ? e.send(step) : true }
100
+ else
101
+ events
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Reporting
5
+ class Generator
6
+ def initialize(storage_adapter)
7
+ @storage_adapter = storage_adapter
8
+ @analytics = Analytics::Engine.new(storage_adapter)
9
+ end
10
+
11
+ def generate_report(context, report_type, options = {})
12
+ context.validate!
13
+
14
+ case report_type.to_sym
15
+ when :summary
16
+ generate_summary_report(context, options)
17
+ when :activity
18
+ generate_activity_report(context, options)
19
+ when :engagement
20
+ generate_engagement_report(context, options)
21
+ when :feature_usage
22
+ generate_feature_usage_report(context, options)
23
+ else
24
+ raise Error, "Unknown report type: #{report_type}"
25
+ end
26
+ end
27
+
28
+ def schedule_report(context, report_type, schedule, options = {})
29
+ # This would integrate with a job scheduler
30
+ # For now, return a report configuration
31
+ {
32
+ context: context.to_h,
33
+ report_type: report_type,
34
+ schedule: schedule,
35
+ options: options,
36
+ created_at: Time.now
37
+ }
38
+ end
39
+
40
+ private
41
+
42
+ def generate_summary_report(context, options)
43
+ date_range = options[:date_range] || (options[:since]..options[:until])
44
+
45
+ {
46
+ report_type: "summary",
47
+ generated_at: Time.now,
48
+ context: context.to_h,
49
+ date_range: {
50
+ since: date_range.begin,
51
+ until: date_range.end
52
+ },
53
+ metrics: {
54
+ total_events: @analytics.event_count(context, since: date_range.begin, until: date_range.end),
55
+ unique_users: @analytics.unique_users(context, since: date_range.begin, until: date_range.end),
56
+ active_days: @analytics.active_days(context, since: date_range.begin, until: date_range.end),
57
+ engagement_score: @analytics.engagement_score(context, since: date_range.begin, until: date_range.end)
58
+ }
59
+ }
60
+ end
61
+
62
+ def generate_activity_report(context, options)
63
+ date_range = options[:date_range] || (options[:since]..options[:until])
64
+ period = options[:period] || :daily
65
+
66
+ {
67
+ report_type: "activity",
68
+ generated_at: Time.now,
69
+ context: context.to_h,
70
+ date_range: {
71
+ since: date_range.begin,
72
+ until: date_range.end
73
+ },
74
+ activity_timeline: @analytics.activity_timeline(context, {
75
+ since: date_range.begin,
76
+ until: date_range.end,
77
+ period: period
78
+ })
79
+ }
80
+ end
81
+
82
+ def generate_engagement_report(context, options)
83
+ date_range = options[:date_range] || (options[:since]..options[:until])
84
+
85
+ {
86
+ report_type: "engagement",
87
+ generated_at: Time.now,
88
+ context: context.to_h,
89
+ date_range: {
90
+ since: date_range.begin,
91
+ until: date_range.end
92
+ },
93
+ engagement_score: @analytics.engagement_score(context, since: date_range.begin, until: date_range.end),
94
+ breakdown: {
95
+ total_events: @analytics.event_count(context, since: date_range.begin, until: date_range.end),
96
+ unique_users: @analytics.unique_users(context, since: date_range.begin, until: date_range.end),
97
+ active_days: @analytics.active_days(context, since: date_range.begin, until: date_range.end),
98
+ feature_diversity: @analytics.top_features(context, since: date_range.begin, until: date_range.end).keys.size
99
+ }
100
+ }
101
+ end
102
+
103
+ def generate_feature_usage_report(context, options)
104
+ date_range = options[:date_range] || (options[:since]..options[:until])
105
+
106
+ {
107
+ report_type: "feature_usage",
108
+ generated_at: Time.now,
109
+ context: context.to_h,
110
+ date_range: {
111
+ since: date_range.begin,
112
+ until: date_range.end
113
+ },
114
+ feature_stats: @analytics.feature_usage_stats(context, since: date_range.begin, until: date_range.end),
115
+ top_features: @analytics.top_features(context, {
116
+ since: date_range.begin,
117
+ until: date_range.end,
118
+ limit: options[:limit] || 10
119
+ })
120
+ }
121
+ end
122
+ end
123
+ end
124
+ end
125
+
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Sampling
5
+ class Strategy
6
+ def initialize(type:, rate: 1.0, options: {})
7
+ @type = type.to_sym
8
+ @rate = rate.to_f
9
+ @options = options
10
+ end
11
+
12
+ def should_sample?(event, context = nil)
13
+ case @type
14
+ when :random
15
+ random_sampling?
16
+ when :deterministic
17
+ deterministic_sampling?(event, context)
18
+ when :adaptive
19
+ adaptive_sampling?(event, context)
20
+ else
21
+ true
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def random_sampling?
28
+ rand < @rate
29
+ end
30
+
31
+ def deterministic_sampling?(event, context)
32
+ # Use a hash of tenant_id or user_id for deterministic sampling
33
+ key = context&.tenant_id || event[:tenant_id] || event[:id]
34
+ hash_value = key.hash.abs
35
+ (hash_value % 100) < (@rate * 100)
36
+ end
37
+
38
+ def adaptive_sampling?(event, context)
39
+ # Adaptive sampling based on event volume
40
+ # This is a simplified version - in production you'd track actual volume
41
+ base_rate = @rate
42
+ volume_multiplier = @options[:volume_multiplier] || 1.0
43
+
44
+ # Adjust rate based on current volume (simplified)
45
+ adjusted_rate = base_rate * volume_multiplier
46
+ adjusted_rate = [adjusted_rate, 1.0].min
47
+ adjusted_rate = [adjusted_rate, 0.0].max
48
+
49
+ rand < adjusted_rate
50
+ end
51
+ end
52
+ end
53
+ end
54
+
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Schema
5
+ class Definition
6
+ attr_reader :name, :version, :required_fields, :field_types, :custom_rules
7
+
8
+ def initialize(name, version: "1.0", &block)
9
+ @name = name
10
+ @version = version
11
+ @required_fields = []
12
+ @field_types = {}
13
+ @custom_rules = []
14
+
15
+ instance_eval(&block) if block_given?
16
+ end
17
+
18
+ def required(*fields)
19
+ @required_fields.concat(fields.map(&:to_s))
20
+ end
21
+
22
+ def field(field_name, type:)
23
+ @field_types[field_name.to_s] = type
24
+ end
25
+
26
+ def validate(&block)
27
+ @custom_rules << block
28
+ end
29
+
30
+ def to_h
31
+ {
32
+ name: @name,
33
+ version: @version,
34
+ required_fields: @required_fields,
35
+ field_types: @field_types,
36
+ custom_rules_count: @custom_rules.size
37
+ }
38
+ end
39
+ end
40
+
41
+ class Registry
42
+ def initialize
43
+ @schemas = {}
44
+ @mutex = Mutex.new
45
+ end
46
+
47
+ def register(schema_definition)
48
+ @mutex.synchronize do
49
+ key = "#{schema_definition.name}@#{schema_definition.version}"
50
+ @schemas[key] = schema_definition
51
+ end
52
+ end
53
+
54
+ def get(name, version: "1.0")
55
+ key = "#{name}@#{version}"
56
+ @schemas[key]
57
+ end
58
+
59
+ def list
60
+ @schemas.values.map(&:to_h)
61
+ end
62
+
63
+ def clear
64
+ @mutex.synchronize do
65
+ @schemas.clear
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+