behavior_analytics 0.1.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +146 -5
  3. data/behavior_analytics.gemspec +3 -1
  4. data/db/migrate/002_enhance_behavior_events_v2.rb +46 -0
  5. data/lib/behavior_analytics/analytics/cohorts.rb +242 -0
  6. data/lib/behavior_analytics/analytics/engine.rb +15 -0
  7. data/lib/behavior_analytics/analytics/funnels.rb +176 -0
  8. data/lib/behavior_analytics/analytics/retention.rb +186 -0
  9. data/lib/behavior_analytics/context.rb +38 -2
  10. data/lib/behavior_analytics/debug/inspector.rb +82 -0
  11. data/lib/behavior_analytics/event.rb +7 -1
  12. data/lib/behavior_analytics/export/csv_exporter.rb +102 -0
  13. data/lib/behavior_analytics/export/json_exporter.rb +55 -0
  14. data/lib/behavior_analytics/hooks/callback.rb +50 -0
  15. data/lib/behavior_analytics/hooks/manager.rb +106 -0
  16. data/lib/behavior_analytics/hooks/webhook.rb +114 -0
  17. data/lib/behavior_analytics/integrations/rails/middleware.rb +99 -0
  18. data/lib/behavior_analytics/integrations/rails.rb +123 -2
  19. data/lib/behavior_analytics/jobs/active_event_job.rb +37 -0
  20. data/lib/behavior_analytics/jobs/delayed_event_job.rb +29 -0
  21. data/lib/behavior_analytics/jobs/sidekiq_event_job.rb +37 -0
  22. data/lib/behavior_analytics/observability/metrics.rb +112 -0
  23. data/lib/behavior_analytics/observability/tracer.rb +85 -0
  24. data/lib/behavior_analytics/processors/async_processor.rb +24 -0
  25. data/lib/behavior_analytics/processors/background_job_processor.rb +72 -0
  26. data/lib/behavior_analytics/query.rb +89 -4
  27. data/lib/behavior_analytics/replay/engine.rb +108 -0
  28. data/lib/behavior_analytics/replay/processor.rb +107 -0
  29. data/lib/behavior_analytics/reporting/generator.rb +125 -0
  30. data/lib/behavior_analytics/sampling/strategy.rb +54 -0
  31. data/lib/behavior_analytics/schema/definition.rb +71 -0
  32. data/lib/behavior_analytics/schema/validator.rb +113 -0
  33. data/lib/behavior_analytics/storage/active_record_adapter.rb +183 -10
  34. data/lib/behavior_analytics/storage/elasticsearch_adapter.rb +185 -0
  35. data/lib/behavior_analytics/storage/in_memory_adapter.rb +234 -5
  36. data/lib/behavior_analytics/storage/kafka_adapter.rb +127 -0
  37. data/lib/behavior_analytics/storage/redis_adapter.rb +211 -0
  38. data/lib/behavior_analytics/streaming/event_stream.rb +77 -0
  39. data/lib/behavior_analytics/throttling/limiter.rb +97 -0
  40. data/lib/behavior_analytics/tracker.rb +130 -4
  41. data/lib/behavior_analytics/version.rb +1 -1
  42. data/lib/behavior_analytics.rb +139 -2
  43. metadata +33 -3
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BehaviorAnalytics
4
+ module Schema
5
+ class Validator
6
+ def initialize(schema_definition)
7
+ @schema = schema_definition
8
+ end
9
+
10
+ def validate(event)
11
+ errors = []
12
+
13
+ # Validate required fields
14
+ if @schema.required_fields
15
+ @schema.required_fields.each do |field|
16
+ unless event.key?(field.to_sym) || event.key?(field.to_s)
17
+ errors << "Missing required field: #{field}"
18
+ end
19
+ end
20
+ end
21
+
22
+ # Validate field types
23
+ if @schema.field_types
24
+ @schema.field_types.each do |field, expected_type|
25
+ value = event[field.to_sym] || event[field.to_s]
26
+ next if value.nil? # Optional fields can be nil
27
+
28
+ unless matches_type?(value, expected_type)
29
+ errors << "Field #{field} has wrong type. Expected #{expected_type}, got #{value.class}"
30
+ end
31
+ end
32
+ end
33
+
34
+ # Validate custom rules
35
+ if @schema.custom_rules
36
+ @schema.custom_rules.each do |rule|
37
+ result = evaluate_rule(rule, event)
38
+ unless result[:valid]
39
+ errors << result[:error] || "Validation failed for rule: #{rule}"
40
+ end
41
+ end
42
+ end
43
+
44
+ {
45
+ valid: errors.empty?,
46
+ errors: errors
47
+ }
48
+ end
49
+
50
+ private
51
+
52
+ def matches_type?(value, expected_type)
53
+ case expected_type
54
+ when :string
55
+ value.is_a?(String)
56
+ when :integer
57
+ value.is_a?(Integer)
58
+ when :float, :number
59
+ value.is_a?(Numeric)
60
+ when :boolean
61
+ value.is_a?(TrueClass) || value.is_a?(FalseClass)
62
+ when :hash, :object
63
+ value.is_a?(Hash)
64
+ when :array
65
+ value.is_a?(Array)
66
+ when Class
67
+ value.is_a?(expected_type)
68
+ else
69
+ true
70
+ end
71
+ end
72
+
73
+ def evaluate_rule(rule, event)
74
+ case rule
75
+ when Proc
76
+ begin
77
+ result = rule.call(event)
78
+ if result.is_a?(Hash)
79
+ result
80
+ elsif result
81
+ { valid: true }
82
+ else
83
+ { valid: false, error: "Rule validation failed" }
84
+ end
85
+ rescue StandardError => e
86
+ { valid: false, error: e.message }
87
+ end
88
+ when Hash
89
+ # Rule format: { field: { condition: value } }
90
+ rule.all? do |field, condition|
91
+ value = event[field.to_sym] || event[field.to_s]
92
+ evaluate_condition(value, condition)
93
+ end
94
+ { valid: true }
95
+ else
96
+ { valid: true }
97
+ end
98
+ end
99
+
100
+ def evaluate_condition(value, condition)
101
+ case condition
102
+ when Hash
103
+ condition.all? { |key, expected| evaluate_condition(value, { key => expected }) }
104
+ when Proc
105
+ condition.call(value)
106
+ else
107
+ value == condition
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
113
+
@@ -39,19 +39,103 @@ module BehaviorAnalytics
39
39
  def events_for_context(context, options = {})
40
40
  context.validate!
41
41
 
42
- query = @model_class.where(tenant_id: context.tenant_id)
43
- query = query.where(user_id: context.user_id) if context.user_id
44
- query = query.where(user_type: context.user_type) if context.user_type
42
+ query = build_base_query(context, options)
43
+
44
+ # If no tenant_id, query all events (for non-multi-tenant systems)
45
+ # This allows tracking without tenant isolation
46
+
47
+ # Apply metadata filters
48
+ if options[:metadata_filters]
49
+ options[:metadata_filters].each do |key, value|
50
+ query = query.where("metadata->>? = ?", key, value.to_s)
51
+ end
52
+ end
45
53
 
46
- query = query.where("created_at >= ?", options[:since]) if options[:since]
47
- query = query.where("created_at <= ?", options[:until]) if options[:until]
48
- query = query.where(event_name: options[:event_name]) if options[:event_name]
49
- query = query.where(event_type: options[:event_type].to_s) if options[:event_type]
54
+ # Apply path filtering
55
+ if options[:path]
56
+ if column_exists?(:path)
57
+ query = query.where(path: options[:path])
58
+ else
59
+ query = query.where("metadata->>'path' = ?", options[:path])
60
+ end
61
+ end
62
+
63
+ if options[:path_pattern]
64
+ if column_exists?(:path)
65
+ query = query.where("path LIKE ?", options[:path_pattern])
66
+ else
67
+ query = query.where("metadata->>'path' LIKE ?", options[:path_pattern])
68
+ end
69
+ end
70
+
71
+ # Apply method filtering
72
+ if options[:method]
73
+ if column_exists?(:method)
74
+ query = query.where(method: options[:method])
75
+ else
76
+ query = query.where("metadata->>'method' = ?", options[:method])
77
+ end
78
+ end
79
+
80
+ # Apply status code filtering
81
+ if options[:status_code]
82
+ if column_exists?(:status_code)
83
+ query = query.where(status_code: options[:status_code])
84
+ else
85
+ query = query.where("metadata->>'status_code' = ?", options[:status_code].to_s)
86
+ end
87
+ end
88
+
89
+ # Apply where conditions
90
+ if options[:where_conditions]
91
+ options[:where_conditions].each do |condition|
92
+ if condition[:raw]
93
+ query = query.where(condition[:raw])
94
+ else
95
+ condition.each do |key, value|
96
+ next if key == :raw
97
+ query = query.where(key => value)
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ # Apply aggregations and group by
104
+ if options[:group_by] && !options[:group_by].empty?
105
+ query = apply_group_by(query, options[:group_by], options[:aggregations])
106
+ elsif options[:aggregations] && !options[:aggregations].empty?
107
+ query = apply_aggregations(query, options[:aggregations])
108
+ end
109
+
110
+ # Apply having conditions (after aggregation)
111
+ if options[:having_conditions] && (options[:group_by] || options[:aggregations])
112
+ options[:having_conditions].each do |condition|
113
+ if condition[:raw]
114
+ query = query.having(condition[:raw])
115
+ else
116
+ condition.each do |key, value|
117
+ next if key == :raw
118
+ query = query.having(key => value)
119
+ end
120
+ end
121
+ end
122
+ end
123
+
124
+ # Apply distinct
125
+ if options[:distinct]
126
+ query = query.distinct(options[:distinct])
127
+ end
50
128
 
51
129
  query = apply_order_by(query, options[:order_by]) if options[:order_by]
52
130
  query = query.limit(options[:limit]) if options[:limit]
53
131
 
54
- query.map(&:to_h)
132
+ # Handle aggregations - return hash instead of array
133
+ if options[:aggregations] && !options[:aggregations].empty? && options[:group_by].nil?
134
+ result = query.first
135
+ result ? result.attributes.symbolize_keys : {}
136
+ else
137
+ query.map(&:to_h)
138
+ end
55
139
  end
56
140
 
57
141
  def delete_old_events(before_date)
@@ -59,7 +143,55 @@ module BehaviorAnalytics
59
143
  end
60
144
 
61
145
  def event_count(context, options = {})
146
+ context.validate!
62
147
  query = build_base_query(context, options)
148
+
149
+ # Apply metadata filters
150
+ if options[:metadata_filters]
151
+ options[:metadata_filters].each do |key, value|
152
+ query = query.where("metadata->>? = ?", key, value.to_s)
153
+ end
154
+ end
155
+
156
+ # Apply path/method/status_code filters
157
+ if options[:path]
158
+ if column_exists?(:path)
159
+ query = query.where(path: options[:path])
160
+ else
161
+ query = query.where("metadata->>'path' = ?", options[:path])
162
+ end
163
+ end
164
+
165
+ if options[:method]
166
+ if column_exists?(:method)
167
+ query = query.where(method: options[:method])
168
+ else
169
+ query = query.where("metadata->>'method' = ?", options[:method])
170
+ end
171
+ end
172
+
173
+ if options[:status_code]
174
+ if column_exists?(:status_code)
175
+ query = query.where(status_code: options[:status_code])
176
+ else
177
+ query = query.where("metadata->>'status_code' = ?", options[:status_code].to_s)
178
+ end
179
+ end
180
+
181
+ # Apply where conditions
182
+ if options[:where_conditions]
183
+ options[:where_conditions].each do |condition|
184
+ if condition[:raw]
185
+ query = query.where(condition[:raw])
186
+ else
187
+ condition.each do |key, value|
188
+ next if key == :raw
189
+ query = query.where(key => value)
190
+ end
191
+ end
192
+ end
193
+ end
194
+
63
195
  query.count
64
196
  end
65
197
 
@@ -83,8 +215,17 @@ module BehaviorAnalytics
83
215
  def build_base_query(context, options)
84
216
  context.validate!
85
217
 
86
- query = @model_class.where(tenant_id: context.tenant_id)
87
- query = query.where(user_id: context.user_id) if context.user_id
218
+ # Support different business cases:
219
+ # - Multi-tenant: filter by tenant_id
220
+ # - Single-tenant: filter by user_id (tenant_id may be nil)
221
+ # - API-only: no filters required
222
+ query = @model_class.all
223
+
224
+ if context.has_tenant?
225
+ query = query.where(tenant_id: context.tenant_id)
226
+ end
227
+
228
+ query = query.where(user_id: context.user_id) if context.has_user?
88
229
  query = query.where(user_type: context.user_type) if context.user_type
89
230
 
90
231
  query = query.where("created_at >= ?", options[:since]) if options[:since]
@@ -105,6 +246,38 @@ module BehaviorAnalytics
105
246
  query.order(created_at: direction)
106
247
  end
107
248
  end
249
+
250
+ def apply_group_by(query, group_by_fields, aggregations = [])
251
+ query = query.group(group_by_fields.map(&:to_sym))
252
+
253
+ if aggregations && !aggregations.empty?
254
+ select_clause = group_by_fields.map { |f| "#{f} as #{f}" }
255
+ aggregations.each do |agg|
256
+ field = agg[:field]
257
+ func = agg[:function]
258
+ select_clause << "#{func.upcase}(#{field}) as #{func}_#{field}"
259
+ end
260
+ query = query.select(select_clause.join(", "))
261
+ else
262
+ query = query.select(group_by_fields.map { |f| "#{f} as #{f}" }.join(", "))
263
+ end
264
+
265
+ query
266
+ end
267
+
268
+ def apply_aggregations(query, aggregations)
269
+ select_clause = []
270
+ aggregations.each do |agg|
271
+ field = agg[:field]
272
+ func = agg[:function]
273
+ select_clause << "#{func.upcase}(#{field}) as #{func}_#{field}"
274
+ end
275
+ query.select(select_clause.join(", "))
276
+ end
277
+
278
+ def column_exists?(column_name)
279
+ @model_class.column_names.include?(column_name.to_s)
280
+ end
108
281
  end
109
282
  end
110
283
  end
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "elasticsearch"
5
+ rescue LoadError
6
+ raise LoadError, "Elasticsearch gem is required for ElasticsearchAdapter. Please add 'elasticsearch' to your Gemfile."
7
+ end
8
+
9
+ module BehaviorAnalytics
10
+ module Storage
11
+ class ElasticsearchAdapter < Adapter
12
+ def initialize(client: nil, index_name: "behavior_events")
13
+ @client = client || Elasticsearch::Client.new
14
+ @index_name = index_name
15
+ ensure_index_exists
16
+ end
17
+
18
+ def save_events(events)
19
+ return if events.empty?
20
+
21
+ body = events.map do |event|
22
+ event_hash = event.is_a?(Hash) ? event : event.to_h
23
+ {
24
+ index: {
25
+ _index: @index_name,
26
+ _id: event_hash[:id] || SecureRandom.uuid,
27
+ _type: "_doc",
28
+ data: event_hash
29
+ }
30
+ }
31
+ end
32
+
33
+ @client.bulk(body: body)
34
+ rescue StandardError => e
35
+ raise Error, "Failed to save events to Elasticsearch: #{e.message}"
36
+ end
37
+
38
+ def events_for_context(context, options = {})
39
+ context.validate!
40
+
41
+ query = build_query(context, options)
42
+
43
+ response = @client.search(
44
+ index: @index_name,
45
+ body: {
46
+ query: query,
47
+ sort: build_sort(options[:order_by]),
48
+ size: options[:limit] || 100
49
+ }
50
+ )
51
+
52
+ response["hits"]["hits"].map { |hit| hit["_source"].symbolize_keys }
53
+ end
54
+
55
+ def delete_old_events(before_date)
56
+ @client.delete_by_query(
57
+ index: @index_name,
58
+ body: {
59
+ query: {
60
+ range: {
61
+ created_at: {
62
+ lt: before_date.iso8601
63
+ }
64
+ }
65
+ }
66
+ }
67
+ )
68
+ end
69
+
70
+ def event_count(context, options = {})
71
+ context.validate!
72
+ query = build_query(context, options)
73
+
74
+ response = @client.count(
75
+ index: @index_name,
76
+ body: { query: query }
77
+ )
78
+
79
+ response["count"]
80
+ end
81
+
82
+ def unique_users(context, options = {})
83
+ context.validate!
84
+ query = build_query(context, options)
85
+
86
+ response = @client.search(
87
+ index: @index_name,
88
+ body: {
89
+ query: query,
90
+ aggs: {
91
+ unique_users: {
92
+ cardinality: {
93
+ field: "user_id"
94
+ }
95
+ }
96
+ },
97
+ size: 0
98
+ }
99
+ )
100
+
101
+ response["aggregations"]["unique_users"]["value"]
102
+ end
103
+
104
+ private
105
+
106
+ def ensure_index_exists
107
+ return if @client.indices.exists?(index: @index_name)
108
+
109
+ @client.indices.create(
110
+ index: @index_name,
111
+ body: {
112
+ mappings: {
113
+ properties: {
114
+ tenant_id: { type: "keyword" },
115
+ user_id: { type: "keyword" },
116
+ user_type: { type: "keyword" },
117
+ event_name: { type: "keyword" },
118
+ event_type: { type: "keyword" },
119
+ metadata: { type: "object" },
120
+ created_at: { type: "date" }
121
+ }
122
+ }
123
+ }
124
+ )
125
+ end
126
+
127
+ def build_query(context, options)
128
+ must_clauses = []
129
+
130
+ # Support different business cases:
131
+ # - Multi-tenant: filter by tenant_id
132
+ # - Single-tenant: filter by user_id (tenant_id may be nil)
133
+ # - API-only: no strict filters required
134
+
135
+ if context.has_tenant?
136
+ must_clauses << { term: { tenant_id: context.tenant_id } }
137
+ end
138
+
139
+ if context.has_user?
140
+ must_clauses << { term: { user_id: context.user_id } }
141
+ end
142
+
143
+ must_clauses << { term: { user_type: context.user_type } } if context.user_type
144
+ must_clauses << { term: { event_name: options[:event_name] } } if options[:event_name]
145
+ must_clauses << { term: { event_type: options[:event_type].to_s } } if options[:event_type]
146
+
147
+ if options[:since] || options[:until]
148
+ range_clause = {}
149
+ range_clause[:gte] = options[:since].iso8601 if options[:since]
150
+ range_clause[:lte] = options[:until].iso8601 if options[:until]
151
+ must_clauses << { range: { created_at: range_clause } }
152
+ end
153
+
154
+ if options[:metadata_filters]
155
+ options[:metadata_filters].each do |key, value|
156
+ must_clauses << { term: { "metadata.#{key}" => value } }
157
+ end
158
+ end
159
+
160
+ if options[:path]
161
+ must_clauses << { term: { "metadata.path" => options[:path] } }
162
+ end
163
+
164
+ if options[:method]
165
+ must_clauses << { term: { "metadata.method" => options[:method] } }
166
+ end
167
+
168
+ if options[:status_code]
169
+ must_clauses << { term: { "metadata.status_code" => options[:status_code] } }
170
+ end
171
+
172
+ { bool: { must: must_clauses } }
173
+ end
174
+
175
+ def build_sort(order_by)
176
+ return [{ created_at: { order: "desc" } }] unless order_by
177
+
178
+ field = order_by[:field]
179
+ direction = order_by[:direction] || :desc
180
+ [{ field => { order: direction.to_s } }]
181
+ end
182
+ end
183
+ end
184
+ end
185
+