behavior_analytics 0.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/behavior_analytics.gemspec +3 -1
  3. data/db/migrate/002_enhance_behavior_events_v2.rb +46 -0
  4. data/lib/behavior_analytics/analytics/cohorts.rb +242 -0
  5. data/lib/behavior_analytics/analytics/engine.rb +15 -0
  6. data/lib/behavior_analytics/analytics/funnels.rb +176 -0
  7. data/lib/behavior_analytics/analytics/retention.rb +186 -0
  8. data/lib/behavior_analytics/debug/inspector.rb +82 -0
  9. data/lib/behavior_analytics/export/csv_exporter.rb +102 -0
  10. data/lib/behavior_analytics/export/json_exporter.rb +55 -0
  11. data/lib/behavior_analytics/hooks/callback.rb +50 -0
  12. data/lib/behavior_analytics/hooks/manager.rb +106 -0
  13. data/lib/behavior_analytics/hooks/webhook.rb +114 -0
  14. data/lib/behavior_analytics/integrations/rails/middleware.rb +99 -0
  15. data/lib/behavior_analytics/integrations/rails.rb +106 -0
  16. data/lib/behavior_analytics/jobs/active_event_job.rb +37 -0
  17. data/lib/behavior_analytics/jobs/delayed_event_job.rb +29 -0
  18. data/lib/behavior_analytics/jobs/sidekiq_event_job.rb +37 -0
  19. data/lib/behavior_analytics/observability/metrics.rb +112 -0
  20. data/lib/behavior_analytics/observability/tracer.rb +85 -0
  21. data/lib/behavior_analytics/processors/async_processor.rb +24 -0
  22. data/lib/behavior_analytics/processors/background_job_processor.rb +72 -0
  23. data/lib/behavior_analytics/query.rb +87 -2
  24. data/lib/behavior_analytics/replay/engine.rb +108 -0
  25. data/lib/behavior_analytics/replay/processor.rb +107 -0
  26. data/lib/behavior_analytics/reporting/generator.rb +125 -0
  27. data/lib/behavior_analytics/sampling/strategy.rb +54 -0
  28. data/lib/behavior_analytics/schema/definition.rb +71 -0
  29. data/lib/behavior_analytics/schema/validator.rb +113 -0
  30. data/lib/behavior_analytics/storage/active_record_adapter.rb +168 -8
  31. data/lib/behavior_analytics/storage/elasticsearch_adapter.rb +175 -0
  32. data/lib/behavior_analytics/storage/in_memory_adapter.rb +214 -2
  33. data/lib/behavior_analytics/storage/kafka_adapter.rb +112 -0
  34. data/lib/behavior_analytics/storage/redis_adapter.rb +175 -0
  35. data/lib/behavior_analytics/streaming/event_stream.rb +77 -0
  36. data/lib/behavior_analytics/throttling/limiter.rb +97 -0
  37. data/lib/behavior_analytics/tracker.rb +130 -4
  38. data/lib/behavior_analytics/version.rb +1 -1
  39. data/lib/behavior_analytics.rb +138 -2
  40. metadata +33 -3
@@ -39,19 +39,100 @@ module BehaviorAnalytics
39
39
  def events_for_context(context, options = {})
40
40
  context.validate!
41
41
 
42
- query = @model_class.where(tenant_id: context.tenant_id)
43
- query = query.where(user_id: context.user_id) if context.user_id
44
- query = query.where(user_type: context.user_type) if context.user_type
42
+ query = build_base_query(context, options)
43
+
44
+ # Apply metadata filters
45
+ if options[:metadata_filters]
46
+ options[:metadata_filters].each do |key, value|
47
+ query = query.where("metadata->>? = ?", key, value.to_s)
48
+ end
49
+ end
45
50
 
46
- query = query.where("created_at >= ?", options[:since]) if options[:since]
47
- query = query.where("created_at <= ?", options[:until]) if options[:until]
48
- query = query.where(event_name: options[:event_name]) if options[:event_name]
49
- query = query.where(event_type: options[:event_type].to_s) if options[:event_type]
51
+ # Apply path filtering
52
+ if options[:path]
53
+ if column_exists?(:path)
54
+ query = query.where(path: options[:path])
55
+ else
56
+ query = query.where("metadata->>'path' = ?", options[:path])
57
+ end
58
+ end
59
+
60
+ if options[:path_pattern]
61
+ if column_exists?(:path)
62
+ query = query.where("path LIKE ?", options[:path_pattern])
63
+ else
64
+ query = query.where("metadata->>'path' LIKE ?", options[:path_pattern])
65
+ end
66
+ end
67
+
68
+ # Apply method filtering
69
+ if options[:method]
70
+ if column_exists?(:method)
71
+ query = query.where(method: options[:method])
72
+ else
73
+ query = query.where("metadata->>'method' = ?", options[:method])
74
+ end
75
+ end
76
+
77
+ # Apply status code filtering
78
+ if options[:status_code]
79
+ if column_exists?(:status_code)
80
+ query = query.where(status_code: options[:status_code])
81
+ else
82
+ query = query.where("metadata->>'status_code' = ?", options[:status_code].to_s)
83
+ end
84
+ end
85
+
86
+ # Apply where conditions
87
+ if options[:where_conditions]
88
+ options[:where_conditions].each do |condition|
89
+ if condition[:raw]
90
+ query = query.where(condition[:raw])
91
+ else
92
+ condition.each do |key, value|
93
+ next if key == :raw
94
+ query = query.where(key => value)
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ # Apply aggregations and group by
101
+ if options[:group_by] && !options[:group_by].empty?
102
+ query = apply_group_by(query, options[:group_by], options[:aggregations])
103
+ elsif options[:aggregations] && !options[:aggregations].empty?
104
+ query = apply_aggregations(query, options[:aggregations])
105
+ end
106
+
107
+ # Apply having conditions (after aggregation)
108
+ if options[:having_conditions] && (options[:group_by] || options[:aggregations])
109
+ options[:having_conditions].each do |condition|
110
+ if condition[:raw]
111
+ query = query.having(condition[:raw])
112
+ else
113
+ condition.each do |key, value|
114
+ next if key == :raw
115
+ query = query.having(key => value)
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ # Apply distinct
122
+ if options[:distinct]
123
+ query = query.distinct(options[:distinct])
124
+ end
50
125
 
51
126
  query = apply_order_by(query, options[:order_by]) if options[:order_by]
52
127
  query = query.limit(options[:limit]) if options[:limit]
53
128
 
54
- query.map(&:to_h)
129
+ # Handle aggregations - return hash instead of array
130
+ if options[:aggregations] && !options[:aggregations].empty? && options[:group_by].nil?
131
+ result = query.first
132
+ result ? result.attributes.symbolize_keys : {}
133
+ else
134
+ query.map(&:to_h)
135
+ end
55
136
  end
56
137
 
57
138
  def delete_old_events(before_date)
@@ -60,6 +141,53 @@ module BehaviorAnalytics
60
141
 
61
142
  def event_count(context, options = {})
62
143
  query = build_base_query(context, options)
144
+
145
+ # Apply metadata filters
146
+ if options[:metadata_filters]
147
+ options[:metadata_filters].each do |key, value|
148
+ query = query.where("metadata->>? = ?", key, value.to_s)
149
+ end
150
+ end
151
+
152
+ # Apply path/method/status_code filters
153
+ if options[:path]
154
+ if column_exists?(:path)
155
+ query = query.where(path: options[:path])
156
+ else
157
+ query = query.where("metadata->>'path' = ?", options[:path])
158
+ end
159
+ end
160
+
161
+ if options[:method]
162
+ if column_exists?(:method)
163
+ query = query.where(method: options[:method])
164
+ else
165
+ query = query.where("metadata->>'method' = ?", options[:method])
166
+ end
167
+ end
168
+
169
+ if options[:status_code]
170
+ if column_exists?(:status_code)
171
+ query = query.where(status_code: options[:status_code])
172
+ else
173
+ query = query.where("metadata->>'status_code' = ?", options[:status_code].to_s)
174
+ end
175
+ end
176
+
177
+ # Apply where conditions
178
+ if options[:where_conditions]
179
+ options[:where_conditions].each do |condition|
180
+ if condition[:raw]
181
+ query = query.where(condition[:raw])
182
+ else
183
+ condition.each do |key, value|
184
+ next if key == :raw
185
+ query = query.where(key => value)
186
+ end
187
+ end
188
+ end
189
+ end
190
+
63
191
  query.count
64
192
  end
65
193
 
@@ -105,6 +233,38 @@ module BehaviorAnalytics
105
233
  query.order(created_at: direction)
106
234
  end
107
235
  end
236
+
237
+ def apply_group_by(query, group_by_fields, aggregations = [])
238
+ query = query.group(group_by_fields.map(&:to_sym))
239
+
240
+ if aggregations && !aggregations.empty?
241
+ select_clause = group_by_fields.map { |f| "#{f} as #{f}" }
242
+ aggregations.each do |agg|
243
+ field = agg[:field]
244
+ func = agg[:function]
245
+ select_clause << "#{func.upcase}(#{field}) as #{func}_#{field}"
246
+ end
247
+ query = query.select(select_clause.join(", "))
248
+ else
249
+ query = query.select(group_by_fields.map { |f| "#{f} as #{f}" }.join(", "))
250
+ end
251
+
252
+ query
253
+ end
254
+
255
+ def apply_aggregations(query, aggregations)
256
+ select_clause = []
257
+ aggregations.each do |agg|
258
+ field = agg[:field]
259
+ func = agg[:function]
260
+ select_clause << "#{func.upcase}(#{field}) as #{func}_#{field}"
261
+ end
262
+ query.select(select_clause.join(", "))
263
+ end
264
+
265
+ def column_exists?(column_name)
266
+ @model_class.column_names.include?(column_name.to_s)
267
+ end
108
268
  end
109
269
  end
110
270
  end
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "elasticsearch"
5
+ rescue LoadError
6
+ raise LoadError, "Elasticsearch gem is required for ElasticsearchAdapter. Please add 'elasticsearch' to your Gemfile."
7
+ end
8
+
9
+ module BehaviorAnalytics
10
+ module Storage
11
+ class ElasticsearchAdapter < Adapter
12
+ def initialize(client: nil, index_name: "behavior_events")
13
+ @client = client || Elasticsearch::Client.new
14
+ @index_name = index_name
15
+ ensure_index_exists
16
+ end
17
+
18
+ def save_events(events)
19
+ return if events.empty?
20
+
21
+ body = events.map do |event|
22
+ event_hash = event.is_a?(Hash) ? event : event.to_h
23
+ {
24
+ index: {
25
+ _index: @index_name,
26
+ _id: event_hash[:id] || SecureRandom.uuid,
27
+ _type: "_doc",
28
+ data: event_hash
29
+ }
30
+ }
31
+ end
32
+
33
+ @client.bulk(body: body)
34
+ rescue StandardError => e
35
+ raise Error, "Failed to save events to Elasticsearch: #{e.message}"
36
+ end
37
+
38
+ def events_for_context(context, options = {})
39
+ context.validate!
40
+
41
+ query = build_query(context, options)
42
+
43
+ response = @client.search(
44
+ index: @index_name,
45
+ body: {
46
+ query: query,
47
+ sort: build_sort(options[:order_by]),
48
+ size: options[:limit] || 100
49
+ }
50
+ )
51
+
52
+ response["hits"]["hits"].map { |hit| hit["_source"].symbolize_keys }
53
+ end
54
+
55
+ def delete_old_events(before_date)
56
+ @client.delete_by_query(
57
+ index: @index_name,
58
+ body: {
59
+ query: {
60
+ range: {
61
+ created_at: {
62
+ lt: before_date.iso8601
63
+ }
64
+ }
65
+ }
66
+ }
67
+ )
68
+ end
69
+
70
+ def event_count(context, options = {})
71
+ context.validate!
72
+ query = build_query(context, options)
73
+
74
+ response = @client.count(
75
+ index: @index_name,
76
+ body: { query: query }
77
+ )
78
+
79
+ response["count"]
80
+ end
81
+
82
+ def unique_users(context, options = {})
83
+ context.validate!
84
+ query = build_query(context, options)
85
+
86
+ response = @client.search(
87
+ index: @index_name,
88
+ body: {
89
+ query: query,
90
+ aggs: {
91
+ unique_users: {
92
+ cardinality: {
93
+ field: "user_id"
94
+ }
95
+ }
96
+ },
97
+ size: 0
98
+ }
99
+ )
100
+
101
+ response["aggregations"]["unique_users"]["value"]
102
+ end
103
+
104
+ private
105
+
106
+ def ensure_index_exists
107
+ return if @client.indices.exists?(index: @index_name)
108
+
109
+ @client.indices.create(
110
+ index: @index_name,
111
+ body: {
112
+ mappings: {
113
+ properties: {
114
+ tenant_id: { type: "keyword" },
115
+ user_id: { type: "keyword" },
116
+ user_type: { type: "keyword" },
117
+ event_name: { type: "keyword" },
118
+ event_type: { type: "keyword" },
119
+ metadata: { type: "object" },
120
+ created_at: { type: "date" }
121
+ }
122
+ }
123
+ }
124
+ )
125
+ end
126
+
127
+ def build_query(context, options)
128
+ must_clauses = [
129
+ { term: { tenant_id: context.tenant_id } }
130
+ ]
131
+
132
+ must_clauses << { term: { user_id: context.user_id } } if context.user_id
133
+ must_clauses << { term: { user_type: context.user_type } } if context.user_type
134
+ must_clauses << { term: { event_name: options[:event_name] } } if options[:event_name]
135
+ must_clauses << { term: { event_type: options[:event_type].to_s } } if options[:event_type]
136
+
137
+ if options[:since] || options[:until]
138
+ range_clause = {}
139
+ range_clause[:gte] = options[:since].iso8601 if options[:since]
140
+ range_clause[:lte] = options[:until].iso8601 if options[:until]
141
+ must_clauses << { range: { created_at: range_clause } }
142
+ end
143
+
144
+ if options[:metadata_filters]
145
+ options[:metadata_filters].each do |key, value|
146
+ must_clauses << { term: { "metadata.#{key}" => value } }
147
+ end
148
+ end
149
+
150
+ if options[:path]
151
+ must_clauses << { term: { "metadata.path" => options[:path] } }
152
+ end
153
+
154
+ if options[:method]
155
+ must_clauses << { term: { "metadata.method" => options[:method] } }
156
+ end
157
+
158
+ if options[:status_code]
159
+ must_clauses << { term: { "metadata.status_code" => options[:status_code] } }
160
+ end
161
+
162
+ { bool: { must: must_clauses } }
163
+ end
164
+
165
+ def build_sort(order_by)
166
+ return [{ created_at: { order: "desc" } }] unless order_by
167
+
168
+ field = order_by[:field]
169
+ direction = order_by[:direction] || :desc
170
+ [{ field => { order: direction.to_s } }]
171
+ end
172
+ end
173
+ end
174
+ end
175
+
@@ -21,8 +21,70 @@ module BehaviorAnalytics
21
21
  events = filter_by_date_range(events, options[:since], options[:until]) if options[:since] || options[:until]
22
22
  events = filter_by_event_name(events, options[:event_name]) if options[:event_name]
23
23
  events = filter_by_event_type(events, options[:event_type]) if options[:event_type]
24
+
25
+ # Apply metadata filters
26
+ if options[:metadata_filters]
27
+ events = filter_by_metadata(events, options[:metadata_filters])
28
+ end
29
+
30
+ # Apply path filtering
31
+ if options[:path]
32
+ events = events.select { |e| get_metadata_value(e, "path") == options[:path] }
33
+ end
34
+
35
+ if options[:path_pattern]
36
+ pattern = Regexp.new(options[:path_pattern].gsub('%', '.*'))
37
+ events = events.select { |e| path = get_metadata_value(e, "path"); path && pattern.match?(path) }
38
+ end
39
+
40
+ # Apply method filtering
41
+ if options[:method]
42
+ events = events.select { |e| get_metadata_value(e, "method")&.upcase == options[:method].upcase }
43
+ end
24
44
 
25
- events = events.sort_by { |e| e[:created_at] }.reverse
45
+ # Apply status code filtering
46
+ if options[:status_code]
47
+ events = events.select { |e| get_metadata_value(e, "status_code")&.to_s == options[:status_code].to_s }
48
+ end
49
+
50
+ # Apply where conditions
51
+ if options[:where_conditions]
52
+ events = apply_where_conditions(events, options[:where_conditions])
53
+ end
54
+
55
+ # Apply aggregations and group by
56
+ if options[:group_by] && !options[:group_by].empty?
57
+ return apply_group_by(events, options[:group_by], options[:aggregations])
58
+ elsif options[:aggregations] && !options[:aggregations].empty?
59
+ return apply_aggregations(events, options[:aggregations])
60
+ end
61
+
62
+ # Apply having conditions (after aggregation - handled in group_by/aggregations)
63
+ # Note: In-memory adapter applies having before returning grouped results
64
+
65
+ # Apply distinct
66
+ if options[:distinct]
67
+ seen = {}
68
+ events = events.select do |e|
69
+ value = get_field_value(e, options[:distinct])
70
+ key = value.to_s
71
+ if seen[key]
72
+ false
73
+ else
74
+ seen[key] = true
75
+ true
76
+ end
77
+ end
78
+ end
79
+
80
+ # Apply order by
81
+ if options[:order_by]
82
+ events = apply_order_by(events, options[:order_by])
83
+ else
84
+ events = events.sort_by { |e| e[:created_at] || Time.at(0) }.reverse
85
+ end
86
+
87
+ # Apply limit
26
88
  events = events.first(options[:limit]) if options[:limit]
27
89
 
28
90
  events
@@ -35,7 +97,35 @@ module BehaviorAnalytics
35
97
  end
36
98
 
37
99
  def event_count(context, options = {})
38
- events_for_context(context, options).count
100
+ # For count, we don't need aggregations/group_by, so use simplified version
101
+ context.validate!
102
+ events = filter_by_context(@events, context)
103
+
104
+ events = filter_by_date_range(events, options[:since], options[:until]) if options[:since] || options[:until]
105
+ events = filter_by_event_name(events, options[:event_name]) if options[:event_name]
106
+ events = filter_by_event_type(events, options[:event_type]) if options[:event_type]
107
+
108
+ if options[:metadata_filters]
109
+ events = filter_by_metadata(events, options[:metadata_filters])
110
+ end
111
+
112
+ if options[:path]
113
+ events = events.select { |e| get_metadata_value(e, "path") == options[:path] }
114
+ end
115
+
116
+ if options[:method]
117
+ events = events.select { |e| get_metadata_value(e, "method")&.upcase == options[:method].upcase }
118
+ end
119
+
120
+ if options[:status_code]
121
+ events = events.select { |e| get_metadata_value(e, "status_code")&.to_s == options[:status_code].to_s }
122
+ end
123
+
124
+ if options[:where_conditions]
125
+ events = apply_where_conditions(events, options[:where_conditions])
126
+ end
127
+
128
+ events.count
39
129
  end
40
130
 
41
131
  def unique_users(context, options = {})
@@ -76,6 +166,128 @@ module BehaviorAnalytics
76
166
  event_type_sym = event_type.is_a?(Symbol) ? event_type : event_type.to_sym
77
167
  events.select { |e| e[:event_type] == event_type_sym || e[:event_type].to_sym == event_type_sym }
78
168
  end
169
+
170
+ def filter_by_metadata(events, metadata_filters)
171
+ events.select do |event|
172
+ metadata_filters.all? do |key, value|
173
+ get_metadata_value(event, key) == value || get_metadata_value(event, key).to_s == value.to_s
174
+ end
175
+ end
176
+ end
177
+
178
+ def get_metadata_value(event, key)
179
+ metadata = event[:metadata] || event["metadata"] || {}
180
+ metadata[key.to_sym] || metadata[key.to_s] || metadata[key]
181
+ end
182
+
183
+ def get_field_value(event, field)
184
+ event[field.to_sym] || event[field.to_s] || event[field]
185
+ end
186
+
187
+ def apply_where_conditions(events, where_conditions)
188
+ where_conditions.reduce(events) do |filtered, condition|
189
+ if condition[:raw]
190
+ # For raw conditions, we'd need to evaluate them - simplified version
191
+ # In production, you might want to use a proper expression evaluator
192
+ filtered
193
+ else
194
+ condition.reduce(filtered) do |result, (key, value)|
195
+ next result if key == :raw
196
+ result.select { |e| get_field_value(e, key) == value }
197
+ end
198
+ end
199
+ end
200
+ end
201
+
202
+ def apply_group_by(events, group_by_fields, aggregations = [])
203
+ grouped = events.group_by do |event|
204
+ group_by_fields.map { |field| get_field_value(event, field) }
205
+ end
206
+
207
+ if aggregations && !aggregations.empty?
208
+ grouped.map do |keys, group_events|
209
+ result = {}
210
+ group_by_fields.each_with_index do |field, idx|
211
+ result[field.to_sym] = keys[idx]
212
+ end
213
+ aggregations.each do |agg|
214
+ field = agg[:field]
215
+ func = agg[:function]
216
+ values = group_events.map { |e| get_field_value(e, field) }.compact
217
+ result["#{func}_#{field}".to_sym] = case func
218
+ when "sum"
219
+ values.sum { |v| v.is_a?(Numeric) ? v : 0 }
220
+ when "avg", "average"
221
+ values.empty? ? 0 : values.sum { |v| v.is_a?(Numeric) ? v : 0 }.to_f / values.size
222
+ when "min"
223
+ values.min
224
+ when "max"
225
+ values.max
226
+ when "count"
227
+ values.size
228
+ else
229
+ values.size
230
+ end
231
+ end
232
+ result
233
+ end
234
+ else
235
+ grouped.map do |keys, group_events|
236
+ result = {}
237
+ group_by_fields.each_with_index do |field, idx|
238
+ result[field.to_sym] = keys[idx]
239
+ end
240
+ result[:count] = group_events.size
241
+ result
242
+ end
243
+ end
244
+ end
245
+
246
+ def apply_aggregations(events, aggregations)
247
+ result = {}
248
+ aggregations.each do |agg|
249
+ field = agg[:field]
250
+ func = agg[:function]
251
+ values = events.map { |e| get_field_value(e, field) }.compact
252
+ result["#{func}_#{field}".to_sym] = case func
253
+ when "sum"
254
+ values.sum { |v| v.is_a?(Numeric) ? v : 0 }
255
+ when "avg", "average"
256
+ values.empty? ? 0 : values.sum { |v| v.is_a?(Numeric) ? v : 0 }.to_f / values.size
257
+ when "min"
258
+ values.min
259
+ when "max"
260
+ values.max
261
+ when "count"
262
+ values.size
263
+ else
264
+ values.size
265
+ end
266
+ end
267
+ [result]
268
+ end
269
+
270
+ def apply_order_by(events, order_by)
271
+ field = order_by[:field]
272
+ direction = order_by[:direction] || :desc
273
+
274
+ events.sort do |a, b|
275
+ a_val = get_field_value(a, field)
276
+ b_val = get_field_value(b, field)
277
+
278
+ comparison = if a_val.nil? && b_val.nil?
279
+ 0
280
+ elsif a_val.nil?
281
+ 1
282
+ elsif b_val.nil?
283
+ -1
284
+ else
285
+ a_val <=> b_val
286
+ end
287
+
288
+ direction == :desc ? -comparison : comparison
289
+ end
290
+ end
79
291
  end
80
292
  end
81
293
  end