userpattern 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'openssl'
4
+
5
+ module UserPattern
6
+ # Produces a one-way anonymous session identifier that:
7
+ # - Is consistent within a single session/token lifetime (for per-session stats)
8
+ # - Rotates daily via a date-scoped salt (prevents cross-day correlation)
9
+ # - Cannot be reversed to recover user identity or session ID
10
+ class Anonymizer
11
+ DIGEST = 'SHA256'
12
+ TRUNCATE_LENGTH = 16
13
+
14
+ def self.anonymize(request)
15
+ raw = session_fingerprint(request)
16
+ daily_salt = "#{UserPattern.configuration.anonymous_salt}:#{Date.current.iso8601}"
17
+ OpenSSL::HMAC.hexdigest(DIGEST, daily_salt, raw)[0, TRUNCATE_LENGTH]
18
+ end
19
+
20
+ class << self
21
+ private
22
+
23
+ def session_fingerprint(request)
24
+ detection = UserPattern.configuration.session_detection
25
+
26
+ case detection
27
+ when :auto, nil then auto_detect(request)
28
+ when :session then session_based(request)
29
+ when :header then header_based(request)
30
+ when Proc then detection.call(request).to_s
31
+ end
32
+ end
33
+
34
+ def auto_detect(request)
35
+ if request.headers['Authorization'].present?
36
+ header_based(request)
37
+ elsif request.respond_to?(:session) && request.session.respond_to?(:id) && request.session.id.present?
38
+ session_based(request)
39
+ else
40
+ request.remote_ip.to_s
41
+ end
42
+ end
43
+
44
+ def session_based(request)
45
+ request.session.id.to_s
46
+ end
47
+
48
+ def header_based(request)
49
+ request.headers['Authorization'].to_s
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+
5
+ module UserPattern
6
+ # Thread-safe in-memory buffer that batches request events before flushing
7
+ # to the database. Minimizes per-request overhead to a single array push.
8
+ class Buffer
9
+ MAX_DRAIN = 1_000
10
+
11
+ def initialize
12
+ @queue = Concurrent::Array.new
13
+ @flushing = Concurrent::AtomicBoolean.new(false)
14
+ start_timer
15
+ end
16
+
17
+ def push(event)
18
+ @queue << event
19
+ flush_async if @queue.size >= UserPattern.configuration.buffer_size
20
+ end
21
+
22
+ def flush
23
+ return if @queue.empty?
24
+ return unless @flushing.make_true
25
+
26
+ persist_events
27
+ ensure
28
+ @flushing.make_false
29
+ end
30
+
31
+ def shutdown
32
+ @timer&.shutdown
33
+ flush
34
+ end
35
+
36
+ def size
37
+ @queue.size
38
+ end
39
+
40
+ private
41
+
42
+ def persist_events
43
+ events = drain_queue
44
+ return if events.empty?
45
+
46
+ now = Time.current
47
+ rows = events.map { |e| e.merge(created_at: now) }
48
+ UserPattern::RequestEvent.insert_all(rows)
49
+ rescue StandardError => e
50
+ Rails.logger.error("[UserPattern] Flush error: #{e.message}")
51
+ end
52
+
53
+ def drain_queue
54
+ events = []
55
+ events << @queue.shift until @queue.empty? || events.size >= MAX_DRAIN
56
+ events
57
+ end
58
+
59
+ def flush_async
60
+ Thread.new { flush }
61
+ end
62
+
63
+ def start_timer
64
+ @timer = Concurrent::TimerTask.new(
65
+ execution_interval: UserPattern.configuration.flush_interval
66
+ ) { flush }
67
+ @timer.execute
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UserPattern
4
+ class Configuration
5
+ attr_reader :tracked_models
6
+
7
+ attr_accessor :flush_interval, :buffer_size,
8
+ :retention_period, :anonymous_salt,
9
+ :session_detection, :enabled, :ignored_paths,
10
+ :mode, :threshold_multiplier, :threshold_refresh_interval,
11
+ :block_unknown_endpoints, :on_threshold_exceeded,
12
+ :violation_actions, :logout_method, :rate_limiter_store
13
+
14
+ attr_writer :dashboard_auth
15
+
16
+ def initialize
17
+ @tracked_models = [{ name: 'User', current_method: :current_user }]
18
+ @flush_interval = 30
19
+ @buffer_size = 100
20
+ @retention_period = 30
21
+ @dashboard_auth = nil
22
+ @anonymous_salt = nil
23
+ @session_detection = :auto
24
+ @enabled = true
25
+ @ignored_paths = []
26
+ initialize_alert_defaults
27
+ end
28
+
29
+ def alert_mode?
30
+ @mode == :alert
31
+ end
32
+
33
+ def dashboard_auth
34
+ @dashboard_auth || default_dashboard_auth
35
+ end
36
+
37
+ def tracked_models=(list)
38
+ @tracked_models = list.map do |entry|
39
+ name = entry[:name].to_s
40
+ method = entry[:current_method] || :"current_#{name.underscore}"
41
+ { name: name, current_method: method }
42
+ end
43
+ end
44
+
45
+ def ignored?(path)
46
+ ignored_paths.any? do |pattern|
47
+ case pattern
48
+ when Regexp then pattern.match?(path)
49
+ when String then pattern == path
50
+ end
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ def initialize_alert_defaults
57
+ @mode = :collection
58
+ @threshold_multiplier = 1.5
59
+ @threshold_refresh_interval = 300
60
+ @block_unknown_endpoints = false
61
+ @on_threshold_exceeded = nil
62
+ @violation_actions = [:raise]
63
+ @logout_method = nil
64
+ @rate_limiter_store = nil
65
+ end
66
+
67
+ def default_dashboard_auth
68
+ user = ENV.fetch('USERPATTERN_DASHBOARD_USER', nil)
69
+ pass = ENV.fetch('USERPATTERN_DASHBOARD_PASSWORD', nil)
70
+ return locked_dashboard_auth unless user && pass
71
+
72
+ basic_auth_lambda(user, pass)
73
+ end
74
+
75
+ def locked_dashboard_auth
76
+ lambda {
77
+ render plain: 'Dashboard locked. Set USERPATTERN_DASHBOARD_USER and ' \
78
+ 'USERPATTERN_DASHBOARD_PASSWORD environment variables, ' \
79
+ 'or configure a custom dashboard_auth.',
80
+ status: :forbidden
81
+ }
82
+ end
83
+
84
+ def basic_auth_lambda(user, pass)
85
+ lambda {
86
+ authenticate_or_request_with_http_basic('UserPattern') do |provided_user, provided_pass|
87
+ ActiveSupport::SecurityUtils.secure_compare(provided_user, user) &
88
+ ActiveSupport::SecurityUtils.secure_compare(provided_pass, pass)
89
+ end
90
+ }
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'userpattern/anonymizer'
4
+ require 'userpattern/path_normalizer'
5
+
6
+ module UserPattern
7
+ module ControllerTracking
8
+ extend ActiveSupport::Concern
9
+
10
+ included do
11
+ before_action :_userpattern_check_rate_limit
12
+ after_action :_userpattern_track_request
13
+ end
14
+
15
+ private
16
+
17
+ def _userpattern_check_rate_limit
18
+ return unless _userpattern_should_check_rate_limit?
19
+
20
+ _userpattern_enforce_limits
21
+ rescue UserPattern::ThresholdExceeded => e
22
+ _userpattern_handle_violation(e)
23
+ end
24
+
25
+ def _userpattern_should_check_rate_limit?
26
+ UserPattern.enabled? &&
27
+ UserPattern.configuration.alert_mode? &&
28
+ !_userpattern_internal_request? &&
29
+ !_userpattern_ignored_path? &&
30
+ UserPattern.rate_limiter
31
+ end
32
+
33
+ def _userpattern_enforce_limits
34
+ endpoint = "#{request.method} #{UserPattern::PathNormalizer.normalize(request.fullpath)}"
35
+
36
+ UserPattern.configuration.tracked_models.each do |model_config|
37
+ user = _userpattern_resolve(model_config[:current_method])
38
+ next unless user
39
+
40
+ UserPattern.rate_limiter.check_and_increment!(user.id, model_config[:name], endpoint)
41
+ end
42
+ end
43
+
44
+ def _userpattern_handle_violation(violation)
45
+ actions = UserPattern.configuration.violation_actions
46
+
47
+ _userpattern_log_violation(violation) if actions.include?(:log)
48
+ _userpattern_record_violation(violation) if actions.include?(:record)
49
+ UserPattern.configuration.on_threshold_exceeded&.call(violation)
50
+ UserPattern.configuration.logout_method&.call(self) if actions.include?(:logout)
51
+
52
+ raise violation if actions.include?(:raise)
53
+ end
54
+
55
+ def _userpattern_log_violation(violation)
56
+ Rails.logger.warn("[UserPattern] #{violation.message}")
57
+ end
58
+
59
+ def _userpattern_record_violation(violation)
60
+ require 'userpattern/violation_recorder'
61
+ UserPattern::ViolationRecorder.record!(violation)
62
+ end
63
+
64
+ def _userpattern_track_request
65
+ return unless UserPattern.enabled?
66
+ return if _userpattern_internal_request?
67
+ return if _userpattern_ignored_path?
68
+
69
+ _userpattern_record_matching_models
70
+ end
71
+
72
+ def _userpattern_record_matching_models
73
+ UserPattern.configuration.tracked_models.each do |model_config|
74
+ user = _userpattern_resolve(model_config[:current_method])
75
+ next unless user
76
+
77
+ UserPattern.buffer.push(
78
+ model_type: model_config[:name],
79
+ endpoint: "#{request.method} #{UserPattern::PathNormalizer.normalize(request.fullpath)}",
80
+ anonymous_session_id: UserPattern::Anonymizer.anonymize(request),
81
+ recorded_at: Time.current
82
+ )
83
+ end
84
+ end
85
+
86
+ def _userpattern_resolve(method_name)
87
+ return nil unless respond_to?(method_name, true)
88
+
89
+ send(method_name)
90
+ rescue StandardError
91
+ nil
92
+ end
93
+
94
+ def _userpattern_internal_request?
95
+ self.class.name.to_s.start_with?('UserPattern::')
96
+ end
97
+
98
+ def _userpattern_ignored_path?
99
+ UserPattern.configuration.ignored?(request.path)
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UserPattern
4
+ class Engine < ::Rails::Engine
5
+ isolate_namespace UserPattern
6
+
7
+ initializer 'userpattern.controller_tracking' do
8
+ ActiveSupport.on_load(:action_controller_base) do
9
+ require 'userpattern/controller_tracking'
10
+ include UserPattern::ControllerTracking
11
+ end
12
+
13
+ ActiveSupport.on_load(:action_controller_api) do
14
+ require 'userpattern/controller_tracking'
15
+ include UserPattern::ControllerTracking
16
+ end
17
+ end
18
+
19
+ # :nocov:
20
+ initializer 'userpattern.default_salt' do
21
+ config.after_initialize do
22
+ UserPattern.configuration.anonymous_salt ||=
23
+ Rails.application.secret_key_base&.byteslice(0, 32) || SecureRandom.hex(16)
24
+ end
25
+ end
26
+
27
+ initializer 'userpattern.alert_mode' do
28
+ config.after_initialize do
29
+ UserPattern.start_alert_mode! if UserPattern.configuration.alert_mode?
30
+ end
31
+ end
32
+
33
+ initializer 'userpattern.cleanup_task' do
34
+ config.after_initialize do
35
+ load File.expand_path('../tasks/userpattern.rake', __dir__) if defined?(Rake)
36
+ end
37
+ end
38
+ # :nocov:
39
+ end
40
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UserPattern
4
+ # Normalizes request paths so that URLs differing only by dynamic segments
5
+ # (numeric IDs, UUIDs) are aggregated into a single endpoint pattern.
6
+ # Also redacts identifiable values from query strings.
7
+ module PathNormalizer
8
+ NUMERIC_ID = /\A\d+\z/
9
+ UUID = /\A[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\z/i
10
+ HEX_TOKEN = /\A[0-9a-f]{16,}\z/i
11
+
12
+ ID_PLACEHOLDER = ':id'
13
+ REDACTED_VALUE = ':xxx'
14
+
15
+ class << self
16
+ def normalize(path)
17
+ uri_path, query = path.split('?', 2)
18
+ normalized = normalize_path(uri_path)
19
+ normalized = "#{normalized}?#{normalize_query(query)}" if query
20
+ normalized
21
+ end
22
+
23
+ private
24
+
25
+ def normalize_path(path)
26
+ return path if path == '/'
27
+
28
+ segments = path.split('/')
29
+ segments.map { |seg| dynamic_segment?(seg) ? ID_PLACEHOLDER : seg }.join('/')
30
+ end
31
+
32
+ def normalize_query(query)
33
+ query.split('&').map { |pair| redact_pair(pair) }.sort.join('&')
34
+ end
35
+
36
+ def redact_pair(pair)
37
+ key, value = pair.split('=', 2)
38
+ return pair unless value
39
+
40
+ if dynamic_value?(value)
41
+ "#{key}=#{REDACTED_VALUE}"
42
+ else
43
+ pair
44
+ end
45
+ end
46
+
47
+ def dynamic_segment?(segment)
48
+ return false if segment.empty?
49
+
50
+ segment.match?(NUMERIC_ID) || segment.match?(UUID) || segment.match?(HEX_TOKEN)
51
+ end
52
+
53
+ def dynamic_value?(value)
54
+ return false if value.empty?
55
+
56
+ value.match?(NUMERIC_ID) || value.match?(UUID) || value.match?(HEX_TOKEN)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'userpattern/threshold_exceeded'
4
+
5
+ module UserPattern
6
+ # Checks per-user request rates against the limits from ThresholdCache.
7
+ # Counters are stored in an ActiveSupport::Cache::Store (same interface
8
+ # as Rack::Attack), giving multi-process support via Redis/Memcached.
9
+ class RateLimiter
10
+ PERIODS = {
11
+ minute: { format: '%Y-%m-%dT%H:%M', ttl: 120 },
12
+ hour: { format: '%Y-%m-%dT%H', ttl: 7_200 },
13
+ day: { format: '%Y-%m-%d', ttl: 172_800 }
14
+ }.freeze
15
+
16
+ def initialize(store:, threshold_cache:)
17
+ @store = store
18
+ @threshold_cache = threshold_cache
19
+ end
20
+
21
+ def check_and_increment!(user_id, model_type, endpoint)
22
+ limits = @threshold_cache.limits_for(model_type, endpoint)
23
+
24
+ if limits.nil?
25
+ return unless UserPattern.configuration.block_unknown_endpoints
26
+
27
+ raise_threshold_exceeded(endpoint, user_id, model_type, 'unknown', 1, 0)
28
+ end
29
+
30
+ PERIODS.each do |period, config|
31
+ check_period!(user_id, model_type, endpoint, period, config[:format], limits)
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def check_period!(user_id, model_type, endpoint, period, time_format, limits)
38
+ limit = limits[:"per_#{period}"]
39
+ return unless limit&.positive?
40
+
41
+ key = cache_key(user_id, endpoint, period, Time.current.strftime(time_format))
42
+ count = increment_counter(key, PERIODS[period][:ttl])
43
+
44
+ return unless count > limit
45
+
46
+ raise_threshold_exceeded(endpoint, user_id, model_type, period.to_s, count, limit)
47
+ end
48
+
49
+ def raise_threshold_exceeded(endpoint, user_id, model_type, period, count, limit)
50
+ raise ThresholdExceeded.new(
51
+ endpoint: endpoint, user_id: user_id, model_type: model_type,
52
+ period: period, count: count, limit: limit
53
+ )
54
+ end
55
+
56
+ def increment_counter(key, ttl)
57
+ count = @store.increment(key, 1, expires_in: ttl)
58
+ return count if count
59
+
60
+ @store.write(key, 1, expires_in: ttl)
61
+ 1
62
+ end
63
+
64
+ def cache_key(user_id, endpoint, period, bucket)
65
+ "userpattern:#{user_id}:#{endpoint}:#{period}:#{bucket}"
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UserPattern
4
+ module RequestEventCleanup
5
+ def self.run!
6
+ cutoff = UserPattern.configuration.retention_period.days.ago
7
+ UserPattern::RequestEvent.where('recorded_at < ?', cutoff).delete_all
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UserPattern
4
+ class StatsCalculator
5
+ def self.compute_all
6
+ new.compute_all
7
+ end
8
+
9
+ def compute_all
10
+ load_groups
11
+ .reject { |row| ignored_endpoint?(row[1]) }
12
+ .map { |row| build_stat(row) }
13
+ end
14
+
15
+ private
16
+
17
+ def ignored_endpoint?(endpoint)
18
+ # endpoint format: "VERB /path" — match only against the path part
19
+ path = endpoint.split(' ', 2).last.to_s
20
+ UserPattern.configuration.ignored?(path)
21
+ end
22
+
23
+ def load_groups
24
+ RequestEvent
25
+ .group(:model_type, :endpoint)
26
+ .pluck(
27
+ :model_type,
28
+ :endpoint,
29
+ Arel.sql('COUNT(*)'),
30
+ Arel.sql('COUNT(DISTINCT anonymous_session_id)'),
31
+ Arel.sql('MIN(recorded_at)'),
32
+ Arel.sql('MAX(recorded_at)')
33
+ )
34
+ end
35
+
36
+ def build_stat(row)
37
+ model_type, endpoint, total, sessions, first_seen, last_seen = row
38
+ span = time_span_seconds(first_seen, last_seen)
39
+
40
+ {
41
+ model_type: model_type,
42
+ endpoint: endpoint,
43
+ total_requests: total,
44
+ total_sessions: sessions,
45
+ avg_per_session: safe_divide(total, sessions),
46
+ avg_per_minute: avg_rate(total, span, 60),
47
+ max_per_minute: max_per_bucket(model_type, endpoint, :minute),
48
+ max_per_hour: max_per_bucket(model_type, endpoint, :hour),
49
+ max_per_day: max_per_bucket(model_type, endpoint, :day),
50
+ first_seen_at: first_seen,
51
+ last_seen_at: last_seen
52
+ }
53
+ end
54
+
55
+ def max_per_bucket(model_type, endpoint, period)
56
+ RequestEvent
57
+ .where(model_type: model_type, endpoint: endpoint)
58
+ .group(Arel.sql(bucket_expression(period)))
59
+ .count
60
+ .values
61
+ .max || 0
62
+ end
63
+
64
+ def bucket_expression(period)
65
+ case connection_adapter
66
+ when /postgres/
67
+ pg_period = { minute: 'minute', hour: 'hour', day: 'day' }[period]
68
+ "date_trunc('#{pg_period}', recorded_at)"
69
+ when /mysql/
70
+ fmt = { minute: '%Y-%m-%d %H:%i', hour: '%Y-%m-%d %H', day: '%Y-%m-%d' }[period]
71
+ "DATE_FORMAT(recorded_at, '#{fmt}')"
72
+ else
73
+ fmt = { minute: '%Y-%m-%d %H:%M', hour: '%Y-%m-%d %H', day: '%Y-%m-%d' }[period]
74
+ "strftime('#{fmt}', recorded_at)"
75
+ end
76
+ end
77
+
78
+ def connection_adapter
79
+ ActiveRecord::Base.connection.adapter_name.downcase
80
+ end
81
+
82
+ def time_span_seconds(first, last)
83
+ return 1.0 if first.nil? || last.nil?
84
+
85
+ span = (last.to_time - first.to_time).to_f
86
+ span.positive? ? span : 1.0
87
+ end
88
+
89
+ def safe_divide(numerator, denominator)
90
+ return 0.0 if denominator.nil? || denominator.zero?
91
+
92
+ (numerator.to_f / denominator).round(2)
93
+ end
94
+
95
+ def avg_rate(total, span_seconds, period_seconds)
96
+ periods = span_seconds / period_seconds.to_f
97
+ return total.to_f.round(2) if periods < 1
98
+
99
+ (total / periods).round(2)
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+ require 'userpattern/stats_calculator'
5
+
6
+ module UserPattern
7
+ # Periodically loads observed max frequencies from the DB and builds
8
+ # an in-memory lookup of limits (max * multiplier) per (model_type, endpoint).
9
+ #
10
+ # A Hash is used rather than a Set because we need associated limit values
11
+ # per key — Hash#key? is already O(1), same as Set#include?.
12
+ class ThresholdCache
13
+ def initialize
14
+ @limits = {}
15
+ @mutex = Mutex.new
16
+ safe_refresh
17
+ start_refresh_timer
18
+ end
19
+
20
+ def limits_for(model_type, endpoint)
21
+ @limits[[model_type, endpoint]]
22
+ end
23
+
24
+ def known_endpoint?(model_type, endpoint)
25
+ @limits.key?([model_type, endpoint])
26
+ end
27
+
28
+ def all_limits
29
+ @limits.dup
30
+ end
31
+
32
+ def refresh!
33
+ new_limits = build_limits
34
+ @mutex.synchronize { @limits = new_limits }
35
+ end
36
+
37
+ def shutdown
38
+ @timer&.shutdown
39
+ end
40
+
41
+ private
42
+
43
+ def build_limits
44
+ multiplier = UserPattern.configuration.threshold_multiplier
45
+
46
+ StatsCalculator.compute_all.each_with_object({}) do |stat, hash|
47
+ key = [stat[:model_type], stat[:endpoint]]
48
+ hash[key] = {
49
+ per_minute: (stat[:max_per_minute] * multiplier).ceil,
50
+ per_hour: (stat[:max_per_hour] * multiplier).ceil,
51
+ per_day: (stat[:max_per_day] * multiplier).ceil
52
+ }
53
+ end
54
+ end
55
+
56
+ def safe_refresh
57
+ refresh!
58
+ rescue StandardError => e
59
+ # :nocov:
60
+ Rails.logger&.error("[UserPattern] Threshold refresh error: #{e.message}")
61
+ # :nocov:
62
+ end
63
+
64
+ # :nocov:
65
+ def start_refresh_timer
66
+ @timer = Concurrent::TimerTask.new(
67
+ execution_interval: UserPattern.configuration.threshold_refresh_interval
68
+ ) { safe_refresh }
69
+ @timer.execute
70
+ end
71
+ # :nocov:
72
+ end
73
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UserPattern
4
+ class ThresholdExceeded < StandardError
5
+ attr_reader :endpoint, :user_id, :model_type, :period, :count, :limit
6
+
7
+ def initialize(endpoint:, user_id:, model_type:, period:, count:, limit:)
8
+ @endpoint = endpoint
9
+ @user_id = user_id
10
+ @model_type = model_type
11
+ @period = period
12
+ @count = count
13
+ @limit = limit
14
+ super(build_message)
15
+ end
16
+
17
+ private
18
+
19
+ def build_message
20
+ "Rate limit exceeded: #{endpoint} — " \
21
+ "#{count}/#{period} (max: #{limit}) " \
22
+ "by #{model_type}##{user_id}"
23
+ end
24
+ end
25
+ end