spyglasses 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,197 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+
5
+ module Spyglasses
6
+ class Middleware
7
+ include Spyglasses::Types
8
+
9
+ def initialize(app, options = {})
10
+ @app = app
11
+ @configuration = setup_configuration(options)
12
+ @client = Client.new(@configuration)
13
+ @exclude_paths = @configuration.exclude_paths + default_exclude_paths
14
+
15
+ log_debug("Spyglasses middleware initialized with configuration: #{@configuration.to_h}")
16
+ end
17
+
18
+ def call(env)
19
+ request_start_time = Time.now
20
+ request = Rack::Request.new(env)
21
+
22
+ # Skip excluded paths
23
+ if should_exclude_path?(request.path)
24
+ log_debug("Excluding path: #{request.path}")
25
+ return @app.call(env)
26
+ end
27
+
28
+ # Extract request information
29
+ user_agent = request.user_agent || ''
30
+ referrer = request.referrer || ''
31
+
32
+ log_debug("Processing request to #{request.path}")
33
+ log_debug("User-Agent: #{user_agent[0..99]}#{user_agent.length > 100 ? '...' : ''}")
34
+ log_debug("Referrer: #{referrer}") unless referrer.empty?
35
+
36
+ # Detect bot or AI referrer
37
+ detection_result = @client.detect(user_agent, referrer)
38
+
39
+ if detection_result.source_type != 'none'
40
+ log_debug("Detection result: #{detection_result.to_h}")
41
+
42
+ # Handle blocking
43
+ if detection_result.should_block
44
+ log_debug("Blocking request from #{detection_result.source_type}: #{detection_result.matched_pattern}")
45
+
46
+ # Log the blocked request
47
+ log_request_async(detection_result, request, 403, Time.now - request_start_time)
48
+
49
+ # Return 403 Forbidden
50
+ return forbidden_response
51
+ end
52
+ end
53
+
54
+ # Process the request normally
55
+ begin
56
+ status, headers, response = @app.call(env)
57
+ response_time = Time.now - request_start_time
58
+
59
+ # Log successful requests with detection results
60
+ if detection_result.source_type != 'none'
61
+ log_request_async(detection_result, request, status, response_time)
62
+ end
63
+
64
+ [status, headers, response]
65
+ rescue => e
66
+ # Log error requests if we detected something
67
+ if detection_result.source_type != 'none'
68
+ response_time = Time.now - request_start_time
69
+ log_request_async(detection_result, request, 500, response_time)
70
+ end
71
+
72
+ raise e
73
+ end
74
+ end
75
+
76
+ private
77
+
78
+ def setup_configuration(options)
79
+ config = Configuration.new
80
+
81
+ # Override with provided options
82
+ options.each do |key, value|
83
+ case key.to_sym
84
+ when :api_key
85
+ config.api_key = value
86
+ when :debug
87
+ config.debug = value
88
+ when :collect_endpoint
89
+ config.collect_endpoint = value
90
+ when :patterns_endpoint
91
+ config.patterns_endpoint = value
92
+ when :auto_sync
93
+ config.auto_sync = value
94
+ when :platform_type
95
+ config.platform_type = value
96
+ when :cache_ttl
97
+ config.cache_ttl = value.to_i
98
+ when :exclude_paths
99
+ config.exclude_paths = Array(value)
100
+ end
101
+ end
102
+
103
+ config
104
+ end
105
+
106
+ def default_exclude_paths
107
+ [
108
+ # Static assets
109
+ /\.(css|js|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|eot)$/i,
110
+ # Rails specific
111
+ %r{^/rails/},
112
+ %r{^/assets/},
113
+ # Common paths
114
+ %r{^/favicon\.ico},
115
+ %r{^/robots\.txt},
116
+ %r{^/sitemap\.xml},
117
+ # Health checks
118
+ %r{^/health},
119
+ %r{^/status},
120
+ %r{^/ping}
121
+ ]
122
+ end
123
+
124
+ def should_exclude_path?(path)
125
+ @exclude_paths.any? do |pattern|
126
+ case pattern
127
+ when String
128
+ path.include?(pattern)
129
+ when Regexp
130
+ pattern.match?(path)
131
+ else
132
+ false
133
+ end
134
+ end
135
+ end
136
+
137
+ def log_request_async(detection_result, request, status, response_time)
138
+ return unless @configuration.api_key_present?
139
+
140
+ request_info = {
141
+ url: request.url,
142
+ user_agent: request.user_agent || '',
143
+ ip_address: extract_client_ip(request),
144
+ request_method: request.request_method,
145
+ request_path: request.path,
146
+ request_query: request.query_string.empty? ? nil : request.query_string,
147
+ referrer: request.referrer,
148
+ response_status: status,
149
+ response_time_ms: (response_time * 1000).round,
150
+ headers: extract_headers(request)
151
+ }
152
+
153
+ @client.log_request(detection_result, request_info)
154
+
155
+ log_debug("Logging #{detection_result.source_type} visit: #{detection_result.matched_pattern}")
156
+ end
157
+
158
+ def extract_client_ip(request)
159
+ # Try various headers to get the real client IP
160
+ [
161
+ request.env['HTTP_X_FORWARDED_FOR'],
162
+ request.env['HTTP_X_REAL_IP'],
163
+ request.env['HTTP_CF_CONNECTING_IP'], # Cloudflare
164
+ request.env['HTTP_X_CLIENT_IP'],
165
+ request.env['REMOTE_ADDR']
166
+ ].find { |ip| ip && !ip.empty? && ip != '127.0.0.1' } || request.ip
167
+ end
168
+
169
+ def extract_headers(request)
170
+ headers = {}
171
+ request.env.each do |key, value|
172
+ if key.start_with?('HTTP_') && value.is_a?(String)
173
+ header_name = key[5..-1].downcase.tr('_', '-')
174
+ headers[header_name] = value
175
+ end
176
+ end
177
+ headers
178
+ end
179
+
180
+ def forbidden_response
181
+ [
182
+ 403,
183
+ {
184
+ 'Content-Type' => 'text/plain',
185
+ 'Content-Length' => '13'
186
+ },
187
+ ['Access Denied']
188
+ ]
189
+ end
190
+
191
+ def log_debug(message)
192
+ return unless @configuration.debug?
193
+
194
+ puts "[Spyglasses] #{message}"
195
+ end
196
+ end
197
+ end
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spyglasses
4
+ module Types
5
+ # Base detection result
6
+ class DetectionResult
7
+ attr_accessor :is_bot, :should_block, :source_type, :matched_pattern, :info
8
+
9
+ def initialize(is_bot: false, should_block: false, source_type: 'none', matched_pattern: nil, info: nil)
10
+ @is_bot = is_bot
11
+ @should_block = should_block
12
+ @source_type = source_type
13
+ @matched_pattern = matched_pattern
14
+ @info = info
15
+ end
16
+
17
+ def to_h
18
+ {
19
+ is_bot: @is_bot,
20
+ should_block: @should_block,
21
+ source_type: @source_type,
22
+ matched_pattern: @matched_pattern,
23
+ info: @info&.to_h
24
+ }
25
+ end
26
+ end
27
+
28
+ # Bot pattern from API
29
+ class BotPattern
30
+ attr_accessor :pattern, :url, :type, :category, :subcategory, :company,
31
+ :is_compliant, :is_ai_model_trainer, :intent, :instances
32
+
33
+ def initialize(data = {})
34
+ @pattern = data['pattern'] || data[:pattern]
35
+ @url = data['url'] || data[:url]
36
+ @type = data['type'] || data[:type]
37
+ @category = data['category'] || data[:category]
38
+ @subcategory = data['subcategory'] || data[:subcategory]
39
+ @company = data['company'] || data[:company]
40
+ @is_compliant = data['is_compliant'] || data[:is_compliant] || data['isCompliant']
41
+ @is_ai_model_trainer = data['is_ai_model_trainer'] || data[:is_ai_model_trainer] || data['isAiModelTrainer']
42
+ @intent = data['intent'] || data[:intent]
43
+ @instances = data['instances'] || data[:instances] || []
44
+ end
45
+
46
+ def to_h
47
+ {
48
+ pattern: @pattern,
49
+ url: @url,
50
+ type: @type,
51
+ category: @category,
52
+ subcategory: @subcategory,
53
+ company: @company,
54
+ is_compliant: @is_compliant,
55
+ is_ai_model_trainer: @is_ai_model_trainer,
56
+ intent: @intent,
57
+ instances: @instances
58
+ }
59
+ end
60
+ end
61
+
62
+ # Bot info for detection results
63
+ class BotInfo
64
+ attr_accessor :pattern, :type, :category, :subcategory, :company,
65
+ :is_compliant, :is_ai_model_trainer, :intent, :url
66
+
67
+ def initialize(data = {})
68
+ @pattern = data['pattern'] || data[:pattern]
69
+ @type = data['type'] || data[:type]
70
+ @category = data['category'] || data[:category]
71
+ @subcategory = data['subcategory'] || data[:subcategory]
72
+ @company = data['company'] || data[:company]
73
+ @is_compliant = data['is_compliant'] || data[:is_compliant] || false
74
+ @is_ai_model_trainer = data['is_ai_model_trainer'] || data[:is_ai_model_trainer] || false
75
+ @intent = data['intent'] || data[:intent]
76
+ @url = data['url'] || data[:url]
77
+ end
78
+
79
+ def to_h
80
+ {
81
+ pattern: @pattern,
82
+ type: @type,
83
+ category: @category,
84
+ subcategory: @subcategory,
85
+ company: @company,
86
+ is_compliant: @is_compliant,
87
+ is_ai_model_trainer: @is_ai_model_trainer,
88
+ intent: @intent,
89
+ url: @url
90
+ }
91
+ end
92
+ end
93
+
94
+ # AI referrer info
95
+ class AiReferrerInfo
96
+ attr_accessor :id, :name, :company, :url, :patterns, :description, :logo_url
97
+
98
+ def initialize(data = {})
99
+ @id = data['id'] || data[:id]
100
+ @name = data['name'] || data[:name]
101
+ @company = data['company'] || data[:company]
102
+ @url = data['url'] || data[:url]
103
+ @patterns = data['patterns'] || data[:patterns] || []
104
+ @description = data['description'] || data[:description]
105
+ @logo_url = data['logo_url'] || data[:logo_url] || data['logoUrl']
106
+ end
107
+
108
+ def to_h
109
+ {
110
+ id: @id,
111
+ name: @name,
112
+ company: @company,
113
+ url: @url,
114
+ patterns: @patterns,
115
+ description: @description,
116
+ logo_url: @logo_url
117
+ }
118
+ end
119
+ end
120
+
121
+ # API pattern response
122
+ class ApiPatternResponse
123
+ attr_accessor :version, :patterns, :ai_referrers, :property_settings
124
+
125
+ def initialize(data = {})
126
+ @version = data['version'] || data[:version]
127
+ @patterns = (data['patterns'] || data[:patterns] || []).map { |p| BotPattern.new(p) }
128
+ @ai_referrers = (data['ai_referrers'] || data[:ai_referrers] || data['aiReferrers'] || []).map { |r| AiReferrerInfo.new(r) }
129
+
130
+ settings_data = data['property_settings'] || data[:property_settings] || data['propertySettings'] || {}
131
+ @property_settings = PropertySettings.new(settings_data)
132
+ end
133
+
134
+ def to_h
135
+ {
136
+ version: @version,
137
+ patterns: @patterns.map(&:to_h),
138
+ ai_referrers: @ai_referrers.map(&:to_h),
139
+ property_settings: @property_settings.to_h
140
+ }
141
+ end
142
+ end
143
+
144
+ # Property settings from API
145
+ class PropertySettings
146
+ attr_accessor :block_ai_model_trainers, :custom_blocks, :custom_allows
147
+
148
+ def initialize(data = {})
149
+ @block_ai_model_trainers = data['block_ai_model_trainers'] || data[:block_ai_model_trainers] || data['blockAiModelTrainers'] || false
150
+ @custom_blocks = data['custom_blocks'] || data[:custom_blocks] || data['customBlocks'] || []
151
+ @custom_allows = data['custom_allows'] || data[:custom_allows] || data['customAllows'] || []
152
+ end
153
+
154
+ def to_h
155
+ {
156
+ block_ai_model_trainers: @block_ai_model_trainers,
157
+ custom_blocks: @custom_blocks,
158
+ custom_allows: @custom_allows
159
+ }
160
+ end
161
+ end
162
+
163
+ # Collector payload
164
+ class CollectorPayload
165
+ attr_accessor :url, :user_agent, :ip_address, :request_method, :request_path,
166
+ :request_query, :request_body, :referrer, :response_status,
167
+ :response_time_ms, :headers, :timestamp, :platform_type, :metadata
168
+
169
+ def initialize(data = {})
170
+ @url = data[:url]
171
+ @user_agent = data[:user_agent]
172
+ @ip_address = data[:ip_address]
173
+ @request_method = data[:request_method]
174
+ @request_path = data[:request_path]
175
+ @request_query = data[:request_query]
176
+ @request_body = data[:request_body]
177
+ @referrer = data[:referrer]
178
+ @response_status = data[:response_status]
179
+ @response_time_ms = data[:response_time_ms]
180
+ @headers = data[:headers] || {}
181
+ @timestamp = data[:timestamp] || Time.now.utc.iso8601
182
+ @platform_type = data[:platform_type]
183
+ @metadata = data[:metadata] || {}
184
+ end
185
+
186
+ def to_h
187
+ {
188
+ url: @url,
189
+ user_agent: @user_agent,
190
+ ip_address: @ip_address,
191
+ request_method: @request_method,
192
+ request_path: @request_path,
193
+ request_query: @request_query,
194
+ request_body: @request_body,
195
+ referrer: @referrer,
196
+ response_status: @response_status,
197
+ response_time_ms: @response_time_ms,
198
+ headers: @headers,
199
+ timestamp: @timestamp,
200
+ platform_type: @platform_type,
201
+ metadata: @metadata
202
+ }
203
+ end
204
+
205
+ def to_json(*args)
206
+ to_h.to_json(*args)
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spyglasses
4
+ VERSION = '1.0.0'
5
+ end
data/lib/spyglasses.rb ADDED
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'spyglasses/version'
4
+ require_relative 'spyglasses/types'
5
+ require_relative 'spyglasses/configuration'
6
+ require_relative 'spyglasses/client'
7
+ require_relative 'spyglasses/middleware'
8
+
9
+ module Spyglasses
10
+ class Error < StandardError; end
11
+ class ConfigurationError < Error; end
12
+ class ApiError < Error; end
13
+
14
+ class << self
15
+ # Global configuration
16
+ def configure
17
+ yield(configuration)
18
+ end
19
+
20
+ def configuration
21
+ @configuration ||= Configuration.new
22
+ end
23
+
24
+ def reset_configuration!
25
+ @configuration = nil
26
+ end
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,203 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spyglasses
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Orchestra AI, Inc.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2025-06-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rack
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '2.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '2.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '2.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '2.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '13.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '13.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: webmock
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rack-test
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '2.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '2.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rubocop
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '1.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '1.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: simplecov
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0.21'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0.21'
139
+ - !ruby/object:Gem::Dependency
140
+ name: yard
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0.9'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0.9'
153
+ description: Spyglasses provides comprehensive AI agent detection and management capabilities
154
+ for Ruby web applications, including Rails, Sinatra, and other Rack-based frameworks.
155
+ email:
156
+ - support@spyglasses.io
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - ".rspec"
162
+ - ".rspec_status"
163
+ - CHANGELOG.md
164
+ - DEVELOPMENT.md
165
+ - Gemfile
166
+ - Gemfile.lock
167
+ - LICENSE
168
+ - README.md
169
+ - Rakefile
170
+ - lib/spyglasses.rb
171
+ - lib/spyglasses/client.rb
172
+ - lib/spyglasses/configuration.rb
173
+ - lib/spyglasses/middleware.rb
174
+ - lib/spyglasses/types.rb
175
+ - lib/spyglasses/version.rb
176
+ homepage: https://www.spyglasses.io
177
+ licenses:
178
+ - MIT
179
+ metadata:
180
+ homepage_uri: https://www.spyglasses.io
181
+ source_code_uri: https://github.com/spyglasses/spyglasses-ruby
182
+ documentation_uri: https://docs.spyglasses.io/ruby
183
+ changelog_uri: https://github.com/spyglasses/spyglasses-ruby/blob/main/CHANGELOG.md
184
+ post_install_message:
185
+ rdoc_options: []
186
+ require_paths:
187
+ - lib
188
+ required_ruby_version: !ruby/object:Gem::Requirement
189
+ requirements:
190
+ - - ">="
191
+ - !ruby/object:Gem::Version
192
+ version: 2.7.0
193
+ required_rubygems_version: !ruby/object:Gem::Requirement
194
+ requirements:
195
+ - - ">="
196
+ - !ruby/object:Gem::Version
197
+ version: '0'
198
+ requirements: []
199
+ rubygems_version: 3.3.3
200
+ signing_key:
201
+ specification_version: 4
202
+ summary: AI Agent Detection and Management for Ruby web applications
203
+ test_files: []