spyglasses 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rspec_status +54 -0
- data/CHANGELOG.md +41 -0
- data/DEVELOPMENT.md +215 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +143 -0
- data/LICENSE +21 -0
- data/README.md +335 -0
- data/Rakefile +37 -0
- data/lib/spyglasses/client.rb +491 -0
- data/lib/spyglasses/configuration.rb +79 -0
- data/lib/spyglasses/middleware.rb +197 -0
- data/lib/spyglasses/types.rb +210 -0
- data/lib/spyglasses/version.rb +5 -0
- data/lib/spyglasses.rb +28 -0
- metadata +203 -0
@@ -0,0 +1,491 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
require 'json'
|
6
|
+
require 'thread'
|
7
|
+
|
8
|
+
module Spyglasses
|
9
|
+
class Client
|
10
|
+
include Spyglasses::Types
|
11
|
+
|
12
|
+
attr_reader :configuration, :patterns, :ai_referrers, :pattern_version, :last_pattern_sync
|
13
|
+
|
14
|
+
def initialize(config = nil)
|
15
|
+
@configuration = config || Configuration.new
|
16
|
+
@patterns = []
|
17
|
+
@ai_referrers = []
|
18
|
+
@pattern_regex_cache = {}
|
19
|
+
@pattern_version = '1.0.0'
|
20
|
+
@last_pattern_sync = 0
|
21
|
+
@mutex = Mutex.new
|
22
|
+
|
23
|
+
# Property settings loaded from API
|
24
|
+
@block_ai_model_trainers = false
|
25
|
+
@custom_blocks = []
|
26
|
+
@custom_allows = []
|
27
|
+
|
28
|
+
load_default_patterns
|
29
|
+
|
30
|
+
# Auto-sync patterns if enabled and API key is present
|
31
|
+
if @configuration.auto_sync? && @configuration.api_key_present?
|
32
|
+
Thread.new do
|
33
|
+
begin
|
34
|
+
sync_patterns
|
35
|
+
rescue => e
|
36
|
+
log_debug("Error syncing patterns: #{e.message}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Sync patterns from the API
|
43
|
+
def sync_patterns
|
44
|
+
unless @configuration.api_key_present?
|
45
|
+
message = 'No API key set for pattern sync'
|
46
|
+
log_debug(message)
|
47
|
+
return message
|
48
|
+
end
|
49
|
+
|
50
|
+
begin
|
51
|
+
uri = URI(@configuration.patterns_endpoint)
|
52
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
53
|
+
http.use_ssl = uri.scheme == 'https'
|
54
|
+
http.read_timeout = 30
|
55
|
+
http.open_timeout = 10
|
56
|
+
|
57
|
+
request = Net::HTTP::Get.new(uri)
|
58
|
+
request['Content-Type'] = 'application/json'
|
59
|
+
request['x-api-key'] = @configuration.api_key
|
60
|
+
|
61
|
+
response = http.request(request)
|
62
|
+
|
63
|
+
unless response.is_a?(Net::HTTPSuccess)
|
64
|
+
message = "Pattern sync HTTP error #{response.code}: #{response.message}"
|
65
|
+
log_debug(message)
|
66
|
+
return message
|
67
|
+
end
|
68
|
+
|
69
|
+
data = JSON.parse(response.body)
|
70
|
+
api_response = ApiPatternResponse.new(data)
|
71
|
+
|
72
|
+
# Thread-safe update of patterns
|
73
|
+
@mutex.synchronize do
|
74
|
+
@patterns = api_response.patterns
|
75
|
+
@ai_referrers = api_response.ai_referrers
|
76
|
+
@pattern_version = api_response.version
|
77
|
+
@last_pattern_sync = Time.now.to_i
|
78
|
+
|
79
|
+
# Update property settings
|
80
|
+
@block_ai_model_trainers = api_response.property_settings.block_ai_model_trainers
|
81
|
+
@custom_blocks = api_response.property_settings.custom_blocks
|
82
|
+
@custom_allows = api_response.property_settings.custom_allows
|
83
|
+
|
84
|
+
# Clear regex cache
|
85
|
+
@pattern_regex_cache.clear
|
86
|
+
end
|
87
|
+
|
88
|
+
log_debug("Synced #{@patterns.length} patterns and #{@ai_referrers.length} AI referrers")
|
89
|
+
log_debug("Property settings: block_ai_model_trainers=#{@block_ai_model_trainers}, custom_blocks=#{@custom_blocks.length}, custom_allows=#{@custom_allows.length}")
|
90
|
+
|
91
|
+
api_response
|
92
|
+
rescue => e
|
93
|
+
message = "Error syncing patterns: #{e.message}"
|
94
|
+
log_debug(message)
|
95
|
+
message
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Detect if a user agent is a bot
|
100
|
+
def detect_bot(user_agent)
|
101
|
+
return DetectionResult.new unless user_agent && !user_agent.empty?
|
102
|
+
|
103
|
+
log_debug("Checking user agent: \"#{user_agent[0..149]}#{user_agent.length > 150 ? '...' : ''}\"")
|
104
|
+
log_debug("Testing against #{@patterns.length} bot patterns")
|
105
|
+
|
106
|
+
@patterns.each do |pattern|
|
107
|
+
begin
|
108
|
+
regex = get_regex_for_pattern(pattern.pattern)
|
109
|
+
log_debug("Testing pattern: \"#{pattern.pattern}\" (#{pattern.type || 'unknown'} - #{pattern.company || 'unknown company'})")
|
110
|
+
|
111
|
+
if regex.match?(user_agent)
|
112
|
+
should_block = should_block_pattern?(pattern)
|
113
|
+
|
114
|
+
log_debug("✅ BOT DETECTED! Pattern matched: \"#{pattern.pattern}\"")
|
115
|
+
log_debug("Bot details: type=#{pattern.type}, category=#{pattern.category}, subcategory=#{pattern.subcategory}, company=#{pattern.company}, is_ai_model_trainer=#{pattern.is_ai_model_trainer}, should_block=#{should_block}")
|
116
|
+
|
117
|
+
bot_info = BotInfo.new(
|
118
|
+
pattern: pattern.pattern,
|
119
|
+
type: pattern.type || 'unknown',
|
120
|
+
category: pattern.category || 'Unknown',
|
121
|
+
subcategory: pattern.subcategory || 'Unclassified',
|
122
|
+
company: pattern.company,
|
123
|
+
is_compliant: pattern.is_compliant || false,
|
124
|
+
is_ai_model_trainer: pattern.is_ai_model_trainer || false,
|
125
|
+
intent: pattern.intent || 'unknown',
|
126
|
+
url: pattern.url
|
127
|
+
)
|
128
|
+
|
129
|
+
return DetectionResult.new(
|
130
|
+
is_bot: true,
|
131
|
+
should_block: should_block,
|
132
|
+
source_type: 'bot',
|
133
|
+
matched_pattern: pattern.pattern,
|
134
|
+
info: bot_info
|
135
|
+
)
|
136
|
+
end
|
137
|
+
rescue => e
|
138
|
+
log_debug("Error with pattern #{pattern.pattern}: #{e.message}")
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
log_debug('No bot patterns matched user agent')
|
143
|
+
DetectionResult.new
|
144
|
+
end
|
145
|
+
|
146
|
+
# Detect if a referrer is from an AI platform
|
147
|
+
def detect_ai_referrer(referrer)
|
148
|
+
return DetectionResult.new unless referrer && !referrer.empty?
|
149
|
+
|
150
|
+
log_debug("Checking referrer: \"#{referrer}\"")
|
151
|
+
|
152
|
+
# Extract hostname from referrer
|
153
|
+
hostname = extract_hostname(referrer)
|
154
|
+
log_debug("Extracted hostname: \"#{hostname}\"")
|
155
|
+
|
156
|
+
@ai_referrers.each do |ai_referrer|
|
157
|
+
log_debug("Testing AI referrer: \"#{ai_referrer.name}\" (#{ai_referrer.company}) with patterns: #{ai_referrer.patterns.join(', ')}")
|
158
|
+
|
159
|
+
ai_referrer.patterns.each do |pattern|
|
160
|
+
log_debug("Testing AI referrer pattern: \"#{pattern}\" against hostname: \"#{hostname}\"")
|
161
|
+
|
162
|
+
if hostname.include?(pattern)
|
163
|
+
log_debug("✅ AI REFERRER DETECTED! Pattern matched: \"#{pattern}\"")
|
164
|
+
log_debug("AI referrer details: name=#{ai_referrer.name}, company=#{ai_referrer.company}, id=#{ai_referrer.id}")
|
165
|
+
|
166
|
+
return DetectionResult.new(
|
167
|
+
is_bot: false,
|
168
|
+
should_block: false,
|
169
|
+
source_type: 'ai_referrer',
|
170
|
+
matched_pattern: pattern,
|
171
|
+
info: ai_referrer
|
172
|
+
)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
DetectionResult.new
|
178
|
+
end
|
179
|
+
|
180
|
+
# Combined detection for both bot and AI referrer
|
181
|
+
def detect(user_agent, referrer = nil)
|
182
|
+
log_debug("detect() called with user_agent: #{user_agent ? "\"#{user_agent[0..99]}#{user_agent.length > 100 ? '...' : ''}\"" : 'nil'}, referrer: #{referrer || 'nil'}")
|
183
|
+
|
184
|
+
# Check for bot first
|
185
|
+
bot_result = detect_bot(user_agent)
|
186
|
+
if bot_result.is_bot
|
187
|
+
log_debug('🤖 Final result: BOT detected, returning bot result')
|
188
|
+
return bot_result
|
189
|
+
end
|
190
|
+
|
191
|
+
# Check for AI referrer if provided
|
192
|
+
if referrer
|
193
|
+
log_debug('No bot detected, starting AI referrer detection...')
|
194
|
+
referrer_result = detect_ai_referrer(referrer)
|
195
|
+
if referrer_result.source_type == 'ai_referrer'
|
196
|
+
log_debug('🧠 Final result: AI REFERRER detected, returning referrer result')
|
197
|
+
return referrer_result
|
198
|
+
end
|
199
|
+
else
|
200
|
+
log_debug('No referrer provided, skipping AI referrer detection')
|
201
|
+
end
|
202
|
+
|
203
|
+
DetectionResult.new
|
204
|
+
end
|
205
|
+
|
206
|
+
# Log a request to the collector
|
207
|
+
def log_request(detection_result, request_info)
|
208
|
+
log_debug("log_request() called for source_type: #{detection_result.source_type}")
|
209
|
+
|
210
|
+
return unless @configuration.api_key_present? && detection_result.source_type != 'none'
|
211
|
+
|
212
|
+
log_debug("Preparing to log #{detection_result.source_type} event to collector")
|
213
|
+
|
214
|
+
# Prepare metadata
|
215
|
+
metadata = { was_blocked: detection_result.should_block }
|
216
|
+
|
217
|
+
if detection_result.source_type == 'bot' && detection_result.info
|
218
|
+
bot_info = detection_result.info
|
219
|
+
metadata.merge!(
|
220
|
+
agent_type: bot_info.type,
|
221
|
+
agent_category: bot_info.category,
|
222
|
+
agent_subcategory: bot_info.subcategory,
|
223
|
+
company: bot_info.company,
|
224
|
+
is_compliant: bot_info.is_compliant,
|
225
|
+
intent: bot_info.intent,
|
226
|
+
confidence: 0.9,
|
227
|
+
detection_method: 'pattern_match'
|
228
|
+
)
|
229
|
+
elsif detection_result.source_type == 'ai_referrer' && detection_result.info
|
230
|
+
referrer_info = detection_result.info
|
231
|
+
metadata.merge!(
|
232
|
+
source_type: 'ai_referrer',
|
233
|
+
referrer_id: referrer_info.id,
|
234
|
+
referrer_name: referrer_info.name,
|
235
|
+
company: referrer_info.company
|
236
|
+
)
|
237
|
+
end
|
238
|
+
|
239
|
+
payload = CollectorPayload.new(
|
240
|
+
url: request_info[:url],
|
241
|
+
user_agent: request_info[:user_agent],
|
242
|
+
ip_address: request_info[:ip_address],
|
243
|
+
request_method: request_info[:request_method],
|
244
|
+
request_path: request_info[:request_path],
|
245
|
+
request_query: request_info[:request_query],
|
246
|
+
referrer: request_info[:referrer],
|
247
|
+
response_status: request_info[:response_status] || (detection_result.should_block ? 403 : 200),
|
248
|
+
response_time_ms: request_info[:response_time_ms] || 0,
|
249
|
+
headers: request_info[:headers] || {},
|
250
|
+
platform_type: @configuration.platform_type,
|
251
|
+
metadata: metadata
|
252
|
+
)
|
253
|
+
|
254
|
+
# Send request in background thread to avoid blocking
|
255
|
+
Thread.new do
|
256
|
+
send_collector_request(payload, detection_result.source_type)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
private
|
261
|
+
|
262
|
+
def load_default_patterns
|
263
|
+
# Default patterns similar to the TypeScript SDK
|
264
|
+
@patterns = [
|
265
|
+
# AI Assistants
|
266
|
+
BotPattern.new(
|
267
|
+
pattern: 'ChatGPT-User\/[0-9]',
|
268
|
+
url: 'https://platform.openai.com/docs/bots',
|
269
|
+
type: 'chatgpt-user',
|
270
|
+
category: 'AI Agent',
|
271
|
+
subcategory: 'AI Assistants',
|
272
|
+
company: 'OpenAI',
|
273
|
+
is_compliant: true,
|
274
|
+
is_ai_model_trainer: false,
|
275
|
+
intent: 'UserQuery'
|
276
|
+
),
|
277
|
+
BotPattern.new(
|
278
|
+
pattern: 'Perplexity-User\/[0-9]',
|
279
|
+
url: 'https://docs.perplexity.ai/guides/bots',
|
280
|
+
type: 'perplexity-user',
|
281
|
+
category: 'AI Agent',
|
282
|
+
subcategory: 'AI Assistants',
|
283
|
+
company: 'Perplexity AI',
|
284
|
+
is_compliant: true,
|
285
|
+
is_ai_model_trainer: false,
|
286
|
+
intent: 'UserQuery'
|
287
|
+
),
|
288
|
+
BotPattern.new(
|
289
|
+
pattern: 'Gemini-User\/[0-9]',
|
290
|
+
url: 'https://ai.google.dev/gemini-api/docs/bots',
|
291
|
+
type: 'gemini-user',
|
292
|
+
category: 'AI Agent',
|
293
|
+
subcategory: 'AI Assistants',
|
294
|
+
company: 'Google',
|
295
|
+
is_compliant: true,
|
296
|
+
is_ai_model_trainer: false,
|
297
|
+
intent: 'UserQuery'
|
298
|
+
),
|
299
|
+
BotPattern.new(
|
300
|
+
pattern: 'Claude-User\/[0-9]',
|
301
|
+
url: 'https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler',
|
302
|
+
type: 'claude-user',
|
303
|
+
category: 'AI Agent',
|
304
|
+
subcategory: 'AI Assistants',
|
305
|
+
company: 'Anthropic',
|
306
|
+
is_compliant: true,
|
307
|
+
is_ai_model_trainer: false,
|
308
|
+
intent: 'UserQuery'
|
309
|
+
),
|
310
|
+
|
311
|
+
# AI Model Training Crawlers
|
312
|
+
BotPattern.new(
|
313
|
+
pattern: 'CCBot\/[0-9]',
|
314
|
+
url: 'https://commoncrawl.org/ccbot',
|
315
|
+
type: 'ccbot',
|
316
|
+
category: 'AI Crawler',
|
317
|
+
subcategory: 'Model Training Crawlers',
|
318
|
+
company: 'Common Crawl',
|
319
|
+
is_compliant: true,
|
320
|
+
is_ai_model_trainer: true,
|
321
|
+
intent: 'DataCollection'
|
322
|
+
),
|
323
|
+
BotPattern.new(
|
324
|
+
pattern: 'ClaudeBot\/[0-9]',
|
325
|
+
url: 'https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler',
|
326
|
+
type: 'claude-bot',
|
327
|
+
category: 'AI Crawler',
|
328
|
+
subcategory: 'Model Training Crawlers',
|
329
|
+
company: 'Anthropic',
|
330
|
+
is_compliant: true,
|
331
|
+
is_ai_model_trainer: true,
|
332
|
+
intent: 'DataCollection'
|
333
|
+
),
|
334
|
+
BotPattern.new(
|
335
|
+
pattern: 'GPTBot\/[0-9]',
|
336
|
+
url: 'https://platform.openai.com/docs/gptbot',
|
337
|
+
type: 'gptbot',
|
338
|
+
category: 'AI Crawler',
|
339
|
+
subcategory: 'Model Training Crawlers',
|
340
|
+
company: 'OpenAI',
|
341
|
+
is_compliant: true,
|
342
|
+
is_ai_model_trainer: true,
|
343
|
+
intent: 'DataCollection'
|
344
|
+
),
|
345
|
+
BotPattern.new(
|
346
|
+
pattern: 'meta-externalagent\/[0-9]',
|
347
|
+
url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler',
|
348
|
+
type: 'meta-externalagent',
|
349
|
+
category: 'AI Crawler',
|
350
|
+
subcategory: 'Model Training Crawlers',
|
351
|
+
company: 'Meta',
|
352
|
+
is_compliant: true,
|
353
|
+
is_ai_model_trainer: true,
|
354
|
+
intent: 'DataCollection'
|
355
|
+
),
|
356
|
+
BotPattern.new(
|
357
|
+
pattern: 'Applebot-Extended\/[0-9]',
|
358
|
+
url: 'https://support.apple.com/en-us/119829',
|
359
|
+
type: 'applebot-extended',
|
360
|
+
category: 'AI Crawler',
|
361
|
+
subcategory: 'Model Training Crawlers',
|
362
|
+
company: 'Apple',
|
363
|
+
is_compliant: true,
|
364
|
+
is_ai_model_trainer: true,
|
365
|
+
intent: 'DataCollection'
|
366
|
+
)
|
367
|
+
]
|
368
|
+
|
369
|
+
# Default AI referrers
|
370
|
+
@ai_referrers = [
|
371
|
+
AiReferrerInfo.new(
|
372
|
+
id: 'chatgpt',
|
373
|
+
name: 'ChatGPT',
|
374
|
+
company: 'OpenAI',
|
375
|
+
url: 'https://chat.openai.com',
|
376
|
+
patterns: ['chat.openai.com', 'chatgpt.com'],
|
377
|
+
description: 'Traffic from ChatGPT users clicking on links'
|
378
|
+
),
|
379
|
+
AiReferrerInfo.new(
|
380
|
+
id: 'claude',
|
381
|
+
name: 'Claude',
|
382
|
+
company: 'Anthropic',
|
383
|
+
url: 'https://claude.ai',
|
384
|
+
patterns: ['claude.ai'],
|
385
|
+
description: 'Traffic from Claude users clicking on links'
|
386
|
+
),
|
387
|
+
AiReferrerInfo.new(
|
388
|
+
id: 'perplexity',
|
389
|
+
name: 'Perplexity',
|
390
|
+
company: 'Perplexity AI',
|
391
|
+
url: 'https://perplexity.ai',
|
392
|
+
patterns: ['perplexity.ai'],
|
393
|
+
description: 'Traffic from Perplexity users clicking on links'
|
394
|
+
),
|
395
|
+
AiReferrerInfo.new(
|
396
|
+
id: 'gemini',
|
397
|
+
name: 'Gemini',
|
398
|
+
company: 'Google',
|
399
|
+
url: 'https://gemini.google.com',
|
400
|
+
patterns: ['gemini.google.com', 'bard.google.com'],
|
401
|
+
description: 'Traffic from Gemini users clicking on links'
|
402
|
+
),
|
403
|
+
AiReferrerInfo.new(
|
404
|
+
id: 'copilot',
|
405
|
+
name: 'Microsoft Copilot',
|
406
|
+
company: 'Microsoft',
|
407
|
+
url: 'https://copilot.microsoft.com/',
|
408
|
+
patterns: ['copilot.microsoft.com', 'bing.com/chat'],
|
409
|
+
description: 'Traffic from Microsoft Copilot users clicking on links'
|
410
|
+
)
|
411
|
+
]
|
412
|
+
end
|
413
|
+
|
414
|
+
def get_regex_for_pattern(pattern)
|
415
|
+
return @pattern_regex_cache[pattern] if @pattern_regex_cache.key?(pattern)
|
416
|
+
|
417
|
+
@pattern_regex_cache[pattern] = Regexp.new(pattern, Regexp::IGNORECASE)
|
418
|
+
end
|
419
|
+
|
420
|
+
def should_block_pattern?(pattern_data)
|
421
|
+
# Check if pattern is explicitly allowed
|
422
|
+
return false if @custom_allows.include?("pattern:#{pattern_data.pattern}")
|
423
|
+
|
424
|
+
category = pattern_data.category || 'Unknown'
|
425
|
+
subcategory = pattern_data.subcategory || 'Unclassified'
|
426
|
+
type = pattern_data.type || 'unknown'
|
427
|
+
|
428
|
+
# Check if any parent is explicitly allowed
|
429
|
+
return false if @custom_allows.include?("category:#{category}") ||
|
430
|
+
@custom_allows.include?("subcategory:#{category}:#{subcategory}") ||
|
431
|
+
@custom_allows.include?("type:#{category}:#{subcategory}:#{type}")
|
432
|
+
|
433
|
+
# Check if pattern is explicitly blocked
|
434
|
+
return true if @custom_blocks.include?("pattern:#{pattern_data.pattern}")
|
435
|
+
|
436
|
+
# Check if any parent is explicitly blocked
|
437
|
+
return true if @custom_blocks.include?("category:#{category}") ||
|
438
|
+
@custom_blocks.include?("subcategory:#{category}:#{subcategory}") ||
|
439
|
+
@custom_blocks.include?("type:#{category}:#{subcategory}:#{type}")
|
440
|
+
|
441
|
+
# Check for AI model trainers global setting
|
442
|
+
return true if @block_ai_model_trainers && pattern_data.is_ai_model_trainer
|
443
|
+
|
444
|
+
# Default to not blocking
|
445
|
+
false
|
446
|
+
end
|
447
|
+
|
448
|
+
def extract_hostname(referrer)
|
449
|
+
uri = URI.parse(referrer)
|
450
|
+
uri.hostname&.downcase || referrer.downcase
|
451
|
+
rescue URI::InvalidURIError
|
452
|
+
referrer.downcase
|
453
|
+
end
|
454
|
+
|
455
|
+
def send_collector_request(payload, source_type)
|
456
|
+
begin
|
457
|
+
uri = URI(@configuration.collect_endpoint)
|
458
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
459
|
+
http.use_ssl = uri.scheme == 'https'
|
460
|
+
http.read_timeout = 10
|
461
|
+
http.open_timeout = 5
|
462
|
+
|
463
|
+
request = Net::HTTP::Post.new(uri)
|
464
|
+
request['Content-Type'] = 'application/json'
|
465
|
+
request['x-api-key'] = @configuration.api_key
|
466
|
+
request.body = payload.to_json
|
467
|
+
|
468
|
+
log_debug("Making POST request to #{@configuration.collect_endpoint}")
|
469
|
+
log_debug("Payload size: #{request.body.bytesize} bytes")
|
470
|
+
|
471
|
+
response = http.request(request)
|
472
|
+
|
473
|
+
log_debug("Collector response status: #{response.code} #{response.message}")
|
474
|
+
|
475
|
+
if response.is_a?(Net::HTTPSuccess)
|
476
|
+
log_debug("✅ Successfully logged #{source_type} event")
|
477
|
+
else
|
478
|
+
log_debug("❌ Failed to log #{source_type} event")
|
479
|
+
end
|
480
|
+
rescue => e
|
481
|
+
log_debug("❌ Exception during collector request for #{source_type}: #{e.message}")
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
def log_debug(message)
|
486
|
+
return unless @configuration.debug?
|
487
|
+
|
488
|
+
puts "[Spyglasses] #{message}"
|
489
|
+
end
|
490
|
+
end
|
491
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Spyglasses
|
4
|
+
class Configuration
|
5
|
+
DEFAULT_COLLECT_ENDPOINT = 'https://www.spyglasses.io/api/collect'
|
6
|
+
DEFAULT_PATTERNS_ENDPOINT = 'https://www.spyglasses.io/api/patterns'
|
7
|
+
DEFAULT_CACHE_TTL = 24 * 60 * 60 # 24 hours in seconds
|
8
|
+
DEFAULT_PLATFORM_TYPE = 'ruby'
|
9
|
+
|
10
|
+
attr_accessor :api_key, :debug, :collect_endpoint, :patterns_endpoint,
|
11
|
+
:auto_sync, :platform_type, :cache_ttl, :exclude_paths
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
# Load from environment variables by default
|
15
|
+
@api_key = ENV['SPYGLASSES_API_KEY']
|
16
|
+
@debug = ENV['SPYGLASSES_DEBUG'] == 'true'
|
17
|
+
@collect_endpoint = ENV['SPYGLASSES_COLLECT_ENDPOINT'] || DEFAULT_COLLECT_ENDPOINT
|
18
|
+
@patterns_endpoint = ENV['SPYGLASSES_PATTERNS_ENDPOINT'] || DEFAULT_PATTERNS_ENDPOINT
|
19
|
+
@auto_sync = ENV['SPYGLASSES_AUTO_SYNC'] != 'false' # Default to true
|
20
|
+
@platform_type = ENV['SPYGLASSES_PLATFORM_TYPE'] || DEFAULT_PLATFORM_TYPE
|
21
|
+
@cache_ttl = (ENV['SPYGLASSES_CACHE_TTL'] || DEFAULT_CACHE_TTL).to_i
|
22
|
+
@exclude_paths = []
|
23
|
+
end
|
24
|
+
|
25
|
+
def api_key_present?
|
26
|
+
!@api_key.nil? && !@api_key.empty?
|
27
|
+
end
|
28
|
+
|
29
|
+
def debug?
|
30
|
+
@debug
|
31
|
+
end
|
32
|
+
|
33
|
+
def auto_sync?
|
34
|
+
@auto_sync
|
35
|
+
end
|
36
|
+
|
37
|
+
def validate!
|
38
|
+
unless api_key_present?
|
39
|
+
raise ConfigurationError, 'API key is required. Set SPYGLASSES_API_KEY environment variable or configure via Spyglasses.configure'
|
40
|
+
end
|
41
|
+
|
42
|
+
unless valid_url?(@collect_endpoint)
|
43
|
+
raise ConfigurationError, "Invalid collect endpoint: #{@collect_endpoint}"
|
44
|
+
end
|
45
|
+
|
46
|
+
unless valid_url?(@patterns_endpoint)
|
47
|
+
raise ConfigurationError, "Invalid patterns endpoint: #{@patterns_endpoint}"
|
48
|
+
end
|
49
|
+
|
50
|
+
if @cache_ttl < 0
|
51
|
+
raise ConfigurationError, "Cache TTL must be non-negative: #{@cache_ttl}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_h
|
56
|
+
{
|
57
|
+
api_key: @api_key ? "#{@api_key[0..7]}..." : nil,
|
58
|
+
debug: @debug,
|
59
|
+
collect_endpoint: @collect_endpoint,
|
60
|
+
patterns_endpoint: @patterns_endpoint,
|
61
|
+
auto_sync: @auto_sync,
|
62
|
+
platform_type: @platform_type,
|
63
|
+
cache_ttl: @cache_ttl,
|
64
|
+
exclude_paths: @exclude_paths
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def valid_url?(url)
|
71
|
+
return false if url.nil? || url.empty?
|
72
|
+
|
73
|
+
uri = URI.parse(url)
|
74
|
+
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
75
|
+
rescue URI::InvalidURIError
|
76
|
+
false
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|