mathpix 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +52 -0
- data/LICENSE +21 -0
- data/README.md +171 -0
- data/SECURITY.md +137 -0
- data/lib/mathpix/balanced_ternary.rb +86 -0
- data/lib/mathpix/batch.rb +155 -0
- data/lib/mathpix/capture_builder.rb +142 -0
- data/lib/mathpix/chemistry.rb +69 -0
- data/lib/mathpix/client.rb +439 -0
- data/lib/mathpix/configuration.rb +187 -0
- data/lib/mathpix/configuration.rb.backup +125 -0
- data/lib/mathpix/conversion.rb +257 -0
- data/lib/mathpix/document.rb +320 -0
- data/lib/mathpix/errors.rb +78 -0
- data/lib/mathpix/mcp/auth/oauth_provider.rb +346 -0
- data/lib/mathpix/mcp/auth/token_manager.rb +31 -0
- data/lib/mathpix/mcp/auth.rb +18 -0
- data/lib/mathpix/mcp/base_tool.rb +117 -0
- data/lib/mathpix/mcp/elicitations/ambiguity_elicitation.rb +162 -0
- data/lib/mathpix/mcp/elicitations/base_elicitation.rb +141 -0
- data/lib/mathpix/mcp/elicitations/confidence_elicitation.rb +162 -0
- data/lib/mathpix/mcp/elicitations.rb +78 -0
- data/lib/mathpix/mcp/middleware/cors_middleware.rb +94 -0
- data/lib/mathpix/mcp/middleware/oauth_middleware.rb +72 -0
- data/lib/mathpix/mcp/middleware/rate_limiting_middleware.rb +140 -0
- data/lib/mathpix/mcp/middleware.rb +13 -0
- data/lib/mathpix/mcp/resources/formats_list_resource.rb +113 -0
- data/lib/mathpix/mcp/resources/hierarchical_router.rb +237 -0
- data/lib/mathpix/mcp/resources/latest_snip_resource.rb +60 -0
- data/lib/mathpix/mcp/resources/recent_snips_resource.rb +75 -0
- data/lib/mathpix/mcp/resources/snip_stats_resource.rb +78 -0
- data/lib/mathpix/mcp/resources.rb +15 -0
- data/lib/mathpix/mcp/server.rb +174 -0
- data/lib/mathpix/mcp/tools/batch_convert_tool.rb +106 -0
- data/lib/mathpix/mcp/tools/check_document_status_tool.rb +66 -0
- data/lib/mathpix/mcp/tools/convert_document_tool.rb +90 -0
- data/lib/mathpix/mcp/tools/convert_image_tool.rb +91 -0
- data/lib/mathpix/mcp/tools/convert_strokes_tool.rb +82 -0
- data/lib/mathpix/mcp/tools/get_account_info_tool.rb +57 -0
- data/lib/mathpix/mcp/tools/get_usage_tool.rb +62 -0
- data/lib/mathpix/mcp/tools/list_formats_tool.rb +81 -0
- data/lib/mathpix/mcp/tools/search_results_tool.rb +111 -0
- data/lib/mathpix/mcp/transports/http_streaming_transport.rb +622 -0
- data/lib/mathpix/mcp/transports/sse_stream_handler.rb +236 -0
- data/lib/mathpix/mcp/transports.rb +12 -0
- data/lib/mathpix/mcp.rb +52 -0
- data/lib/mathpix/result.rb +364 -0
- data/lib/mathpix/version.rb +22 -0
- data/lib/mathpix.rb +229 -0
- metadata +283 -0
@@ -0,0 +1,141 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
|
5
|
+
module Mathpix
|
6
|
+
module MCP
|
7
|
+
module Elicitations
|
8
|
+
# Base class for all MCP elicitations
|
9
|
+
#
|
10
|
+
# Implements MCP elicitation protocol for interactive client prompts
|
11
|
+
# Uses seed 1069 for deterministic ID generation
|
12
|
+
#
|
13
|
+
# @example Text elicitation
|
14
|
+
# elicitation = TextElicitation.new(
|
15
|
+
# prompt: "Enter your API key",
|
16
|
+
# field_name: "api_key"
|
17
|
+
# )
|
18
|
+
# response = elicitation.request
|
19
|
+
#
|
20
|
+
class BaseElicitation
|
21
|
+
# Seed 1069 for deterministic behavior
|
22
|
+
SEED_1069 = 1069
|
23
|
+
|
24
|
+
# Balanced ternary basis [1, 1, 0, -1, 1, 1, 1]
|
25
|
+
TERNARY_BASIS = [1, 1, 0, -1, 1, 1, 1].freeze
|
26
|
+
|
27
|
+
attr_reader :id, :type, :prompt, :field_name, :optional, :default_value
|
28
|
+
|
29
|
+
# Initialize elicitation
|
30
|
+
#
|
31
|
+
# @param type [String] elicitation type (text, password, select, etc)
|
32
|
+
# @param prompt [String] prompt text to display
|
33
|
+
# @param field_name [String] field name for response
|
34
|
+
# @param optional [Boolean] whether field is optional
|
35
|
+
# @param default_value [Object] default value if any
|
36
|
+
# @param seed [Integer] random seed for deterministic IDs (default 1069)
|
37
|
+
def initialize(type:, prompt:, field_name:, optional: false, default_value: nil, seed: SEED_1069)
|
38
|
+
@type = type
|
39
|
+
@prompt = prompt
|
40
|
+
@field_name = field_name
|
41
|
+
@optional = optional
|
42
|
+
@default_value = default_value
|
43
|
+
@seed = seed
|
44
|
+
@id = generate_id
|
45
|
+
@response = nil
|
46
|
+
@validated = false
|
47
|
+
end
|
48
|
+
|
49
|
+
# Generate elicitation request payload
|
50
|
+
#
|
51
|
+
# @return [Hash] MCP elicitation request
|
52
|
+
def to_request
|
53
|
+
{
|
54
|
+
id: @id,
|
55
|
+
type: @type,
|
56
|
+
prompt: @prompt,
|
57
|
+
field_name: @field_name,
|
58
|
+
optional: @optional
|
59
|
+
}.tap do |req|
|
60
|
+
req[:default] = @default_value if @default_value
|
61
|
+
req.merge!(type_specific_fields)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Set response value
|
66
|
+
#
|
67
|
+
# @param value [Object] response value
|
68
|
+
# @return [Boolean] whether response is valid
|
69
|
+
def set_response(value)
|
70
|
+
@response = value
|
71
|
+
@validated = validate_response(value)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Get response value
|
75
|
+
#
|
76
|
+
# @return [Object] response value
|
77
|
+
# @raise [RuntimeError] if no response set
|
78
|
+
def response
|
79
|
+
raise "No response set for elicitation #{@id}" unless @response
|
80
|
+
@response
|
81
|
+
end
|
82
|
+
|
83
|
+
# Check if elicitation has valid response
|
84
|
+
#
|
85
|
+
# @return [Boolean]
|
86
|
+
def responded?
|
87
|
+
!@response.nil? && @validated
|
88
|
+
end
|
89
|
+
|
90
|
+
# Validate response (override in subclasses)
|
91
|
+
#
|
92
|
+
# @param value [Object] response value
|
93
|
+
# @return [Boolean] whether response is valid
|
94
|
+
def validate_response(value)
|
95
|
+
# Base validation: optional fields can be nil
|
96
|
+
return true if @optional && value.nil?
|
97
|
+
# Non-optional fields must have value
|
98
|
+
!value.nil?
|
99
|
+
end
|
100
|
+
|
101
|
+
# Store in CoBlackboard context
|
102
|
+
#
|
103
|
+
# @param coblackboard [Object] CoBlackboard instance
|
104
|
+
def store_in_context(coblackboard)
|
105
|
+
coblackboard.record_elicitation(
|
106
|
+
id: @id,
|
107
|
+
type: @type,
|
108
|
+
prompt: @prompt,
|
109
|
+
response: @response,
|
110
|
+
timestamp: Time.now
|
111
|
+
)
|
112
|
+
end
|
113
|
+
|
114
|
+
protected
|
115
|
+
|
116
|
+
# Generate deterministic ID using seed 1069
|
117
|
+
#
|
118
|
+
# Uses balanced ternary basis for coordinate mapping
|
119
|
+
#
|
120
|
+
# @return [String] unique elicitation ID
|
121
|
+
def generate_id
|
122
|
+
# Use seed 1069 for deterministic generation
|
123
|
+
rng = Random.new(@seed + field_name.hash)
|
124
|
+
|
125
|
+
# Generate ID with ternary coordinate
|
126
|
+
ternary_coord = TERNARY_BASIS.sample(3, random: rng)
|
127
|
+
coord_str = ternary_coord.map { |t| t == -1 ? 'n' : t.to_s }.join
|
128
|
+
|
129
|
+
"elicit_#{@field_name}_#{coord_str}_#{rng.rand(1000..9999)}"
|
130
|
+
end
|
131
|
+
|
132
|
+
# Type-specific fields (override in subclasses)
|
133
|
+
#
|
134
|
+
# @return [Hash] additional fields for request
|
135
|
+
def type_specific_fields
|
136
|
+
{}
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,162 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mathpix
|
4
|
+
module MCP
|
5
|
+
module Elicitations
|
6
|
+
# Confidence elicitation for Mathpix OCR results
|
7
|
+
#
|
8
|
+
# Prompts user when OCR confidence falls below threshold
|
9
|
+
# Offers options: continue, adjust threshold, review ambiguous parts
|
10
|
+
#
|
11
|
+
# @example Low confidence handling
|
12
|
+
# elicitation = ConfidenceElicitation.new(
|
13
|
+
# confidence: 0.65,
|
14
|
+
# threshold: 0.70,
|
15
|
+
# latex: "\\int_{0}^{\\infty} e^{-x} dx",
|
16
|
+
# image_path: "formula.png"
|
17
|
+
# )
|
18
|
+
# response = elicitation.request_user_decision
|
19
|
+
#
|
20
|
+
class ConfidenceElicitation
|
21
|
+
attr_reader :confidence, :threshold, :latex, :image_path, :decision
|
22
|
+
|
23
|
+
# Initialize confidence elicitation
|
24
|
+
#
|
25
|
+
# @param confidence [Float] OCR confidence (0.0-1.0)
|
26
|
+
# @param threshold [Float] minimum acceptable confidence
|
27
|
+
# @param latex [String] OCR result (LaTeX)
|
28
|
+
# @param image_path [String] path to source image
|
29
|
+
def initialize(confidence:, threshold:, latex:, image_path:)
|
30
|
+
@confidence = confidence
|
31
|
+
@threshold = threshold
|
32
|
+
@latex = latex
|
33
|
+
@image_path = image_path
|
34
|
+
@decision = nil
|
35
|
+
end
|
36
|
+
|
37
|
+
# Check if elicitation should be triggered
|
38
|
+
#
|
39
|
+
# @return [Boolean] true if confidence below threshold
|
40
|
+
def should_elicit?
|
41
|
+
@confidence < @threshold
|
42
|
+
end
|
43
|
+
|
44
|
+
# Generate elicitation prompt
|
45
|
+
#
|
46
|
+
# @return [String] user-facing prompt
|
47
|
+
def prompt
|
48
|
+
percentage = (@confidence * 100).round(1)
|
49
|
+
threshold_pct = (@threshold * 100).round(0)
|
50
|
+
|
51
|
+
<<~PROMPT
|
52
|
+
⚠️ Low OCR Confidence Detected
|
53
|
+
|
54
|
+
Confidence: #{percentage}% (threshold: #{threshold_pct}%)
|
55
|
+
Image: #{File.basename(@image_path)}
|
56
|
+
|
57
|
+
OCR Result:
|
58
|
+
#{@latex}
|
59
|
+
|
60
|
+
What would you like to do?
|
61
|
+
PROMPT
|
62
|
+
end
|
63
|
+
|
64
|
+
# Generate options for user
|
65
|
+
#
|
66
|
+
# @return [Array<Hash>] option choices
|
67
|
+
def options
|
68
|
+
[
|
69
|
+
{
|
70
|
+
value: 'accept',
|
71
|
+
label: 'Accept result (use as-is)',
|
72
|
+
description: 'Continue with current OCR output despite low confidence'
|
73
|
+
},
|
74
|
+
{
|
75
|
+
value: 'adjust_threshold',
|
76
|
+
label: 'Adjust confidence threshold',
|
77
|
+
description: 'Lower threshold to accept similar confidence in future'
|
78
|
+
},
|
79
|
+
{
|
80
|
+
value: 'review_ambiguous',
|
81
|
+
label: 'Review ambiguous parts',
|
82
|
+
description: 'Show detailed breakdown of low-confidence regions'
|
83
|
+
},
|
84
|
+
{
|
85
|
+
value: 'retry',
|
86
|
+
label: 'Retry OCR',
|
87
|
+
description: 'Re-process image (may improve confidence)'
|
88
|
+
},
|
89
|
+
{
|
90
|
+
value: 'reject',
|
91
|
+
label: 'Reject result',
|
92
|
+
description: 'Discard this OCR result'
|
93
|
+
}
|
94
|
+
]
|
95
|
+
end
|
96
|
+
|
97
|
+
# Set user decision
|
98
|
+
#
|
99
|
+
# @param decision [String] user's choice (accept, adjust_threshold, etc)
|
100
|
+
def set_decision(decision)
|
101
|
+
valid_options = options.map { |o| o[:value] }
|
102
|
+
unless valid_options.include?(decision)
|
103
|
+
raise ArgumentError, "Invalid decision: #{decision}. Must be one of: #{valid_options.join(', ')}"
|
104
|
+
end
|
105
|
+
@decision = decision
|
106
|
+
end
|
107
|
+
|
108
|
+
# Execute user's decision
|
109
|
+
#
|
110
|
+
# @return [Hash] action result
|
111
|
+
def execute_decision
|
112
|
+
case @decision
|
113
|
+
when 'accept'
|
114
|
+
{ action: :accept, result: @latex }
|
115
|
+
when 'adjust_threshold'
|
116
|
+
{ action: :adjust_threshold, new_threshold: @confidence - 0.05 }
|
117
|
+
when 'review_ambiguous'
|
118
|
+
{ action: :review, data: identify_ambiguous_regions }
|
119
|
+
when 'retry'
|
120
|
+
{ action: :retry, image_path: @image_path }
|
121
|
+
when 'reject'
|
122
|
+
{ action: :reject, reason: 'Low confidence' }
|
123
|
+
else
|
124
|
+
raise "No decision set"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Convert to MCP elicitation request
|
129
|
+
#
|
130
|
+
# @return [Hash] MCP-compatible request
|
131
|
+
def to_mcp_request
|
132
|
+
{
|
133
|
+
type: 'select',
|
134
|
+
prompt: prompt,
|
135
|
+
field_name: 'confidence_decision',
|
136
|
+
options: options.map { |o| { value: o[:value], label: o[:label] } },
|
137
|
+
metadata: {
|
138
|
+
confidence: @confidence,
|
139
|
+
threshold: @threshold,
|
140
|
+
image_path: @image_path,
|
141
|
+
result_preview: @latex[0..100]
|
142
|
+
}
|
143
|
+
}
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
# Identify regions with low confidence (placeholder)
|
149
|
+
#
|
150
|
+
# In real implementation, would use word_data/line_data from Mathpix API
|
151
|
+
#
|
152
|
+
# @return [Array<Hash>] ambiguous regions
|
153
|
+
def identify_ambiguous_regions
|
154
|
+
# Placeholder: In reality would analyze word_data from Mathpix response
|
155
|
+
[
|
156
|
+
{ text: @latex, confidence: @confidence, note: 'Full result has low confidence' }
|
157
|
+
]
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Load Mathpix-specific elicitations
|
4
|
+
require_relative 'elicitations/confidence_elicitation'
|
5
|
+
require_relative 'elicitations/ambiguity_elicitation'
|
6
|
+
|
7
|
+
module Mathpix
|
8
|
+
module MCP
|
9
|
+
# Mathpix MCP Elicitations
|
10
|
+
#
|
11
|
+
# Interactive prompts for OCR quality assurance:
|
12
|
+
# - ConfidenceElicitation: prompt when OCR confidence < threshold
|
13
|
+
# - AmbiguityElicitation: clarify ambiguous mathematical symbols
|
14
|
+
#
|
15
|
+
# @example Confidence checking
|
16
|
+
# if result.confidence < 0.70
|
17
|
+
# elicitation = Elicitations::ConfidenceElicitation.new(
|
18
|
+
# confidence: result.confidence,
|
19
|
+
# threshold: 0.70,
|
20
|
+
# latex: result.latex,
|
21
|
+
# image_path: image_path
|
22
|
+
# )
|
23
|
+
#
|
24
|
+
# if elicitation.should_elicit?
|
25
|
+
# mcp_request = elicitation.to_mcp_request
|
26
|
+
# # Send to MCP client for user decision
|
27
|
+
# end
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
module Elicitations
|
31
|
+
# Check if result requires confidence elicitation
|
32
|
+
#
|
33
|
+
# @param result [Mathpix::Result] OCR result
|
34
|
+
# @param threshold [Float] minimum confidence (default 0.70)
|
35
|
+
# @param image_path [String] source image path
|
36
|
+
# @return [ConfidenceElicitation, nil] elicitation if needed
|
37
|
+
def self.check_confidence(result, threshold: 0.70, image_path:)
|
38
|
+
return nil if result.confidence >= threshold
|
39
|
+
|
40
|
+
ConfidenceElicitation.new(
|
41
|
+
confidence: result.confidence,
|
42
|
+
threshold: threshold,
|
43
|
+
latex: result.latex || result.text,
|
44
|
+
image_path: image_path
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Detect ambiguous notation in result
|
49
|
+
#
|
50
|
+
# @param result [Mathpix::Result] OCR result
|
51
|
+
# @return [Array<AmbiguityElicitation>] ambiguities found
|
52
|
+
def self.detect_ambiguities(result)
|
53
|
+
latex = result.latex || result.text
|
54
|
+
ambiguities = []
|
55
|
+
|
56
|
+
# Check for O/0 ambiguity
|
57
|
+
if latex =~ /[O0]/ && result.confidence < 0.90
|
58
|
+
ambiguities << AmbiguityElicitation.new(
|
59
|
+
ambiguous_text: latex[/[O0]/],
|
60
|
+
context: latex,
|
61
|
+
alternatives: ['0 (zero)', 'O (letter O)']
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Check for 1/l/I ambiguity
|
66
|
+
if latex =~ /[1lI]/ && result.confidence < 0.90
|
67
|
+
ambiguities << AmbiguityElicitation.new(
|
68
|
+
ambiguous_text: latex[/[1lI]/],
|
69
|
+
context: latex,
|
70
|
+
alternatives: ['1 (one)', 'l (lowercase L)', 'I (uppercase i)']
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
ambiguities
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mathpix
|
4
|
+
module MCP
|
5
|
+
module Middleware
|
6
|
+
# Rack middleware for handling CORS (Cross-Origin Resource Sharing)
|
7
|
+
class CorsMiddleware
|
8
|
+
def initialize(app, options = {})
|
9
|
+
@app = app
|
10
|
+
@allowed_origins = options[:allowed_origins] || ['*']
|
11
|
+
@allowed_methods = options[:allowed_methods] || %w[GET POST PUT DELETE OPTIONS]
|
12
|
+
@allowed_headers = options[:allowed_headers] || %w[Authorization Content-Type Accept]
|
13
|
+
@expose_headers = options[:expose_headers] || []
|
14
|
+
@max_age = options[:max_age] || 3600
|
15
|
+
@expose_credentials = options[:expose_credentials] || false
|
16
|
+
end
|
17
|
+
|
18
|
+
def call(env)
|
19
|
+
origin = env['HTTP_ORIGIN']
|
20
|
+
|
21
|
+
# Handle preflight OPTIONS request
|
22
|
+
if env['REQUEST_METHOD'] == 'OPTIONS'
|
23
|
+
return preflight_response(origin)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Call app and add CORS headers
|
27
|
+
status, headers, body = @app.call(env)
|
28
|
+
|
29
|
+
# For wildcard '*', always add CORS headers even without Origin
|
30
|
+
# For specific origins, only add when Origin header is present and allowed
|
31
|
+
if @allowed_origins.include?('*') || (origin && origin_allowed?(origin))
|
32
|
+
add_cors_headers(headers, origin)
|
33
|
+
end
|
34
|
+
|
35
|
+
[status, headers, body]
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def preflight_response(origin)
|
41
|
+
headers = {}
|
42
|
+
|
43
|
+
# Always add CORS headers for OPTIONS (preflight) requests
|
44
|
+
# This allows browsers to discover CORS policy even without Origin header
|
45
|
+
if @allowed_origins.include?('*')
|
46
|
+
headers['Access-Control-Allow-Origin'] = '*'
|
47
|
+
elsif origin && origin_allowed?(origin)
|
48
|
+
headers['Access-Control-Allow-Origin'] = origin
|
49
|
+
headers['Access-Control-Allow-Credentials'] = 'true' if @expose_credentials
|
50
|
+
end
|
51
|
+
|
52
|
+
# Always add these headers for preflight
|
53
|
+
headers['Access-Control-Allow-Methods'] = @allowed_methods.join(', ')
|
54
|
+
headers['Access-Control-Allow-Headers'] = @allowed_headers.join(', ')
|
55
|
+
headers['Access-Control-Max-Age'] = @max_age.to_s
|
56
|
+
|
57
|
+
[200, headers, []]
|
58
|
+
end
|
59
|
+
|
60
|
+
def add_cors_headers(headers, origin)
|
61
|
+
if @allowed_origins.include?('*')
|
62
|
+
headers['Access-Control-Allow-Origin'] = '*'
|
63
|
+
# Cannot use credentials with wildcard origin
|
64
|
+
else
|
65
|
+
headers['Access-Control-Allow-Origin'] = origin
|
66
|
+
headers['Access-Control-Allow-Credentials'] = 'true' if @expose_credentials
|
67
|
+
end
|
68
|
+
|
69
|
+
headers['Access-Control-Expose-Headers'] = @expose_headers.join(', ') unless @expose_headers.empty?
|
70
|
+
|
71
|
+
# Add Vary header for caching
|
72
|
+
vary = headers['Vary']
|
73
|
+
headers['Vary'] = vary ? "#{vary}, Origin" : 'Origin'
|
74
|
+
end
|
75
|
+
|
76
|
+
def origin_allowed?(origin)
|
77
|
+
return false if origin.nil?
|
78
|
+
|
79
|
+
@allowed_origins.include?('*') ||
|
80
|
+
@allowed_origins.include?(origin) ||
|
81
|
+
@allowed_origins.any? { |pattern| wildcard_match?(pattern, origin) }
|
82
|
+
end
|
83
|
+
|
84
|
+
def wildcard_match?(pattern, origin)
|
85
|
+
return false unless pattern.include?('*')
|
86
|
+
|
87
|
+
regex_pattern = Regexp.escape(pattern).gsub('\*', '.*')
|
88
|
+
regex = Regexp.new("^#{regex_pattern}$")
|
89
|
+
regex.match?(origin)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mathpix
|
4
|
+
module MCP
|
5
|
+
module Middleware
|
6
|
+
# Rack middleware for OAuth 2.0 authentication
|
7
|
+
class OAuthMiddleware
|
8
|
+
def initialize(app, options = {})
|
9
|
+
@app = app
|
10
|
+
@oauth_provider = options[:oauth_provider]
|
11
|
+
end
|
12
|
+
|
13
|
+
def call(env)
|
14
|
+
# Skip auth for OAuth endpoints and health checks
|
15
|
+
path = env['PATH_INFO']
|
16
|
+
return @app.call(env) if public_path?(path)
|
17
|
+
|
18
|
+
# Extract and validate token
|
19
|
+
token = extract_token(env)
|
20
|
+
|
21
|
+
if token.nil?
|
22
|
+
return unauthorized_response('missing_token')
|
23
|
+
end
|
24
|
+
|
25
|
+
begin
|
26
|
+
payload = @oauth_provider.validate_token(token)
|
27
|
+
env['oauth.user_id'] = payload['user_id']
|
28
|
+
env['oauth.client_id'] = payload['client_id']
|
29
|
+
env['oauth.scope'] = payload['scope']
|
30
|
+
|
31
|
+
@app.call(env)
|
32
|
+
rescue Mathpix::MCP::Auth::InvalidTokenError => e
|
33
|
+
unauthorized_response('invalid_token', e.message)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def extract_token(env)
|
40
|
+
# Try Bearer token
|
41
|
+
auth_header = env['HTTP_AUTHORIZATION']
|
42
|
+
if auth_header&.start_with?('Bearer ')
|
43
|
+
return auth_header.sub('Bearer ', '')
|
44
|
+
end
|
45
|
+
|
46
|
+
# Try X-API-Key header
|
47
|
+
env['HTTP_X_API_KEY']
|
48
|
+
end
|
49
|
+
|
50
|
+
def public_path?(path)
|
51
|
+
path.start_with?('/oauth/') ||
|
52
|
+
path == '/health' ||
|
53
|
+
path == '/mcp/info'
|
54
|
+
end
|
55
|
+
|
56
|
+
def unauthorized_response(error, description = nil)
|
57
|
+
headers = {
|
58
|
+
'Content-Type' => 'application/json',
|
59
|
+
'WWW-Authenticate' => 'Bearer realm="MCP Server"'
|
60
|
+
}
|
61
|
+
|
62
|
+
body = {
|
63
|
+
error: error,
|
64
|
+
error_description: description
|
65
|
+
}.compact
|
66
|
+
|
67
|
+
[401, headers, [JSON.generate(body)]]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'concurrent'
|
4
|
+
|
5
|
+
module Mathpix
|
6
|
+
module MCP
|
7
|
+
module Middleware
|
8
|
+
# Rate limiting middleware using token bucket algorithm
|
9
|
+
class RateLimitingMiddleware
|
10
|
+
DEFAULT_LIMIT = 60 # requests per minute
|
11
|
+
DEFAULT_WINDOW = 60 # seconds
|
12
|
+
|
13
|
+
# Class-level storage shared across all instances (Rack may create many instances)
|
14
|
+
@@buckets = {}
|
15
|
+
@@buckets_mutex = Mutex.new
|
16
|
+
|
17
|
+
# Reset class-level state for testing (prevents test interference)
|
18
|
+
def self.reset_for_testing!
|
19
|
+
@@buckets_mutex.synchronize do
|
20
|
+
@@buckets.clear
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(app, limit: DEFAULT_LIMIT, window: DEFAULT_WINDOW)
|
25
|
+
@app = app
|
26
|
+
@limit = limit
|
27
|
+
@window = window
|
28
|
+
@cleanup_thread = start_cleanup_thread unless ENV['RACK_ENV'] == 'test'
|
29
|
+
|
30
|
+
$stderr.puts "[RATE LIMIT] Middleware initialized: object_id=#{object_id}" if ENV['RACK_ENV'] == 'test'
|
31
|
+
end
|
32
|
+
|
33
|
+
def call(env)
|
34
|
+
# Exempt /health endpoint from rate limiting (monitoring endpoint)
|
35
|
+
request_path = env['PATH_INFO'] || env['REQUEST_PATH']
|
36
|
+
if request_path == '/health'
|
37
|
+
return @app.call(env)
|
38
|
+
end
|
39
|
+
|
40
|
+
client_id = extract_client_id(env)
|
41
|
+
|
42
|
+
# Check if rate limited FIRST (before incrementing)
|
43
|
+
if rate_limited?(client_id)
|
44
|
+
retry_after = time_until_reset(client_id)
|
45
|
+
bucket = @@buckets[client_id]
|
46
|
+
$stderr.puts "[RATE LIMIT] LIMITING client #{client_id}, Count: #{bucket[:count]}/#{@limit}" if ENV['RACK_ENV'] == 'test'
|
47
|
+
return rate_limit_response(retry_after)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Record request (atomically increments counter)
|
51
|
+
record_request(client_id)
|
52
|
+
|
53
|
+
# Debug: show count AFTER incrementing
|
54
|
+
if ENV['RACK_ENV'] == 'test'
|
55
|
+
bucket = @@buckets[client_id]
|
56
|
+
$stderr.puts "[RATE LIMIT] Client: #{client_id}, Count: #{bucket[:count]}/#{@limit}"
|
57
|
+
end
|
58
|
+
|
59
|
+
@app.call(env)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def extract_client_id(env)
|
65
|
+
# Try multiple sources for client identification
|
66
|
+
env['HTTP_X_FORWARDED_FOR']&.split(',')&.first&.strip ||
|
67
|
+
env['REMOTE_ADDR'] ||
|
68
|
+
'unknown'
|
69
|
+
end
|
70
|
+
|
71
|
+
def rate_limited?(client_id)
|
72
|
+
bucket = @@buckets[client_id]
|
73
|
+
return false unless bucket # Not rate limited if no bucket yet
|
74
|
+
bucket[:count] >= @limit
|
75
|
+
end
|
76
|
+
|
77
|
+
def record_request(client_id)
|
78
|
+
@@buckets_mutex.synchronize do
|
79
|
+
# Initialize bucket if it doesn't exist
|
80
|
+
@@buckets[client_id] ||= { count: 0, reset_at: Time.now + @window }
|
81
|
+
|
82
|
+
# Get current bucket and increment
|
83
|
+
bucket = @@buckets[client_id]
|
84
|
+
old_count = bucket[:count]
|
85
|
+
bucket[:count] += 1
|
86
|
+
$stderr.puts "[RATE LIMIT] record_request: #{old_count} -> #{bucket[:count]}" if ENV['RACK_ENV'] == 'test'
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def time_until_reset(client_id)
|
91
|
+
bucket = @@buckets[client_id]
|
92
|
+
return @window unless bucket
|
93
|
+
|
94
|
+
(bucket[:reset_at] - Time.now).ceil.clamp(0, @window)
|
95
|
+
end
|
96
|
+
|
97
|
+
def rate_limit_response(retry_after)
|
98
|
+
[
|
99
|
+
429,
|
100
|
+
{
|
101
|
+
'Content-Type' => 'application/json',
|
102
|
+
'Retry-After' => retry_after.to_s,
|
103
|
+
'X-RateLimit-Limit' => @limit.to_s,
|
104
|
+
'X-RateLimit-Reset' => (Time.now + retry_after).to_i.to_s
|
105
|
+
},
|
106
|
+
[JSON.generate({
|
107
|
+
error: 'rate_limit_exceeded',
|
108
|
+
message: 'Too many requests',
|
109
|
+
retry_after: retry_after
|
110
|
+
})]
|
111
|
+
]
|
112
|
+
end
|
113
|
+
|
114
|
+
def start_cleanup_thread
|
115
|
+
Thread.new do
|
116
|
+
loop do
|
117
|
+
sleep @window
|
118
|
+
cleanup_expired_buckets
|
119
|
+
end
|
120
|
+
rescue StandardError => e
|
121
|
+
# Log error but don't crash
|
122
|
+
warn "Rate limit cleanup error: #{e.message}"
|
123
|
+
retry
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def cleanup_expired_buckets
|
128
|
+
now = Time.now
|
129
|
+
@buckets.each_pair do |client_id, bucket|
|
130
|
+
if bucket[:reset_at] <= now
|
131
|
+
# Reset the bucket instead of deleting
|
132
|
+
bucket[:count] = 0
|
133
|
+
bucket[:reset_at] = now + @window
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mathpix
|
4
|
+
module MCP
|
5
|
+
module Middleware
|
6
|
+
# Middleware will be autoloaded
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
require_relative 'middleware/cors_middleware'
|
12
|
+
require_relative 'middleware/oauth_middleware'
|
13
|
+
require_relative 'middleware/rate_limiting_middleware'
|