mathpix 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/README.md +115 -2
- data/SECURITY.md +1 -1
- data/bin/mathpix-mcp +55 -0
- data/lib/mathpix/batch.rb +7 -8
- data/lib/mathpix/batched_document_conversion.rb +238 -0
- data/lib/mathpix/client.rb +33 -27
- data/lib/mathpix/configuration.rb +5 -9
- data/lib/mathpix/conversion.rb +2 -6
- data/lib/mathpix/document.rb +47 -12
- data/lib/mathpix/document_batcher.rb +191 -0
- data/lib/mathpix/mcp/auth/oauth_provider.rb +8 -9
- data/lib/mathpix/mcp/base_tool.rb +8 -5
- data/lib/mathpix/mcp/elicitations/ambiguity_elicitation.rb +8 -11
- data/lib/mathpix/mcp/elicitations/base_elicitation.rb +2 -0
- data/lib/mathpix/mcp/elicitations/confidence_elicitation.rb +2 -1
- data/lib/mathpix/mcp/elicitations.rb +1 -1
- data/lib/mathpix/mcp/middleware/cors_middleware.rb +2 -6
- data/lib/mathpix/mcp/middleware/oauth_middleware.rb +2 -6
- data/lib/mathpix/mcp/middleware/rate_limiting_middleware.rb +19 -18
- data/lib/mathpix/mcp/resources/formats_list_resource.rb +54 -54
- data/lib/mathpix/mcp/resources/hierarchical_router.rb +9 -18
- data/lib/mathpix/mcp/resources/latest_snip_resource.rb +22 -22
- data/lib/mathpix/mcp/resources/recent_snips_resource.rb +11 -10
- data/lib/mathpix/mcp/resources/snip_stats_resource.rb +14 -12
- data/lib/mathpix/mcp/server.rb +18 -18
- data/lib/mathpix/mcp/tools/batch_convert_tool.rb +31 -37
- data/lib/mathpix/mcp/tools/check_document_status_tool.rb +5 -5
- data/lib/mathpix/mcp/tools/convert_document_tool.rb +15 -14
- data/lib/mathpix/mcp/tools/convert_image_tool.rb +15 -14
- data/lib/mathpix/mcp/tools/convert_strokes_tool.rb +13 -13
- data/lib/mathpix/mcp/tools/get_account_info_tool.rb +1 -1
- data/lib/mathpix/mcp/tools/get_usage_tool.rb +5 -7
- data/lib/mathpix/mcp/tools/list_formats_tool.rb +30 -30
- data/lib/mathpix/mcp/tools/search_results_tool.rb +13 -14
- data/lib/mathpix/mcp/transports/http_streaming_transport.rb +129 -118
- data/lib/mathpix/mcp/transports/sse_stream_handler.rb +37 -35
- data/lib/mathpix/result.rb +3 -2
- data/lib/mathpix/version.rb +1 -1
- data/lib/mathpix.rb +3 -1
- metadata +75 -12
data/lib/mathpix/client.rb
CHANGED
@@ -29,11 +29,11 @@ module Mathpix
|
|
29
29
|
src, source_ref = prepare_image_source(image_path_or_url, options)
|
30
30
|
|
31
31
|
response = post('/text', {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
src: src,
|
33
|
+
formats: (options[:formats] || config.default_formats).map(&:to_s),
|
34
|
+
include_line_data: options[:include_line_data] || false,
|
35
|
+
**build_request_options(options)
|
36
|
+
})
|
37
37
|
|
38
38
|
Result.new(response, source_ref)
|
39
39
|
end
|
@@ -79,10 +79,10 @@ module Mathpix
|
|
79
79
|
end
|
80
80
|
|
81
81
|
response = post('/converter', {
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
82
|
+
mmd: mmd,
|
83
|
+
formats: formats_hash,
|
84
|
+
conversion_options: options[:conversion_options] || {}
|
85
|
+
})
|
86
86
|
|
87
87
|
conversion_id = response['conversion_id']
|
88
88
|
Conversion.new(self, conversion_id: conversion_id, mmd: mmd, formats: formats)
|
@@ -137,10 +137,10 @@ module Mathpix
|
|
137
137
|
def convert_document(document_path:, document_type:, **options)
|
138
138
|
# Encode document as base64 data URI or use URL
|
139
139
|
src = if url?(document_path)
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
140
|
+
document_path
|
141
|
+
else
|
142
|
+
encode_image(document_path) # Reuse existing encoding
|
143
|
+
end
|
144
144
|
|
145
145
|
# Build conversion request
|
146
146
|
request_body = {
|
@@ -151,7 +151,7 @@ module Mathpix
|
|
151
151
|
}
|
152
152
|
|
153
153
|
response = post('/pdf', request_body)
|
154
|
-
response['pdf_id']
|
154
|
+
response['pdf_id'] # Returns conversion ID for polling
|
155
155
|
end
|
156
156
|
|
157
157
|
# Get document conversion status
|
@@ -202,13 +202,13 @@ module Mathpix
|
|
202
202
|
# @param options [Hash] additional options
|
203
203
|
# @return [Array<String, String>] src value and source reference
|
204
204
|
# @raise [InvalidRequestError] if input looks like malformed URL
|
205
|
-
def prepare_image_source(input,
|
205
|
+
def prepare_image_source(input, _options = {})
|
206
206
|
# Handle hash input: { url: '...' } or { path: '...' }
|
207
207
|
if input.is_a?(Hash)
|
208
208
|
if input[:url] || input['url']
|
209
209
|
url = input[:url] || input['url']
|
210
|
-
url = config.upgrade_to_https(url)
|
211
|
-
validate_url!(url)
|
210
|
+
url = config.upgrade_to_https(url) # Auto-upgrade HTTP→HTTPS
|
211
|
+
validate_url!(url) # Raise InvalidRequestError if malformed
|
212
212
|
return [url, url]
|
213
213
|
elsif input[:path] || input['path']
|
214
214
|
path = input[:path] || input['path']
|
@@ -222,22 +222,20 @@ module Mathpix
|
|
222
222
|
|
223
223
|
# Detect if input is URL or local path
|
224
224
|
if url?(upgraded_input)
|
225
|
-
[upgraded_input, upgraded_input]
|
225
|
+
[upgraded_input, upgraded_input] # Use URL directly as src
|
226
226
|
elsif looks_like_url?(input)
|
227
227
|
# String contains URL-like patterns but isn't valid
|
228
228
|
raise InvalidRequestError, "Invalid URL format: #{input}"
|
229
229
|
else
|
230
230
|
# Try to encode as local file
|
231
231
|
begin
|
232
|
-
[encode_image(input), input]
|
233
|
-
rescue SecurityError, Errno::ENOENT
|
232
|
+
[encode_image(input), input] # Encode local file (use original path)
|
233
|
+
rescue SecurityError, Errno::ENOENT
|
234
234
|
# If file encoding fails and input doesn't look like a file path,
|
235
235
|
# it's likely a malformed URL
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
raise # Re-raise original error for actual file path issues
|
240
|
-
end
|
236
|
+
raise InvalidRequestError, "Invalid URL format: #{input}" unless looks_like_file_path?(input)
|
237
|
+
|
238
|
+
raise # Re-raise original error for actual file path issues
|
241
239
|
end
|
242
240
|
end
|
243
241
|
end
|
@@ -248,6 +246,7 @@ module Mathpix
|
|
248
246
|
# @return [Boolean]
|
249
247
|
def url?(str)
|
250
248
|
return false unless str.is_a?(String)
|
249
|
+
|
251
250
|
config.valid_url?(str)
|
252
251
|
end
|
253
252
|
|
@@ -260,6 +259,7 @@ module Mathpix
|
|
260
259
|
# @return [Boolean]
|
261
260
|
def looks_like_url?(str)
|
262
261
|
return false unless str.is_a?(String)
|
262
|
+
|
263
263
|
# URL-like patterns: contains protocol or www prefix
|
264
264
|
str.match?(%r{^(https?://|www\.)|://})
|
265
265
|
end
|
@@ -270,6 +270,7 @@ module Mathpix
|
|
270
270
|
# @raise [InvalidRequestError] if URL is not valid
|
271
271
|
def validate_url!(url)
|
272
272
|
return if config.valid_url?(url)
|
273
|
+
|
273
274
|
raise InvalidRequestError, "Invalid URL format: #{url}"
|
274
275
|
end
|
275
276
|
|
@@ -282,8 +283,9 @@ module Mathpix
|
|
282
283
|
# @return [Boolean]
|
283
284
|
def looks_like_file_path?(str)
|
284
285
|
return false unless str.is_a?(String)
|
286
|
+
|
285
287
|
# File path patterns: contains slashes, starts with ~, has file extension, or starts with .
|
286
|
-
str.match?(%r{^[
|
288
|
+
str.match?(%r{^[~/.]|/|\\|\.(?:png|jpe?g|gif|webp|pdf|docx|pptx)$}i)
|
287
289
|
end
|
288
290
|
|
289
291
|
# Encode image to base64 data URI (with path sanitization)
|
@@ -417,7 +419,11 @@ module Mathpix
|
|
417
419
|
retry_after: response['Retry-After']&.to_i
|
418
420
|
)
|
419
421
|
when Net::HTTPClientError
|
420
|
-
error_data =
|
422
|
+
error_data = begin
|
423
|
+
JSON.parse(response.body)
|
424
|
+
rescue StandardError
|
425
|
+
{}
|
426
|
+
end
|
421
427
|
raise APIError.new(
|
422
428
|
error_data['error'] || 'Client error',
|
423
429
|
status: response.code.to_i,
|
@@ -62,7 +62,7 @@ module Mathpix
|
|
62
62
|
@rate_limit = RATE_LIMIT_DEFAULT
|
63
63
|
|
64
64
|
# Structured logging
|
65
|
-
@logger = nil
|
65
|
+
@logger = nil # Can be set to Logger instance
|
66
66
|
end
|
67
67
|
|
68
68
|
def validate!
|
@@ -70,14 +70,10 @@ module Mathpix
|
|
70
70
|
raise ConfigurationError, 'app_key is required' if app_key.nil? || app_key.empty?
|
71
71
|
|
72
72
|
# Validate API URL uses HTTPS
|
73
|
-
if enforce_https && !api_url.start_with?('https://')
|
74
|
-
raise ConfigurationError, 'API URL must use HTTPS'
|
75
|
-
end
|
73
|
+
raise ConfigurationError, 'API URL must use HTTPS' if enforce_https && !api_url.start_with?('https://')
|
76
74
|
|
77
75
|
# Validate timeout
|
78
|
-
if timeout <= 0 || timeout > 300
|
79
|
-
raise ConfigurationError, 'Timeout must be between 1 and 300 seconds'
|
80
|
-
end
|
76
|
+
raise ConfigurationError, 'Timeout must be between 1 and 300 seconds' if timeout <= 0 || timeout > 300
|
81
77
|
|
82
78
|
true
|
83
79
|
end
|
@@ -132,7 +128,7 @@ module Mathpix
|
|
132
128
|
return url unless url.is_a?(String)
|
133
129
|
return url unless url.start_with?('http://')
|
134
130
|
|
135
|
-
url.sub(
|
131
|
+
url.sub(%r{^http://}, 'https://')
|
136
132
|
end
|
137
133
|
|
138
134
|
# Sanitize file path to prevent directory traversal
|
@@ -151,7 +147,7 @@ module Mathpix
|
|
151
147
|
|
152
148
|
# Check for directory traversal attempts
|
153
149
|
return nil if normalized.include?('../')
|
154
|
-
return nil if normalized.match?(
|
150
|
+
return nil if normalized.match?(%r{\.\.[/\\]})
|
155
151
|
|
156
152
|
# Check file exists (for local paths)
|
157
153
|
return nil unless File.exist?(normalized)
|
data/lib/mathpix/conversion.rb
CHANGED
@@ -77,14 +77,10 @@ module Mathpix
|
|
77
77
|
|
78
78
|
return self if completed?
|
79
79
|
|
80
|
-
if error?
|
81
|
-
raise ConversionError, "Conversion failed: #{error_message}"
|
82
|
-
end
|
80
|
+
raise ConversionError, "Conversion failed: #{error_message}" if error?
|
83
81
|
|
84
82
|
elapsed = Time.now - start_time
|
85
|
-
if elapsed > max_wait
|
86
|
-
raise TimeoutError, "Conversion timed out after #{max_wait}s (status: #{status})"
|
87
|
-
end
|
83
|
+
raise TimeoutError, "Conversion timed out after #{max_wait}s (status: #{status})" if elapsed > max_wait
|
88
84
|
|
89
85
|
sleep poll_interval if processing?
|
90
86
|
end
|
data/lib/mathpix/document.rb
CHANGED
@@ -73,7 +73,10 @@ module Mathpix
|
|
73
73
|
|
74
74
|
# Execute document conversion (async operation)
|
75
75
|
#
|
76
|
-
#
|
76
|
+
# Automatically uses batching for large PDFs (>1.2MB) to prevent
|
77
|
+
# "request too large" errors. Batching is transparent to the user.
|
78
|
+
#
|
79
|
+
# @return [DocumentConversion, BatchedDocumentConversion] conversion object (async)
|
77
80
|
# @example
|
78
81
|
# conversion = Mathpix.document('paper.pdf')
|
79
82
|
# .with_formats(:markdown, :latex)
|
@@ -84,14 +87,20 @@ module Mathpix
|
|
84
87
|
# Detect document type from extension
|
85
88
|
doc_type = detect_document_type
|
86
89
|
|
87
|
-
#
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
90
|
+
# Check if batching is needed (large PDFs only)
|
91
|
+
if should_batch?
|
92
|
+
# Use automatic batching
|
93
|
+
batcher = DocumentBatcher.new(document_path)
|
94
|
+
BatchedDocumentConversion.new(client, document_path, doc_type, batcher, options)
|
95
|
+
else
|
96
|
+
# Standard single-request conversion
|
97
|
+
conversion_id = client.convert_document(
|
98
|
+
document_path: document_path,
|
99
|
+
document_type: doc_type,
|
100
|
+
**options
|
101
|
+
)
|
102
|
+
DocumentConversion.new(client, conversion_id, document_path, doc_type)
|
103
|
+
end
|
95
104
|
end
|
96
105
|
|
97
106
|
alias call convert
|
@@ -99,6 +108,33 @@ module Mathpix
|
|
99
108
|
|
100
109
|
private
|
101
110
|
|
111
|
+
# Check if document should use automatic batching
|
112
|
+
#
|
113
|
+
# Batching is used for:
|
114
|
+
# - Local PDF files (not URLs)
|
115
|
+
# - Files larger than 1.2MB (conservative threshold)
|
116
|
+
#
|
117
|
+
# @return [Boolean] true if batching should be used
|
118
|
+
def should_batch?
|
119
|
+
# Can't batch URLs - need local file access
|
120
|
+
return false if url?(document_path)
|
121
|
+
|
122
|
+
# Only batch PDFs (DOCX/PPTX handled differently by API)
|
123
|
+
return false unless File.extname(document_path).downcase == '.pdf'
|
124
|
+
|
125
|
+
# Check file size threshold
|
126
|
+
File.exist?(document_path) &&
|
127
|
+
File.size(document_path) > (DocumentBatcher::MAX_SINGLE_REQUEST_MB * 1024 * 1024)
|
128
|
+
end
|
129
|
+
|
130
|
+
# Check if document path is a URL
|
131
|
+
#
|
132
|
+
# @param path [String] document path
|
133
|
+
# @return [Boolean] true if path is a URL
|
134
|
+
def url?(path)
|
135
|
+
path.to_s.start_with?('http://', 'https://')
|
136
|
+
end
|
137
|
+
|
102
138
|
# Detect document type from file extension
|
103
139
|
# @return [Symbol] :pdf, :docx, :pptx
|
104
140
|
def detect_document_type
|
@@ -154,9 +190,8 @@ module Mathpix
|
|
154
190
|
)
|
155
191
|
when 'processing', 'pending'
|
156
192
|
elapsed = Time.now - start_time
|
157
|
-
if elapsed > max_wait
|
158
|
-
|
159
|
-
end
|
193
|
+
raise TimeoutError, "Document conversion timed out after #{max_wait}s" if elapsed > max_wait
|
194
|
+
|
160
195
|
sleep poll_interval
|
161
196
|
else
|
162
197
|
raise ConversionError.new(
|
@@ -0,0 +1,191 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pdf-reader'
|
4
|
+
require 'prawn'
|
5
|
+
require 'tempfile'
|
6
|
+
|
7
|
+
module Mathpix
|
8
|
+
# Adaptive PDF Batching for Large Documents
|
9
|
+
#
|
10
|
+
# Automatically splits large PDFs into processable batches to avoid
|
11
|
+
# "request too large" errors. Uses pdf-reader to extract page counts
|
12
|
+
# and prawn to create batch PDFs.
|
13
|
+
#
|
14
|
+
# The geodesic path: adaptive batching based on file size and page density
|
15
|
+
#
|
16
|
+
# @see https://mathpix.com/docs/convert/limits Mathpix API limits (1 GB max)
|
17
|
+
# @see https://mathpix.com/docs/convert/best-practices Mathpix best practices
|
18
|
+
#
|
19
|
+
# Batch size constants informed by industry research (2025-10-14):
|
20
|
+
# - 7 comprehensive searches across OCR API providers, performance benchmarks, and distributed systems
|
21
|
+
# - Findings: AWS Textract (500MB, 3000 pages), Google Vision (20MB), Azure (500MB, 2000 pages)
|
22
|
+
# - Performance: LlamaParse 6s/batch (50 pages), memory optimization at 1000-page intervals
|
23
|
+
# - Chunking research: 512-token baseline with 10-20% overlap, <100MB batches for efficiency
|
24
|
+
# - Our 1.2MB threshold balances Mathpix latency (<100KB instant) vs batch efficiency (<100MB)
|
25
|
+
# - 10 pages/batch aligns with 50-page optimal windows while maintaining memory efficiency
|
26
|
+
class DocumentBatcher
|
27
|
+
# Conservative threshold based on research findings:
|
28
|
+
# - Well below Mathpix 1 GB limit for safety margin
|
29
|
+
# - Above 100 KB instant-speed threshold
|
30
|
+
# - Below 100 MB memory-efficient recommendation
|
31
|
+
# - Allows ~12 batches for typical 15 MB documents
|
32
|
+
# - 1.2 MB before base64 encoding ≈ 1.6 MB after (33% expansion)
|
33
|
+
MAX_SINGLE_REQUEST_MB = 1.2
|
34
|
+
|
35
|
+
# Default maximum pages per batch based on performance benchmarks:
|
36
|
+
# - Aligned with 50-page optimal processing windows (LlamaParse: 6s consistent)
|
37
|
+
# - Small enough for memory efficiency (<900MB when processing 1000 pages)
|
38
|
+
# - Large enough to minimize API call overhead
|
39
|
+
# - Adaptive algorithm adjusts based on actual page density
|
40
|
+
DEFAULT_PAGES_PER_BATCH = 10
|
41
|
+
|
42
|
+
# Minimum pages per batch (handles extremely dense pages):
|
43
|
+
# - Ensures at least 1 page per batch for very large individual pages
|
44
|
+
# - Example: 10MB PDF with 20 pages = 0.5MB/page requires 2 pages/batch
|
45
|
+
MIN_PAGES_PER_BATCH = 1
|
46
|
+
|
47
|
+
attr_reader :document_path, :file_size, :page_count
|
48
|
+
|
49
|
+
# Initialize batcher with document
|
50
|
+
#
|
51
|
+
# @param document_path [String] path to PDF file
|
52
|
+
# @raise [InvalidImageError] if file doesn't exist or isn't a PDF
|
53
|
+
def initialize(document_path)
|
54
|
+
@document_path = document_path
|
55
|
+
validate_document!
|
56
|
+
|
57
|
+
@file_size = File.size(document_path)
|
58
|
+
@page_count = extract_page_count
|
59
|
+
end
|
60
|
+
|
61
|
+
# Check if document needs batching
|
62
|
+
#
|
63
|
+
# @return [Boolean] true if file size exceeds threshold
|
64
|
+
def needs_batching?
|
65
|
+
file_size_mb > MAX_SINGLE_REQUEST_MB
|
66
|
+
end
|
67
|
+
|
68
|
+
# Calculate optimal batch ranges
|
69
|
+
#
|
70
|
+
# @return [Array<Array(Integer, Integer)>] array of [start_page, end_page] tuples (1-indexed)
|
71
|
+
# @example
|
72
|
+
# batcher.calculate_batches
|
73
|
+
# # => [[1, 10], [11, 20], [21, 30]]
|
74
|
+
def calculate_batches
|
75
|
+
return [[1, @page_count]] unless needs_batching?
|
76
|
+
|
77
|
+
# Calculate average page size
|
78
|
+
avg_page_size_mb = file_size_mb / @page_count
|
79
|
+
|
80
|
+
# Determine pages per batch (adaptive based on page density)
|
81
|
+
pages_per_batch = [
|
82
|
+
MIN_PAGES_PER_BATCH,
|
83
|
+
[DEFAULT_PAGES_PER_BATCH, (MAX_SINGLE_REQUEST_MB / avg_page_size_mb).floor].min
|
84
|
+
].max
|
85
|
+
|
86
|
+
# Generate batch ranges (1-indexed for Mathpix API)
|
87
|
+
batches = []
|
88
|
+
(1..@page_count).step(pages_per_batch) do |start_page|
|
89
|
+
end_page = [start_page + pages_per_batch - 1, @page_count].min
|
90
|
+
batches << [start_page, end_page]
|
91
|
+
end
|
92
|
+
|
93
|
+
batches
|
94
|
+
end
|
95
|
+
|
96
|
+
# Extract specific page range to a new PDF file
|
97
|
+
#
|
98
|
+
# @param start_page [Integer] first page (1-indexed)
|
99
|
+
# @param end_page [Integer] last page (1-indexed)
|
100
|
+
# @return [Tempfile] temporary PDF file with extracted pages
|
101
|
+
# @raise [ArgumentError] if page range is invalid
|
102
|
+
def extract_batch(start_page, end_page)
|
103
|
+
validate_page_range!(start_page, end_page)
|
104
|
+
|
105
|
+
# Read original PDF
|
106
|
+
reader = ::PDF::Reader.new(@document_path)
|
107
|
+
|
108
|
+
# Create new PDF with extracted pages
|
109
|
+
temp_pdf = Tempfile.new(['batch', '.pdf'])
|
110
|
+
|
111
|
+
::Prawn::Document.generate(temp_pdf.path) do |pdf|
|
112
|
+
(start_page..end_page).each do |page_num|
|
113
|
+
# Get page from reader
|
114
|
+
page = reader.pages[page_num - 1] # 0-indexed in reader
|
115
|
+
|
116
|
+
# Add page to new PDF
|
117
|
+
# Note: This is a simplified version - full implementation
|
118
|
+
# would need to preserve all page content, images, fonts, etc.
|
119
|
+
pdf.text "Page #{page_num}"
|
120
|
+
pdf.text page.text if page.respond_to?(:text)
|
121
|
+
pdf.start_new_page unless page_num == end_page
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
temp_pdf.rewind
|
126
|
+
temp_pdf
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
# Get file size in megabytes
|
132
|
+
#
|
133
|
+
# @return [Float] file size in MB
|
134
|
+
def file_size_mb
|
135
|
+
@file_size / (1024.0 * 1024.0)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Extract total page count from PDF
|
139
|
+
#
|
140
|
+
# @return [Integer] number of pages
|
141
|
+
# @raise [InvalidImageError] if PDF is malformed
|
142
|
+
def extract_page_count
|
143
|
+
reader = ::PDF::Reader.new(@document_path)
|
144
|
+
reader.page_count
|
145
|
+
rescue ::PDF::Reader::MalformedPDFError => e
|
146
|
+
raise InvalidImageError.new(
|
147
|
+
"PDF is malformed or corrupted: #{e.message}",
|
148
|
+
recommended_format: 'valid PDF'
|
149
|
+
)
|
150
|
+
rescue StandardError => e
|
151
|
+
raise InvalidImageError.new(
|
152
|
+
"Failed to read PDF: #{e.message}",
|
153
|
+
recommended_format: 'valid PDF'
|
154
|
+
)
|
155
|
+
end
|
156
|
+
|
157
|
+
# Validate document exists and is a PDF
|
158
|
+
#
|
159
|
+
# @raise [InvalidImageError] if file doesn't exist or isn't a PDF
|
160
|
+
def validate_document!
|
161
|
+
unless File.exist?(@document_path)
|
162
|
+
raise InvalidImageError.new(
|
163
|
+
"File not found: #{@document_path}",
|
164
|
+
recommended_format: 'existing PDF file'
|
165
|
+
)
|
166
|
+
end
|
167
|
+
|
168
|
+
return if File.extname(@document_path).downcase == '.pdf'
|
169
|
+
|
170
|
+
raise InvalidImageError.new(
|
171
|
+
"File must be a PDF: #{@document_path}",
|
172
|
+
recommended_format: 'PDF'
|
173
|
+
)
|
174
|
+
end
|
175
|
+
|
176
|
+
# Validate page range is within bounds
|
177
|
+
#
|
178
|
+
# @param start_page [Integer] first page (1-indexed)
|
179
|
+
# @param end_page [Integer] last page (1-indexed)
|
180
|
+
# @raise [ArgumentError] if range is invalid
|
181
|
+
def validate_page_range!(start_page, end_page)
|
182
|
+
if start_page < 1 || end_page > @page_count
|
183
|
+
raise ArgumentError, "Page range (#{start_page}-#{end_page}) out of bounds (1-#{@page_count})"
|
184
|
+
end
|
185
|
+
|
186
|
+
return unless start_page > end_page
|
187
|
+
|
188
|
+
raise ArgumentError, "Start page (#{start_page}) must be <= end page (#{end_page})"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
@@ -44,7 +44,8 @@ module Mathpix
|
|
44
44
|
end
|
45
45
|
|
46
46
|
# Authorization code grant flow
|
47
|
-
def authorize(client_id:, redirect_uri:, scope:, user_id:, state: nil, code_challenge: nil,
|
47
|
+
def authorize(client_id:, redirect_uri:, scope:, user_id:, state: nil, code_challenge: nil,
|
48
|
+
code_challenge_method: nil)
|
48
49
|
raise InvalidClientError, 'Unknown client' unless @clients.key?(client_id)
|
49
50
|
|
50
51
|
client = @clients[client_id]
|
@@ -167,8 +168,8 @@ module Mathpix
|
|
167
168
|
|
168
169
|
def introspect_token(token, client_id: nil, client_secret: nil)
|
169
170
|
# Validate client if credentials provided
|
170
|
-
if client_id && client_secret
|
171
|
-
raise InvalidClientError, 'Invalid client'
|
171
|
+
if client_id && client_secret && !validate_client(client_id, client_secret)
|
172
|
+
raise InvalidClientError, 'Invalid client'
|
172
173
|
end
|
173
174
|
|
174
175
|
begin
|
@@ -187,8 +188,8 @@ module Mathpix
|
|
187
188
|
|
188
189
|
def revoke_token(token, client_id: nil, client_secret: nil)
|
189
190
|
# Validate client if credentials provided
|
190
|
-
if client_id && client_secret
|
191
|
-
raise InvalidClientError, 'Invalid client'
|
191
|
+
if client_id && client_secret && !validate_client(client_id, client_secret)
|
192
|
+
raise InvalidClientError, 'Invalid client'
|
192
193
|
end
|
193
194
|
|
194
195
|
@revoked_tokens[token] = Time.now
|
@@ -199,7 +200,7 @@ module Mathpix
|
|
199
200
|
@refresh_tokens.delete(token)
|
200
201
|
|
201
202
|
# Revoke all access tokens for this user/client combination
|
202
|
-
@access_tokens.select do |
|
203
|
+
@access_tokens.select do |_access_token, data|
|
203
204
|
data[:user_id] == token_data[:user_id] && data[:client_id] == token_data[:client_id]
|
204
205
|
end.each_key do |access_token|
|
205
206
|
@revoked_tokens[access_token] = Time.now
|
@@ -314,9 +315,7 @@ module Mathpix
|
|
314
315
|
expected_signature = OpenSSL::HMAC.digest('SHA256', @jwt_secret, signature_input)
|
315
316
|
expected_signature_encoded = base64url_encode(expected_signature)
|
316
317
|
|
317
|
-
unless signature_encoded == expected_signature_encoded
|
318
|
-
raise InvalidTokenError, 'Invalid signature'
|
319
|
-
end
|
318
|
+
raise InvalidTokenError, 'Invalid signature' unless signature_encoded == expected_signature_encoded
|
320
319
|
|
321
320
|
JSON.parse(base64url_decode(payload_encoded))
|
322
321
|
end
|
@@ -33,7 +33,7 @@ module Mathpix
|
|
33
33
|
# @param server_context [Hash] MCP server context
|
34
34
|
# @return [Mathpix::Client] Mathpix API client
|
35
35
|
def mathpix_client(server_context)
|
36
|
-
server_context[:mathpix_client] || raise(ArgumentError,
|
36
|
+
server_context[:mathpix_client] || raise(ArgumentError, 'mathpix_client not in server_context')
|
37
37
|
end
|
38
38
|
|
39
39
|
# Create text response (official MCP format)
|
@@ -42,9 +42,9 @@ module Mathpix
|
|
42
42
|
# @return [::MCP::Tool::Response]
|
43
43
|
def text_response(text)
|
44
44
|
::MCP::Tool::Response.new([{
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
type: 'text',
|
46
|
+
text: text
|
47
|
+
}])
|
48
48
|
end
|
49
49
|
|
50
50
|
# Create JSON response with text wrapper
|
@@ -80,6 +80,7 @@ module Mathpix
|
|
80
80
|
# @return [Array<Symbol>] format symbols
|
81
81
|
def extract_formats(formats, client)
|
82
82
|
return client.config.default_formats if formats.nil? || formats.empty?
|
83
|
+
|
83
84
|
Array(formats).map(&:to_sym)
|
84
85
|
end
|
85
86
|
|
@@ -88,7 +89,9 @@ module Mathpix
|
|
88
89
|
# @param path [String] file path
|
89
90
|
# @return [String] normalized path
|
90
91
|
def normalize_path(path)
|
91
|
-
File.expand_path(path)
|
92
|
+
File.expand_path(path)
|
93
|
+
rescue StandardError
|
94
|
+
path
|
92
95
|
end
|
93
96
|
|
94
97
|
# Check if path is a URL
|
@@ -35,7 +35,7 @@ module Mathpix
|
|
35
35
|
description: 'Comma vs decimal point in numbers'
|
36
36
|
},
|
37
37
|
'prime_or_apostrophe' => {
|
38
|
-
pattern: /[
|
38
|
+
pattern: /['′]/,
|
39
39
|
alternatives: ["' (prime)", "' (apostrophe)"],
|
40
40
|
description: 'Prime notation vs apostrophe'
|
41
41
|
}
|
@@ -89,9 +89,8 @@ module Mathpix
|
|
89
89
|
# @param decision [String] selected option (option_0, option_1, etc)
|
90
90
|
def set_decision(decision)
|
91
91
|
index = decision.match(/option_(\d+)/)[1].to_i
|
92
|
-
unless index >= 0 && index < @alternatives.length
|
93
|
-
|
94
|
-
end
|
92
|
+
raise ArgumentError, "Invalid decision: #{decision}" unless index >= 0 && index < @alternatives.length
|
93
|
+
|
95
94
|
@decision = @alternatives[index]
|
96
95
|
end
|
97
96
|
|
@@ -99,7 +98,7 @@ module Mathpix
|
|
99
98
|
#
|
100
99
|
# @return [String] corrected LaTeX
|
101
100
|
def apply_clarification
|
102
|
-
raise
|
101
|
+
raise 'No decision set' unless @decision
|
103
102
|
|
104
103
|
# Simple replacement (in real implementation, would be more sophisticated)
|
105
104
|
corrected = @context.dup
|
@@ -113,7 +112,7 @@ module Mathpix
|
|
113
112
|
corrected.gsub!(/[lI]/, '1')
|
114
113
|
when /l \(lowercase L\)/
|
115
114
|
corrected.gsub!(/[1I]/, 'l')
|
116
|
-
|
115
|
+
# Add more transformations as needed
|
117
116
|
end
|
118
117
|
|
119
118
|
corrected
|
@@ -143,16 +142,14 @@ module Mathpix
|
|
143
142
|
# @return [Array<String>] detected alternatives
|
144
143
|
def detect_alternatives
|
145
144
|
# Check common patterns
|
146
|
-
AMBIGUOUS_PATTERNS.
|
147
|
-
if pattern_data[:pattern].match?(@ambiguous_text)
|
148
|
-
return pattern_data[:alternatives]
|
149
|
-
end
|
145
|
+
AMBIGUOUS_PATTERNS.each_value do |pattern_data|
|
146
|
+
return pattern_data[:alternatives] if pattern_data[:pattern].match?(@ambiguous_text)
|
150
147
|
end
|
151
148
|
|
152
149
|
# Default: generic ambiguity
|
153
150
|
[
|
154
151
|
"Interpretation A: #{@ambiguous_text}",
|
155
|
-
|
152
|
+
'Interpretation B: similar symbol',
|
156
153
|
"Keep as-is: #{@ambiguous_text}"
|
157
154
|
]
|
158
155
|
end
|
@@ -77,6 +77,7 @@ module Mathpix
|
|
77
77
|
# @raise [RuntimeError] if no response set
|
78
78
|
def response
|
79
79
|
raise "No response set for elicitation #{@id}" unless @response
|
80
|
+
|
80
81
|
@response
|
81
82
|
end
|
82
83
|
|
@@ -94,6 +95,7 @@ module Mathpix
|
|
94
95
|
def validate_response(value)
|
95
96
|
# Base validation: optional fields can be nil
|
96
97
|
return true if @optional && value.nil?
|
98
|
+
|
97
99
|
# Non-optional fields must have value
|
98
100
|
!value.nil?
|
99
101
|
end
|
@@ -102,6 +102,7 @@ module Mathpix
|
|
102
102
|
unless valid_options.include?(decision)
|
103
103
|
raise ArgumentError, "Invalid decision: #{decision}. Must be one of: #{valid_options.join(', ')}"
|
104
104
|
end
|
105
|
+
|
105
106
|
@decision = decision
|
106
107
|
end
|
107
108
|
|
@@ -121,7 +122,7 @@ module Mathpix
|
|
121
122
|
when 'reject'
|
122
123
|
{ action: :reject, reason: 'Low confidence' }
|
123
124
|
else
|
124
|
-
raise
|
125
|
+
raise 'No decision set'
|
125
126
|
end
|
126
127
|
end
|
127
128
|
|
@@ -34,7 +34,7 @@ module Mathpix
|
|
34
34
|
# @param threshold [Float] minimum confidence (default 0.70)
|
35
35
|
# @param image_path [String] source image path
|
36
36
|
# @return [ConfidenceElicitation, nil] elicitation if needed
|
37
|
-
def self.check_confidence(result, threshold: 0.70
|
37
|
+
def self.check_confidence(result, image_path:, threshold: 0.70)
|
38
38
|
return nil if result.confidence >= threshold
|
39
39
|
|
40
40
|
ConfidenceElicitation.new(
|