mathpix-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,534 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mathpix
4
+ # Core HTTP client for Mathpix API
5
+ class Client
6
+ attr_reader :config
7
+
8
+ def initialize(config = Mathpix.configuration)
9
+ @config = config
10
+ config.validate!
11
+ end
12
+
13
+ # Snap image to equation (core method)
14
+ #
15
+ # Supports both local file paths and remote URLs
16
+ #
17
+ # @param image_path_or_url [String, Hash] path to image, URL, or hash with :path/:url key
18
+ # @param options [Hash] request options
19
+ # @return [Result]
20
+ # @example Local file
21
+ # client.snap('equation.png')
22
+ # @example Remote URL
23
+ # client.snap('https://example.com/equation.png')
24
+ # client.snap(url: 'https://example.com/equation.png')
25
+ # @example With options
26
+ # client.snap('equation.png', formats: [:latex, :mathml])
27
+ def snap(image_path_or_url, **options)
28
+ src, source_ref = prepare_image_source(image_path_or_url, options)
29
+
30
+ response = post('/text', {
31
+ src: src,
32
+ formats: (options[:formats] || config.default_formats).map(&:to_s),
33
+ include_line_data: options[:include_line_data] || false,
34
+ **build_request_options(options)
35
+ })
36
+
37
+ Result.new(response, source_ref)
38
+ end
39
+
40
+ # Convert handwritten strokes to text/LaTeX via /v3/strokes. Strokes arrive
41
+ # as [[[x,y],...], ...]; Mathpix wants parallel x/y arrays, so we transpose.
42
+ #
43
+ # @param strokes [Array<Array<Array<Numeric>>>]
44
+ # @return [Result]
45
+ def convert_strokes(strokes, **options)
46
+ pts = Array(strokes).map { |stroke| Array(stroke) }
47
+ response = post('/strokes',
48
+ strokes: { strokes: { x: pts.map { |s| s.map { |p| p[0] } },
49
+ y: pts.map { |s| s.map { |p| p[1] } } } },
50
+ formats: (options[:formats] || config.default_formats).map(&:to_s),
51
+ **build_request_options(options))
52
+ Result.new(response)
53
+ end
54
+
55
+ # Get recent captures. /v3/ocr-results returns rows under "ocr_results",
56
+ # each nesting the OCR payload under "result" with the timestamp on top.
57
+ #
58
+ # @param limit [Integer] number of results
59
+ # @return [Array<Result>]
60
+ def recent(limit: 10)
61
+ rows = get('/ocr-results', params: { per_page: limit })['ocr_results'] || []
62
+ rows.map do |row|
63
+ payload = row['result'] || row
64
+ payload = payload.merge('timestamp' => row['timestamp']) if payload.is_a?(Hash) && row['timestamp']
65
+ Result.new(payload)
66
+ end
67
+ end
68
+
69
+ # Download file from URL
70
+ #
71
+ # @param url [String] download URL
72
+ # @return [String] file content as bytes
73
+ def download(url)
74
+ uri = URI(url)
75
+ request = Net::HTTP::Get.new(uri)
76
+ request['app_id'] = config.app_id
77
+ request['app_key'] = config.app_key
78
+ request['User-Agent'] = config.user_agent
79
+
80
+ response = make_request(uri, request)
81
+
82
+ case response
83
+ when Net::HTTPSuccess
84
+ response.body
85
+ else
86
+ raise APIError.new(
87
+ "Download failed: #{response.code}",
88
+ status: response.code.to_i
89
+ )
90
+ end
91
+ end
92
+
93
+ # Convert document (PDF, DOCX, PPTX) asynchronously
94
+ #
95
+ #
96
+ # @param document_path [String] path to document file
97
+ # @param document_type [Symbol] :pdf, :docx, :pptx
98
+ # @param options [Hash] conversion options
99
+ # @return [String] conversion_id for polling
100
+ # @example
101
+ # conversion_id = client.convert_document(
102
+ # document_path: 'paper.pdf',
103
+ # document_type: :pdf,
104
+ # formats: [:markdown, :latex]
105
+ # )
106
+ def convert_document(document_path:, document_type:, **options)
107
+ conversion_formats = build_conversion_formats(options)
108
+ request_options = build_document_options(options)
109
+
110
+ # The /v3/pdf endpoint takes a remote PDF via the `url` field, or a local
111
+ # file via multipart upload — NOT the base64 `src` field used by the image
112
+ # (/v3/text) endpoint. Sending `src` made Mathpix reply "Missing URL in
113
+ # request body", which previously surfaced as a useless generic
114
+ # "Client error".
115
+ response =
116
+ if url?(document_path)
117
+ post('/pdf', { url: document_path, conversion_formats: conversion_formats }.merge(request_options))
118
+ else
119
+ post_multipart('/pdf', document_path, { conversion_formats: conversion_formats }.merge(request_options))
120
+ end
121
+
122
+ pdf_id = response['pdf_id']
123
+ return pdf_id if pdf_id
124
+
125
+ # 200 OK with an error body (missing/invalid fields, etc.)
126
+ raise APIError.new(
127
+ "Document submission failed: #{extract_error_message(response) || 'no pdf_id returned by Mathpix'}",
128
+ status: 200,
129
+ details: response.is_a?(Hash) ? response : {}
130
+ )
131
+ end
132
+
133
+ # Get document conversion status
134
+ #
135
+ # @param conversion_id [String] document conversion ID
136
+ # @return [Hash] status data
137
+ def get_document_status(conversion_id)
138
+ get("/pdf/#{conversion_id}")
139
+ end
140
+
141
+ # Fetch a rendered document output (e.g. 'mmd', 'md', 'html', 'tex')
142
+ #
143
+ # The /v3/pdf/{id}.{ext} endpoints return the raw converted content; the
144
+ # status endpoint (get_document_status) never contains it.
145
+ #
146
+ # @param conversion_id [String] document conversion ID
147
+ # @param format [String] output extension (mmd, md, html, tex, ...)
148
+ # @return [String] raw output content
149
+ def get_document_output(conversion_id, format)
150
+ uri = URI("#{config.endpoint}/pdf/#{conversion_id}.#{format}")
151
+ request = Net::HTTP::Get.new(uri)
152
+ request['app_id'] = config.app_id
153
+ request['app_key'] = config.app_key
154
+ request['User-Agent'] = config.user_agent
155
+
156
+ response = make_request(uri, request)
157
+ # Net::HTTP returns ASCII-8BIT bodies; Mathpix text outputs are UTF-8.
158
+ return response.body.to_s.dup.force_encoding(Encoding::UTF_8) if response.is_a?(Net::HTTPSuccess)
159
+
160
+ error_data = begin
161
+ JSON.parse(response.body)
162
+ rescue StandardError
163
+ {}
164
+ end
165
+ raise APIError.new(
166
+ "Failed to fetch '#{format}' output: #{extract_error_message(error_data) || "HTTP #{response.code}"}",
167
+ status: response.code.to_i,
168
+ details: error_data.is_a?(Hash) ? error_data : {}
169
+ )
170
+ end
171
+
172
+ private
173
+
174
+ # Build conversion formats hash
175
+ #
176
+ # @param options [Hash] user options
177
+ # @return [Hash] formats configuration
178
+ # Map requested output formats to valid Mathpix /v3/pdf `conversion_formats`
179
+ # keys. Unknown keys (text, latex_styled, ...) are dropped so we never send
180
+ # an invalid format that the API rejects.
181
+ CONVERSION_FORMAT_MAP = {
182
+ 'docx' => 'docx', 'pptx' => 'pptx', 'pdf' => 'pdf',
183
+ 'tex' => 'tex.zip', 'tex.zip' => 'tex.zip', 'latex' => 'tex.zip',
184
+ 'html' => 'html', 'md' => 'md', 'mmd' => 'md', 'markdown' => 'md'
185
+ }.freeze
186
+
187
+ def build_conversion_formats(options)
188
+ formats = {}
189
+ Array(options[:formats]).each do |fmt|
190
+ key = CONVERSION_FORMAT_MAP[fmt.to_s.downcase]
191
+ formats[key] = true if key
192
+ end
193
+ formats['md'] = true # always enable Markdown retrieval
194
+ formats
195
+ end
196
+
197
+ # Build document-specific options
198
+ #
199
+ # @param options [Hash] user options
200
+ # @return [Hash] document options
201
+ def build_document_options(options)
202
+ {}.tap do |opts|
203
+ opts[:include_table_html] = true if options[:include_table_html]
204
+ opts[:include_diagram_svg] = true if options[:include_diagram_svg]
205
+ opts[:include_line_data] = true if options[:include_line_data]
206
+ opts[:include_word_data] = true if options[:include_word_data]
207
+ opts[:quality] = options[:quality] if options[:quality]
208
+ opts[:page_ranges] = options[:page_ranges] if options[:page_ranges]
209
+ end
210
+ end
211
+
212
+ # Prepare image source (URL or local file)
213
+ #
214
+ # Automatically upgrades HTTP to HTTPS
215
+ #
216
+ # @param input [String, Hash] path, URL, or hash with :path/:url key
217
+ # @param options [Hash] additional options
218
+ # @return [Array<String, String>] src value and source reference
219
+ # @raise [InvalidRequestError] if input looks like malformed URL
220
+ def prepare_image_source(input, _options = {})
221
+ # Handle hash input: { url: '...' } or { path: '...' }
222
+ if input.is_a?(Hash)
223
+ if input[:url] || input['url']
224
+ url = input[:url] || input['url']
225
+ url = config.upgrade_to_https(url) # Auto-upgrade HTTP→HTTPS
226
+ validate_url!(url) # Raise InvalidRequestError if malformed
227
+ return [url, url]
228
+ elsif input[:path] || input['path']
229
+ path = input[:path] || input['path']
230
+ return [encode_image(path), path]
231
+ end
232
+ end
233
+
234
+ # Auto-upgrade HTTP to HTTPS BEFORE validation
235
+ # This ensures HTTP URLs pass validation after upgrade
236
+ upgraded_input = config.upgrade_to_https(input)
237
+
238
+ # Detect if input is URL or local path
239
+ if url?(upgraded_input)
240
+ [upgraded_input, upgraded_input] # Use URL directly as src
241
+ elsif looks_like_url?(input)
242
+ # String contains URL-like patterns but isn't valid
243
+ raise InvalidRequestError, "Invalid URL format: #{input}"
244
+ else
245
+ # Try to encode as local file
246
+ begin
247
+ [encode_image(input), input] # Encode local file (use original path)
248
+ rescue SecurityError, Errno::ENOENT
249
+ # If file encoding fails and input doesn't look like a file path,
250
+ # it's likely a malformed URL
251
+ raise InvalidRequestError, "Invalid URL format: #{input}" unless looks_like_file_path?(input)
252
+
253
+ raise # Re-raise original error for actual file path issues
254
+ end
255
+ end
256
+ end
257
+
258
+ # Check if string is a URL (using secure configuration validation)
259
+ #
260
+ # @param str [String] string to check
261
+ # @return [Boolean]
262
+ def url?(str)
263
+ return false unless str.is_a?(String)
264
+
265
+ config.valid_url?(str)
266
+ end
267
+
268
+ # Check if string looks like a URL but may not be valid
269
+ #
270
+ # Detects patterns that suggest URL intent: protocol prefixes, www prefix
271
+ # Used to provide better error messages for malformed URLs
272
+ #
273
+ # @param str [String] string to check
274
+ # @return [Boolean]
275
+ def looks_like_url?(str)
276
+ return false unless str.is_a?(String)
277
+
278
+ # URL-like patterns: contains protocol or www prefix
279
+ str.match?(%r{^(https?://|www\.)|://})
280
+ end
281
+
282
+ # Validate URL and raise InvalidRequestError if malformed
283
+ #
284
+ # @param url [String] URL to validate
285
+ # @raise [InvalidRequestError] if URL is not valid
286
+ def validate_url!(url)
287
+ return if config.valid_url?(url)
288
+
289
+ raise InvalidRequestError, "Invalid URL format: #{url}"
290
+ end
291
+
292
+ # Check if string looks like a file path
293
+ #
294
+ # Detects patterns that suggest file path intent: directory separators,
295
+ # file extensions, relative/absolute path markers
296
+ #
297
+ # @param str [String] string to check
298
+ # @return [Boolean]
299
+ def looks_like_file_path?(str)
300
+ return false unless str.is_a?(String)
301
+
302
+ # File path patterns: contains slashes, starts with ~, has file extension, or starts with .
303
+ str.match?(%r{^[~/.]|/|\\|\.(?:png|jpe?g|gif|webp|pdf|docx|pptx)$}i)
304
+ end
305
+
306
+ # Encode image to base64 data URI (with path sanitization)
307
+ #
308
+ # @param path [String] path to image file
309
+ # @return [String] data URI
310
+ # @raise [SecurityError] if path is invalid or dangerous
311
+ def encode_image(path)
312
+ # Sanitize path to prevent directory traversal
313
+ sanitized_path = config.sanitize_path(path)
314
+ raise SecurityError, "Invalid or dangerous file path: #{path}" if sanitized_path.nil?
315
+
316
+ content = File.binread(sanitized_path)
317
+ mime_type = detect_mime_type(sanitized_path)
318
+ "data:#{mime_type};base64,#{Base64.strict_encode64(content)}"
319
+ end
320
+
321
+ # Detect MIME type from file extension
322
+ #
323
+ # @param path [String] file path
324
+ # @return [String] MIME type
325
+ def detect_mime_type(path)
326
+ case File.extname(path).downcase
327
+ when '.png' then 'image/png'
328
+ when '.jpg', '.jpeg' then 'image/jpeg'
329
+ when '.gif' then 'image/gif'
330
+ when '.webp' then 'image/webp'
331
+ when '.pdf' then 'application/pdf'
332
+ when '.docx' then 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
333
+ when '.pptx' then 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
334
+ else 'application/octet-stream'
335
+ end
336
+ end
337
+
338
+ # Build request options from user input + defaults
339
+ #
340
+ # @param options [Hash] user options
341
+ # @return [Hash] complete request options
342
+ def build_request_options(options)
343
+ {}.tap do |opts|
344
+ # Data options
345
+ if options[:include_latex]
346
+ opts[:data_options] ||= {}
347
+ opts[:data_options][:include_latex] = true
348
+ end
349
+
350
+ # Metadata
351
+ opts[:metadata] = options[:metadata] if options[:metadata]
352
+ opts[:tags] = Array(options[:tags]) if options[:tags]
353
+
354
+ # Recognition options
355
+ opts[:rm_spaces] = options[:rm_spaces] if options.key?(:rm_spaces)
356
+ opts[:idiomatic_eqn_arrays] = options[:idiomatic_eqn_arrays] if options.key?(:idiomatic_eqn_arrays)
357
+
358
+ # Confidence threshold
359
+ opts[:confidence_threshold] = options[:confidence_threshold] if options[:confidence_threshold]
360
+
361
+ # Chemistry
362
+ opts[:include_smiles] = true if options[:chemistry] || options[:include_smiles]
363
+
364
+ # Alphabets
365
+ opts[:alphabets_allowed] = options[:alphabets] if options[:alphabets]
366
+ end
367
+ end
368
+
369
+ # Make POST request
370
+ #
371
+ # @param path [String] API endpoint path
372
+ # @param body [Hash] request body
373
+ # @return [Hash] parsed response
374
+ def post(path, body)
375
+ uri = URI("#{config.endpoint}#{path}")
376
+ request = Net::HTTP::Post.new(uri)
377
+ request['Content-Type'] = 'application/json'
378
+ request['app_id'] = config.app_id
379
+ request['app_key'] = config.app_key
380
+ request['User-Agent'] = config.user_agent
381
+ request.body = JSON.generate(body)
382
+
383
+ response = make_request(uri, request)
384
+ handle_response(response)
385
+ end
386
+
387
+ # Make multipart POST request (local file upload)
388
+ #
389
+ # @param path [String] API endpoint path
390
+ # @param file_path [String] local file to upload
391
+ # @param fields [Hash] extra form fields (sent as options_json)
392
+ # @return [Hash] parsed response
393
+ def post_multipart(path, file_path, fields)
394
+ uri = URI("#{config.endpoint}#{path}")
395
+ request = Net::HTTP::Post.new(uri)
396
+ request['app_id'] = config.app_id
397
+ request['app_key'] = config.app_key
398
+ request['User-Agent'] = config.user_agent
399
+
400
+ file = File.open(file_path, 'rb')
401
+ begin
402
+ request.set_form(
403
+ [['file', file], ['options_json', JSON.generate(fields)]],
404
+ 'multipart/form-data'
405
+ )
406
+ response = make_request(uri, request)
407
+ ensure
408
+ file.close
409
+ end
410
+
411
+ handle_response(response)
412
+ end
413
+
414
+ # Make GET request
415
+ #
416
+ # @param path [String] API endpoint path
417
+ # @param params [Hash] query parameters
418
+ # @return [Hash] parsed response
419
+ def get(path, params: {})
420
+ uri = URI("#{config.endpoint}#{path}")
421
+ uri.query = URI.encode_www_form(params) unless params.empty?
422
+
423
+ request = Net::HTTP::Get.new(uri)
424
+ request['app_id'] = config.app_id
425
+ request['app_key'] = config.app_key
426
+ request['User-Agent'] = config.user_agent
427
+
428
+ response = make_request(uri, request)
429
+ handle_response(response)
430
+ end
431
+
432
+ # Execute HTTP request with error handling
433
+ #
434
+ # @param uri [URI] request URI
435
+ # @param request [Net::HTTPRequest] HTTP request object
436
+ # @return [Net::HTTPResponse]
437
+ def make_request(uri, request)
438
+ Net::HTTP.start(uri.hostname, uri.port,
439
+ use_ssl: uri.scheme == 'https',
440
+ read_timeout: config.timeout) do |http|
441
+ http.request(request)
442
+ end
443
+ rescue Net::OpenTimeout, Net::ReadTimeout => e
444
+ raise TimeoutError, "Request timed out after #{config.timeout}s: #{e.message}"
445
+ rescue SocketError, IOError, SystemCallError, OpenSSL::SSL::SSLError,
446
+ Net::HTTPBadResponse, Net::ProtocolError => e
447
+ # Genuine network/transport failures get relabeled for callers. Other
448
+ # StandardErrors (e.g. a programming bug in this method) now propagate
449
+ # untouched instead of being masked as a misleading "Network error".
450
+ raise NetworkError, "Network error: #{e.message}"
451
+ end
452
+
453
+ # Handle API response
454
+ #
455
+ # @param response [Net::HTTPResponse]
456
+ # @return [Hash] parsed response body
457
+ # @raise [APIError] on error response
458
+ def handle_response(response)
459
+ case response
460
+ when Net::HTTPSuccess
461
+ data = parse_body(response)
462
+ # Mathpix occasionally returns HTTP 200 with an error payload
463
+ # (e.g. "Missing URL in request body"). Surface it rather than
464
+ # silently treating the request as successful.
465
+ if data.is_a?(Hash) && (data['error'] || data['error_info'])
466
+ raise APIError.new(
467
+ extract_error_message(data) || 'Mathpix returned an error',
468
+ status: response.code.to_i,
469
+ details: data
470
+ )
471
+ end
472
+ data
473
+ when Net::HTTPTooManyRequests
474
+ raise RateLimitError.new(
475
+ 'Rate limit exceeded',
476
+ retry_after: response['Retry-After']&.to_i
477
+ )
478
+ when Net::HTTPClientError
479
+ error_data = parse_body(response)
480
+ raise APIError.new(
481
+ extract_error_message(error_data) || "Client error (HTTP #{response.code})",
482
+ status: response.code.to_i,
483
+ details: error_data.is_a?(Hash) ? error_data : {}
484
+ )
485
+ when Net::HTTPServerError
486
+ error_data = parse_body(response)
487
+ raise ServerError.new(
488
+ extract_error_message(error_data) || "Server error (HTTP #{response.code})",
489
+ status: response.code.to_i,
490
+ details: error_data.is_a?(Hash) ? error_data : {}
491
+ )
492
+ else
493
+ raise APIError.new(
494
+ "Unexpected response: HTTP #{response.code}",
495
+ status: response.code.to_i
496
+ )
497
+ end
498
+ end
499
+
500
+ # Parse a response body as JSON, tolerating empty/non-JSON bodies
501
+ #
502
+ # @param response [Net::HTTPResponse]
503
+ # @return [Hash, Array] parsed body, or { 'error' => raw } for non-JSON
504
+ def parse_body(response)
505
+ body = response.body.to_s
506
+ return {} if body.empty?
507
+
508
+ JSON.parse(body)
509
+ rescue JSON::ParserError
510
+ # Non-JSON body (often an HTML error page) — keep a concise summary
511
+ # instead of dumping the whole page as the error message.
512
+ { 'error' => "HTTP #{response.code} #{response.message}".strip }
513
+ end
514
+
515
+ # Pull the most descriptive message out of a Mathpix error payload.
516
+ # Mathpix nests the human-readable reason under error_info.message.
517
+ #
518
+ # @param data [Hash] parsed error body
519
+ # @return [String, nil]
520
+ def extract_error_message(data)
521
+ return nil unless data.is_a?(Hash)
522
+
523
+ data.dig('error_info', 'message') ||
524
+ data.dig('error_info', 'id') ||
525
+ data['error'] ||
526
+ data['message']
527
+ end
528
+
529
+ # `get`/`post` are part of the public surface used by the MCP tools
530
+ # (e.g. GetAccountInfoTool calls client.get('/account')). They were
531
+ # previously private, raising "private method 'get' called".
532
+ public :get, :post
533
+ end
534
+ end