mathpix-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +22 -0
- data/README.md +403 -0
- data/bin/mathpix-mcp +46 -0
- data/bin/mathpix-mcp-http +39 -0
- data/config.ru +24 -0
- data/lib/mathpix/client.rb +534 -0
- data/lib/mathpix/configuration.rb +182 -0
- data/lib/mathpix/document.rb +345 -0
- data/lib/mathpix/errors.rb +78 -0
- data/lib/mathpix/mcp/base_tool.rb +225 -0
- data/lib/mathpix/mcp/http_app.rb +60 -0
- data/lib/mathpix/mcp/server.rb +124 -0
- data/lib/mathpix/mcp/tools/batch_convert_tool.rb +147 -0
- data/lib/mathpix/mcp/tools/check_document_status_tool.rb +70 -0
- data/lib/mathpix/mcp/tools/convert_document_tool.rb +176 -0
- data/lib/mathpix/mcp/tools/convert_image_tool.rb +108 -0
- data/lib/mathpix/mcp/tools/convert_strokes_tool.rb +97 -0
- data/lib/mathpix/mcp/tools/get_account_info_tool.rb +47 -0
- data/lib/mathpix/mcp/tools/get_usage_tool.rb +61 -0
- data/lib/mathpix/mcp/tools/list_formats_tool.rb +79 -0
- data/lib/mathpix/mcp/tools/search_results_tool.rb +116 -0
- data/lib/mathpix/mcp.rb +31 -0
- data/lib/mathpix/result.rb +387 -0
- data/lib/mathpix/version.rb +5 -0
- data/lib/mathpix.rb +52 -0
- metadata +132 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'tmpdir'
|
|
4
|
+
|
|
5
|
+
module Mathpix
|
|
6
|
+
module MCP
|
|
7
|
+
module Tools
|
|
8
|
+
# Base class for all Mathpix MCP tools
|
|
9
|
+
#
|
|
10
|
+
# Uses official Ruby MCP SDK (MCP::Tool)
|
|
11
|
+
# Provides common utilities for Mathpix-specific tools
|
|
12
|
+
#
|
|
13
|
+
#
|
|
14
|
+
# @example Tool implementation
|
|
15
|
+
# class ExampleTool < BaseTool
|
|
16
|
+
# description "Example tool"
|
|
17
|
+
# input_schema(
|
|
18
|
+
# properties: { message: { type: "string" } },
|
|
19
|
+
# required: ["message"]
|
|
20
|
+
# )
|
|
21
|
+
#
|
|
22
|
+
# def self.call(message:, server_context:)
|
|
23
|
+
# client = server_context[:mathpix_client]
|
|
24
|
+
# # Use client to make API calls
|
|
25
|
+
# text_response("Result: #{message}")
|
|
26
|
+
# end
|
|
27
|
+
# end
|
|
28
|
+
class BaseTool < ::MCP::Tool
|
|
29
|
+
class << self
|
|
30
|
+
protected
|
|
31
|
+
|
|
32
|
+
# Get Mathpix client from server context
|
|
33
|
+
#
|
|
34
|
+
# @param server_context [Hash] MCP server context
|
|
35
|
+
# @return [Mathpix::Client] Mathpix API client
|
|
36
|
+
def mathpix_client(server_context)
|
|
37
|
+
server_context[:mathpix_client] || raise(ArgumentError, 'mathpix_client not in server_context')
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Create text response (official MCP format)
|
|
41
|
+
#
|
|
42
|
+
# @param text [String] response text
|
|
43
|
+
# @return [::MCP::Tool::Response]
|
|
44
|
+
def text_response(text)
|
|
45
|
+
::MCP::Tool::Response.new([{
|
|
46
|
+
type: 'text',
|
|
47
|
+
text: text
|
|
48
|
+
}])
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Create JSON response with text wrapper
|
|
52
|
+
#
|
|
53
|
+
# @param data [Hash] JSON data
|
|
54
|
+
# @return [::MCP::Tool::Response]
|
|
55
|
+
def json_response(data)
|
|
56
|
+
text_response(JSON.pretty_generate(data))
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Create error response
|
|
60
|
+
#
|
|
61
|
+
# @param error [StandardError, String] error object or message
|
|
62
|
+
# @return [::MCP::Tool::Response]
|
|
63
|
+
def error_response(error)
|
|
64
|
+
message = error.is_a?(StandardError) ? error.message : error.to_s
|
|
65
|
+
details = error.is_a?(Mathpix::Error) ? error.details : {}
|
|
66
|
+
|
|
67
|
+
error_data = {
|
|
68
|
+
error: true,
|
|
69
|
+
message: message,
|
|
70
|
+
type: error.is_a?(StandardError) ? error.class.name : 'Error'
|
|
71
|
+
}
|
|
72
|
+
error_data[:status] = error.status if error.is_a?(Mathpix::APIError) && error.status
|
|
73
|
+
error_data[:details] = details unless details.nil? || details.empty?
|
|
74
|
+
|
|
75
|
+
json_response(error_data)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Extract formats from arguments
|
|
79
|
+
#
|
|
80
|
+
# @param formats [Array, nil] format array
|
|
81
|
+
# @param client [Mathpix::Client] client for defaults
|
|
82
|
+
# @return [Array<Symbol>] format symbols
|
|
83
|
+
def extract_formats(formats, client)
|
|
84
|
+
return client.config.default_formats if formats.nil? || formats.empty?
|
|
85
|
+
|
|
86
|
+
Array(formats).map(&:to_sym)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Normalize path (expand ~, resolve relative paths)
|
|
90
|
+
#
|
|
91
|
+
# @param path [String] file path
|
|
92
|
+
# @return [String] normalized path
|
|
93
|
+
def normalize_path(path)
|
|
94
|
+
File.expand_path(path)
|
|
95
|
+
rescue StandardError
|
|
96
|
+
path
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Check if path is a URL
|
|
100
|
+
#
|
|
101
|
+
# @param path [String] path or URL
|
|
102
|
+
# @return [Boolean]
|
|
103
|
+
def url?(path)
|
|
104
|
+
path.to_s.start_with?('http://', 'https://')
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Characters of inline preview returned alongside a saved artifact.
|
|
108
|
+
PREVIEW_CHARS = 2_000
|
|
109
|
+
|
|
110
|
+
# Map an OCR format name to a sensible file extension.
|
|
111
|
+
ARTIFACT_EXT = {
|
|
112
|
+
'markdown' => 'md', 'md' => 'md', 'mmd' => 'mmd',
|
|
113
|
+
'latex' => 'tex', 'latex_styled' => 'tex', 'latex_simplified' => 'tex',
|
|
114
|
+
'text' => 'txt', 'text_display' => 'txt', 'asciimath' => 'txt',
|
|
115
|
+
'mathml' => 'mml', 'html' => 'html', 'data' => 'json',
|
|
116
|
+
'line_data' => 'json', 'word_data' => 'json'
|
|
117
|
+
}.freeze
|
|
118
|
+
|
|
119
|
+
# Directory where OCR artifacts are written when no explicit
|
|
120
|
+
# output_path is given. Configurable via MATHPIX_OUTPUT_DIR; defaults
|
|
121
|
+
# to the system temp dir.
|
|
122
|
+
#
|
|
123
|
+
# @return [String]
|
|
124
|
+
def artifact_dir
|
|
125
|
+
dir = ENV.fetch('MATHPIX_OUTPUT_DIR', nil)
|
|
126
|
+
dir && !dir.empty? ? File.expand_path(dir) : Dir.tmpdir
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Make a value safe to embed in a filename.
|
|
130
|
+
#
|
|
131
|
+
# @param value [#to_s]
|
|
132
|
+
# @return [String]
|
|
133
|
+
def sanitize(value)
|
|
134
|
+
value.to_s.gsub(/[^a-zA-Z0-9_-]/, '_')
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Short inline preview of a piece of content.
|
|
138
|
+
#
|
|
139
|
+
# @param content [String, nil]
|
|
140
|
+
# @param limit [Integer]
|
|
141
|
+
# @return [String, nil]
|
|
142
|
+
def preview_of(content, limit = PREVIEW_CHARS)
|
|
143
|
+
return nil if content.nil?
|
|
144
|
+
|
|
145
|
+
str = content.to_s
|
|
146
|
+
str.length > limit ? "#{str[0, limit]}…" : str
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Derive a sibling path with a different extension.
|
|
150
|
+
#
|
|
151
|
+
# @param base [String] base file path
|
|
152
|
+
# @param ext [String] extension without the dot
|
|
153
|
+
# @return [String]
|
|
154
|
+
def sibling_path(base, ext)
|
|
155
|
+
dir = File.dirname(base)
|
|
156
|
+
stem = File.basename(base, File.extname(base))
|
|
157
|
+
File.join(dir, "#{stem}.#{ext}")
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Write OCR artifacts to disk so their (potentially large) content
|
|
161
|
+
# never enters the model context. The first format is written to
|
|
162
|
+
# base_path (honoring any extension the caller chose); the rest are
|
|
163
|
+
# written to siblings whose extension is derived from the format name.
|
|
164
|
+
#
|
|
165
|
+
# @param contents [Hash{String,Symbol=>String}] format => content
|
|
166
|
+
# @param base_path [String] primary output path
|
|
167
|
+
# @return [Hash{String=>Hash}] format => { path:, bytes: }
|
|
168
|
+
def write_artifacts(contents, base_path)
|
|
169
|
+
base_path = File.expand_path(base_path)
|
|
170
|
+
ensure_dir(File.dirname(base_path))
|
|
171
|
+
saved = {}
|
|
172
|
+
first = true
|
|
173
|
+
|
|
174
|
+
contents.each do |format, content|
|
|
175
|
+
next if content.nil? || content.to_s.empty?
|
|
176
|
+
|
|
177
|
+
name = format.to_s
|
|
178
|
+
path =
|
|
179
|
+
if first
|
|
180
|
+
first = false
|
|
181
|
+
base_path
|
|
182
|
+
else
|
|
183
|
+
sibling_path(base_path, ARTIFACT_EXT[name] || 'txt')
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
str = content.is_a?(String) ? content : JSON.pretty_generate(content)
|
|
187
|
+
File.write(path, str)
|
|
188
|
+
saved[name] = { path: path, bytes: str.bytesize }
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
saved
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Build a default artifact base path inside artifact_dir.
|
|
195
|
+
#
|
|
196
|
+
# @param stem [String] human-meaningful filename stem
|
|
197
|
+
# @param ext [String] extension without the dot
|
|
198
|
+
# @return [String]
|
|
199
|
+
def default_artifact_path(stem, ext)
|
|
200
|
+
File.join(artifact_dir, "mathpix_#{sanitize(stem)}.#{ext}")
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Ensure a directory exists (lazily requiring fileutils so tools that
|
|
204
|
+
# never write files don't pay for it).
|
|
205
|
+
def ensure_dir(dir)
|
|
206
|
+
require 'fileutils'
|
|
207
|
+
FileUtils.mkdir_p(dir)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Safe execute with error handling
|
|
211
|
+
#
|
|
212
|
+
# @yield Block to execute
|
|
213
|
+
# @return [::MCP::Tool::Response]
|
|
214
|
+
def safe_execute
|
|
215
|
+
yield
|
|
216
|
+
rescue Mathpix::Error => e
|
|
217
|
+
error_response(e)
|
|
218
|
+
rescue StandardError => e
|
|
219
|
+
error_response(e)
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'rack'
|
|
5
|
+
require 'mcp/server/transports/streamable_http_transport'
|
|
6
|
+
|
|
7
|
+
module Mathpix
|
|
8
|
+
module MCP
|
|
9
|
+
# Builds a Rack application that serves the Mathpix MCP server over the
|
|
10
|
+
# MCP Streamable HTTP transport, guarded by a bearer token.
|
|
11
|
+
#
|
|
12
|
+
# @example config.ru
|
|
13
|
+
# require 'mathpix/mcp/http_app'
|
|
14
|
+
# run Mathpix::MCP::HttpApp.build
|
|
15
|
+
module HttpApp
|
|
16
|
+
module_function
|
|
17
|
+
|
|
18
|
+
# Build the bearer-guarded Rack app.
|
|
19
|
+
#
|
|
20
|
+
# @param token [String] required bearer token (defaults to MATHPIX_MCP_TOKEN)
|
|
21
|
+
# @param server [Mathpix::MCP::Server] optional pre-built server
|
|
22
|
+
# @return [#call] a Rack application
|
|
23
|
+
# @raise [RuntimeError] if no token is configured
|
|
24
|
+
def build(token: ENV.fetch('MATHPIX_MCP_TOKEN', nil), server: nil)
|
|
25
|
+
raise 'MATHPIX_MCP_TOKEN must be set to run the HTTP transport (bearer-token auth is required)' if token.nil? || token.empty?
|
|
26
|
+
|
|
27
|
+
mcp_server = (server || Mathpix::MCP::Server.new).mcp_server
|
|
28
|
+
# stateless + JSON responses keep this simple and sidestep session
|
|
29
|
+
# state entirely; bearer auth guards every request.
|
|
30
|
+
transport = ::MCP::Server::Transports::StreamableHTTPTransport.new(
|
|
31
|
+
mcp_server, stateless: true, enable_json_response: true
|
|
32
|
+
)
|
|
33
|
+
BearerAuth.new(transport, token: token)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Rack middleware enforcing a constant-time bearer-token check.
|
|
37
|
+
class BearerAuth
|
|
38
|
+
def initialize(app, token:)
|
|
39
|
+
@app = app
|
|
40
|
+
@token = token
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def call(env)
|
|
44
|
+
provided = env['HTTP_AUTHORIZATION'].to_s.sub(/\ABearer\s+/i, '')
|
|
45
|
+
return unauthorized unless Rack::Utils.secure_compare(@token, provided)
|
|
46
|
+
|
|
47
|
+
@app.call(env)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def unauthorized
|
|
53
|
+
[401,
|
|
54
|
+
{ 'content-type' => 'application/json' },
|
|
55
|
+
[JSON.generate(error: true, message: 'Unauthorized: missing or invalid bearer token')]]
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
begin
|
|
4
|
+
require 'mcp'
|
|
5
|
+
require 'mcp/server/transports/stdio_transport' # transport not auto-loaded
|
|
6
|
+
rescue LoadError
|
|
7
|
+
raise LoadError, <<~ERROR
|
|
8
|
+
The 'mcp' gem is required for MCP server functionality.
|
|
9
|
+
|
|
10
|
+
Add to your Gemfile:
|
|
11
|
+
gem 'mcp'
|
|
12
|
+
|
|
13
|
+
Or install directly:
|
|
14
|
+
gem install mcp
|
|
15
|
+
|
|
16
|
+
Official Ruby MCP SDK: https://github.com/modelcontextprotocol/ruby-sdk
|
|
17
|
+
ERROR
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
module Mathpix
|
|
21
|
+
module MCP
|
|
22
|
+
# MCP Server for Mathpix OCR.
|
|
23
|
+
#
|
|
24
|
+
# Uses the official Ruby MCP SDK. Provides 9 tools as thin delegates to
|
|
25
|
+
# Mathpix::Client over the stdio transport.
|
|
26
|
+
#
|
|
27
|
+
# @example Start STDIO server
|
|
28
|
+
# require 'mathpix/mcp'
|
|
29
|
+
#
|
|
30
|
+
# Mathpix.configure do |config|
|
|
31
|
+
# config.app_id = ENV['MATHPIX_APP_ID']
|
|
32
|
+
# config.app_key = ENV['MATHPIX_APP_KEY']
|
|
33
|
+
# end
|
|
34
|
+
#
|
|
35
|
+
# Mathpix::MCP::Server.run
|
|
36
|
+
#
|
|
37
|
+
# @example With custom configuration
|
|
38
|
+
# server = Mathpix::MCP::Server.new(
|
|
39
|
+
# name: "mathpix-custom",
|
|
40
|
+
# version: "1.0.0",
|
|
41
|
+
# mathpix_client: custom_client
|
|
42
|
+
# )
|
|
43
|
+
# transport = server.create_stdio_transport
|
|
44
|
+
# transport.open
|
|
45
|
+
class Server
|
|
46
|
+
attr_reader :name, :version, :mathpix_client, :mcp_server
|
|
47
|
+
|
|
48
|
+
# Initialize MCP server
|
|
49
|
+
#
|
|
50
|
+
# @param name [String] server name
|
|
51
|
+
# @param version [String] server version
|
|
52
|
+
# @param mathpix_client [Mathpix::Client] optional client instance
|
|
53
|
+
def initialize(name: 'mathpix', version: Mathpix::VERSION, mathpix_client: nil)
|
|
54
|
+
@name = name
|
|
55
|
+
@version = version
|
|
56
|
+
@mathpix_client = mathpix_client || Mathpix.client
|
|
57
|
+
@mcp_server = create_mcp_server
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Create STDIO transport (standard MCP transport)
|
|
61
|
+
#
|
|
62
|
+
# @return [::MCP::Server::Transports::StdioTransport]
|
|
63
|
+
def create_stdio_transport
|
|
64
|
+
::MCP::Server::Transports::StdioTransport.new(@mcp_server)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Run MCP server with STDIO transport (blocking)
|
|
68
|
+
#
|
|
69
|
+
# Standard way to run MCP server via stdio
|
|
70
|
+
def run
|
|
71
|
+
transport = create_stdio_transport
|
|
72
|
+
transport.open
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Server capabilities
|
|
76
|
+
#
|
|
77
|
+
# @return [Hash] MCP server capabilities
|
|
78
|
+
def capabilities
|
|
79
|
+
{
|
|
80
|
+
tools: tool_classes.map(&:name)
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Class method: run server directly
|
|
85
|
+
#
|
|
86
|
+
# @example
|
|
87
|
+
# Mathpix::MCP::Server.run
|
|
88
|
+
def self.run(**)
|
|
89
|
+
new(**).run
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
# Create the official MCP::Server with the tool classes
|
|
95
|
+
#
|
|
96
|
+
# Uses official Ruby MCP SDK structure
|
|
97
|
+
def create_mcp_server
|
|
98
|
+
::MCP::Server.new(
|
|
99
|
+
name: @name,
|
|
100
|
+
version: @version,
|
|
101
|
+
tools: tool_classes,
|
|
102
|
+
server_context: { mathpix_client: @mathpix_client }
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# List of all tool classes (using official MCP::Tool)
|
|
107
|
+
#
|
|
108
|
+
# @return [Array<Class>] tool classes
|
|
109
|
+
def tool_classes
|
|
110
|
+
[
|
|
111
|
+
Tools::ConvertImageTool,
|
|
112
|
+
Tools::ConvertDocumentTool,
|
|
113
|
+
Tools::ConvertStrokesTool,
|
|
114
|
+
Tools::BatchConvertTool,
|
|
115
|
+
Tools::CheckDocumentStatusTool,
|
|
116
|
+
Tools::SearchResultsTool,
|
|
117
|
+
Tools::GetUsageTool,
|
|
118
|
+
Tools::GetAccountInfoTool,
|
|
119
|
+
Tools::ListFormatsTool
|
|
120
|
+
]
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_tool'
|
|
4
|
+
|
|
5
|
+
module Mathpix
|
|
6
|
+
module MCP
|
|
7
|
+
module Tools
|
|
8
|
+
# Batch Convert Tool
|
|
9
|
+
#
|
|
10
|
+
# Converts multiple images in batch for efficiency
|
|
11
|
+
# Thin delegate to Mathpix::Client#snap with batch processing
|
|
12
|
+
class BatchConvertTool < BaseTool
|
|
13
|
+
description 'Convert multiple images in batch using Mathpix OCR'
|
|
14
|
+
|
|
15
|
+
input_schema(
|
|
16
|
+
properties: {
|
|
17
|
+
image_paths: {
|
|
18
|
+
type: 'array',
|
|
19
|
+
items: { type: 'string' },
|
|
20
|
+
description: 'Array of image paths or URLs to process'
|
|
21
|
+
},
|
|
22
|
+
formats: {
|
|
23
|
+
type: 'array',
|
|
24
|
+
items: { type: 'string' },
|
|
25
|
+
description: 'Output formats for all images: latex, text, mathml, asciimath (default: latex_styled, text)'
|
|
26
|
+
},
|
|
27
|
+
parallel: {
|
|
28
|
+
type: 'boolean',
|
|
29
|
+
description: 'Process images concurrently (default: false)'
|
|
30
|
+
},
|
|
31
|
+
max_parallel: {
|
|
32
|
+
type: 'number',
|
|
33
|
+
description: 'Maximum number of concurrent requests when parallel is true (default: 3)'
|
|
34
|
+
},
|
|
35
|
+
output_dir: {
|
|
36
|
+
type: 'string',
|
|
37
|
+
description: 'Directory to write each image\'s OCR result into. Recognized content ' \
|
|
38
|
+
'is always saved to files (never returned inline); defaults to ' \
|
|
39
|
+
'MATHPIX_OUTPUT_DIR or the system temp dir.'
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
required: ['image_paths']
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def self.call(image_paths:, server_context:, formats: nil, parallel: false, max_parallel: 3, output_dir: nil)
|
|
46
|
+
safe_execute do
|
|
47
|
+
client = mathpix_client(server_context)
|
|
48
|
+
|
|
49
|
+
# Extract formats or use defaults
|
|
50
|
+
output_formats = extract_formats(formats, client)
|
|
51
|
+
dir = output_dir && !output_dir.empty? ? File.expand_path(output_dir) : artifact_dir
|
|
52
|
+
|
|
53
|
+
# Normalize paths
|
|
54
|
+
normalized_paths = image_paths.map do |path|
|
|
55
|
+
url?(path) ? path : normalize_path(path)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Process images (concurrently when requested)
|
|
59
|
+
results =
|
|
60
|
+
if parallel
|
|
61
|
+
process_batch_parallel(client, normalized_paths, output_formats, max_parallel.to_i, dir)
|
|
62
|
+
else
|
|
63
|
+
process_batch_sequential(client, normalized_paths, output_formats, dir)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Format response
|
|
67
|
+
response_data = {
|
|
68
|
+
success: true,
|
|
69
|
+
batch_size: image_paths.length,
|
|
70
|
+
formats: output_formats,
|
|
71
|
+
parallel: parallel,
|
|
72
|
+
results: results,
|
|
73
|
+
summary: {
|
|
74
|
+
total: results.length,
|
|
75
|
+
successful: results.count { |r| r[:success] },
|
|
76
|
+
failed: results.count { |r| !r[:success] }
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
json_response(response_data)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Convert a single image, writing its recognized content to a file and
|
|
85
|
+
# returning a result/error hash (never raises). The OCR text/latex is
|
|
86
|
+
# saved to disk rather than inlined so a large batch can't overflow the
|
|
87
|
+
# model context.
|
|
88
|
+
def self.convert_one(client, path, index, formats, dir)
|
|
89
|
+
result = client.snap(path, formats: formats)
|
|
90
|
+
contents = {
|
|
91
|
+
'latex' => result.latex,
|
|
92
|
+
'text' => result.text,
|
|
93
|
+
'mathml' => result.mathml,
|
|
94
|
+
'asciimath' => result.asciimath
|
|
95
|
+
}.compact
|
|
96
|
+
stem = url?(path) ? "image_#{index}" : File.basename(path, File.extname(path))
|
|
97
|
+
saved = write_artifacts(contents, File.join(dir, "mathpix_#{sanitize(stem)}.tex"))
|
|
98
|
+
{
|
|
99
|
+
index: index,
|
|
100
|
+
image_path: path,
|
|
101
|
+
success: true,
|
|
102
|
+
confidence: result.confidence,
|
|
103
|
+
saved_files: saved,
|
|
104
|
+
preview: preview_of(result.latex || result.text, 200)
|
|
105
|
+
}
|
|
106
|
+
rescue Mathpix::Error => e
|
|
107
|
+
{
|
|
108
|
+
index: index,
|
|
109
|
+
image_path: path,
|
|
110
|
+
success: false,
|
|
111
|
+
error: e.message
|
|
112
|
+
}
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def self.process_batch_sequential(client, paths, formats, dir)
|
|
116
|
+
paths.map.with_index { |path, index| convert_one(client, path, index, formats, dir) }
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Bounded thread pool: at most max_parallel concurrent HTTP requests.
|
|
120
|
+
# Results are written back by original index so ordering is preserved.
|
|
121
|
+
def self.process_batch_parallel(client, paths, formats, max_parallel, dir)
|
|
122
|
+
max_parallel = 1 if max_parallel < 1
|
|
123
|
+
results = Array.new(paths.length)
|
|
124
|
+
queue = Queue.new
|
|
125
|
+
paths.each_with_index { |path, i| queue << [path, i] }
|
|
126
|
+
|
|
127
|
+
worker_count = [max_parallel, paths.length].min
|
|
128
|
+
workers = Array.new(worker_count) do
|
|
129
|
+
Thread.new do
|
|
130
|
+
loop do
|
|
131
|
+
path, index = begin
|
|
132
|
+
queue.pop(true) # non-blocking; raises ThreadError when empty
|
|
133
|
+
rescue ThreadError
|
|
134
|
+
break
|
|
135
|
+
end
|
|
136
|
+
results[index] = convert_one(client, path, index, formats, dir)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
workers.each(&:join)
|
|
142
|
+
results
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../base_tool'
|
|
4
|
+
|
|
5
|
+
module Mathpix
|
|
6
|
+
module MCP
|
|
7
|
+
module Tools
|
|
8
|
+
# Check Document Status Tool
|
|
9
|
+
#
|
|
10
|
+
# Polls document conversion status for async operations
|
|
11
|
+
# Thin delegate to Mathpix::Client#get_document_status
|
|
12
|
+
class CheckDocumentStatusTool < BaseTool
|
|
13
|
+
description 'Check the status of a document conversion (PDF, DOCX, PPTX)'
|
|
14
|
+
|
|
15
|
+
input_schema(
|
|
16
|
+
properties: {
|
|
17
|
+
conversion_id: {
|
|
18
|
+
type: 'string',
|
|
19
|
+
description: 'Document conversion ID returned from convert_document'
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
required: ['conversion_id']
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def self.call(conversion_id:, server_context:)
|
|
26
|
+
safe_execute do
|
|
27
|
+
client = mathpix_client(server_context)
|
|
28
|
+
|
|
29
|
+
# Delegate to core gem
|
|
30
|
+
status_data = client.get_document_status(conversion_id)
|
|
31
|
+
|
|
32
|
+
# Format response
|
|
33
|
+
response_data = {
|
|
34
|
+
success: true,
|
|
35
|
+
conversion_id: conversion_id,
|
|
36
|
+
status: status_data['status'],
|
|
37
|
+
progress: status_data['percent_done'],
|
|
38
|
+
metadata: {}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
# Add completion data if available
|
|
42
|
+
if status_data['status'] == 'completed'
|
|
43
|
+
response_data[:metadata][:pages] = status_data['num_pages']
|
|
44
|
+
response_data[:metadata][:pages_completed] = status_data['num_pages_completed']
|
|
45
|
+
# The /v3/pdf status payload does NOT contain markdown_url/latex_url/
|
|
46
|
+
# html_url (those were always nil). The converted content lives at
|
|
47
|
+
# the /pdf/{id}.{ext} endpoints — report those, and how to fetch.
|
|
48
|
+
endpoint = client.config.endpoint
|
|
49
|
+
response_data[:results] = {
|
|
50
|
+
markdown_endpoint: "#{endpoint}/pdf/#{conversion_id}.mmd",
|
|
51
|
+
html_endpoint: "#{endpoint}/pdf/#{conversion_id}.html",
|
|
52
|
+
tex_endpoint: "#{endpoint}/pdf/#{conversion_id}.tex",
|
|
53
|
+
note: 'Fetch these with app_id/app_key headers, or call ' \
|
|
54
|
+
'convert_document_tool to get the content directly.'
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Add error info if failed
|
|
59
|
+
if %w[error failed].include?(status_data['status'])
|
|
60
|
+
response_data[:error] = status_data['error']
|
|
61
|
+
response_data[:error_info] = status_data['error_info']
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
json_response(response_data)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|