mathpix 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +52 -0
- data/LICENSE +21 -0
- data/README.md +171 -0
- data/SECURITY.md +137 -0
- data/lib/mathpix/balanced_ternary.rb +86 -0
- data/lib/mathpix/batch.rb +155 -0
- data/lib/mathpix/capture_builder.rb +142 -0
- data/lib/mathpix/chemistry.rb +69 -0
- data/lib/mathpix/client.rb +439 -0
- data/lib/mathpix/configuration.rb +187 -0
- data/lib/mathpix/configuration.rb.backup +125 -0
- data/lib/mathpix/conversion.rb +257 -0
- data/lib/mathpix/document.rb +320 -0
- data/lib/mathpix/errors.rb +78 -0
- data/lib/mathpix/mcp/auth/oauth_provider.rb +346 -0
- data/lib/mathpix/mcp/auth/token_manager.rb +31 -0
- data/lib/mathpix/mcp/auth.rb +18 -0
- data/lib/mathpix/mcp/base_tool.rb +117 -0
- data/lib/mathpix/mcp/elicitations/ambiguity_elicitation.rb +162 -0
- data/lib/mathpix/mcp/elicitations/base_elicitation.rb +141 -0
- data/lib/mathpix/mcp/elicitations/confidence_elicitation.rb +162 -0
- data/lib/mathpix/mcp/elicitations.rb +78 -0
- data/lib/mathpix/mcp/middleware/cors_middleware.rb +94 -0
- data/lib/mathpix/mcp/middleware/oauth_middleware.rb +72 -0
- data/lib/mathpix/mcp/middleware/rate_limiting_middleware.rb +140 -0
- data/lib/mathpix/mcp/middleware.rb +13 -0
- data/lib/mathpix/mcp/resources/formats_list_resource.rb +113 -0
- data/lib/mathpix/mcp/resources/hierarchical_router.rb +237 -0
- data/lib/mathpix/mcp/resources/latest_snip_resource.rb +60 -0
- data/lib/mathpix/mcp/resources/recent_snips_resource.rb +75 -0
- data/lib/mathpix/mcp/resources/snip_stats_resource.rb +78 -0
- data/lib/mathpix/mcp/resources.rb +15 -0
- data/lib/mathpix/mcp/server.rb +174 -0
- data/lib/mathpix/mcp/tools/batch_convert_tool.rb +106 -0
- data/lib/mathpix/mcp/tools/check_document_status_tool.rb +66 -0
- data/lib/mathpix/mcp/tools/convert_document_tool.rb +90 -0
- data/lib/mathpix/mcp/tools/convert_image_tool.rb +91 -0
- data/lib/mathpix/mcp/tools/convert_strokes_tool.rb +82 -0
- data/lib/mathpix/mcp/tools/get_account_info_tool.rb +57 -0
- data/lib/mathpix/mcp/tools/get_usage_tool.rb +62 -0
- data/lib/mathpix/mcp/tools/list_formats_tool.rb +81 -0
- data/lib/mathpix/mcp/tools/search_results_tool.rb +111 -0
- data/lib/mathpix/mcp/transports/http_streaming_transport.rb +622 -0
- data/lib/mathpix/mcp/transports/sse_stream_handler.rb +236 -0
- data/lib/mathpix/mcp/transports.rb +12 -0
- data/lib/mathpix/mcp.rb +52 -0
- data/lib/mathpix/result.rb +364 -0
- data/lib/mathpix/version.rb +22 -0
- data/lib/mathpix.rb +229 -0
- metadata +283 -0
@@ -0,0 +1,320 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mathpix
|
4
|
+
# Document processing builder (PDF, DOCX, PPTX)
|
5
|
+
# The geodesic path: document-oriented, format-agnostic
|
6
|
+
#
|
7
|
+
# Feature parity with Python mpxpy: PDF + Office documents
|
8
|
+
class Document
|
9
|
+
attr_reader :client, :document_path, :options
|
10
|
+
|
11
|
+
def initialize(client, document_path)
|
12
|
+
@client = client
|
13
|
+
@document_path = document_path
|
14
|
+
@options = {}
|
15
|
+
end
|
16
|
+
|
17
|
+
# Set output formats
|
18
|
+
# @param formats [Array<Symbol>] format names
|
19
|
+
# @return [self]
|
20
|
+
# @example
|
21
|
+
# doc.with_formats(:markdown, :latex, :docx)
|
22
|
+
def with_formats(*formats)
|
23
|
+
@options[:formats] = formats.flatten
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
# Enable table extraction
|
28
|
+
# @param options [Hash] table options
|
29
|
+
# @return [self]
|
30
|
+
def with_tables(**options)
|
31
|
+
@options[:include_table_html] = true
|
32
|
+
@options.merge!(options)
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
# Enable diagram extraction
|
37
|
+
# @return [self]
|
38
|
+
def with_diagrams
|
39
|
+
@options[:include_diagram_svg] = true
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
# Set quality level
|
44
|
+
# @param level [Symbol] :low, :medium, :high
|
45
|
+
# @return [self]
|
46
|
+
def quality(level)
|
47
|
+
@options[:quality] = level
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
# Enable line-level data (bounding boxes)
|
52
|
+
# @return [self]
|
53
|
+
def with_line_data
|
54
|
+
@options[:include_line_data] = true
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
# Enable word-level data (bounding boxes)
|
59
|
+
# @return [self]
|
60
|
+
def with_word_data
|
61
|
+
@options[:include_word_data] = true
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
# Set page range for processing
|
66
|
+
# @param start_page [Integer] first page (1-indexed)
|
67
|
+
# @param end_page [Integer, nil] last page (nil = all)
|
68
|
+
# @return [self]
|
69
|
+
def pages(start_page, end_page = nil)
|
70
|
+
@options[:page_ranges] = { start: start_page, end: end_page }
|
71
|
+
self
|
72
|
+
end
|
73
|
+
|
74
|
+
# Execute document conversion (async operation)
|
75
|
+
#
|
76
|
+
# @return [DocumentConversion] conversion object (async)
|
77
|
+
# @example
|
78
|
+
# conversion = Mathpix.document('paper.pdf')
|
79
|
+
# .with_formats(:markdown, :latex)
|
80
|
+
# .convert
|
81
|
+
# conversion.wait_until_complete
|
82
|
+
# conversion.save_markdown('output.md')
|
83
|
+
def convert
|
84
|
+
# Detect document type from extension
|
85
|
+
doc_type = detect_document_type
|
86
|
+
|
87
|
+
# Build conversion request
|
88
|
+
conversion_id = client.convert_document(
|
89
|
+
document_path: document_path,
|
90
|
+
document_type: doc_type,
|
91
|
+
**options
|
92
|
+
)
|
93
|
+
|
94
|
+
DocumentConversion.new(client, conversion_id, document_path, doc_type)
|
95
|
+
end
|
96
|
+
|
97
|
+
alias call convert
|
98
|
+
alias run convert
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
# Detect document type from file extension
|
103
|
+
# @return [Symbol] :pdf, :docx, :pptx
|
104
|
+
def detect_document_type
|
105
|
+
ext = File.extname(document_path).downcase
|
106
|
+
case ext
|
107
|
+
when '.pdf' then :pdf
|
108
|
+
when '.docx' then :docx
|
109
|
+
when '.pptx' then :pptx
|
110
|
+
else
|
111
|
+
raise InvalidImageError.new(
|
112
|
+
"Unsupported document format: #{ext}",
|
113
|
+
recommended_format: 'pdf, docx, pptx'
|
114
|
+
)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Document Conversion Result (async operation)
|
120
|
+
#
|
121
|
+
# Polls Mathpix API until conversion completes
|
122
|
+
# Feature parity with Python mpxpy document processing
|
123
|
+
class DocumentConversion
|
124
|
+
attr_reader :client, :conversion_id, :document_path, :document_type
|
125
|
+
|
126
|
+
def initialize(client, conversion_id, document_path, document_type)
|
127
|
+
@client = client
|
128
|
+
@conversion_id = conversion_id
|
129
|
+
@document_path = document_path
|
130
|
+
@document_type = document_type
|
131
|
+
end
|
132
|
+
|
133
|
+
# Wait for conversion to complete
|
134
|
+
#
|
135
|
+
# @param max_wait [Integer] maximum wait time in seconds
|
136
|
+
# @param poll_interval [Float] seconds between polls
|
137
|
+
# @return [self]
|
138
|
+
def wait_until_complete(max_wait: 600, poll_interval: 3.0)
|
139
|
+
start_time = Time.now
|
140
|
+
|
141
|
+
loop do
|
142
|
+
status_data = client.get_document_status(conversion_id)
|
143
|
+
status = status_data['status']
|
144
|
+
|
145
|
+
case status
|
146
|
+
when 'completed'
|
147
|
+
@result = DocumentResult.new(status_data, document_path, document_type)
|
148
|
+
return self
|
149
|
+
when 'error', 'failed'
|
150
|
+
raise ConversionError.new(
|
151
|
+
"Document conversion failed: #{status_data['error']}",
|
152
|
+
conversion_id: conversion_id,
|
153
|
+
conversion_status: status
|
154
|
+
)
|
155
|
+
when 'processing', 'pending'
|
156
|
+
elapsed = Time.now - start_time
|
157
|
+
if elapsed > max_wait
|
158
|
+
raise TimeoutError, "Document conversion timed out after #{max_wait}s"
|
159
|
+
end
|
160
|
+
sleep poll_interval
|
161
|
+
else
|
162
|
+
raise ConversionError.new(
|
163
|
+
"Unknown conversion status: #{status}",
|
164
|
+
conversion_id: conversion_id,
|
165
|
+
conversion_status: status
|
166
|
+
)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# Get result (must wait_until_complete first)
|
172
|
+
# @return [DocumentResult]
|
173
|
+
def result
|
174
|
+
@result || raise(ConversionError, 'Conversion not yet complete. Call wait_until_complete first.')
|
175
|
+
end
|
176
|
+
|
177
|
+
# Convenience method: wait and get result
|
178
|
+
# @return [DocumentResult]
|
179
|
+
def complete!
|
180
|
+
wait_until_complete
|
181
|
+
result
|
182
|
+
end
|
183
|
+
|
184
|
+
# Save markdown output
|
185
|
+
# @param path [String] output file path
|
186
|
+
def save_markdown(path)
|
187
|
+
complete! unless @result
|
188
|
+
@result.save_markdown(path)
|
189
|
+
end
|
190
|
+
|
191
|
+
# Save LaTeX output
|
192
|
+
# @param path [String] output file path
|
193
|
+
def save_latex(path)
|
194
|
+
complete! unless @result
|
195
|
+
@result.save_latex(path)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Save HTML output
|
199
|
+
# @param path [String] output file path
|
200
|
+
def save_html(path)
|
201
|
+
complete! unless @result
|
202
|
+
@result.save_html(path)
|
203
|
+
end
|
204
|
+
|
205
|
+
# Save DOCX output
|
206
|
+
# @param path [String] output file path
|
207
|
+
def save_docx(path)
|
208
|
+
complete! unless @result
|
209
|
+
@result.save_docx(path)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# Document Result object
|
214
|
+
#
|
215
|
+
# Represents processed document with extracted content
|
216
|
+
class DocumentResult < Result
|
217
|
+
attr_reader :document_path, :document_type
|
218
|
+
|
219
|
+
def initialize(data, document_path = nil, document_type = nil)
|
220
|
+
super(data)
|
221
|
+
@document_path = document_path
|
222
|
+
@document_type = document_type
|
223
|
+
end
|
224
|
+
|
225
|
+
# Get all pages
|
226
|
+
# @return [Array<Hash>] page data
|
227
|
+
def pages
|
228
|
+
data['pages'] || []
|
229
|
+
end
|
230
|
+
|
231
|
+
# Get page count
|
232
|
+
# @return [Integer]
|
233
|
+
def page_count
|
234
|
+
pages.length
|
235
|
+
end
|
236
|
+
|
237
|
+
# Get all equations across all pages
|
238
|
+
# @return [Array<String>]
|
239
|
+
def equations
|
240
|
+
pages.flat_map { |p| p['equations'] || [] }
|
241
|
+
end
|
242
|
+
|
243
|
+
# Get all tables across all pages
|
244
|
+
# @return [Array<Hash>]
|
245
|
+
def tables
|
246
|
+
pages.flat_map { |p| p['tables'] || [] }
|
247
|
+
end
|
248
|
+
|
249
|
+
# Get all diagrams across all pages
|
250
|
+
# @return [Array<Hash>]
|
251
|
+
def diagrams
|
252
|
+
pages.flat_map { |p| p['diagrams'] || [] }
|
253
|
+
end
|
254
|
+
|
255
|
+
# Get markdown output
|
256
|
+
# @return [String, nil]
|
257
|
+
def markdown
|
258
|
+
data['markdown'] || data['mmd']
|
259
|
+
end
|
260
|
+
|
261
|
+
# Get LaTeX output
|
262
|
+
# @return [String, nil]
|
263
|
+
def latex
|
264
|
+
data['latex']
|
265
|
+
end
|
266
|
+
|
267
|
+
# Get HTML output
|
268
|
+
# @return [String, nil]
|
269
|
+
def html
|
270
|
+
data['html']
|
271
|
+
end
|
272
|
+
|
273
|
+
# Save markdown to file
|
274
|
+
# @param path [String] output file path
|
275
|
+
def save_markdown(path)
|
276
|
+
File.write(path, markdown) if markdown
|
277
|
+
end
|
278
|
+
|
279
|
+
# Save LaTeX to file
|
280
|
+
# @param path [String] output file path
|
281
|
+
def save_latex(path)
|
282
|
+
File.write(path, latex) if latex
|
283
|
+
end
|
284
|
+
|
285
|
+
# Save HTML to file
|
286
|
+
# @param path [String] output file path
|
287
|
+
def save_html(path)
|
288
|
+
File.write(path, html) if html
|
289
|
+
end
|
290
|
+
|
291
|
+
# Save DOCX output to file
|
292
|
+
# @param path [String] output file path
|
293
|
+
def save_docx(path)
|
294
|
+
if data['docx_url']
|
295
|
+
docx_data = client.download(data['docx_url'])
|
296
|
+
File.binwrite(path, docx_data)
|
297
|
+
elsif data['docx_data']
|
298
|
+
File.binwrite(path, data['docx_data'])
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# Check if document is a specific type
|
303
|
+
# @return [Boolean]
|
304
|
+
def pdf?
|
305
|
+
document_type == :pdf
|
306
|
+
end
|
307
|
+
|
308
|
+
def docx?
|
309
|
+
document_type == :docx
|
310
|
+
end
|
311
|
+
|
312
|
+
def pptx?
|
313
|
+
document_type == :pptx
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
# Alias PDF class to Document for backward compatibility
|
318
|
+
PDF = Document
|
319
|
+
PDFResult = DocumentResult
|
320
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mathpix
|
4
|
+
# Base error class
|
5
|
+
class Error < StandardError
|
6
|
+
attr_reader :details
|
7
|
+
|
8
|
+
def initialize(message, details: {})
|
9
|
+
super(message)
|
10
|
+
@details = details
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# Configuration error
|
15
|
+
class ConfigurationError < Error; end
|
16
|
+
|
17
|
+
# API error
|
18
|
+
class APIError < Error
|
19
|
+
attr_reader :status
|
20
|
+
|
21
|
+
def initialize(message, status: nil, details: {})
|
22
|
+
super(message, details: details)
|
23
|
+
@status = status
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Rate limit error
|
28
|
+
class RateLimitError < APIError
|
29
|
+
attr_reader :retry_after
|
30
|
+
|
31
|
+
def initialize(message, retry_after: nil, **options)
|
32
|
+
super(message, **options)
|
33
|
+
@retry_after = retry_after
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Server error (5xx)
|
38
|
+
class ServerError < APIError; end
|
39
|
+
|
40
|
+
# Network/timeout error
|
41
|
+
class NetworkError < Error; end
|
42
|
+
class TimeoutError < NetworkError; end
|
43
|
+
|
44
|
+
# Low confidence error
|
45
|
+
class LowConfidenceError < Error
|
46
|
+
attr_reader :confidence, :suggestions
|
47
|
+
|
48
|
+
def initialize(message, confidence: nil, suggestions: [])
|
49
|
+
super(message)
|
50
|
+
@confidence = confidence
|
51
|
+
@suggestions = suggestions
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Invalid request error (malformed input)
|
56
|
+
class InvalidRequestError < Error; end
|
57
|
+
|
58
|
+
# Invalid image error
|
59
|
+
class InvalidImageError < Error
|
60
|
+
attr_reader :recommended_format
|
61
|
+
|
62
|
+
def initialize(message, recommended_format: nil)
|
63
|
+
super(message)
|
64
|
+
@recommended_format = recommended_format
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Conversion error
|
69
|
+
class ConversionError < Error
|
70
|
+
attr_reader :conversion_id, :conversion_status
|
71
|
+
|
72
|
+
def initialize(message, conversion_id: nil, conversion_status: nil)
|
73
|
+
super(message)
|
74
|
+
@conversion_id = conversion_id
|
75
|
+
@conversion_status = conversion_status
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|