mathpix 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +52 -0
  3. data/LICENSE +21 -0
  4. data/README.md +171 -0
  5. data/SECURITY.md +137 -0
  6. data/lib/mathpix/balanced_ternary.rb +86 -0
  7. data/lib/mathpix/batch.rb +155 -0
  8. data/lib/mathpix/capture_builder.rb +142 -0
  9. data/lib/mathpix/chemistry.rb +69 -0
  10. data/lib/mathpix/client.rb +439 -0
  11. data/lib/mathpix/configuration.rb +187 -0
  12. data/lib/mathpix/configuration.rb.backup +125 -0
  13. data/lib/mathpix/conversion.rb +257 -0
  14. data/lib/mathpix/document.rb +320 -0
  15. data/lib/mathpix/errors.rb +78 -0
  16. data/lib/mathpix/mcp/auth/oauth_provider.rb +346 -0
  17. data/lib/mathpix/mcp/auth/token_manager.rb +31 -0
  18. data/lib/mathpix/mcp/auth.rb +18 -0
  19. data/lib/mathpix/mcp/base_tool.rb +117 -0
  20. data/lib/mathpix/mcp/elicitations/ambiguity_elicitation.rb +162 -0
  21. data/lib/mathpix/mcp/elicitations/base_elicitation.rb +141 -0
  22. data/lib/mathpix/mcp/elicitations/confidence_elicitation.rb +162 -0
  23. data/lib/mathpix/mcp/elicitations.rb +78 -0
  24. data/lib/mathpix/mcp/middleware/cors_middleware.rb +94 -0
  25. data/lib/mathpix/mcp/middleware/oauth_middleware.rb +72 -0
  26. data/lib/mathpix/mcp/middleware/rate_limiting_middleware.rb +140 -0
  27. data/lib/mathpix/mcp/middleware.rb +13 -0
  28. data/lib/mathpix/mcp/resources/formats_list_resource.rb +113 -0
  29. data/lib/mathpix/mcp/resources/hierarchical_router.rb +237 -0
  30. data/lib/mathpix/mcp/resources/latest_snip_resource.rb +60 -0
  31. data/lib/mathpix/mcp/resources/recent_snips_resource.rb +75 -0
  32. data/lib/mathpix/mcp/resources/snip_stats_resource.rb +78 -0
  33. data/lib/mathpix/mcp/resources.rb +15 -0
  34. data/lib/mathpix/mcp/server.rb +174 -0
  35. data/lib/mathpix/mcp/tools/batch_convert_tool.rb +106 -0
  36. data/lib/mathpix/mcp/tools/check_document_status_tool.rb +66 -0
  37. data/lib/mathpix/mcp/tools/convert_document_tool.rb +90 -0
  38. data/lib/mathpix/mcp/tools/convert_image_tool.rb +91 -0
  39. data/lib/mathpix/mcp/tools/convert_strokes_tool.rb +82 -0
  40. data/lib/mathpix/mcp/tools/get_account_info_tool.rb +57 -0
  41. data/lib/mathpix/mcp/tools/get_usage_tool.rb +62 -0
  42. data/lib/mathpix/mcp/tools/list_formats_tool.rb +81 -0
  43. data/lib/mathpix/mcp/tools/search_results_tool.rb +111 -0
  44. data/lib/mathpix/mcp/transports/http_streaming_transport.rb +622 -0
  45. data/lib/mathpix/mcp/transports/sse_stream_handler.rb +236 -0
  46. data/lib/mathpix/mcp/transports.rb +12 -0
  47. data/lib/mathpix/mcp.rb +52 -0
  48. data/lib/mathpix/result.rb +364 -0
  49. data/lib/mathpix/version.rb +22 -0
  50. data/lib/mathpix.rb +229 -0
  51. metadata +283 -0
@@ -0,0 +1,320 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mathpix
4
+ # Document processing builder (PDF, DOCX, PPTX)
5
+ # The geodesic path: document-oriented, format-agnostic
6
+ #
7
+ # Feature parity with Python mpxpy: PDF + Office documents
8
+ class Document
9
+ attr_reader :client, :document_path, :options
10
+
11
+ def initialize(client, document_path)
12
+ @client = client
13
+ @document_path = document_path
14
+ @options = {}
15
+ end
16
+
17
+ # Set output formats
18
+ # @param formats [Array<Symbol>] format names
19
+ # @return [self]
20
+ # @example
21
+ # doc.with_formats(:markdown, :latex, :docx)
22
+ def with_formats(*formats)
23
+ @options[:formats] = formats.flatten
24
+ self
25
+ end
26
+
27
+ # Enable table extraction
28
+ # @param options [Hash] table options
29
+ # @return [self]
30
+ def with_tables(**options)
31
+ @options[:include_table_html] = true
32
+ @options.merge!(options)
33
+ self
34
+ end
35
+
36
+ # Enable diagram extraction
37
+ # @return [self]
38
+ def with_diagrams
39
+ @options[:include_diagram_svg] = true
40
+ self
41
+ end
42
+
43
+ # Set quality level
44
+ # @param level [Symbol] :low, :medium, :high
45
+ # @return [self]
46
+ def quality(level)
47
+ @options[:quality] = level
48
+ self
49
+ end
50
+
51
+ # Enable line-level data (bounding boxes)
52
+ # @return [self]
53
+ def with_line_data
54
+ @options[:include_line_data] = true
55
+ self
56
+ end
57
+
58
+ # Enable word-level data (bounding boxes)
59
+ # @return [self]
60
+ def with_word_data
61
+ @options[:include_word_data] = true
62
+ self
63
+ end
64
+
65
+ # Set page range for processing
66
+ # @param start_page [Integer] first page (1-indexed)
67
+ # @param end_page [Integer, nil] last page (nil = all)
68
+ # @return [self]
69
+ def pages(start_page, end_page = nil)
70
+ @options[:page_ranges] = { start: start_page, end: end_page }
71
+ self
72
+ end
73
+
74
+ # Execute document conversion (async operation)
75
+ #
76
+ # @return [DocumentConversion] conversion object (async)
77
+ # @example
78
+ # conversion = Mathpix.document('paper.pdf')
79
+ # .with_formats(:markdown, :latex)
80
+ # .convert
81
+ # conversion.wait_until_complete
82
+ # conversion.save_markdown('output.md')
83
+ def convert
84
+ # Detect document type from extension
85
+ doc_type = detect_document_type
86
+
87
+ # Build conversion request
88
+ conversion_id = client.convert_document(
89
+ document_path: document_path,
90
+ document_type: doc_type,
91
+ **options
92
+ )
93
+
94
+ DocumentConversion.new(client, conversion_id, document_path, doc_type)
95
+ end
96
+
97
+ alias call convert
98
+ alias run convert
99
+
100
+ private
101
+
102
+ # Detect document type from file extension
103
+ # @return [Symbol] :pdf, :docx, :pptx
104
+ def detect_document_type
105
+ ext = File.extname(document_path).downcase
106
+ case ext
107
+ when '.pdf' then :pdf
108
+ when '.docx' then :docx
109
+ when '.pptx' then :pptx
110
+ else
111
+ raise InvalidImageError.new(
112
+ "Unsupported document format: #{ext}",
113
+ recommended_format: 'pdf, docx, pptx'
114
+ )
115
+ end
116
+ end
117
+ end
118
+
119
+ # Document Conversion Result (async operation)
120
+ #
121
+ # Polls Mathpix API until conversion completes
122
+ # Feature parity with Python mpxpy document processing
123
+ class DocumentConversion
124
+ attr_reader :client, :conversion_id, :document_path, :document_type
125
+
126
+ def initialize(client, conversion_id, document_path, document_type)
127
+ @client = client
128
+ @conversion_id = conversion_id
129
+ @document_path = document_path
130
+ @document_type = document_type
131
+ end
132
+
133
+ # Wait for conversion to complete
134
+ #
135
+ # @param max_wait [Integer] maximum wait time in seconds
136
+ # @param poll_interval [Float] seconds between polls
137
+ # @return [self]
138
+ def wait_until_complete(max_wait: 600, poll_interval: 3.0)
139
+ start_time = Time.now
140
+
141
+ loop do
142
+ status_data = client.get_document_status(conversion_id)
143
+ status = status_data['status']
144
+
145
+ case status
146
+ when 'completed'
147
+ @result = DocumentResult.new(status_data, document_path, document_type)
148
+ return self
149
+ when 'error', 'failed'
150
+ raise ConversionError.new(
151
+ "Document conversion failed: #{status_data['error']}",
152
+ conversion_id: conversion_id,
153
+ conversion_status: status
154
+ )
155
+ when 'processing', 'pending'
156
+ elapsed = Time.now - start_time
157
+ if elapsed > max_wait
158
+ raise TimeoutError, "Document conversion timed out after #{max_wait}s"
159
+ end
160
+ sleep poll_interval
161
+ else
162
+ raise ConversionError.new(
163
+ "Unknown conversion status: #{status}",
164
+ conversion_id: conversion_id,
165
+ conversion_status: status
166
+ )
167
+ end
168
+ end
169
+ end
170
+
171
+ # Get result (must wait_until_complete first)
172
+ # @return [DocumentResult]
173
+ def result
174
+ @result || raise(ConversionError, 'Conversion not yet complete. Call wait_until_complete first.')
175
+ end
176
+
177
+ # Convenience method: wait and get result
178
+ # @return [DocumentResult]
179
+ def complete!
180
+ wait_until_complete
181
+ result
182
+ end
183
+
184
+ # Save markdown output
185
+ # @param path [String] output file path
186
+ def save_markdown(path)
187
+ complete! unless @result
188
+ @result.save_markdown(path)
189
+ end
190
+
191
+ # Save LaTeX output
192
+ # @param path [String] output file path
193
+ def save_latex(path)
194
+ complete! unless @result
195
+ @result.save_latex(path)
196
+ end
197
+
198
+ # Save HTML output
199
+ # @param path [String] output file path
200
+ def save_html(path)
201
+ complete! unless @result
202
+ @result.save_html(path)
203
+ end
204
+
205
+ # Save DOCX output
206
+ # @param path [String] output file path
207
+ def save_docx(path)
208
+ complete! unless @result
209
+ @result.save_docx(path)
210
+ end
211
+ end
212
+
213
+ # Document Result object
214
+ #
215
+ # Represents processed document with extracted content
216
+ class DocumentResult < Result
217
+ attr_reader :document_path, :document_type
218
+
219
+ def initialize(data, document_path = nil, document_type = nil)
220
+ super(data)
221
+ @document_path = document_path
222
+ @document_type = document_type
223
+ end
224
+
225
+ # Get all pages
226
+ # @return [Array<Hash>] page data
227
+ def pages
228
+ data['pages'] || []
229
+ end
230
+
231
+ # Get page count
232
+ # @return [Integer]
233
+ def page_count
234
+ pages.length
235
+ end
236
+
237
+ # Get all equations across all pages
238
+ # @return [Array<String>]
239
+ def equations
240
+ pages.flat_map { |p| p['equations'] || [] }
241
+ end
242
+
243
+ # Get all tables across all pages
244
+ # @return [Array<Hash>]
245
+ def tables
246
+ pages.flat_map { |p| p['tables'] || [] }
247
+ end
248
+
249
+ # Get all diagrams across all pages
250
+ # @return [Array<Hash>]
251
+ def diagrams
252
+ pages.flat_map { |p| p['diagrams'] || [] }
253
+ end
254
+
255
+ # Get markdown output
256
+ # @return [String, nil]
257
+ def markdown
258
+ data['markdown'] || data['mmd']
259
+ end
260
+
261
+ # Get LaTeX output
262
+ # @return [String, nil]
263
+ def latex
264
+ data['latex']
265
+ end
266
+
267
+ # Get HTML output
268
+ # @return [String, nil]
269
+ def html
270
+ data['html']
271
+ end
272
+
273
+ # Save markdown to file
274
+ # @param path [String] output file path
275
+ def save_markdown(path)
276
+ File.write(path, markdown) if markdown
277
+ end
278
+
279
+ # Save LaTeX to file
280
+ # @param path [String] output file path
281
+ def save_latex(path)
282
+ File.write(path, latex) if latex
283
+ end
284
+
285
+ # Save HTML to file
286
+ # @param path [String] output file path
287
+ def save_html(path)
288
+ File.write(path, html) if html
289
+ end
290
+
291
+ # Save DOCX output to file
292
+ # @param path [String] output file path
293
+ def save_docx(path)
294
+ if data['docx_url']
295
+ docx_data = client.download(data['docx_url'])
296
+ File.binwrite(path, docx_data)
297
+ elsif data['docx_data']
298
+ File.binwrite(path, data['docx_data'])
299
+ end
300
+ end
301
+
302
+ # Check if document is a specific type
303
+ # @return [Boolean]
304
+ def pdf?
305
+ document_type == :pdf
306
+ end
307
+
308
+ def docx?
309
+ document_type == :docx
310
+ end
311
+
312
+ def pptx?
313
+ document_type == :pptx
314
+ end
315
+ end
316
+
317
+ # Alias PDF class to Document for backward compatibility
318
+ PDF = Document
319
+ PDFResult = DocumentResult
320
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mathpix
4
+ # Base error class
5
+ class Error < StandardError
6
+ attr_reader :details
7
+
8
+ def initialize(message, details: {})
9
+ super(message)
10
+ @details = details
11
+ end
12
+ end
13
+
14
+ # Configuration error
15
+ class ConfigurationError < Error; end
16
+
17
+ # API error
18
+ class APIError < Error
19
+ attr_reader :status
20
+
21
+ def initialize(message, status: nil, details: {})
22
+ super(message, details: details)
23
+ @status = status
24
+ end
25
+ end
26
+
27
+ # Rate limit error
28
+ class RateLimitError < APIError
29
+ attr_reader :retry_after
30
+
31
+ def initialize(message, retry_after: nil, **options)
32
+ super(message, **options)
33
+ @retry_after = retry_after
34
+ end
35
+ end
36
+
37
+ # Server error (5xx)
38
+ class ServerError < APIError; end
39
+
40
+ # Network/timeout error
41
+ class NetworkError < Error; end
42
+ class TimeoutError < NetworkError; end
43
+
44
+ # Low confidence error
45
+ class LowConfidenceError < Error
46
+ attr_reader :confidence, :suggestions
47
+
48
+ def initialize(message, confidence: nil, suggestions: [])
49
+ super(message)
50
+ @confidence = confidence
51
+ @suggestions = suggestions
52
+ end
53
+ end
54
+
55
+ # Invalid request error (malformed input)
56
+ class InvalidRequestError < Error; end
57
+
58
+ # Invalid image error
59
+ class InvalidImageError < Error
60
+ attr_reader :recommended_format
61
+
62
+ def initialize(message, recommended_format: nil)
63
+ super(message)
64
+ @recommended_format = recommended_format
65
+ end
66
+ end
67
+
68
+ # Conversion error
69
+ class ConversionError < Error
70
+ attr_reader :conversion_id, :conversion_status
71
+
72
+ def initialize(message, conversion_id: nil, conversion_status: nil)
73
+ super(message)
74
+ @conversion_id = conversion_id
75
+ @conversion_status = conversion_status
76
+ end
77
+ end
78
+ end