mathpix 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +52 -0
  3. data/LICENSE +21 -0
  4. data/README.md +171 -0
  5. data/SECURITY.md +137 -0
  6. data/lib/mathpix/balanced_ternary.rb +86 -0
  7. data/lib/mathpix/batch.rb +155 -0
  8. data/lib/mathpix/capture_builder.rb +142 -0
  9. data/lib/mathpix/chemistry.rb +69 -0
  10. data/lib/mathpix/client.rb +439 -0
  11. data/lib/mathpix/configuration.rb +187 -0
  12. data/lib/mathpix/configuration.rb.backup +125 -0
  13. data/lib/mathpix/conversion.rb +257 -0
  14. data/lib/mathpix/document.rb +320 -0
  15. data/lib/mathpix/errors.rb +78 -0
  16. data/lib/mathpix/mcp/auth/oauth_provider.rb +346 -0
  17. data/lib/mathpix/mcp/auth/token_manager.rb +31 -0
  18. data/lib/mathpix/mcp/auth.rb +18 -0
  19. data/lib/mathpix/mcp/base_tool.rb +117 -0
  20. data/lib/mathpix/mcp/elicitations/ambiguity_elicitation.rb +162 -0
  21. data/lib/mathpix/mcp/elicitations/base_elicitation.rb +141 -0
  22. data/lib/mathpix/mcp/elicitations/confidence_elicitation.rb +162 -0
  23. data/lib/mathpix/mcp/elicitations.rb +78 -0
  24. data/lib/mathpix/mcp/middleware/cors_middleware.rb +94 -0
  25. data/lib/mathpix/mcp/middleware/oauth_middleware.rb +72 -0
  26. data/lib/mathpix/mcp/middleware/rate_limiting_middleware.rb +140 -0
  27. data/lib/mathpix/mcp/middleware.rb +13 -0
  28. data/lib/mathpix/mcp/resources/formats_list_resource.rb +113 -0
  29. data/lib/mathpix/mcp/resources/hierarchical_router.rb +237 -0
  30. data/lib/mathpix/mcp/resources/latest_snip_resource.rb +60 -0
  31. data/lib/mathpix/mcp/resources/recent_snips_resource.rb +75 -0
  32. data/lib/mathpix/mcp/resources/snip_stats_resource.rb +78 -0
  33. data/lib/mathpix/mcp/resources.rb +15 -0
  34. data/lib/mathpix/mcp/server.rb +174 -0
  35. data/lib/mathpix/mcp/tools/batch_convert_tool.rb +106 -0
  36. data/lib/mathpix/mcp/tools/check_document_status_tool.rb +66 -0
  37. data/lib/mathpix/mcp/tools/convert_document_tool.rb +90 -0
  38. data/lib/mathpix/mcp/tools/convert_image_tool.rb +91 -0
  39. data/lib/mathpix/mcp/tools/convert_strokes_tool.rb +82 -0
  40. data/lib/mathpix/mcp/tools/get_account_info_tool.rb +57 -0
  41. data/lib/mathpix/mcp/tools/get_usage_tool.rb +62 -0
  42. data/lib/mathpix/mcp/tools/list_formats_tool.rb +81 -0
  43. data/lib/mathpix/mcp/tools/search_results_tool.rb +111 -0
  44. data/lib/mathpix/mcp/transports/http_streaming_transport.rb +622 -0
  45. data/lib/mathpix/mcp/transports/sse_stream_handler.rb +236 -0
  46. data/lib/mathpix/mcp/transports.rb +12 -0
  47. data/lib/mathpix/mcp.rb +52 -0
  48. data/lib/mathpix/result.rb +364 -0
  49. data/lib/mathpix/version.rb +22 -0
  50. data/lib/mathpix.rb +229 -0
  51. metadata +283 -0
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mathpix
4
+ # Configuration class with security defaults and validation
5
+ # Seed: 1069 - Deterministic configuration values
6
+ class Configuration
7
+ # Security constants
8
+ HTTPS_ONLY = true
9
+ MAX_FILE_SIZE_MB = 10
10
+ MAX_PATH_LENGTH = 1024
11
+ ALLOWED_SCHEMES = %w[https].freeze
12
+
13
+ # Resource limits
14
+ MIN_LIMIT = 1
15
+ MAX_LIMIT = 100
16
+ DEFAULT_LIMIT = 10
17
+
18
+ # Confidence thresholds (balanced ternary seed 1069)
19
+ CONFIDENCE_HIGH = 0.9
20
+ CONFIDENCE_MEDIUM = 0.7
21
+ CONFIDENCE_LOW = 0.5
22
+
23
+ # Rate limiting (requests per minute)
24
+ RATE_LIMIT_DEFAULT = 60
25
+ RATE_LIMIT_BURST = 10
26
+
27
+ attr_accessor :app_id, :app_key, :api_url, :timeout, :default_formats,
28
+ :user_agent, :enforce_https, :max_file_size_mb, :logger, :seed
29
+
30
+ attr_reader :rate_limit, :confidence_thresholds
31
+
32
+ # Alias endpoint for api_url (for consistency with Python mpxpy)
33
+ alias endpoint api_url
34
+ alias endpoint= api_url=
35
+
36
+ def initialize
37
+ @app_id = ENV.fetch('MATHPIX_APP_ID', nil)
38
+ @app_key = ENV.fetch('MATHPIX_APP_KEY', nil)
39
+ @api_url = ENV.fetch('MATHPIX_API_URL', 'https://api.mathpix.com/v3')
40
+ @timeout = ENV.fetch('MATHPIX_TIMEOUT', '30').to_i
41
+ @default_formats = [:latex_styled]
42
+ @user_agent = "mathpix-ruby/#{Mathpix::VERSION}"
43
+
44
+ # Security settings
45
+ @enforce_https = HTTPS_ONLY
46
+ @max_file_size_mb = MAX_FILE_SIZE_MB
47
+ @max_path_length = MAX_PATH_LENGTH
48
+
49
+ # Resource limits
50
+ @min_limit = MIN_LIMIT
51
+ @max_limit = MAX_LIMIT
52
+ @default_limit = DEFAULT_LIMIT
53
+
54
+ # Confidence thresholds
55
+ @confidence_thresholds = {
56
+ high: CONFIDENCE_HIGH,
57
+ medium: CONFIDENCE_MEDIUM,
58
+ low: CONFIDENCE_LOW
59
+ }
60
+
61
+ # Rate limiting
62
+ @rate_limit = RATE_LIMIT_DEFAULT
63
+
64
+ # Structured logging
65
+ @logger = nil # Can be set to Logger instance
66
+ end
67
+
68
+ def validate!
69
+ raise ConfigurationError, 'app_id is required' if app_id.nil? || app_id.empty?
70
+ raise ConfigurationError, 'app_key is required' if app_key.nil? || app_key.empty?
71
+
72
+ # Validate API URL uses HTTPS
73
+ if enforce_https && !api_url.start_with?('https://')
74
+ raise ConfigurationError, 'API URL must use HTTPS'
75
+ end
76
+
77
+ # Validate timeout
78
+ if timeout <= 0 || timeout > 300
79
+ raise ConfigurationError, 'Timeout must be between 1 and 300 seconds'
80
+ end
81
+
82
+ true
83
+ end
84
+
85
+ # Sanitize limit to be within bounds
86
+ #
87
+ # @param limit [Integer] requested limit
88
+ # @return [Integer] clamped limit
89
+ def sanitize_limit(limit)
90
+ [[limit.to_i, @min_limit].max, @max_limit].min
91
+ end
92
+
93
+ # Check if URL is allowed (HTTPS only)
94
+ #
95
+ # @param url [String] URL to validate
96
+ # @return [Boolean]
97
+ def valid_url?(url)
98
+ return false unless url.is_a?(String)
99
+ return false if url.length > @max_path_length
100
+
101
+ uri = URI.parse(url)
102
+
103
+ # Must be HTTP(S) scheme
104
+ return false unless %w[http https].include?(uri.scheme)
105
+
106
+ # Enforce HTTPS if enabled
107
+ return false if enforce_https && uri.scheme != 'https'
108
+
109
+ # Must have a host
110
+ return false if uri.host.nil? || uri.host.empty?
111
+
112
+ # Block localhost and private IPs
113
+ return false if uri.host.match?(/^(localhost|127\.|0\.0\.0\.0|::1)/)
114
+ return false if uri.host.match?(/^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)/)
115
+
116
+ true
117
+ rescue URI::InvalidURIError
118
+ false
119
+ end
120
+
121
+ # Auto-upgrade HTTP to HTTPS for remote URLs (feature parity with mpxpy)
122
+ #
123
+ # Python mpxpy automatically upgrades http:// to https://
124
+ # This provides the same behavior for seamless URL support
125
+ #
126
+ # @param url [String] URL that may be HTTP or HTTPS
127
+ # @return [String] URL with https:// scheme
128
+ # @example
129
+ # upgrade_to_https('http://example.com/img.png')
130
+ # # => 'https://example.com/img.png'
131
+ def upgrade_to_https(url)
132
+ return url unless url.is_a?(String)
133
+ return url unless url.start_with?('http://')
134
+
135
+ url.sub(/^http:\/\//, 'https://')
136
+ end
137
+
138
+ # Sanitize file path to prevent directory traversal
139
+ #
140
+ # @param path [String] file path
141
+ # @return [String, nil] sanitized path or nil if invalid
142
+ def sanitize_path(path)
143
+ return nil unless path.is_a?(String)
144
+ return nil if path.length > @max_path_length
145
+
146
+ # Remove null bytes
147
+ path = path.tr("\0", '')
148
+
149
+ # Normalize path
150
+ normalized = File.expand_path(path)
151
+
152
+ # Check for directory traversal attempts
153
+ return nil if normalized.include?('../')
154
+ return nil if normalized.match?(/\.\.[\/\\]/)
155
+
156
+ # Check file exists (for local paths)
157
+ return nil unless File.exist?(normalized)
158
+
159
+ # Check file size
160
+ size_mb = File.size(normalized).to_f / (1024 * 1024)
161
+ return nil if size_mb > @max_file_size_mb
162
+
163
+ normalized
164
+ rescue StandardError
165
+ nil
166
+ end
167
+
168
+ # Log structured message
169
+ #
170
+ # @param level [Symbol] log level (:debug, :info, :warn, :error)
171
+ # @param message [String] log message
172
+ # @param data [Hash] structured data
173
+ def log(level, message, data = {})
174
+ return unless @logger
175
+
176
+ structured_message = {
177
+ timestamp: Time.now.utc.iso8601,
178
+ level: level,
179
+ message: message,
180
+ seed: 1069,
181
+ **data
182
+ }.to_json
183
+
184
+ @logger.send(level, structured_message)
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+ require 'fileutils'
5
+
6
+ module Mathpix
7
+ # Configuration for Mathpix client
8
+ # The geodesic path: simple, explicit, block-based
9
+ #
10
+ # Priority order (feature parity with mpxpy):
11
+ # 1. Direct assignment (config.app_id = ...)
12
+ # 2. Config file (~/.mathpix/config)
13
+ # 3. Environment variables (MATHPIX_APP_ID, MATHPIX_APP_KEY)
14
+ class Configuration
15
+ attr_accessor :app_id, :app_key, :endpoint, :timeout, :seed
16
+ attr_accessor :default_formats, :default_confidence
17
+ attr_accessor :auto_retry, :max_retries, :user_agent
18
+ attr_accessor :config_file_path
19
+
20
+ DEFAULT_CONFIG_PATH = File.expand_path('~/.mathpix/config').freeze
21
+
22
+ def initialize
23
+ @endpoint = 'https://api.mathpix.com/v3'
24
+ @timeout = 30
25
+ @seed = 1069 # Balanced ternary determinism
26
+ @default_formats = %i[text latex_styled]
27
+ @default_confidence = 0.85
28
+ @auto_retry = true
29
+ @max_retries = 3
30
+ @user_agent = "mathpix-ruby/#{Mathpix::VERSION}"
31
+ @config_file_path = DEFAULT_CONFIG_PATH
32
+
33
+ # Load in priority order: direct > file > env
34
+ load_from_file if File.exist?(@config_file_path)
35
+ load_from_env
36
+ end
37
+
38
+ # Validate configuration
39
+ # @raise [ConfigurationError] if invalid
40
+ def validate!
41
+ raise ConfigurationError, 'app_id required' unless app_id
42
+ raise ConfigurationError, 'app_key required' unless app_key
43
+ end
44
+
45
+ # Enable seed-based determinism
46
+ # @param seed [Integer] random seed
47
+ def with_seed(seed)
48
+ @seed = seed
49
+ srand(seed)
50
+ self
51
+ end
52
+
53
+ # Set balanced ternary pattern
54
+ # @return [Array<Integer>] the pattern [+1, -1, -1, +1, +1, +1, +1]
55
+ def balanced_ternary_pattern
56
+ [+1, -1, -1, +1, +1, +1, +1]
57
+ end
58
+
59
+ # Load configuration from file (feature parity with mpxpy)
60
+ #
61
+ # Config file format (YAML):
62
+ # app_id: your_app_id
63
+ # app_key: your_app_key
64
+ # endpoint: https://api.mathpix.com/v3
65
+ # timeout: 30
66
+ #
67
+ # @param path [String] config file path
68
+ # @return [self]
69
+ def load_from_file(path = @config_file_path)
70
+ return self unless File.exist?(path)
71
+
72
+ config = YAML.load_file(path)
73
+
74
+ # Only set if not already set (lower priority than direct assignment)
75
+ @app_id ||= config['app_id']
76
+ @app_key ||= config['app_key']
77
+ @endpoint = config['endpoint'] if config['endpoint']
78
+ @timeout = config['timeout'] if config['timeout']
79
+ @seed = config['seed'] if config['seed']
80
+
81
+ self
82
+ rescue => e
83
+ warn "Failed to load config from #{path}: #{e.message}"
84
+ self
85
+ end
86
+
87
+ # Load configuration from environment variables
88
+ #
89
+ # Environment variables:
90
+ # MATHPIX_APP_ID
91
+ # MATHPIX_APP_KEY
92
+ # MATHPIX_ENDPOINT
93
+ # MATHPIX_TIMEOUT
94
+ #
95
+ # @return [self]
96
+ def load_from_env
97
+ # Only set if not already set (lowest priority)
98
+ @app_id ||= ENV['MATHPIX_APP_ID']
99
+ @app_key ||= ENV['MATHPIX_APP_KEY']
100
+ @endpoint = ENV['MATHPIX_ENDPOINT'] if ENV['MATHPIX_ENDPOINT']
101
+ @timeout = ENV['MATHPIX_TIMEOUT'].to_i if ENV['MATHPIX_TIMEOUT']
102
+
103
+ self
104
+ end
105
+
106
+ # Save current configuration to file
107
+ #
108
+ # @param path [String] config file path
109
+ # @return [self]
110
+ def save_to_file(path = @config_file_path)
111
+ FileUtils.mkdir_p(File.dirname(path))
112
+
113
+ config = {
114
+ 'app_id' => @app_id,
115
+ 'app_key' => @app_key,
116
+ 'endpoint' => @endpoint,
117
+ 'timeout' => @timeout,
118
+ 'seed' => @seed
119
+ }
120
+
121
+ File.write(path, YAML.dump(config))
122
+ self
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,257 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mathpix
4
+ # Mathpix Markdown (MMD) conversion to multiple formats
5
+ # Handles async conversion via POST /v3/converter
6
+ class Conversion
7
+ attr_reader :conversion_id, :mmd, :formats, :client
8
+
9
+ # Conversion status states
10
+ STATUS_QUEUED = 'queued'
11
+ STATUS_PROCESSING = 'processing'
12
+ STATUS_COMPLETED = 'completed'
13
+ STATUS_ERROR = 'error'
14
+
15
+ # Supported output formats (verified from Mathpix API docs)
16
+ SUPPORTED_FORMATS = %w[
17
+ md docx tex.zip html pdf latex_pdf pptx
18
+ mmd.zip md.zip html.zip
19
+ ].freeze
20
+
21
+ def initialize(client, conversion_id: nil, mmd: nil, formats: nil)
22
+ @client = client
23
+ @conversion_id = conversion_id
24
+ @mmd = mmd
25
+ @formats = formats
26
+ @status_data = nil
27
+ end
28
+
29
+ # Check conversion status
30
+ #
31
+ # @return [String] status (queued, processing, completed, error)
32
+ def status
33
+ refresh_status unless @status_data
34
+ @status_data['status']
35
+ end
36
+
37
+ # Check if conversion is complete
38
+ #
39
+ # @return [Boolean]
40
+ def completed?
41
+ status == STATUS_COMPLETED
42
+ end
43
+
44
+ # Check if conversion is still processing
45
+ #
46
+ # @return [Boolean]
47
+ def processing?
48
+ [STATUS_QUEUED, STATUS_PROCESSING].include?(status)
49
+ end
50
+
51
+ # Check if conversion failed
52
+ #
53
+ # @return [Boolean]
54
+ def error?
55
+ status == STATUS_ERROR
56
+ end
57
+
58
+ # Get error message if conversion failed
59
+ #
60
+ # @return [String, nil]
61
+ def error_message
62
+ @status_data&.dig('error')
63
+ end
64
+
65
+ # Wait until conversion is complete
66
+ #
67
+ # @param max_wait [Integer] maximum seconds to wait (default: 300 = 5 minutes)
68
+ # @param poll_interval [Float] seconds between status checks (default: 2.0)
69
+ # @return [self]
70
+ # @raise [TimeoutError] if max_wait exceeded
71
+ # @raise [ConversionError] if conversion fails
72
+ def wait_until_complete(max_wait: 300, poll_interval: 2.0)
73
+ start_time = Time.now
74
+
75
+ loop do
76
+ refresh_status
77
+
78
+ return self if completed?
79
+
80
+ if error?
81
+ raise ConversionError, "Conversion failed: #{error_message}"
82
+ end
83
+
84
+ elapsed = Time.now - start_time
85
+ if elapsed > max_wait
86
+ raise TimeoutError, "Conversion timed out after #{max_wait}s (status: #{status})"
87
+ end
88
+
89
+ sleep poll_interval if processing?
90
+ end
91
+ end
92
+
93
+ # Poll until complete (alias for wait_until_complete)
94
+ #
95
+ # @param max_wait [Integer] maximum seconds to wait
96
+ # @param poll_interval [Float] seconds between checks
97
+ # @return [self]
98
+ def poll_until_ready(max_wait: 300, poll_interval: 2.0)
99
+ wait_until_complete(max_wait: max_wait, poll_interval: poll_interval)
100
+ end
101
+
102
+ # Get converted output for a specific format
103
+ #
104
+ # @param format [String, Symbol] output format (e.g., 'pdf', :docx)
105
+ # @return [String] file content as bytes
106
+ # @raise [Error] if conversion not complete
107
+ def output(format)
108
+ raise Error, "Conversion not complete (status: #{status})" unless completed?
109
+
110
+ format_str = format.to_s
111
+ url = output_url(format_str)
112
+
113
+ client.download(url)
114
+ end
115
+
116
+ # Save output to file
117
+ #
118
+ # @param format [String, Symbol] output format
119
+ # @param path [String] destination file path
120
+ # @return [String] path to saved file
121
+ def save_output(format, path)
122
+ content = output(format)
123
+ File.binwrite(path, content)
124
+ path
125
+ end
126
+
127
+ # --- Format-specific convenience methods (mpxpy parity) ---
128
+
129
+ # Get PDF output as bytes
130
+ # @return [String] binary PDF content
131
+ def to_pdf_bytes
132
+ output(:pdf)
133
+ end
134
+
135
+ # Save PDF to file
136
+ # @param path [String] destination path
137
+ # @return [String] path
138
+ def to_pdf_file(path)
139
+ save_output(:pdf, path)
140
+ end
141
+
142
+ # Get DOCX output as bytes
143
+ # @return [String] binary DOCX content
144
+ def to_docx_bytes
145
+ output(:docx)
146
+ end
147
+
148
+ # Save DOCX to file
149
+ # @param path [String] destination path
150
+ # @return [String] path
151
+ def to_docx_file(path)
152
+ save_output(:docx, path)
153
+ end
154
+
155
+ # Get HTML output as text
156
+ # @return [String] HTML content
157
+ def to_html_text
158
+ output(:html)
159
+ end
160
+
161
+ # Save HTML to file
162
+ # @param path [String] destination path
163
+ # @return [String] path
164
+ def to_html_file(path)
165
+ save_output(:html, path)
166
+ end
167
+
168
+ # Get Markdown output as text
169
+ # @return [String] Markdown content
170
+ def to_md_text
171
+ output(:md)
172
+ end
173
+
174
+ alias to_markdown_text to_md_text
175
+
176
+ # Save Markdown to file
177
+ # @param path [String] destination path
178
+ # @return [String] path
179
+ def to_md_file(path)
180
+ save_output(:md, path)
181
+ end
182
+
183
+ alias to_markdown_file to_md_file
184
+
185
+ # Get LaTeX ZIP as bytes
186
+ # @return [String] binary ZIP content
187
+ def to_tex_zip_bytes
188
+ output(:'tex.zip')
189
+ end
190
+
191
+ # Save LaTeX ZIP to file
192
+ # @param path [String] destination path
193
+ # @return [String] path
194
+ def to_tex_zip_file(path)
195
+ save_output(:'tex.zip', path)
196
+ end
197
+
198
+ # Get LaTeX PDF as bytes
199
+ # @return [String] binary PDF content
200
+ def to_latex_pdf_bytes
201
+ output(:latex_pdf)
202
+ end
203
+
204
+ # Save LaTeX PDF to file
205
+ # @param path [String] destination path
206
+ # @return [String] path
207
+ def to_latex_pdf_file(path)
208
+ save_output(:latex_pdf, path)
209
+ end
210
+
211
+ # Get PowerPoint as bytes
212
+ # @return [String] binary PPTX content
213
+ def to_pptx_bytes
214
+ output(:pptx)
215
+ end
216
+
217
+ # Save PowerPoint to file
218
+ # @param path [String] destination path
219
+ # @return [String] path
220
+ def to_pptx_file(path)
221
+ save_output(:pptx, path)
222
+ end
223
+
224
+ # Get all available outputs
225
+ #
226
+ # @return [Hash<Symbol, String>] format => url mapping
227
+ def available_outputs
228
+ refresh_status unless completed?
229
+ return {} unless @status_data&.dig('outputs')
230
+
231
+ @status_data['outputs'].transform_keys(&:to_sym)
232
+ end
233
+
234
+ # Inspect
235
+ # @return [String]
236
+ def inspect
237
+ "#<Mathpix::Conversion id=#{conversion_id} status=#{status}>"
238
+ end
239
+
240
+ private
241
+
242
+ # Refresh status from API
243
+ def refresh_status
244
+ @status_data = client.get_conversion_status(conversion_id)
245
+ end
246
+
247
+ # Get output URL for format
248
+ #
249
+ # @param format [String] format name
250
+ # @return [String] download URL
251
+ def output_url(format)
252
+ urls = @status_data&.dig('outputs') || {}
253
+ urls[format] || urls[format.to_s] ||
254
+ raise(Error, "Output format '#{format}' not available. Available: #{urls.keys.join(', ')}")
255
+ end
256
+ end
257
+ end