mathpix 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +52 -0
- data/LICENSE +21 -0
- data/README.md +171 -0
- data/SECURITY.md +137 -0
- data/lib/mathpix/balanced_ternary.rb +86 -0
- data/lib/mathpix/batch.rb +155 -0
- data/lib/mathpix/capture_builder.rb +142 -0
- data/lib/mathpix/chemistry.rb +69 -0
- data/lib/mathpix/client.rb +439 -0
- data/lib/mathpix/configuration.rb +187 -0
- data/lib/mathpix/configuration.rb.backup +125 -0
- data/lib/mathpix/conversion.rb +257 -0
- data/lib/mathpix/document.rb +320 -0
- data/lib/mathpix/errors.rb +78 -0
- data/lib/mathpix/mcp/auth/oauth_provider.rb +346 -0
- data/lib/mathpix/mcp/auth/token_manager.rb +31 -0
- data/lib/mathpix/mcp/auth.rb +18 -0
- data/lib/mathpix/mcp/base_tool.rb +117 -0
- data/lib/mathpix/mcp/elicitations/ambiguity_elicitation.rb +162 -0
- data/lib/mathpix/mcp/elicitations/base_elicitation.rb +141 -0
- data/lib/mathpix/mcp/elicitations/confidence_elicitation.rb +162 -0
- data/lib/mathpix/mcp/elicitations.rb +78 -0
- data/lib/mathpix/mcp/middleware/cors_middleware.rb +94 -0
- data/lib/mathpix/mcp/middleware/oauth_middleware.rb +72 -0
- data/lib/mathpix/mcp/middleware/rate_limiting_middleware.rb +140 -0
- data/lib/mathpix/mcp/middleware.rb +13 -0
- data/lib/mathpix/mcp/resources/formats_list_resource.rb +113 -0
- data/lib/mathpix/mcp/resources/hierarchical_router.rb +237 -0
- data/lib/mathpix/mcp/resources/latest_snip_resource.rb +60 -0
- data/lib/mathpix/mcp/resources/recent_snips_resource.rb +75 -0
- data/lib/mathpix/mcp/resources/snip_stats_resource.rb +78 -0
- data/lib/mathpix/mcp/resources.rb +15 -0
- data/lib/mathpix/mcp/server.rb +174 -0
- data/lib/mathpix/mcp/tools/batch_convert_tool.rb +106 -0
- data/lib/mathpix/mcp/tools/check_document_status_tool.rb +66 -0
- data/lib/mathpix/mcp/tools/convert_document_tool.rb +90 -0
- data/lib/mathpix/mcp/tools/convert_image_tool.rb +91 -0
- data/lib/mathpix/mcp/tools/convert_strokes_tool.rb +82 -0
- data/lib/mathpix/mcp/tools/get_account_info_tool.rb +57 -0
- data/lib/mathpix/mcp/tools/get_usage_tool.rb +62 -0
- data/lib/mathpix/mcp/tools/list_formats_tool.rb +81 -0
- data/lib/mathpix/mcp/tools/search_results_tool.rb +111 -0
- data/lib/mathpix/mcp/transports/http_streaming_transport.rb +622 -0
- data/lib/mathpix/mcp/transports/sse_stream_handler.rb +236 -0
- data/lib/mathpix/mcp/transports.rb +12 -0
- data/lib/mathpix/mcp.rb +52 -0
- data/lib/mathpix/result.rb +364 -0
- data/lib/mathpix/version.rb +22 -0
- data/lib/mathpix.rb +229 -0
- metadata +283 -0
@@ -0,0 +1,142 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mathpix
|
4
|
+
# Fluent API builder for OCR captures
|
5
|
+
# The geodesic path: chainable, intuitive, discoverable
|
6
|
+
class CaptureBuilder
|
7
|
+
def initialize(client, image_path)
|
8
|
+
@client = client
|
9
|
+
@image_path = image_path
|
10
|
+
@options = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
# Set output formats
|
14
|
+
#
|
15
|
+
# @param formats [Array<Symbol>] format names
|
16
|
+
# @return [self]
|
17
|
+
# @example
|
18
|
+
# builder.with_formats(:latex, :mathml, :asciimath)
|
19
|
+
def with_formats(*formats)
|
20
|
+
@options[:formats] = formats.flatten
|
21
|
+
self
|
22
|
+
end
|
23
|
+
|
24
|
+
# Set confidence threshold
|
25
|
+
#
|
26
|
+
# @param threshold [Float] minimum confidence (0.0-1.0)
|
27
|
+
# @return [self]
|
28
|
+
def with_confidence(threshold)
|
29
|
+
@options[:confidence_threshold] = threshold
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# Add metadata
|
34
|
+
#
|
35
|
+
# @param metadata [Hash] metadata key-value pairs
|
36
|
+
# @return [self]
|
37
|
+
def with_metadata(**metadata)
|
38
|
+
@options[:metadata] = metadata
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
# Add tags
|
43
|
+
#
|
44
|
+
# @param tags [Array<String>] tag strings
|
45
|
+
# @return [self]
|
46
|
+
def with_tags(*tags)
|
47
|
+
@options[:tags] = tags.flatten
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
# Enable space removal
|
52
|
+
#
|
53
|
+
# @return [self]
|
54
|
+
def remove_spaces
|
55
|
+
@options[:rm_spaces] = true
|
56
|
+
self
|
57
|
+
end
|
58
|
+
|
59
|
+
# Set recognition options
|
60
|
+
#
|
61
|
+
# @param options [Hash] recognition settings
|
62
|
+
# @return [self]
|
63
|
+
def with_recognition(**options)
|
64
|
+
@options.merge!(options)
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
# Set preprocessing options
|
69
|
+
#
|
70
|
+
# @param options [Hash] preprocessing settings
|
71
|
+
# @return [self]
|
72
|
+
# @example
|
73
|
+
# builder.with_preprocessing(denoise: true, enhance_contrast: true)
|
74
|
+
def with_preprocessing(**options)
|
75
|
+
@options[:preprocessing] = options
|
76
|
+
self
|
77
|
+
end
|
78
|
+
|
79
|
+
# Set data options
|
80
|
+
#
|
81
|
+
# @param options [Hash] data output options
|
82
|
+
# @return [self]
|
83
|
+
def with_data_options(**options)
|
84
|
+
@options[:data_options] = options
|
85
|
+
self
|
86
|
+
end
|
87
|
+
|
88
|
+
# Set alphabet support
|
89
|
+
#
|
90
|
+
# @param alphabets [Hash] alphabet flags
|
91
|
+
# @return [self]
|
92
|
+
# @example
|
93
|
+
# builder.with_alphabets(en: true, es: true, de: true)
|
94
|
+
def with_alphabets(**alphabets)
|
95
|
+
@options[:alphabets] = alphabets
|
96
|
+
self
|
97
|
+
end
|
98
|
+
|
99
|
+
# Enable chemistry mode
|
100
|
+
#
|
101
|
+
# @return [self]
|
102
|
+
def chemistry_mode
|
103
|
+
@options[:chemistry] = true
|
104
|
+
@options[:include_smiles] = true
|
105
|
+
self
|
106
|
+
end
|
107
|
+
|
108
|
+
# Enable line-by-line data output (feature parity with mpxpy)
|
109
|
+
#
|
110
|
+
# Returns structured data with bounding boxes for each line
|
111
|
+
# Enables result.lines() and result.lines_json() methods
|
112
|
+
#
|
113
|
+
# @return [self]
|
114
|
+
# @example
|
115
|
+
# result = Mathpix.from('multiline.png')
|
116
|
+
# .with_line_data
|
117
|
+
# .capture
|
118
|
+
# result.lines.each { |line| puts "#{line.text} @ #{line.bbox}" }
|
119
|
+
def with_line_data
|
120
|
+
@options[:include_line_data] = true
|
121
|
+
self
|
122
|
+
end
|
123
|
+
|
124
|
+
# Alias for with_line_data (alternative name)
|
125
|
+
alias include_line_data with_line_data
|
126
|
+
|
127
|
+
# Execute capture
|
128
|
+
#
|
129
|
+
# @return [Result]
|
130
|
+
def capture
|
131
|
+
@client.snap(@image_path, **@options)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Alias for #capture with .convert terminator (matches BDD examples)
|
135
|
+
alias convert capture
|
136
|
+
|
137
|
+
# Alias for #capture
|
138
|
+
alias call capture
|
139
|
+
alias run capture
|
140
|
+
alias execute capture
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mathpix
|
4
|
+
# Chemistry capture builder
|
5
|
+
# The geodesic path: domain-specific, fluent, intuitive
|
6
|
+
class Chemistry
|
7
|
+
def initialize(client, image_path)
|
8
|
+
@client = client
|
9
|
+
@image_path = image_path
|
10
|
+
@options = {
|
11
|
+
chemistry: true,
|
12
|
+
include_smiles: true
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
# Enable SMILES output
|
17
|
+
# @return [self]
|
18
|
+
def with_smiles
|
19
|
+
@options[:include_smiles] = true
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
# Enable InChI output
|
24
|
+
# @return [self]
|
25
|
+
def with_inchi
|
26
|
+
@options[:include_inchi] = true
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
# Enable molecular formula
|
31
|
+
# @return [self]
|
32
|
+
def with_molecular_formula
|
33
|
+
@options[:include_molecular_formula] = true
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
# Enable stereochemistry detection
|
38
|
+
# @return [self]
|
39
|
+
def with_stereochemistry
|
40
|
+
@options[:detect_stereochemistry] = true
|
41
|
+
self
|
42
|
+
end
|
43
|
+
|
44
|
+
# Set confidence threshold
|
45
|
+
# @param threshold [Float] minimum confidence
|
46
|
+
# @return [self]
|
47
|
+
def with_confidence(threshold)
|
48
|
+
@options[:confidence_threshold] = threshold
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
# Add metadata
|
53
|
+
# @param metadata [Hash] metadata
|
54
|
+
# @return [self]
|
55
|
+
def with_metadata(**metadata)
|
56
|
+
@options[:metadata] = metadata
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
# Execute capture
|
61
|
+
# @return [Result]
|
62
|
+
def capture
|
63
|
+
@client.snap(@image_path, **@options)
|
64
|
+
end
|
65
|
+
|
66
|
+
alias call capture
|
67
|
+
alias run capture
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,439 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mathpix
|
4
|
+
# Core HTTP client for Mathpix API
|
5
|
+
# The geodesic path: clean, testable, resilient
|
6
|
+
class Client
|
7
|
+
attr_reader :config
|
8
|
+
|
9
|
+
def initialize(config = Mathpix.configuration)
|
10
|
+
@config = config
|
11
|
+
config.validate!
|
12
|
+
end
|
13
|
+
|
14
|
+
# Snap image to equation (core method)
|
15
|
+
#
|
16
|
+
# Supports both local file paths and remote URLs (feature parity with mpxpy)
|
17
|
+
#
|
18
|
+
# @param image_path_or_url [String, Hash] path to image, URL, or hash with :path/:url key
|
19
|
+
# @param options [Hash] request options
|
20
|
+
# @return [Result]
|
21
|
+
# @example Local file
|
22
|
+
# client.snap('equation.png')
|
23
|
+
# @example Remote URL
|
24
|
+
# client.snap('https://example.com/equation.png')
|
25
|
+
# client.snap(url: 'https://example.com/equation.png')
|
26
|
+
# @example With options
|
27
|
+
# client.snap('equation.png', formats: [:latex, :mathml])
|
28
|
+
def snap(image_path_or_url, **options)
|
29
|
+
src, source_ref = prepare_image_source(image_path_or_url, options)
|
30
|
+
|
31
|
+
response = post('/text', {
|
32
|
+
src: src,
|
33
|
+
formats: (options[:formats] || config.default_formats).map(&:to_s),
|
34
|
+
include_line_data: options[:include_line_data] || false,
|
35
|
+
**build_request_options(options)
|
36
|
+
})
|
37
|
+
|
38
|
+
Result.new(response, source_ref)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get recent captures
|
42
|
+
#
|
43
|
+
# @param limit [Integer] number of results
|
44
|
+
# @return [Array<Result>]
|
45
|
+
def recent(limit: 10)
|
46
|
+
response = get('/ocr-results', params: { limit: limit })
|
47
|
+
response['data'].map { |data| Result.new(data) }
|
48
|
+
end
|
49
|
+
|
50
|
+
# Search captures
|
51
|
+
#
|
52
|
+
# @yield [SearchQuery] query builder (future)
|
53
|
+
# @return [Array<Result>]
|
54
|
+
def search
|
55
|
+
# TODO: Implement search query DSL
|
56
|
+
[]
|
57
|
+
end
|
58
|
+
|
59
|
+
# Convert Mathpix Markdown to multiple formats
|
60
|
+
#
|
61
|
+
# Feature parity with Python mpxpy conversion_new() method
|
62
|
+
# Async operation - returns Conversion object to poll for completion
|
63
|
+
#
|
64
|
+
# @param mmd [String] Mathpix Markdown content
|
65
|
+
# @param formats [Array<Symbol, String>] output formats
|
66
|
+
# @param options [Hash] conversion options
|
67
|
+
# @return [Conversion] conversion object (async)
|
68
|
+
# @example
|
69
|
+
# conversion = client.convert_mmd(
|
70
|
+
# mmd: "\\frac{1}{2} + \\sqrt{3}",
|
71
|
+
# formats: [:pdf, :docx, :html]
|
72
|
+
# )
|
73
|
+
# conversion.wait_until_complete
|
74
|
+
# conversion.to_pdf_file('output.pdf')
|
75
|
+
def convert_mmd(mmd:, formats:, **options)
|
76
|
+
# Build formats hash for API
|
77
|
+
formats_hash = Array(formats).each_with_object({}) do |format, hash|
|
78
|
+
hash[format.to_s] = true
|
79
|
+
end
|
80
|
+
|
81
|
+
response = post('/converter', {
|
82
|
+
mmd: mmd,
|
83
|
+
formats: formats_hash,
|
84
|
+
conversion_options: options[:conversion_options] || {}
|
85
|
+
})
|
86
|
+
|
87
|
+
conversion_id = response['conversion_id']
|
88
|
+
Conversion.new(self, conversion_id: conversion_id, mmd: mmd, formats: formats)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Get conversion status
|
92
|
+
#
|
93
|
+
# @param conversion_id [String] conversion ID
|
94
|
+
# @return [Hash] status data
|
95
|
+
def get_conversion_status(conversion_id)
|
96
|
+
get("/converter/#{conversion_id}")
|
97
|
+
end
|
98
|
+
|
99
|
+
# Download file from URL
|
100
|
+
#
|
101
|
+
# @param url [String] download URL
|
102
|
+
# @return [String] file content as bytes
|
103
|
+
def download(url)
|
104
|
+
uri = URI(url)
|
105
|
+
request = Net::HTTP::Get.new(uri)
|
106
|
+
request['app_id'] = config.app_id
|
107
|
+
request['app_key'] = config.app_key
|
108
|
+
request['User-Agent'] = config.user_agent
|
109
|
+
|
110
|
+
response = make_request(uri, request)
|
111
|
+
|
112
|
+
case response
|
113
|
+
when Net::HTTPSuccess
|
114
|
+
response.body
|
115
|
+
else
|
116
|
+
raise APIError.new(
|
117
|
+
"Download failed: #{response.code}",
|
118
|
+
status: response.code.to_i
|
119
|
+
)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Convert document (PDF, DOCX, PPTX) asynchronously
|
124
|
+
#
|
125
|
+
# Feature parity with Python mpxpy document processing
|
126
|
+
#
|
127
|
+
# @param document_path [String] path to document file
|
128
|
+
# @param document_type [Symbol] :pdf, :docx, :pptx
|
129
|
+
# @param options [Hash] conversion options
|
130
|
+
# @return [String] conversion_id for polling
|
131
|
+
# @example
|
132
|
+
# conversion_id = client.convert_document(
|
133
|
+
# document_path: 'paper.pdf',
|
134
|
+
# document_type: :pdf,
|
135
|
+
# formats: [:markdown, :latex]
|
136
|
+
# )
|
137
|
+
def convert_document(document_path:, document_type:, **options)
|
138
|
+
# Encode document as base64 data URI or use URL
|
139
|
+
src = if url?(document_path)
|
140
|
+
document_path
|
141
|
+
else
|
142
|
+
encode_image(document_path) # Reuse existing encoding
|
143
|
+
end
|
144
|
+
|
145
|
+
# Build conversion request
|
146
|
+
request_body = {
|
147
|
+
src: src,
|
148
|
+
formats: (options[:formats] || [:markdown]).map(&:to_s),
|
149
|
+
conversion_formats: build_conversion_formats(options),
|
150
|
+
**build_document_options(options)
|
151
|
+
}
|
152
|
+
|
153
|
+
response = post('/pdf', request_body)
|
154
|
+
response['pdf_id'] # Returns conversion ID for polling
|
155
|
+
end
|
156
|
+
|
157
|
+
# Get document conversion status
|
158
|
+
#
|
159
|
+
# @param conversion_id [String] document conversion ID
|
160
|
+
# @return [Hash] status data
|
161
|
+
def get_document_status(conversion_id)
|
162
|
+
get("/pdf/#{conversion_id}")
|
163
|
+
end
|
164
|
+
|
165
|
+
private
|
166
|
+
|
167
|
+
# Build conversion formats hash
|
168
|
+
#
|
169
|
+
# @param options [Hash] user options
|
170
|
+
# @return [Hash] formats configuration
|
171
|
+
def build_conversion_formats(options)
|
172
|
+
formats = {}
|
173
|
+
if options[:formats]
|
174
|
+
Array(options[:formats]).each do |fmt|
|
175
|
+
formats[fmt.to_s] = true
|
176
|
+
end
|
177
|
+
end
|
178
|
+
formats
|
179
|
+
end
|
180
|
+
|
181
|
+
# Build document-specific options
|
182
|
+
#
|
183
|
+
# @param options [Hash] user options
|
184
|
+
# @return [Hash] document options
|
185
|
+
def build_document_options(options)
|
186
|
+
{}.tap do |opts|
|
187
|
+
opts[:include_table_html] = true if options[:include_table_html]
|
188
|
+
opts[:include_diagram_svg] = true if options[:include_diagram_svg]
|
189
|
+
opts[:include_line_data] = true if options[:include_line_data]
|
190
|
+
opts[:include_word_data] = true if options[:include_word_data]
|
191
|
+
opts[:quality] = options[:quality] if options[:quality]
|
192
|
+
opts[:page_ranges] = options[:page_ranges] if options[:page_ranges]
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
# Prepare image source (URL or local file)
|
197
|
+
#
|
198
|
+
# Feature parity with Python mpxpy which supports both url= and file_path= parameters
|
199
|
+
# Automatically upgrades HTTP to HTTPS (mpxpy behavior)
|
200
|
+
#
|
201
|
+
# @param input [String, Hash] path, URL, or hash with :path/:url key
|
202
|
+
# @param options [Hash] additional options
|
203
|
+
# @return [Array<String, String>] src value and source reference
|
204
|
+
# @raise [InvalidRequestError] if input looks like malformed URL
|
205
|
+
def prepare_image_source(input, options = {})
|
206
|
+
# Handle hash input: { url: '...' } or { path: '...' }
|
207
|
+
if input.is_a?(Hash)
|
208
|
+
if input[:url] || input['url']
|
209
|
+
url = input[:url] || input['url']
|
210
|
+
url = config.upgrade_to_https(url) # Auto-upgrade HTTP→HTTPS
|
211
|
+
validate_url!(url) # Raise InvalidRequestError if malformed
|
212
|
+
return [url, url]
|
213
|
+
elsif input[:path] || input['path']
|
214
|
+
path = input[:path] || input['path']
|
215
|
+
return [encode_image(path), path]
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
# Auto-upgrade HTTP to HTTPS BEFORE validation
|
220
|
+
# This ensures HTTP URLs pass validation after upgrade
|
221
|
+
upgraded_input = config.upgrade_to_https(input)
|
222
|
+
|
223
|
+
# Detect if input is URL or local path
|
224
|
+
if url?(upgraded_input)
|
225
|
+
[upgraded_input, upgraded_input] # Use URL directly as src
|
226
|
+
elsif looks_like_url?(input)
|
227
|
+
# String contains URL-like patterns but isn't valid
|
228
|
+
raise InvalidRequestError, "Invalid URL format: #{input}"
|
229
|
+
else
|
230
|
+
# Try to encode as local file
|
231
|
+
begin
|
232
|
+
[encode_image(input), input] # Encode local file (use original path)
|
233
|
+
rescue SecurityError, Errno::ENOENT => e
|
234
|
+
# If file encoding fails and input doesn't look like a file path,
|
235
|
+
# it's likely a malformed URL
|
236
|
+
if !looks_like_file_path?(input)
|
237
|
+
raise InvalidRequestError, "Invalid URL format: #{input}"
|
238
|
+
else
|
239
|
+
raise # Re-raise original error for actual file path issues
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
# Check if string is a URL (using secure configuration validation)
|
246
|
+
#
|
247
|
+
# @param str [String] string to check
|
248
|
+
# @return [Boolean]
|
249
|
+
def url?(str)
|
250
|
+
return false unless str.is_a?(String)
|
251
|
+
config.valid_url?(str)
|
252
|
+
end
|
253
|
+
|
254
|
+
# Check if string looks like a URL but may not be valid
|
255
|
+
#
|
256
|
+
# Detects patterns that suggest URL intent: protocol prefixes, www prefix
|
257
|
+
# Used to provide better error messages for malformed URLs
|
258
|
+
#
|
259
|
+
# @param str [String] string to check
|
260
|
+
# @return [Boolean]
|
261
|
+
def looks_like_url?(str)
|
262
|
+
return false unless str.is_a?(String)
|
263
|
+
# URL-like patterns: contains protocol or www prefix
|
264
|
+
str.match?(%r{^(https?://|www\.)|://})
|
265
|
+
end
|
266
|
+
|
267
|
+
# Validate URL and raise InvalidRequestError if malformed
|
268
|
+
#
|
269
|
+
# @param url [String] URL to validate
|
270
|
+
# @raise [InvalidRequestError] if URL is not valid
|
271
|
+
def validate_url!(url)
|
272
|
+
return if config.valid_url?(url)
|
273
|
+
raise InvalidRequestError, "Invalid URL format: #{url}"
|
274
|
+
end
|
275
|
+
|
276
|
+
# Check if string looks like a file path
|
277
|
+
#
|
278
|
+
# Detects patterns that suggest file path intent: directory separators,
|
279
|
+
# file extensions, relative/absolute path markers
|
280
|
+
#
|
281
|
+
# @param str [String] string to check
|
282
|
+
# @return [Boolean]
|
283
|
+
def looks_like_file_path?(str)
|
284
|
+
return false unless str.is_a?(String)
|
285
|
+
# File path patterns: contains slashes, starts with ~, has file extension, or starts with .
|
286
|
+
str.match?(%r{^[~/\.]|/|\\|\.(?:png|jpe?g|gif|webp|pdf|docx|pptx)$}i)
|
287
|
+
end
|
288
|
+
|
289
|
+
# Encode image to base64 data URI (with path sanitization)
|
290
|
+
#
|
291
|
+
# @param path [String] path to image file
|
292
|
+
# @return [String] data URI
|
293
|
+
# @raise [SecurityError] if path is invalid or dangerous
|
294
|
+
def encode_image(path)
|
295
|
+
# Sanitize path to prevent directory traversal
|
296
|
+
sanitized_path = config.sanitize_path(path)
|
297
|
+
raise SecurityError, "Invalid or dangerous file path: #{path}" if sanitized_path.nil?
|
298
|
+
|
299
|
+
content = File.binread(sanitized_path)
|
300
|
+
mime_type = detect_mime_type(sanitized_path)
|
301
|
+
"data:#{mime_type};base64,#{Base64.strict_encode64(content)}"
|
302
|
+
end
|
303
|
+
|
304
|
+
# Detect MIME type from file extension
|
305
|
+
#
|
306
|
+
# @param path [String] file path
|
307
|
+
# @return [String] MIME type
|
308
|
+
def detect_mime_type(path)
|
309
|
+
case File.extname(path).downcase
|
310
|
+
when '.png' then 'image/png'
|
311
|
+
when '.jpg', '.jpeg' then 'image/jpeg'
|
312
|
+
when '.gif' then 'image/gif'
|
313
|
+
when '.webp' then 'image/webp'
|
314
|
+
when '.pdf' then 'application/pdf'
|
315
|
+
when '.docx' then 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
316
|
+
when '.pptx' then 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
317
|
+
else 'application/octet-stream'
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
# Build request options from user input + defaults
|
322
|
+
#
|
323
|
+
# @param options [Hash] user options
|
324
|
+
# @return [Hash] complete request options
|
325
|
+
def build_request_options(options)
|
326
|
+
{}.tap do |opts|
|
327
|
+
# Data options
|
328
|
+
if options[:include_latex]
|
329
|
+
opts[:data_options] ||= {}
|
330
|
+
opts[:data_options][:include_latex] = true
|
331
|
+
end
|
332
|
+
|
333
|
+
# Metadata
|
334
|
+
opts[:metadata] = options[:metadata] if options[:metadata]
|
335
|
+
opts[:tags] = Array(options[:tags]) if options[:tags]
|
336
|
+
|
337
|
+
# Recognition options
|
338
|
+
opts[:rm_spaces] = options[:rm_spaces] if options.key?(:rm_spaces)
|
339
|
+
opts[:idiomatic_eqn_arrays] = options[:idiomatic_eqn_arrays] if options.key?(:idiomatic_eqn_arrays)
|
340
|
+
|
341
|
+
# Confidence threshold
|
342
|
+
opts[:confidence_threshold] = options[:confidence_threshold] if options[:confidence_threshold]
|
343
|
+
|
344
|
+
# Chemistry
|
345
|
+
opts[:include_smiles] = true if options[:chemistry] || options[:include_smiles]
|
346
|
+
|
347
|
+
# Alphabets
|
348
|
+
opts[:alphabets_allowed] = options[:alphabets] if options[:alphabets]
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
# Make POST request
|
353
|
+
#
|
354
|
+
# @param path [String] API endpoint path
|
355
|
+
# @param body [Hash] request body
|
356
|
+
# @return [Hash] parsed response
|
357
|
+
def post(path, body)
|
358
|
+
uri = URI("#{config.endpoint}#{path}")
|
359
|
+
request = Net::HTTP::Post.new(uri)
|
360
|
+
request['Content-Type'] = 'application/json'
|
361
|
+
request['app_id'] = config.app_id
|
362
|
+
request['app_key'] = config.app_key
|
363
|
+
request['User-Agent'] = config.user_agent
|
364
|
+
request.body = JSON.generate(body)
|
365
|
+
|
366
|
+
response = make_request(uri, request)
|
367
|
+
handle_response(response)
|
368
|
+
end
|
369
|
+
|
370
|
+
# Make GET request
|
371
|
+
#
|
372
|
+
# @param path [String] API endpoint path
|
373
|
+
# @param params [Hash] query parameters
|
374
|
+
# @return [Hash] parsed response
|
375
|
+
def get(path, params: {})
|
376
|
+
uri = URI("#{config.endpoint}#{path}")
|
377
|
+
uri.query = URI.encode_www_form(params) unless params.empty?
|
378
|
+
|
379
|
+
request = Net::HTTP::Get.new(uri)
|
380
|
+
request['app_id'] = config.app_id
|
381
|
+
request['app_key'] = config.app_key
|
382
|
+
request['User-Agent'] = config.user_agent
|
383
|
+
|
384
|
+
response = make_request(uri, request)
|
385
|
+
handle_response(response)
|
386
|
+
end
|
387
|
+
|
388
|
+
# Execute HTTP request with error handling
|
389
|
+
#
|
390
|
+
# @param uri [URI] request URI
|
391
|
+
# @param request [Net::HTTPRequest] HTTP request object
|
392
|
+
# @return [Net::HTTPResponse]
|
393
|
+
def make_request(uri, request)
|
394
|
+
Net::HTTP.start(uri.hostname, uri.port,
|
395
|
+
use_ssl: uri.scheme == 'https',
|
396
|
+
read_timeout: config.timeout) do |http|
|
397
|
+
http.request(request)
|
398
|
+
end
|
399
|
+
rescue Net::OpenTimeout, Net::ReadTimeout => e
|
400
|
+
raise TimeoutError, "Request timed out after #{config.timeout}s: #{e.message}"
|
401
|
+
rescue StandardError => e
|
402
|
+
raise NetworkError, "Network error: #{e.message}"
|
403
|
+
end
|
404
|
+
|
405
|
+
# Handle API response
|
406
|
+
#
|
407
|
+
# @param response [Net::HTTPResponse]
|
408
|
+
# @return [Hash] parsed response body
|
409
|
+
# @raise [APIError] on error response
|
410
|
+
def handle_response(response)
|
411
|
+
case response
|
412
|
+
when Net::HTTPSuccess
|
413
|
+
JSON.parse(response.body)
|
414
|
+
when Net::HTTPTooManyRequests
|
415
|
+
raise RateLimitError.new(
|
416
|
+
'Rate limit exceeded',
|
417
|
+
retry_after: response['Retry-After']&.to_i
|
418
|
+
)
|
419
|
+
when Net::HTTPClientError
|
420
|
+
error_data = JSON.parse(response.body) rescue {}
|
421
|
+
raise APIError.new(
|
422
|
+
error_data['error'] || 'Client error',
|
423
|
+
status: response.code.to_i,
|
424
|
+
details: error_data
|
425
|
+
)
|
426
|
+
when Net::HTTPServerError
|
427
|
+
raise ServerError.new(
|
428
|
+
'Server error',
|
429
|
+
status: response.code.to_i
|
430
|
+
)
|
431
|
+
else
|
432
|
+
raise APIError.new(
|
433
|
+
"Unexpected response: #{response.code}",
|
434
|
+
status: response.code.to_i
|
435
|
+
)
|
436
|
+
end
|
437
|
+
end
|
438
|
+
end
|
439
|
+
end
|