tabscanner 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,227 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday'
4
+ require 'faraday/multipart'
5
+ require 'json'
6
+
7
+ module Tabscanner
8
+ # Handles HTTP requests to the Tabscanner API
9
+ #
10
+ # This class manages multipart form data uploads for image processing
11
+ # and handles all HTTP communication with proper error handling.
12
+ #
13
+ # @example Submit a file path
14
+ # Request.submit_receipt('/path/to/receipt.jpg')
15
+ #
16
+ # @example Submit an IO stream
17
+ # File.open('/path/to/receipt.jpg', 'rb') do |file|
18
+ # Request.submit_receipt(file)
19
+ # end
20
+ class Request
21
+ # Submit a receipt image for processing
22
+ #
23
+ # @param file_path_or_io [String, IO] Local file path or IO stream containing image data
24
+ # @return [String] Token for result retrieval
25
+ # @raise [UnauthorizedError] when API key is invalid (401)
26
+ # @raise [ValidationError] when request validation fails (422)
27
+ # @raise [ServerError] when server errors occur (500+)
28
+ # @raise [Error] for other API errors
29
+ def self.submit_receipt(file_path_or_io)
30
+ config = Tabscanner.config
31
+ config.validate!
32
+
33
+ # Handle file input - convert file path to IO if needed
34
+ file_io, filename = normalize_file_input(file_path_or_io)
35
+
36
+ # Build the connection
37
+ conn = build_connection(config)
38
+
39
+ # Make the request
40
+ response = conn.post('/api/2/process') do |req|
41
+ req.body = build_multipart_body(file_io, filename)
42
+ end
43
+
44
+ # Debug logging for request/response
45
+ log_request_response('POST', '/api/2/process', response, config) if config.debug?
46
+
47
+ handle_response(response)
48
+ ensure
49
+ # Close file if we opened it
50
+ file_io&.close if file_path_or_io.is_a?(String) && file_io
51
+ end
52
+
53
+ private
54
+
55
+ # Normalize file input to IO and filename
56
+ # @param file_path_or_io [String, IO] File path or IO stream
57
+ # @return [Array<IO, String>] IO object and filename
58
+ def self.normalize_file_input(file_path_or_io)
59
+ if file_path_or_io.is_a?(String)
60
+ # File path provided
61
+ raise Error, "File not found: #{file_path_or_io}" unless File.exist?(file_path_or_io)
62
+ file_io = File.open(file_path_or_io, 'rb')
63
+ filename = File.basename(file_path_or_io)
64
+ else
65
+ # IO stream provided
66
+ file_io = file_path_or_io
67
+ filename = file_io.respond_to?(:path) ? File.basename(file_io.path) : 'image'
68
+ end
69
+
70
+ [file_io, filename]
71
+ end
72
+
73
+ # Build Faraday connection with proper configuration
74
+ # @param config [Config] Configuration instance
75
+ # @return [Faraday::Connection] Configured connection
76
+ def self.build_connection(config)
77
+ base_url = config.base_url || "https://api.tabscanner.com"
78
+
79
+ Faraday.new(url: base_url) do |f|
80
+ f.request :multipart
81
+ f.request :url_encoded
82
+ f.adapter Faraday.default_adapter
83
+ f.headers['apikey'] = config.api_key
84
+ f.headers['User-Agent'] = "Tabscanner Ruby Gem #{Tabscanner::VERSION}"
85
+ end
86
+ end
87
+
88
+ # Build multipart form data for file upload
89
+ # @param file_io [IO] File IO stream
90
+ # @param filename [String] Name of the file
91
+ # @return [Hash] Multipart form data
92
+ def self.build_multipart_body(file_io, filename)
93
+ {
94
+ image: Faraday::UploadIO.new(file_io, mime_type_for_file(filename), filename)
95
+ }
96
+ end
97
+
98
+ # Determine MIME type for file
99
+ # @param filename [String] Name of the file
100
+ # @return [String] MIME type
101
+ def self.mime_type_for_file(filename)
102
+ ext = File.extname(filename).downcase
103
+ case ext
104
+ when '.jpg', '.jpeg'
105
+ 'image/jpeg'
106
+ when '.png'
107
+ 'image/png'
108
+ when '.gif'
109
+ 'image/gif'
110
+ when '.bmp'
111
+ 'image/bmp'
112
+ when '.tiff', '.tif'
113
+ 'image/tiff'
114
+ else
115
+ 'image/jpeg' # Default fallback
116
+ end
117
+ end
118
+
119
+ # Handle API response and extract token
120
+ # @param response [Faraday::Response] HTTP response
121
+ # @return [String] Token from response
122
+ # @raise [UnauthorizedError, ValidationError, ServerError, Error] Based on status code
123
+ def self.handle_response(response)
124
+ raw_response = build_raw_response_data(response)
125
+
126
+ case response.status
127
+ when 200, 201
128
+ # Success - parse and return token
129
+ parse_success_response(response)
130
+ when 401
131
+ raise UnauthorizedError.new("Invalid API key or authentication failed", raw_response: raw_response)
132
+ when 422
133
+ error_message = parse_error_message(response) || "Request validation failed"
134
+ raise ValidationError.new(error_message, raw_response: raw_response)
135
+ when 500..599
136
+ error_message = parse_error_message(response) || "Server error occurred"
137
+ raise ServerError.new(error_message, raw_response: raw_response)
138
+ else
139
+ error_message = parse_error_message(response) || "Request failed with status #{response.status}"
140
+ raise Error.new(error_message, raw_response: raw_response)
141
+ end
142
+ end
143
+
144
+ # Parse successful response to extract token
145
+ # @param response [Faraday::Response] HTTP response
146
+ # @return [String] Token value
147
+ def self.parse_success_response(response)
148
+ begin
149
+ data = JSON.parse(response.body)
150
+
151
+ # Check if the API returned an error even with 200 status
152
+ if data['success'] == false
153
+ error_message = data['message'] || "API request failed"
154
+ case data['code']
155
+ when 401
156
+ raise UnauthorizedError.new(error_message, raw_response: build_raw_response_data(response))
157
+ when 422
158
+ raise ValidationError.new(error_message, raw_response: build_raw_response_data(response))
159
+ when 500..599
160
+ raise ServerError.new(error_message, raw_response: build_raw_response_data(response))
161
+ else
162
+ raise Error.new(error_message, raw_response: build_raw_response_data(response))
163
+ end
164
+ end
165
+
166
+ token = data['token'] || data['id'] || data['request_id']
167
+
168
+ raise Error, "No token found in response" if token.nil? || token.empty?
169
+
170
+ token
171
+ rescue JSON::ParserError
172
+ raise Error, "Invalid JSON response from API"
173
+ end
174
+ end
175
+
176
+ # Parse error message from response
177
+ # @param response [Faraday::Response] HTTP response
178
+ # @return [String, nil] Error message if available
179
+ def self.parse_error_message(response)
180
+ return nil if response.body.nil? || response.body.empty?
181
+
182
+ begin
183
+ data = JSON.parse(response.body)
184
+ data['error'] || data['message'] || data['errors']&.first
185
+ rescue JSON::ParserError
186
+ # If JSON parsing fails, return raw body if it's short enough
187
+ response.body.length < 200 ? response.body : nil
188
+ end
189
+ end
190
+
191
+ # Build raw response data for error debugging
192
+ # @param response [Faraday::Response] HTTP response
193
+ # @return [Hash] Raw response data
194
+ def self.build_raw_response_data(response)
195
+ {
196
+ status: response.status,
197
+ headers: response.headers.to_hash,
198
+ body: response.body
199
+ }
200
+ end
201
+
202
+ # Log request and response details for debugging
203
+ # @param method [String] HTTP method
204
+ # @param endpoint [String] API endpoint
205
+ # @param response [Faraday::Response] HTTP response
206
+ # @param config [Config] Configuration instance
207
+ def self.log_request_response(method, endpoint, response, config)
208
+ logger = config.logger
209
+
210
+ # Log request details
211
+ logger.debug("HTTP Request: #{method.upcase} #{endpoint}")
212
+ logger.debug("Request Headers: apikey=[REDACTED], User-Agent=#{response.env.request_headers['User-Agent']}")
213
+
214
+ # Log response details
215
+ logger.debug("HTTP Response: #{response.status}")
216
+ logger.debug("Response Headers: #{response.headers.to_hash}")
217
+
218
+ # Log response body (truncated if too long)
219
+ body = response.body
220
+ if body && body.length > 500
221
+ logger.debug("Response Body: #{body[0..500]}... (truncated)")
222
+ else
223
+ logger.debug("Response Body: #{body}")
224
+ end
225
+ end
226
+ end
227
+ end
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday'
4
+ require 'json'
5
+
6
+ module Tabscanner
7
+ # Handles polling for OCR processing results
8
+ #
9
+ # This class manages the polling logic to retrieve processing results
10
+ # from the Tabscanner API using a token, with retry logic and timeout handling.
11
+ #
12
+ # @example Poll for results with default timeout
13
+ # Result.get_result('token123')
14
+ #
15
+ # @example Poll for results with custom timeout
16
+ # Result.get_result('token123', timeout: 30)
17
+ class Result
18
+ # Poll for OCR processing results using a token
19
+ #
20
+ # @param token [String] Token from submit_receipt call
21
+ # @param timeout [Integer] Maximum time to wait in seconds (default: 15)
22
+ # @return [Hash] Parsed receipt data when processing is complete
23
+ # @raise [UnauthorizedError] when API key is invalid (401)
24
+ # @raise [ValidationError] when token is invalid (422)
25
+ # @raise [ServerError] when server errors occur (500+)
26
+ # @raise [Error] for timeout or other API errors
27
+ def self.get_result(token, timeout: 15)
28
+ config = Tabscanner.config
29
+ config.validate!
30
+
31
+ start_time = Time.now
32
+ conn = build_connection(config)
33
+
34
+ config.logger.debug("Starting result polling for token: #{token} (timeout: #{timeout}s)") if config.debug?
35
+
36
+ loop do
37
+ # Check timeout
38
+ elapsed = Time.now - start_time
39
+ if elapsed >= timeout
40
+ raise Error, "Timeout waiting for result after #{timeout} seconds"
41
+ end
42
+
43
+ # Make GET request to result endpoint
44
+ response = conn.get("/api/result/#{token}")
45
+
46
+ # Debug logging for request/response
47
+ log_request_response('GET', "/api/result/#{token}", response, config) if config.debug?
48
+
49
+ result = handle_response(response)
50
+
51
+ # Check status in response
52
+ case result['status']
53
+ when 'complete', 'completed', 'success', 'done'
54
+ config.logger.debug("Result ready for token: #{token}") if config.debug?
55
+ return extract_result_data(result)
56
+ when 'processing', 'pending', 'in_progress'
57
+ # Wait 1 second before next poll
58
+ config.logger.debug("Result still processing for token: #{token}, waiting 1s...") if config.debug?
59
+ sleep 1
60
+ next
61
+ when 'failed', 'error'
62
+ error_message = result['error'] || result['message'] || 'Processing failed'
63
+ config.logger.debug("Result failed for token: #{token} - #{error_message}") if config.debug?
64
+ raise Error, error_message
65
+ else
66
+ # Unknown status - treat as error
67
+ config.logger.debug("Unknown status for token: #{token} - #{result['status']}") if config.debug?
68
+ raise Error, "Unknown processing status: #{result['status']}"
69
+ end
70
+ end
71
+ end
72
+
73
+ private
74
+
75
+ # Build Faraday connection with proper configuration
76
+ # @param config [Config] Configuration instance
77
+ # @return [Faraday::Connection] Configured connection
78
+ def self.build_connection(config)
79
+ base_url = config.base_url || "https://api.tabscanner.com"
80
+
81
+ Faraday.new(url: base_url) do |f|
82
+ f.request :url_encoded
83
+ f.adapter Faraday.default_adapter
84
+ f.headers['apikey'] = config.api_key
85
+ f.headers['User-Agent'] = "Tabscanner Ruby Gem #{Tabscanner::VERSION}"
86
+ f.headers['Accept'] = 'application/json'
87
+ end
88
+ end
89
+
90
+ # Handle API response
91
+ # @param response [Faraday::Response] HTTP response
92
+ # @return [Hash] Parsed JSON response
93
+ # @raise [UnauthorizedError, ValidationError, ServerError, Error] Based on status code
94
+ def self.handle_response(response)
95
+ raw_response = build_raw_response_data(response)
96
+
97
+ case response.status
98
+ when 200, 201
99
+ # Success - parse and return data
100
+ parse_json_response(response)
101
+ when 401
102
+ raise UnauthorizedError.new("Invalid API key or authentication failed", raw_response: raw_response)
103
+ when 422
104
+ error_message = parse_error_message(response) || "Invalid token or request"
105
+ raise ValidationError.new(error_message, raw_response: raw_response)
106
+ when 500..599
107
+ error_message = parse_error_message(response) || "Server error occurred"
108
+ raise ServerError.new(error_message, raw_response: raw_response)
109
+ else
110
+ error_message = parse_error_message(response) || "Request failed with status #{response.status}"
111
+ raise Error.new(error_message, raw_response: raw_response)
112
+ end
113
+ end
114
+
115
+ # Parse JSON response body
116
+ # @param response [Faraday::Response] HTTP response
117
+ # @return [Hash] Parsed JSON data
118
+ # @raise [Error] if JSON parsing fails
119
+ def self.parse_json_response(response)
120
+ JSON.parse(response.body)
121
+ rescue JSON::ParserError
122
+ raise Error, "Invalid JSON response from API"
123
+ end
124
+
125
+ # Extract result data from complete response
126
+ # @param result [Hash] Parsed response data
127
+ # @return [Hash] Receipt data
128
+ def self.extract_result_data(result)
129
+ # Return the full result hash - the actual data structure will depend on the API
130
+ # Common patterns: result['data'], result['receipt'], or the full result
131
+ if result.key?('data')
132
+ result['data']
133
+ elsif result.key?('receipt')
134
+ result['receipt']
135
+ else
136
+ # Return the full result excluding status metadata
137
+ result.reject { |k, _| %w[status message timestamp id].include?(k) }
138
+ end
139
+ end
140
+
141
+ # Parse error message from response
142
+ # @param response [Faraday::Response] HTTP response
143
+ # @return [String, nil] Error message if available
144
+ def self.parse_error_message(response)
145
+ return nil if response.body.nil? || response.body.empty?
146
+
147
+ begin
148
+ data = JSON.parse(response.body)
149
+ data['error'] || data['message'] || data['errors']&.first
150
+ rescue JSON::ParserError
151
+ # If JSON parsing fails, return raw body if it's short enough
152
+ response.body.length < 200 ? response.body : nil
153
+ end
154
+ end
155
+
156
+ # Build raw response data for error debugging
157
+ # @param response [Faraday::Response] HTTP response
158
+ # @return [Hash] Raw response data
159
+ def self.build_raw_response_data(response)
160
+ {
161
+ status: response.status,
162
+ headers: response.headers.to_hash,
163
+ body: response.body
164
+ }
165
+ end
166
+
167
+ # Log request and response details for debugging
168
+ # @param method [String] HTTP method
169
+ # @param endpoint [String] API endpoint
170
+ # @param response [Faraday::Response] HTTP response
171
+ # @param config [Config] Configuration instance
172
+ def self.log_request_response(method, endpoint, response, config)
173
+ logger = config.logger
174
+
175
+ # Log request details
176
+ logger.debug("HTTP Request: #{method.upcase} #{endpoint}")
177
+ logger.debug("Request Headers: apikey=[REDACTED], User-Agent=#{response.env.request_headers['User-Agent']}")
178
+
179
+ # Log response details
180
+ logger.debug("HTTP Response: #{response.status}")
181
+ logger.debug("Response Headers: #{response.headers.to_hash}")
182
+
183
+ # Log response body (truncated if too long)
184
+ body = response.body
185
+ if body && body.length > 500
186
+ logger.debug("Response Body: #{body[0..500]}... (truncated)")
187
+ else
188
+ logger.debug("Response Body: #{body}")
189
+ end
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Tabscanner
4
+ VERSION = "0.1.0"
5
+ end
data/lib/tabscanner.rb ADDED
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "tabscanner/version"
4
+ require_relative "tabscanner/errors/base_error"
5
+ require_relative "tabscanner/errors/configuration_error"
6
+ require_relative "tabscanner/errors/unauthorized_error"
7
+ require_relative "tabscanner/errors/validation_error"
8
+ require_relative "tabscanner/errors/server_error"
9
+ require_relative "tabscanner/config"
10
+ require_relative "tabscanner/http_client"
11
+ require_relative "tabscanner/request"
12
+ require_relative "tabscanner/result"
13
+ require_relative "tabscanner/client"
14
+ require_relative "tabscanner/credits"
15
+
16
+ module Tabscanner
17
+ # Submit a receipt image for OCR processing
18
+ #
19
+ # @param file_path_or_io [String, IO] Local file path or IO stream containing image data
20
+ # @return [String] Token for later result retrieval
21
+ # @see Client.submit_receipt
22
+ def self.submit_receipt(file_path_or_io)
23
+ Client.submit_receipt(file_path_or_io)
24
+ end
25
+
26
+ # Poll for OCR processing results using a token
27
+ #
28
+ # @param token [String] Token from submit_receipt call
29
+ # @param timeout [Integer] Maximum time to wait in seconds (default: 15)
30
+ # @return [Hash] Parsed receipt data when processing is complete
31
+ # @see Client.get_result
32
+ def self.get_result(token, timeout: 15)
33
+ Client.get_result(token, timeout: timeout)
34
+ end
35
+
36
+ # Check remaining API credits for the authenticated account
37
+ #
38
+ # @return [Integer] Number of remaining credits
39
+ # @see Credits.get_credits
40
+ def self.get_credits
41
+ Credits.get_credits
42
+ end
43
+ end
@@ -0,0 +1,4 @@
1
+ module Tabscanner
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tabscanner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Forrest Chang
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2025-07-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday-multipart
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: vcr
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '6.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '6.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: webmock
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.22'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.22'
83
+ description: A Ruby gem that provides a simple interface for submitting receipt images
84
+ to the Tabscanner API and retrieving parsed receipt data. Features include automatic
85
+ polling, comprehensive error handling, debug mode, and environment-based configuration.
86
+ email:
87
+ - fchang@hedgeye.com
88
+ executables: []
89
+ extensions: []
90
+ extra_rdoc_files: []
91
+ files:
92
+ - ".rspec"
93
+ - README.md
94
+ - Rakefile
95
+ - docs/architecture.md
96
+ - docs/prd.md
97
+ - docs/stories/1.1.story.md
98
+ - docs/stories/1.2.story.md
99
+ - docs/stories/1.3.story.md
100
+ - docs/stories/1.4.story.md
101
+ - docs/stories/1.5.story.md
102
+ - docs/stories/1.6.story.md
103
+ - docs/stories/2.1.story.md
104
+ - examples/README.md
105
+ - examples/batch_process.rb
106
+ - examples/check_credits.rb
107
+ - examples/process_receipt.rb
108
+ - examples/quick_test.rb
109
+ - lib/tabscanner.rb
110
+ - lib/tabscanner/client.rb
111
+ - lib/tabscanner/config.rb
112
+ - lib/tabscanner/credits.rb
113
+ - lib/tabscanner/errors/base_error.rb
114
+ - lib/tabscanner/errors/configuration_error.rb
115
+ - lib/tabscanner/errors/server_error.rb
116
+ - lib/tabscanner/errors/unauthorized_error.rb
117
+ - lib/tabscanner/errors/validation_error.rb
118
+ - lib/tabscanner/http_client.rb
119
+ - lib/tabscanner/request.rb
120
+ - lib/tabscanner/result.rb
121
+ - lib/tabscanner/version.rb
122
+ - sig/tabscanner.rbs
123
+ homepage: https://github.com/fkchang/tabscanner_ruby
124
+ licenses:
125
+ - MIT
126
+ metadata:
127
+ homepage_uri: https://github.com/fkchang/tabscanner_ruby
128
+ source_code_uri: https://github.com/fkchang/tabscanner_ruby
129
+ changelog_uri: https://github.com/fkchang/tabscanner_ruby/blob/main/CHANGELOG.md
130
+ post_install_message:
131
+ rdoc_options: []
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: 3.0.0
139
+ required_rubygems_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubygems_version: 3.5.22
146
+ signing_key:
147
+ specification_version: 4
148
+ summary: Ruby gem for processing receipt images using the Tabscanner API
149
+ test_files: []