reducto_ai 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +6 -0
- data/README.md +2 -0
- data/Rakefile +7 -0
- data/lib/reducto_ai/client.rb +98 -2
- data/lib/reducto_ai/config.rb +45 -1
- data/lib/reducto_ai/engine.rb +6 -0
- data/lib/reducto_ai/errors.rb +63 -1
- data/lib/reducto_ai/resources/edit.rb +67 -0
- data/lib/reducto_ai/resources/extract.rb +78 -0
- data/lib/reducto_ai/resources/jobs.rb +117 -0
- data/lib/reducto_ai/resources/parse.rb +84 -0
- data/lib/reducto_ai/resources/pipeline.rb +74 -0
- data/lib/reducto_ai/resources/split.rb +61 -0
- data/lib/reducto_ai/version.rb +2 -1
- data/lib/reducto_ai.rb +37 -0
- metadata +9 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fca3ba2a817b1f51125400a08cdee95435107e7a1f16df0d8c2cd7cbb3304a8a
|
|
4
|
+
data.tar.gz: 80fe552733b22de584c8999e55b36930d9d6e59578486728b70d3d169e26c9d4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0b3d764d2e1220b1e3fe15eba6ac9d56c9046abafdd68437ed2aaa87e935ed626c58573659af664b9077b178454fe5d961ef1d2621e97ef8c6d080aa22b240b8
|
|
7
|
+
data.tar.gz: d45dced01076b031189f8f299c3438f9698df14738b41abc0012932a7a52703271ca0ee6fa96557c5915fa4c98365e881bf1bb7a638367cb9e5e4c9674623f01
|
data/.yardopts
ADDED
data/README.md
CHANGED
data/Rakefile
CHANGED
data/lib/reducto_ai/client.rb
CHANGED
|
@@ -11,9 +11,60 @@ require_relative "resources/pipeline"
|
|
|
11
11
|
require_relative "resources/jobs"
|
|
12
12
|
|
|
13
13
|
module ReductoAI
|
|
14
|
+
# HTTP client for the Reducto document intelligence API.
|
|
15
|
+
#
|
|
16
|
+
# Provides access to all Reducto API endpoints through resource objects.
|
|
17
|
+
# Configure globally via {ReductoAI.configure} or pass parameters directly
|
|
18
|
+
# to the constructor.
|
|
19
|
+
#
|
|
20
|
+
# @example Using global configuration
|
|
21
|
+
# ReductoAI.configure do |config|
|
|
22
|
+
# config.api_key = ENV["REDUCTO_API_KEY"]
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# client = ReductoAI::Client.new
|
|
26
|
+
# client.parse.sync(input: "https://example.com/doc.pdf")
|
|
27
|
+
#
|
|
28
|
+
# @example Using per-instance configuration
|
|
29
|
+
# client = ReductoAI::Client.new(
|
|
30
|
+
# api_key: "your-key",
|
|
31
|
+
# read_timeout: 60
|
|
32
|
+
# )
|
|
33
|
+
#
|
|
34
|
+
# @see Resources::Parse
|
|
35
|
+
# @see Resources::Extract
|
|
36
|
+
# @see Resources::Split
|
|
37
|
+
# @see Resources::Edit
|
|
38
|
+
# @see Resources::Pipeline
|
|
39
|
+
# @see Resources::Jobs
|
|
14
40
|
class Client
|
|
15
|
-
|
|
16
|
-
|
|
41
|
+
# @return [String] Reducto API key
|
|
42
|
+
attr_reader :api_key
|
|
43
|
+
|
|
44
|
+
# @return [String] Base URL for API requests
|
|
45
|
+
attr_reader :base_url
|
|
46
|
+
|
|
47
|
+
# @return [Logger] Logger instance for debugging
|
|
48
|
+
attr_reader :logger
|
|
49
|
+
|
|
50
|
+
# @return [Integer] Connection open timeout in seconds
|
|
51
|
+
attr_reader :open_timeout
|
|
52
|
+
|
|
53
|
+
# @return [Integer] Request read timeout in seconds
|
|
54
|
+
attr_reader :read_timeout
|
|
55
|
+
|
|
56
|
+
# Creates a new Reducto API client.
|
|
57
|
+
#
|
|
58
|
+
# @param api_key [String, nil] Reducto API key (defaults to global config)
|
|
59
|
+
# @param base_url [String, nil] API base URL (defaults to global config)
|
|
60
|
+
# @param logger [Logger, nil] Logger instance (defaults to global config)
|
|
61
|
+
# @param open_timeout [Integer, nil] Connection timeout in seconds (defaults to global config)
|
|
62
|
+
# @param read_timeout [Integer, nil] Read timeout in seconds (defaults to global config)
|
|
63
|
+
#
|
|
64
|
+
# @raise [ArgumentError] if api_key is missing or empty
|
|
65
|
+
#
|
|
66
|
+
# @example
|
|
67
|
+
# client = ReductoAI::Client.new(api_key: "sk-...")
|
|
17
68
|
def initialize(api_key: nil, base_url: nil, logger: nil, open_timeout: nil, read_timeout: nil)
|
|
18
69
|
configuration = ReductoAI.config
|
|
19
70
|
|
|
@@ -26,30 +77,68 @@ module ReductoAI
|
|
|
26
77
|
raise ArgumentError, "Missing API key for ReductoAI" if @api_key.to_s.empty?
|
|
27
78
|
end
|
|
28
79
|
|
|
80
|
+
# Returns the Parse resource for document parsing operations.
|
|
81
|
+
#
|
|
82
|
+
# @return [Resources::Parse] parse operations interface
|
|
83
|
+
# @see Resources::Parse
|
|
29
84
|
def parse
|
|
30
85
|
@parse ||= Resources::Parse.new(self)
|
|
31
86
|
end
|
|
32
87
|
|
|
88
|
+
# Returns the Extract resource for structured data extraction.
|
|
89
|
+
#
|
|
90
|
+
# @return [Resources::Extract] extract operations interface
|
|
91
|
+
# @see Resources::Extract
|
|
33
92
|
def extract
|
|
34
93
|
@extract ||= Resources::Extract.new(self)
|
|
35
94
|
end
|
|
36
95
|
|
|
96
|
+
# Returns the Split resource for document splitting operations.
|
|
97
|
+
#
|
|
98
|
+
# @return [Resources::Split] split operations interface
|
|
99
|
+
# @see Resources::Split
|
|
37
100
|
def split
|
|
38
101
|
@split ||= Resources::Split.new(self)
|
|
39
102
|
end
|
|
40
103
|
|
|
104
|
+
# Returns the Edit resource for PDF markup operations.
|
|
105
|
+
#
|
|
106
|
+
# @return [Resources::Edit] edit operations interface
|
|
107
|
+
# @see Resources::Edit
|
|
41
108
|
def edit
|
|
42
109
|
@edit ||= Resources::Edit.new(self)
|
|
43
110
|
end
|
|
44
111
|
|
|
112
|
+
# Returns the Pipeline resource for multi-step workflows.
|
|
113
|
+
#
|
|
114
|
+
# @return [Resources::Pipeline] pipeline operations interface
|
|
115
|
+
# @see Resources::Pipeline
|
|
45
116
|
def pipeline
|
|
46
117
|
@pipeline ||= Resources::Pipeline.new(self)
|
|
47
118
|
end
|
|
48
119
|
|
|
120
|
+
# Returns the Jobs resource for job management operations.
|
|
121
|
+
#
|
|
122
|
+
# @return [Resources::Jobs] jobs operations interface
|
|
123
|
+
# @see Resources::Jobs
|
|
49
124
|
def jobs
|
|
50
125
|
@jobs ||= Resources::Jobs.new(self)
|
|
51
126
|
end
|
|
52
127
|
|
|
128
|
+
# Makes an HTTP request to the Reducto API.
|
|
129
|
+
#
|
|
130
|
+
# @param method [Symbol] HTTP method (:get, :post, :put, :delete)
|
|
131
|
+
# @param path [String] API endpoint path
|
|
132
|
+
# @param body [Hash, nil] request body
|
|
133
|
+
# @param params [Hash, nil] query parameters
|
|
134
|
+
#
|
|
135
|
+
# @return [Hash] parsed JSON response
|
|
136
|
+
# @raise [AuthenticationError] on 401 responses
|
|
137
|
+
# @raise [ClientError] on 4xx responses
|
|
138
|
+
# @raise [ServerError] on 5xx responses
|
|
139
|
+
# @raise [NetworkError] on connection/timeout failures
|
|
140
|
+
#
|
|
141
|
+
# @api private
|
|
53
142
|
def request(method, path, body: nil, params: nil)
|
|
54
143
|
response = execute_request(method, path, body: body, params: params)
|
|
55
144
|
log_response(method, path, response)
|
|
@@ -58,6 +147,13 @@ module ReductoAI
|
|
|
58
147
|
raise NetworkError, "Network error: #{e.message}"
|
|
59
148
|
end
|
|
60
149
|
|
|
150
|
+
# Convenience method for POST requests.
|
|
151
|
+
#
|
|
152
|
+
# @param path [String] API endpoint path
|
|
153
|
+
# @param body [Hash] request body
|
|
154
|
+
# @return [Hash] parsed JSON response
|
|
155
|
+
#
|
|
156
|
+
# @api private
|
|
61
157
|
def post(path, body)
|
|
62
158
|
request(:post, path, body: body)
|
|
63
159
|
end
|
data/lib/reducto_ai/config.rb
CHANGED
|
@@ -3,10 +3,48 @@
|
|
|
3
3
|
require "logger"
|
|
4
4
|
|
|
5
5
|
module ReductoAI
|
|
6
|
+
# Configuration class for the ReductoAI client.
|
|
7
|
+
#
|
|
8
|
+
# Manages API credentials, timeouts, logging, and exception handling behavior.
|
|
9
|
+
# Configuration can be set via environment variables or through the global
|
|
10
|
+
# {ReductoAI.configure} method.
|
|
11
|
+
#
|
|
12
|
+
# @example Environment-based configuration
|
|
13
|
+
# # Set these environment variables:
|
|
14
|
+
# # REDUCTO_API_KEY=your-api-key
|
|
15
|
+
# # REDUCTO_BASE_URL=https://platform.reducto.ai
|
|
16
|
+
# # REDUCTO_OPEN_TIMEOUT=10
|
|
17
|
+
# # REDUCTO_READ_TIMEOUT=60
|
|
18
|
+
#
|
|
19
|
+
# config = ReductoAI::Config.new
|
|
20
|
+
# config.api_key # => "your-api-key"
|
|
21
|
+
#
|
|
22
|
+
# @example Explicit configuration
|
|
23
|
+
# ReductoAI.configure do |config|
|
|
24
|
+
# config.api_key = "your-api-key"
|
|
25
|
+
# config.logger = Rails.logger
|
|
26
|
+
# config.open_timeout = 10
|
|
27
|
+
# end
|
|
6
28
|
class Config
|
|
7
|
-
|
|
29
|
+
# @return [String, nil] Reducto API key (from REDUCTO_API_KEY env var)
|
|
30
|
+
attr_accessor :api_key
|
|
31
|
+
|
|
32
|
+
# @return [String] Base URL for Reducto API (default: https://platform.reducto.ai)
|
|
33
|
+
attr_accessor :base_url
|
|
34
|
+
|
|
35
|
+
# @return [Integer] Connection open timeout in seconds (default: 5)
|
|
36
|
+
attr_accessor :open_timeout
|
|
37
|
+
|
|
38
|
+
# @return [Integer] Request read timeout in seconds (default: 30)
|
|
39
|
+
attr_accessor :read_timeout
|
|
40
|
+
|
|
41
|
+
# @return [Boolean] Whether to raise exceptions on API errors (default: true)
|
|
42
|
+
attr_accessor :raise_exceptions
|
|
43
|
+
|
|
44
|
+
# @return [Logger] Logger instance for debugging
|
|
8
45
|
attr_writer :logger
|
|
9
46
|
|
|
47
|
+
# Creates a new configuration instance with defaults from environment variables.
|
|
10
48
|
def initialize
|
|
11
49
|
@api_key = ENV.fetch("REDUCTO_API_KEY", nil)
|
|
12
50
|
@base_url = ENV.fetch("REDUCTO_BASE_URL", "https://platform.reducto.ai")
|
|
@@ -15,12 +53,18 @@ module ReductoAI
|
|
|
15
53
|
@raise_exceptions = true
|
|
16
54
|
end
|
|
17
55
|
|
|
56
|
+
# Returns the logger instance.
|
|
57
|
+
#
|
|
58
|
+
# Defaults to `Rails.logger` if Rails is available, otherwise a stderr Logger.
|
|
59
|
+
#
|
|
60
|
+
# @return [Logger] the logger instance
|
|
18
61
|
def logger
|
|
19
62
|
@logger ||= (defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger) || Logger.new($stderr)
|
|
20
63
|
end
|
|
21
64
|
|
|
22
65
|
private
|
|
23
66
|
|
|
67
|
+
# @private
|
|
24
68
|
def integer_or_default(key, default)
|
|
25
69
|
Integer(ENV.fetch(key, default))
|
|
26
70
|
rescue StandardError
|
data/lib/reducto_ai/engine.rb
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module ReductoAI
|
|
4
|
+
# Rails engine for automatic initialization in Rails applications.
|
|
5
|
+
#
|
|
6
|
+
# Provides Rails integration for the ReductoAI gem, enabling automatic
|
|
7
|
+
# loading and configuration within Rails applications.
|
|
8
|
+
#
|
|
9
|
+
# @api private
|
|
4
10
|
if defined?(Rails)
|
|
5
11
|
class Engine < ::Rails::Engine
|
|
6
12
|
isolate_namespace ReductoAI
|
data/lib/reducto_ai/errors.rb
CHANGED
|
@@ -1,9 +1,33 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module ReductoAI
|
|
4
|
+
# Base error class for all Reducto API errors.
|
|
5
|
+
#
|
|
6
|
+
# All API-related exceptions inherit from this class and include
|
|
7
|
+
# HTTP status code and response body for debugging.
|
|
8
|
+
#
|
|
9
|
+
# @example Handling errors
|
|
10
|
+
# begin
|
|
11
|
+
# client.parse.sync(input: "invalid-url")
|
|
12
|
+
# rescue ReductoAI::AuthenticationError => e
|
|
13
|
+
# puts "Auth failed: #{e.message}"
|
|
14
|
+
# rescue ReductoAI::ClientError => e
|
|
15
|
+
# puts "Client error (#{e.status}): #{e.body}"
|
|
16
|
+
# rescue ReductoAI::Error => e
|
|
17
|
+
# puts "API error: #{e.message}"
|
|
18
|
+
# end
|
|
4
19
|
class Error < StandardError
|
|
5
|
-
|
|
20
|
+
# @return [Integer, nil] HTTP status code
|
|
21
|
+
attr_reader :status
|
|
6
22
|
|
|
23
|
+
# @return [Hash, String, nil] Response body
|
|
24
|
+
attr_reader :body
|
|
25
|
+
|
|
26
|
+
# Creates a new error instance.
|
|
27
|
+
#
|
|
28
|
+
# @param message [String, nil] Error message
|
|
29
|
+
# @param status [Integer, nil] HTTP status code
|
|
30
|
+
# @param body [Hash, String, nil] Response body
|
|
7
31
|
def initialize(message = nil, status: nil, body: nil)
|
|
8
32
|
super(message)
|
|
9
33
|
@status = status
|
|
@@ -11,8 +35,46 @@ module ReductoAI
|
|
|
11
35
|
end
|
|
12
36
|
end
|
|
13
37
|
|
|
38
|
+
# Raised on 401 Unauthorized responses.
|
|
39
|
+
#
|
|
40
|
+
# Indicates invalid or missing API key.
|
|
41
|
+
#
|
|
42
|
+
# @example
|
|
43
|
+
# # Raised when API key is invalid
|
|
44
|
+
# client = ReductoAI::Client.new(api_key: "invalid-key")
|
|
45
|
+
# client.parse.sync(input: "https://example.com/doc.pdf")
|
|
46
|
+
# # => ReductoAI::AuthenticationError: Unauthorized (401): check API key
|
|
14
47
|
class AuthenticationError < Error; end
|
|
48
|
+
|
|
49
|
+
# Raised on 4xx client errors (400, 404, 422).
|
|
50
|
+
#
|
|
51
|
+
# Indicates invalid request parameters, missing resources, or
|
|
52
|
+
# validation failures.
|
|
53
|
+
#
|
|
54
|
+
# @example
|
|
55
|
+
# # Raised when input is invalid
|
|
56
|
+
# client.parse.sync(input: "not-a-valid-url")
|
|
57
|
+
# # => ReductoAI::ClientError: HTTP 400: Invalid input URL
|
|
15
58
|
class ClientError < Error; end
|
|
59
|
+
|
|
60
|
+
# Raised on 5xx server errors.
|
|
61
|
+
#
|
|
62
|
+
# Indicates Reducto API internal errors or temporary failures.
|
|
63
|
+
#
|
|
64
|
+
# @example
|
|
65
|
+
# # Raised on API server issues
|
|
66
|
+
# client.parse.sync(input: "https://example.com/doc.pdf")
|
|
67
|
+
# # => ReductoAI::ServerError: HTTP 500: Internal server error
|
|
16
68
|
class ServerError < Error; end
|
|
69
|
+
|
|
70
|
+
# Raised on network connection or timeout failures.
|
|
71
|
+
#
|
|
72
|
+
# Indicates network issues, DNS failures, or timeout exceeded.
|
|
73
|
+
#
|
|
74
|
+
# @example
|
|
75
|
+
# # Raised when request times out
|
|
76
|
+
# client = ReductoAI::Client.new(read_timeout: 1)
|
|
77
|
+
# client.parse.sync(input: "https://example.com/large-doc.pdf")
|
|
78
|
+
# # => ReductoAI::NetworkError: Network error: execution expired
|
|
17
79
|
class NetworkError < Error; end
|
|
18
80
|
end
|
|
@@ -2,11 +2,52 @@
|
|
|
2
2
|
|
|
3
3
|
module ReductoAI
|
|
4
4
|
module Resources
|
|
5
|
+
# Edit resource for PDF markup and annotation operations.
|
|
6
|
+
#
|
|
7
|
+
# Generates marked-up PDFs with highlights, annotations, or redactions
|
|
8
|
+
# based on natural language instructions.
|
|
9
|
+
#
|
|
10
|
+
# @example Highlight key terms
|
|
11
|
+
# client = ReductoAI::Client.new
|
|
12
|
+
# result = client.edit.sync(
|
|
13
|
+
# input: "https://example.com/contract.pdf",
|
|
14
|
+
# instructions: "Highlight all mentions of payment terms and deadlines"
|
|
15
|
+
# )
|
|
16
|
+
# marked_pdf_url = result["result"]["document_url"]
|
|
17
|
+
#
|
|
18
|
+
# @note Edit operations consume credits based on document size and
|
|
19
|
+
# instruction complexity.
|
|
5
20
|
class Edit
|
|
21
|
+
# @param client [Client] the Reducto API client
|
|
22
|
+
# @api private
|
|
6
23
|
def initialize(client)
|
|
7
24
|
@client = client
|
|
8
25
|
end
|
|
9
26
|
|
|
27
|
+
# Generates a marked-up PDF synchronously.
|
|
28
|
+
#
|
|
29
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
30
|
+
# @param instructions [String] Natural language editing instructions
|
|
31
|
+
# (e.g., "Highlight all dates", "Redact personal information")
|
|
32
|
+
# @param options [Hash] Additional editing options
|
|
33
|
+
#
|
|
34
|
+
# @return [Hash] Edit results with keys:
|
|
35
|
+
# * "job_id" [String] - Job identifier
|
|
36
|
+
# * "status" [String] - Job status ("succeeded")
|
|
37
|
+
# * "result" [Hash] - Contains "document_url" with marked PDF
|
|
38
|
+
# * "usage" [Hash] - Credit usage details
|
|
39
|
+
#
|
|
40
|
+
# @raise [ArgumentError] if input or instructions are nil/empty
|
|
41
|
+
# @raise [ClientError] if instructions are invalid
|
|
42
|
+
# @raise [ServerError] if editing fails
|
|
43
|
+
#
|
|
44
|
+
# @example Redact sensitive info
|
|
45
|
+
# result = client.edit.sync(
|
|
46
|
+
# input: "https://example.com/report.pdf",
|
|
47
|
+
# instructions: "Redact all social security numbers"
|
|
48
|
+
# )
|
|
49
|
+
#
|
|
50
|
+
# @see https://docs.reducto.ai/api-reference/edit Reducto Edit API
|
|
10
51
|
def sync(input:, instructions:, **options)
|
|
11
52
|
raise ArgumentError, "input is required" if input.nil?
|
|
12
53
|
if instructions.nil? || (instructions.respond_to?(:empty?) && instructions.empty?)
|
|
@@ -17,6 +58,30 @@ module ReductoAI
|
|
|
17
58
|
@client.post("/edit", payload)
|
|
18
59
|
end
|
|
19
60
|
|
|
61
|
+
# Generates a marked-up PDF asynchronously.
|
|
62
|
+
#
|
|
63
|
+
# Returns immediately with a job_id. Poll with {Jobs#retrieve} to get results.
|
|
64
|
+
#
|
|
65
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
66
|
+
# @param instructions [String] Natural language editing instructions
|
|
67
|
+
# @param async [Boolean, nil] Async mode flag
|
|
68
|
+
# @param options [Hash] Additional editing options
|
|
69
|
+
#
|
|
70
|
+
# @return [Hash] Job status with keys:
|
|
71
|
+
# * "job_id" [String] - Job identifier for polling
|
|
72
|
+
# * "status" [String] - Initial status ("processing")
|
|
73
|
+
#
|
|
74
|
+
# @raise [ArgumentError] if input or instructions are nil/empty
|
|
75
|
+
#
|
|
76
|
+
# @example
|
|
77
|
+
# job = client.edit.async(
|
|
78
|
+
# input: "https://example.com/legal-doc.pdf",
|
|
79
|
+
# instructions: "Highlight all liability clauses"
|
|
80
|
+
# )
|
|
81
|
+
# job_id = job["job_id"]
|
|
82
|
+
#
|
|
83
|
+
# @see Jobs#retrieve
|
|
84
|
+
# @see https://docs.reducto.ai/api-reference/edit-async
|
|
20
85
|
def async(input:, instructions:, async: nil, **options)
|
|
21
86
|
raise ArgumentError, "input is required" if input.nil?
|
|
22
87
|
if instructions.nil? || (instructions.respond_to?(:empty?) && instructions.empty?)
|
|
@@ -31,11 +96,13 @@ module ReductoAI
|
|
|
31
96
|
|
|
32
97
|
private
|
|
33
98
|
|
|
99
|
+
# @private
|
|
34
100
|
def build_payload(input, instructions, options)
|
|
35
101
|
document_url = normalize_input(input)
|
|
36
102
|
{ document_url: document_url, edit_instructions: instructions, **options }.compact
|
|
37
103
|
end
|
|
38
104
|
|
|
105
|
+
# @private
|
|
39
106
|
def normalize_input(input)
|
|
40
107
|
return input unless input.is_a?(Hash)
|
|
41
108
|
|
|
@@ -2,11 +2,62 @@
|
|
|
2
2
|
|
|
3
3
|
module ReductoAI
|
|
4
4
|
module Resources
|
|
5
|
+
# Extract resource for structured data extraction.
|
|
6
|
+
#
|
|
7
|
+
# Extracts specific information from documents based on a schema or instructions.
|
|
8
|
+
# Returns structured JSON data matching the provided schema.
|
|
9
|
+
#
|
|
10
|
+
# @example Extract with schema
|
|
11
|
+
# client = ReductoAI::Client.new
|
|
12
|
+
# schema = {
|
|
13
|
+
# invoice_number: "string",
|
|
14
|
+
# total_amount: "number",
|
|
15
|
+
# line_items: ["object"]
|
|
16
|
+
# }
|
|
17
|
+
#
|
|
18
|
+
# result = client.extract.sync(
|
|
19
|
+
# input: "https://example.com/invoice.pdf",
|
|
20
|
+
# instructions: schema
|
|
21
|
+
# )
|
|
22
|
+
# puts result["result"]
|
|
23
|
+
#
|
|
24
|
+
# @note Extraction operations consume credits based on document complexity
|
|
25
|
+
# and schema size.
|
|
5
26
|
class Extract
|
|
27
|
+
# @param client [Client] the Reducto API client
|
|
28
|
+
# @api private
|
|
6
29
|
def initialize(client)
|
|
7
30
|
@client = client
|
|
8
31
|
end
|
|
9
32
|
|
|
33
|
+
# Extracts structured data from a document synchronously.
|
|
34
|
+
#
|
|
35
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
36
|
+
# @param instructions [Hash, String] Extraction schema or instructions.
|
|
37
|
+
# Can be a simple hash (auto-wrapped as `{ schema: ... }`) or
|
|
38
|
+
# a full instructions hash with a :schema key.
|
|
39
|
+
# @param options [Hash] Additional extraction options
|
|
40
|
+
#
|
|
41
|
+
# @return [Hash] Extraction results with keys:
|
|
42
|
+
# * "job_id" [String] - Job identifier
|
|
43
|
+
# * "status" [String] - Job status ("succeeded")
|
|
44
|
+
# * "result" [Hash] - Extracted data matching schema
|
|
45
|
+
# * "usage" [Hash] - Credit usage details
|
|
46
|
+
#
|
|
47
|
+
# @raise [ArgumentError] if input or instructions are nil/empty
|
|
48
|
+
# @raise [ClientError] if schema is invalid
|
|
49
|
+
# @raise [ServerError] if extraction fails
|
|
50
|
+
#
|
|
51
|
+
# @example Extract invoice data
|
|
52
|
+
# result = client.extract.sync(
|
|
53
|
+
# input: "https://example.com/invoice.pdf",
|
|
54
|
+
# instructions: {
|
|
55
|
+
# invoice_number: "string",
|
|
56
|
+
# total: "number"
|
|
57
|
+
# }
|
|
58
|
+
# )
|
|
59
|
+
#
|
|
60
|
+
# @see https://docs.reducto.ai/api-reference/extract Reducto Extract API
|
|
10
61
|
def sync(input:, instructions:, **options)
|
|
11
62
|
raise ArgumentError, "input is required" if input.nil?
|
|
12
63
|
if instructions.nil? || (instructions.respond_to?(:empty?) && instructions.empty?)
|
|
@@ -17,6 +68,30 @@ module ReductoAI
|
|
|
17
68
|
@client.post("/extract", payload)
|
|
18
69
|
end
|
|
19
70
|
|
|
71
|
+
# Extracts structured data from a document asynchronously.
|
|
72
|
+
#
|
|
73
|
+
# Returns immediately with a job_id. Poll with {Jobs#retrieve} to get results.
|
|
74
|
+
#
|
|
75
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
76
|
+
# @param instructions [Hash, String] Extraction schema (same as {#sync})
|
|
77
|
+
# @param async [Boolean, nil] Async mode flag
|
|
78
|
+
# @param options [Hash] Additional extraction options
|
|
79
|
+
#
|
|
80
|
+
# @return [Hash] Job status with keys:
|
|
81
|
+
# * "job_id" [String] - Job identifier for polling
|
|
82
|
+
# * "status" [String] - Initial status ("processing")
|
|
83
|
+
#
|
|
84
|
+
# @raise [ArgumentError] if input or instructions are nil/empty
|
|
85
|
+
#
|
|
86
|
+
# @example Start async extraction
|
|
87
|
+
# job = client.extract.async(
|
|
88
|
+
# input: "https://example.com/contract.pdf",
|
|
89
|
+
# instructions: { parties: ["string"], terms: "string" }
|
|
90
|
+
# )
|
|
91
|
+
# job_id = job["job_id"]
|
|
92
|
+
#
|
|
93
|
+
# @see Jobs#retrieve
|
|
94
|
+
# @see https://docs.reducto.ai/api-reference/extract-async
|
|
20
95
|
def async(input:, instructions:, async: nil, **options)
|
|
21
96
|
raise ArgumentError, "input is required" if input.nil?
|
|
22
97
|
if instructions.nil? || (instructions.respond_to?(:empty?) && instructions.empty?)
|
|
@@ -31,6 +106,7 @@ module ReductoAI
|
|
|
31
106
|
|
|
32
107
|
private
|
|
33
108
|
|
|
109
|
+
# @private
|
|
34
110
|
def build_payload(input, instructions, options)
|
|
35
111
|
normalized_input = normalize_input(input)
|
|
36
112
|
normalized_instructions = normalize_instructions(instructions)
|
|
@@ -38,12 +114,14 @@ module ReductoAI
|
|
|
38
114
|
{ input: normalized_input, instructions: normalized_instructions, **options }.compact
|
|
39
115
|
end
|
|
40
116
|
|
|
117
|
+
# @private
|
|
41
118
|
def normalize_input(input)
|
|
42
119
|
return input unless input.is_a?(Hash)
|
|
43
120
|
|
|
44
121
|
input[:url] || input["url"] || input
|
|
45
122
|
end
|
|
46
123
|
|
|
124
|
+
# @private
|
|
47
125
|
def normalize_instructions(instructions)
|
|
48
126
|
return { schema: instructions } unless instructions.is_a?(Hash)
|
|
49
127
|
return instructions if instructions.key?(:schema) || instructions.key?("schema")
|
|
@@ -2,32 +2,140 @@
|
|
|
2
2
|
|
|
3
3
|
module ReductoAI
|
|
4
4
|
module Resources
|
|
5
|
+
# Jobs resource for job management and file upload operations.
|
|
6
|
+
#
|
|
7
|
+
# Provides methods to list, retrieve, cancel jobs, upload files,
|
|
8
|
+
# and configure webhooks for async job notifications.
|
|
9
|
+
#
|
|
10
|
+
# @example Poll for job completion
|
|
11
|
+
# client = ReductoAI::Client.new
|
|
12
|
+
# job = client.parse.async(input: "https://example.com/doc.pdf")
|
|
13
|
+
#
|
|
14
|
+
# loop do
|
|
15
|
+
# status = client.jobs.retrieve(job_id: job["job_id"])
|
|
16
|
+
# break if status["status"] == "succeeded"
|
|
17
|
+
# sleep 2
|
|
18
|
+
# end
|
|
19
|
+
# result = status["result"]
|
|
20
|
+
#
|
|
21
|
+
# @example Upload a local file
|
|
22
|
+
# upload_result = client.jobs.upload(file: "/path/to/document.pdf")
|
|
23
|
+
# document_url = upload_result["url"]
|
|
24
|
+
# client.parse.sync(input: document_url)
|
|
5
25
|
class Jobs
|
|
26
|
+
# @param client [Client] the Reducto API client
|
|
27
|
+
# @api private
|
|
6
28
|
def initialize(client)
|
|
7
29
|
@client = client
|
|
8
30
|
end
|
|
9
31
|
|
|
32
|
+
# Returns API version information.
|
|
33
|
+
#
|
|
34
|
+
# @return [Hash] Version details
|
|
35
|
+
#
|
|
36
|
+
# @example
|
|
37
|
+
# version_info = client.jobs.version
|
|
38
|
+
# puts version_info["version"]
|
|
10
39
|
def version
|
|
11
40
|
@client.request(:get, "/version")
|
|
12
41
|
end
|
|
13
42
|
|
|
43
|
+
# Lists jobs with optional filtering.
|
|
44
|
+
#
|
|
45
|
+
# @param options [Hash] Query parameters for filtering
|
|
46
|
+
# @option options [String] :status Filter by job status ("processing", "succeeded", "failed")
|
|
47
|
+
# @option options [Integer] :limit Maximum number of jobs to return
|
|
48
|
+
# @option options [Integer] :offset Pagination offset
|
|
49
|
+
#
|
|
50
|
+
# @return [Hash] Job list with pagination metadata
|
|
51
|
+
#
|
|
52
|
+
# @example List recent jobs
|
|
53
|
+
# jobs = client.jobs.list(limit: 10)
|
|
54
|
+
# jobs["jobs"].each { |job| puts job["job_id"] }
|
|
55
|
+
#
|
|
56
|
+
# @example Filter by status
|
|
57
|
+
# failed_jobs = client.jobs.list(status: "failed")
|
|
58
|
+
#
|
|
59
|
+
# @see https://docs.reducto.ai/api-reference/jobs
|
|
14
60
|
def list(**options)
|
|
15
61
|
params = options.compact
|
|
16
62
|
@client.request(:get, "/jobs", params: params)
|
|
17
63
|
end
|
|
18
64
|
|
|
65
|
+
# Cancels a running async job.
|
|
66
|
+
#
|
|
67
|
+
# @param job_id [String] Job identifier to cancel
|
|
68
|
+
#
|
|
69
|
+
# @return [Hash] Cancellation result
|
|
70
|
+
#
|
|
71
|
+
# @raise [ArgumentError] if job_id is nil or empty
|
|
72
|
+
# @raise [ClientError] if job doesn't exist or is not cancellable
|
|
73
|
+
#
|
|
74
|
+
# @example
|
|
75
|
+
# client.jobs.cancel(job_id: "job_abc123")
|
|
76
|
+
#
|
|
77
|
+
# @see https://docs.reducto.ai/api-reference/cancel
|
|
19
78
|
def cancel(job_id:)
|
|
20
79
|
raise ArgumentError, "job_id is required" if job_id.nil? || job_id.to_s.strip.empty?
|
|
21
80
|
|
|
22
81
|
@client.request(:post, "/cancel/#{job_id}")
|
|
23
82
|
end
|
|
24
83
|
|
|
84
|
+
# Retrieves job status and results.
|
|
85
|
+
#
|
|
86
|
+
# Used to poll async jobs until completion. Completed jobs include
|
|
87
|
+
# full results in the response.
|
|
88
|
+
#
|
|
89
|
+
# @param job_id [String] Job identifier to retrieve
|
|
90
|
+
#
|
|
91
|
+
# @return [Hash] Job status with keys:
|
|
92
|
+
# * "job_id" [String] - Job identifier
|
|
93
|
+
# * "status" [String] - Current status ("processing", "succeeded", "failed")
|
|
94
|
+
# * "result" [Hash] - Results (only present when status is "succeeded")
|
|
95
|
+
# * "error" [String] - Error message (only present when status is "failed")
|
|
96
|
+
#
|
|
97
|
+
# @raise [ArgumentError] if job_id is nil or empty
|
|
98
|
+
# @raise [ClientError] if job doesn't exist
|
|
99
|
+
#
|
|
100
|
+
# @example Poll until complete
|
|
101
|
+
# loop do
|
|
102
|
+
# status = client.jobs.retrieve(job_id: job_id)
|
|
103
|
+
# break if %w[succeeded failed].include?(status["status"])
|
|
104
|
+
# sleep 2
|
|
105
|
+
# end
|
|
106
|
+
#
|
|
107
|
+
# @see https://docs.reducto.ai/api-reference/job
|
|
25
108
|
def retrieve(job_id:)
|
|
26
109
|
raise ArgumentError, "job_id is required" if job_id.nil? || job_id.to_s.strip.empty?
|
|
27
110
|
|
|
28
111
|
@client.request(:get, "/job/#{job_id}")
|
|
29
112
|
end
|
|
30
113
|
|
|
114
|
+
# Uploads a local file to Reducto's storage.
|
|
115
|
+
#
|
|
116
|
+
# Returns a URL that can be used as input for other API operations.
|
|
117
|
+
# Useful when processing local files instead of publicly accessible URLs.
|
|
118
|
+
#
|
|
119
|
+
# @param file [String, File, IO] File path or file-like object to upload
|
|
120
|
+
# @param extension [String, nil] File extension override (e.g., "pdf", "png")
|
|
121
|
+
#
|
|
122
|
+
# @return [Hash] Upload result with keys:
|
|
123
|
+
# * "url" [String] - Uploaded file URL for use in API calls
|
|
124
|
+
# * "job_id" [String] - Upload job identifier
|
|
125
|
+
#
|
|
126
|
+
# @raise [ArgumentError] if file is nil or path doesn't exist
|
|
127
|
+
# @raise [ServerError] if upload fails
|
|
128
|
+
#
|
|
129
|
+
# @example Upload local PDF
|
|
130
|
+
# upload = client.jobs.upload(file: "/path/to/invoice.pdf")
|
|
131
|
+
# result = client.parse.sync(input: upload["url"])
|
|
132
|
+
#
|
|
133
|
+
# @example Upload with File object
|
|
134
|
+
# File.open("/path/to/doc.pdf", "rb") do |f|
|
|
135
|
+
# upload = client.jobs.upload(file: f, extension: "pdf")
|
|
136
|
+
# end
|
|
137
|
+
#
|
|
138
|
+
# @see https://docs.reducto.ai/api-reference/upload
|
|
31
139
|
def upload(file:, extension: nil)
|
|
32
140
|
raise ArgumentError, "file is required" if file.nil?
|
|
33
141
|
|
|
@@ -39,12 +147,21 @@ module ReductoAI
|
|
|
39
147
|
@client.request(:post, "/upload", body: body, params: params)
|
|
40
148
|
end
|
|
41
149
|
|
|
150
|
+
# Configures webhook notifications for async jobs.
|
|
151
|
+
#
|
|
152
|
+
# @return [Hash] Webhook configuration result
|
|
153
|
+
#
|
|
154
|
+
# @example
|
|
155
|
+
# client.jobs.configure_webhook
|
|
156
|
+
#
|
|
157
|
+
# @see https://docs.reducto.ai/api-reference/configure-webhook
|
|
42
158
|
def configure_webhook
|
|
43
159
|
@client.request(:post, "/configure_webhook")
|
|
44
160
|
end
|
|
45
161
|
|
|
46
162
|
private
|
|
47
163
|
|
|
164
|
+
# @private
|
|
48
165
|
def build_upload_io(file)
|
|
49
166
|
if file.is_a?(String)
|
|
50
167
|
raise ArgumentError, "file path does not exist" unless File.exist?(file)
|
|
@@ -1,12 +1,68 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module ReductoAI
|
|
4
|
+
# Resource classes for Reducto API endpoints.
|
|
5
|
+
#
|
|
6
|
+
# Each resource class corresponds to a set of related API operations
|
|
7
|
+
# (Parse, Extract, Split, Edit, Pipeline, Jobs).
|
|
4
8
|
module Resources
|
|
9
|
+
# Parse resource for document parsing operations.
|
|
10
|
+
#
|
|
11
|
+
# Converts documents (PDFs, images, etc.) into structured formats like
|
|
12
|
+
# Markdown, JSON, or HTML. Supports both synchronous and asynchronous modes.
|
|
13
|
+
#
|
|
14
|
+
# @example Synchronous parsing
|
|
15
|
+
# client = ReductoAI::Client.new
|
|
16
|
+
# result = client.parse.sync(
|
|
17
|
+
# input: "https://example.com/document.pdf",
|
|
18
|
+
# output_formats: { markdown: true }
|
|
19
|
+
# )
|
|
20
|
+
# puts result["result"]["markdown"]
|
|
21
|
+
#
|
|
22
|
+
# @example Asynchronous parsing
|
|
23
|
+
# job = client.parse.async(
|
|
24
|
+
# input: { url: "https://example.com/large-doc.pdf" },
|
|
25
|
+
# async: true
|
|
26
|
+
# )
|
|
27
|
+
# job_id = job["job_id"]
|
|
28
|
+
#
|
|
29
|
+
# @note Each parse operation consumes credits based on document complexity.
|
|
30
|
+
# See Reducto documentation for pricing details.
|
|
5
31
|
class Parse
|
|
32
|
+
# @param client [Client] the Reducto API client
|
|
33
|
+
# @api private
|
|
6
34
|
def initialize(client)
|
|
7
35
|
@client = client
|
|
8
36
|
end
|
|
9
37
|
|
|
38
|
+
# Parses a document synchronously.
|
|
39
|
+
#
|
|
40
|
+
# Blocks until parsing completes and returns the full result.
|
|
41
|
+
#
|
|
42
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
43
|
+
# @param options [Hash] Additional parsing options
|
|
44
|
+
# @option options [Hash] :output_formats Output format configuration
|
|
45
|
+
# (e.g., `{ markdown: true, html: true }`)
|
|
46
|
+
# @option options [String] :mode Processing mode ("ocr", "auto")
|
|
47
|
+
# @option options [Boolean] :use_cache Whether to use cached results
|
|
48
|
+
#
|
|
49
|
+
# @return [Hash] Parsed document with keys:
|
|
50
|
+
# * "job_id" [String] - Job identifier
|
|
51
|
+
# * "status" [String] - Job status ("succeeded")
|
|
52
|
+
# * "result" [Hash] - Parsed content by format (e.g., "markdown", "html")
|
|
53
|
+
# * "usage" [Hash] - Credit usage details
|
|
54
|
+
#
|
|
55
|
+
# @raise [ArgumentError] if input is nil
|
|
56
|
+
# @raise [ClientError] if document URL is invalid or inaccessible
|
|
57
|
+
# @raise [ServerError] if parsing fails
|
|
58
|
+
#
|
|
59
|
+
# @example Parse to markdown
|
|
60
|
+
# result = client.parse.sync(
|
|
61
|
+
# input: "https://example.com/doc.pdf",
|
|
62
|
+
# output_formats: { markdown: true }
|
|
63
|
+
# )
|
|
64
|
+
#
|
|
65
|
+
# @see https://docs.reducto.ai/api-reference/parse Reducto Parse API
|
|
10
66
|
def sync(input:, **options)
|
|
11
67
|
raise ArgumentError, "input is required" if input.nil?
|
|
12
68
|
|
|
@@ -15,6 +71,33 @@ module ReductoAI
|
|
|
15
71
|
@client.post("/parse", payload)
|
|
16
72
|
end
|
|
17
73
|
|
|
74
|
+
# Parses a document asynchronously.
|
|
75
|
+
#
|
|
76
|
+
# Returns immediately with a job_id. Poll with {Jobs#retrieve} to get results.
|
|
77
|
+
#
|
|
78
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
79
|
+
# @param async [Boolean, nil] Async mode flag (defaults to true if not provided)
|
|
80
|
+
# @param options [Hash] Additional parsing options (same as {#sync})
|
|
81
|
+
#
|
|
82
|
+
# @return [Hash] Job status with keys:
|
|
83
|
+
# * "job_id" [String] - Job identifier for polling
|
|
84
|
+
# * "status" [String] - Initial status ("processing")
|
|
85
|
+
#
|
|
86
|
+
# @raise [ArgumentError] if input is nil
|
|
87
|
+
#
|
|
88
|
+
# @example Start async parse and poll
|
|
89
|
+
# job = client.parse.async(input: "https://example.com/doc.pdf")
|
|
90
|
+
# job_id = job["job_id"]
|
|
91
|
+
#
|
|
92
|
+
# # Poll for completion
|
|
93
|
+
# loop do
|
|
94
|
+
# status = client.jobs.retrieve(job_id: job_id)
|
|
95
|
+
# break if status["status"] == "succeeded"
|
|
96
|
+
# sleep 2
|
|
97
|
+
# end
|
|
98
|
+
#
|
|
99
|
+
# @see Jobs#retrieve
|
|
100
|
+
# @see https://docs.reducto.ai/api-reference/parse-async Reducto Async Parse
|
|
18
101
|
def async(input:, async: nil, **options)
|
|
19
102
|
raise ArgumentError, "input is required" if input.nil?
|
|
20
103
|
|
|
@@ -28,6 +111,7 @@ module ReductoAI
|
|
|
28
111
|
|
|
29
112
|
private
|
|
30
113
|
|
|
114
|
+
# @private
|
|
31
115
|
def normalize_input(input)
|
|
32
116
|
return input unless input.is_a?(Hash)
|
|
33
117
|
|
|
@@ -2,11 +2,58 @@
|
|
|
2
2
|
|
|
3
3
|
module ReductoAI
|
|
4
4
|
module Resources
|
|
5
|
+
# Pipeline resource for multi-step document processing workflows.
|
|
6
|
+
#
|
|
7
|
+
# Orchestrates multiple Reducto operations (parse, extract, split, edit)
|
|
8
|
+
# in a single request, with outputs from earlier steps feeding into later ones.
|
|
9
|
+
#
|
|
10
|
+
# @example Parse then extract
|
|
11
|
+
# client = ReductoAI::Client.new
|
|
12
|
+
# result = client.pipeline.sync(
|
|
13
|
+
# input: "https://example.com/invoice.pdf",
|
|
14
|
+
# steps: [
|
|
15
|
+
# { type: "parse", output_formats: { markdown: true } },
|
|
16
|
+
# { type: "extract", instructions: { total: "number", date: "string" } }
|
|
17
|
+
# ]
|
|
18
|
+
# )
|
|
19
|
+
# extracted_data = result["result"]["steps"][1]["result"]
|
|
20
|
+
#
|
|
21
|
+
# @note Pipeline operations consume credits based on all steps executed.
|
|
5
22
|
class Pipeline
|
|
23
|
+
# @param client [Client] the Reducto API client
|
|
24
|
+
# @api private
|
|
6
25
|
def initialize(client)
|
|
7
26
|
@client = client
|
|
8
27
|
end
|
|
9
28
|
|
|
29
|
+
# Executes a multi-step pipeline synchronously.
|
|
30
|
+
#
|
|
31
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
32
|
+
# @param steps [Array<Hash>] Array of step configurations. Each step
|
|
33
|
+
# must have a :type key ("parse", "extract", "split", "edit") and
|
|
34
|
+
# type-specific options.
|
|
35
|
+
# @param options [Hash] Additional pipeline options
|
|
36
|
+
#
|
|
37
|
+
# @return [Hash] Pipeline results with keys:
|
|
38
|
+
# * "job_id" [String] - Job identifier
|
|
39
|
+
# * "status" [String] - Job status ("succeeded")
|
|
40
|
+
# * "result" [Hash] - Contains "steps" array with each step's result
|
|
41
|
+
# * "usage" [Hash] - Credit usage details
|
|
42
|
+
#
|
|
43
|
+
# @raise [ArgumentError] if input or steps are nil/empty
|
|
44
|
+
# @raise [ClientError] if step configuration is invalid
|
|
45
|
+
# @raise [ServerError] if pipeline execution fails
|
|
46
|
+
#
|
|
47
|
+
# @example Parse and extract in one request
|
|
48
|
+
# result = client.pipeline.sync(
|
|
49
|
+
# input: "https://example.com/form.pdf",
|
|
50
|
+
# steps: [
|
|
51
|
+
# { type: "parse" },
|
|
52
|
+
# { type: "extract", instructions: { name: "string", amount: "number" } }
|
|
53
|
+
# ]
|
|
54
|
+
# )
|
|
55
|
+
#
|
|
56
|
+
# @see https://docs.reducto.ai/api-reference/pipeline Reducto Pipeline API
|
|
10
57
|
def sync(input:, steps:, **options)
|
|
11
58
|
raise ArgumentError, "input is required" if input.nil?
|
|
12
59
|
raise ArgumentError, "steps are required" if steps.nil? || (steps.respond_to?(:empty?) && steps.empty?)
|
|
@@ -15,6 +62,33 @@ module ReductoAI
|
|
|
15
62
|
@client.post("/pipeline", payload)
|
|
16
63
|
end
|
|
17
64
|
|
|
65
|
+
# Executes a multi-step pipeline asynchronously.
|
|
66
|
+
#
|
|
67
|
+
# Returns immediately with a job_id. Poll with {Jobs#retrieve} to get results.
|
|
68
|
+
#
|
|
69
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
70
|
+
# @param steps [Array<Hash>] Array of step configurations (same as {#sync})
|
|
71
|
+
# @param async [Boolean, nil] Async mode flag
|
|
72
|
+
# @param options [Hash] Additional pipeline options
|
|
73
|
+
#
|
|
74
|
+
# @return [Hash] Job status with keys:
|
|
75
|
+
# * "job_id" [String] - Job identifier for polling
|
|
76
|
+
# * "status" [String] - Initial status ("processing")
|
|
77
|
+
#
|
|
78
|
+
# @raise [ArgumentError] if input or steps are nil/empty
|
|
79
|
+
#
|
|
80
|
+
# @example
|
|
81
|
+
# job = client.pipeline.async(
|
|
82
|
+
# input: "https://example.com/complex-doc.pdf",
|
|
83
|
+
# steps: [
|
|
84
|
+
# { type: "split" },
|
|
85
|
+
# { type: "parse", output_formats: { markdown: true } }
|
|
86
|
+
# ]
|
|
87
|
+
# )
|
|
88
|
+
# job_id = job["job_id"]
|
|
89
|
+
#
|
|
90
|
+
# @see Jobs#retrieve
|
|
91
|
+
# @see https://docs.reducto.ai/api-reference/pipeline-async
|
|
18
92
|
def async(input:, steps:, async: nil, **options)
|
|
19
93
|
raise ArgumentError, "input is required" if input.nil?
|
|
20
94
|
raise ArgumentError, "steps are required" if steps.nil? || (steps.respond_to?(:empty?) && steps.empty?)
|
|
@@ -2,11 +2,49 @@
|
|
|
2
2
|
|
|
3
3
|
module ReductoAI
|
|
4
4
|
module Resources
|
|
5
|
+
# Split resource for document splitting operations.
|
|
6
|
+
#
|
|
7
|
+
# Divides documents into logical sections based on content structure,
|
|
8
|
+
# returning page ranges and metadata for each section.
|
|
9
|
+
#
|
|
10
|
+
# @example Split document into sections
|
|
11
|
+
# client = ReductoAI::Client.new
|
|
12
|
+
# result = client.split.sync(
|
|
13
|
+
# input: "https://example.com/report.pdf"
|
|
14
|
+
# )
|
|
15
|
+
# result["result"]["sections"].each do |section|
|
|
16
|
+
# puts "#{section['title']}: pages #{section['start_page']}-#{section['end_page']}"
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# @note Split operations consume credits based on document size.
|
|
5
20
|
class Split
|
|
21
|
+
# @param client [Client] the Reducto API client
|
|
22
|
+
# @api private
|
|
6
23
|
def initialize(client)
|
|
7
24
|
@client = client
|
|
8
25
|
end
|
|
9
26
|
|
|
27
|
+
# Splits a document into sections synchronously.
|
|
28
|
+
#
|
|
29
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
30
|
+
# @param options [Hash] Additional splitting options
|
|
31
|
+
#
|
|
32
|
+
# @return [Hash] Split results with keys:
|
|
33
|
+
# * "job_id" [String] - Job identifier
|
|
34
|
+
# * "status" [String] - Job status ("succeeded")
|
|
35
|
+
# * "result" [Hash] - Sections with page ranges
|
|
36
|
+
# * "usage" [Hash] - Credit usage details
|
|
37
|
+
#
|
|
38
|
+
# @raise [ArgumentError] if input is nil
|
|
39
|
+
# @raise [ClientError] if document URL is invalid
|
|
40
|
+
# @raise [ServerError] if splitting fails
|
|
41
|
+
#
|
|
42
|
+
# @example
|
|
43
|
+
# result = client.split.sync(
|
|
44
|
+
# input: "https://example.com/document.pdf"
|
|
45
|
+
# )
|
|
46
|
+
#
|
|
47
|
+
# @see https://docs.reducto.ai/api-reference/split Reducto Split API
|
|
10
48
|
def sync(input:, **options)
|
|
11
49
|
raise ArgumentError, "input is required" if input.nil?
|
|
12
50
|
|
|
@@ -15,6 +53,28 @@ module ReductoAI
|
|
|
15
53
|
@client.post("/split", payload)
|
|
16
54
|
end
|
|
17
55
|
|
|
56
|
+
# Splits a document into sections asynchronously.
|
|
57
|
+
#
|
|
58
|
+
# Returns immediately with a job_id. Poll with {Jobs#retrieve} to get results.
|
|
59
|
+
#
|
|
60
|
+
# @param input [String, Hash] Document URL or hash with :url key
|
|
61
|
+
# @param async [Boolean, nil] Async mode flag
|
|
62
|
+
# @param options [Hash] Additional splitting options
|
|
63
|
+
#
|
|
64
|
+
# @return [Hash] Job status with keys:
|
|
65
|
+
# * "job_id" [String] - Job identifier for polling
|
|
66
|
+
# * "status" [String] - Initial status ("processing")
|
|
67
|
+
#
|
|
68
|
+
# @raise [ArgumentError] if input is nil
|
|
69
|
+
#
|
|
70
|
+
# @example
|
|
71
|
+
# job = client.split.async(
|
|
72
|
+
# input: "https://example.com/book.pdf"
|
|
73
|
+
# )
|
|
74
|
+
# job_id = job["job_id"]
|
|
75
|
+
#
|
|
76
|
+
# @see Jobs#retrieve
|
|
77
|
+
# @see https://docs.reducto.ai/api-reference/split-async
|
|
18
78
|
def async(input:, async: nil, **options)
|
|
19
79
|
raise ArgumentError, "input is required" if input.nil?
|
|
20
80
|
|
|
@@ -28,6 +88,7 @@ module ReductoAI
|
|
|
28
88
|
|
|
29
89
|
private
|
|
30
90
|
|
|
91
|
+
# @private
|
|
31
92
|
def normalize_input(input)
|
|
32
93
|
return input unless input.is_a?(Hash)
|
|
33
94
|
|
data/lib/reducto_ai/version.rb
CHANGED
data/lib/reducto_ai.rb
CHANGED
|
@@ -6,16 +6,53 @@ require_relative "reducto_ai/errors"
|
|
|
6
6
|
require_relative "reducto_ai/client"
|
|
7
7
|
require_relative "reducto_ai/engine"
|
|
8
8
|
|
|
9
|
+
# Main namespace for the ReductoAI gem.
|
|
10
|
+
#
|
|
11
|
+
# Provides global configuration management for the Reducto API client.
|
|
12
|
+
# Use {.configure} to set API credentials and options, then create a {Client}
|
|
13
|
+
# instance to interact with the Reducto document intelligence API.
|
|
14
|
+
#
|
|
15
|
+
# @example Basic configuration
|
|
16
|
+
# ReductoAI.configure do |config|
|
|
17
|
+
# config.api_key = ENV.fetch("REDUCTO_API_KEY")
|
|
18
|
+
# config.base_url = "https://platform.reducto.ai"
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# client = ReductoAI::Client.new
|
|
22
|
+
# result = client.parse.sync(input: "https://example.com/document.pdf")
|
|
23
|
+
#
|
|
24
|
+
# @see Client
|
|
25
|
+
# @see Config
|
|
9
26
|
module ReductoAI
|
|
10
27
|
class << self
|
|
28
|
+
# Returns the global configuration instance.
|
|
29
|
+
#
|
|
30
|
+
# @return [Config] the current configuration object
|
|
11
31
|
def config
|
|
12
32
|
@config ||= Config.new
|
|
13
33
|
end
|
|
14
34
|
|
|
35
|
+
# Configures the ReductoAI client globally.
|
|
36
|
+
#
|
|
37
|
+
# @example Set API key and timeouts
|
|
38
|
+
# ReductoAI.configure do |config|
|
|
39
|
+
# config.api_key = "your-api-key"
|
|
40
|
+
# config.open_timeout = 10
|
|
41
|
+
# config.read_timeout = 60
|
|
42
|
+
# end
|
|
43
|
+
#
|
|
44
|
+
# @yield [config] Gives the configuration object to the block
|
|
45
|
+
# @yieldparam config [Config] the configuration instance to modify
|
|
46
|
+
# @return [void]
|
|
15
47
|
def configure
|
|
16
48
|
yield(config)
|
|
17
49
|
end
|
|
18
50
|
|
|
51
|
+
# Resets the global configuration to nil.
|
|
52
|
+
#
|
|
53
|
+
# Primarily used for testing to ensure a clean configuration state.
|
|
54
|
+
#
|
|
55
|
+
# @return [void]
|
|
19
56
|
def reset_configuration!
|
|
20
57
|
@config = nil
|
|
21
58
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: reducto_ai
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- dpaluy
|
|
@@ -44,8 +44,12 @@ email:
|
|
|
44
44
|
- dpaluy@users.noreply.github.com
|
|
45
45
|
executables: []
|
|
46
46
|
extensions: []
|
|
47
|
-
extra_rdoc_files:
|
|
47
|
+
extra_rdoc_files:
|
|
48
|
+
- CHANGELOG.md
|
|
49
|
+
- LICENSE.txt
|
|
50
|
+
- README.md
|
|
48
51
|
files:
|
|
52
|
+
- ".yardopts"
|
|
49
53
|
- CHANGELOG.md
|
|
50
54
|
- LICENSE.txt
|
|
51
55
|
- README.md
|
|
@@ -69,8 +73,10 @@ licenses:
|
|
|
69
73
|
metadata:
|
|
70
74
|
rubygems_mfa_required: 'true'
|
|
71
75
|
homepage_uri: https://github.com/dpaluy/reducto_ai
|
|
76
|
+
documentation_uri: https://rubydoc.info/gems/reducto_ai
|
|
72
77
|
source_code_uri: https://github.com/dpaluy/reducto_ai
|
|
73
78
|
changelog_uri: https://github.com/dpaluy/reducto_ai/blob/main/CHANGELOG.md
|
|
79
|
+
bug_tracker_uri: https://github.com/dpaluy/reducto_ai/issues
|
|
74
80
|
rdoc_options: []
|
|
75
81
|
require_paths:
|
|
76
82
|
- lib
|
|
@@ -85,7 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
85
91
|
- !ruby/object:Gem::Version
|
|
86
92
|
version: '0'
|
|
87
93
|
requirements: []
|
|
88
|
-
rubygems_version: 3.
|
|
94
|
+
rubygems_version: 3.6.9
|
|
89
95
|
specification_version: 4
|
|
90
96
|
summary: Ruby client for the Reducto document intelligence API.
|
|
91
97
|
test_files: []
|