llamaparserb 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/README.md +2 -0
- data/lib/llamaparserb/version.rb +1 -1
- data/lib/llamaparserb.rb +21 -21
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f836664d2420cb4d1c4cf4ff02eaa4105507f30a0d25f435565e236d5e03b4e7
|
|
4
|
+
data.tar.gz: b2d0d6d90d7c343528b78aa5df4ee41801b5b5a36394fe19f08c2295195b50a2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fc8c8a573f40858dc727c2744fad07dfa736a4e7f5848952e3bf401f704a435aec32fd65eefdc8444cd7194d8c5e8f21679b93a1ef9990e330d125050f88cfc9
|
|
7
|
+
data.tar.gz: bf19df08fab5f21a021f8f031fe22564641e78b94fd1b77121c2178ee608c08d6b2c56cc7fed070e2bd45c7c6ee1e11ab29c10994e829840e5eb34c9cda54862
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
|
5
5
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
|
+
## [0.3.2] - 2024-12-02
|
|
9
|
+
### Added
|
|
10
|
+
- Add support for `disable_image_extraction`
|
|
11
|
+
|
|
12
|
+
## [0.3.1] - 2024-11-28
|
|
13
|
+
- Add support for all supported optional llamaparse parameters when parsing files from URLs
|
|
8
14
|
|
|
9
15
|
## [0.3.0] - 2024-11-28
|
|
10
16
|
### Added
|
|
@@ -12,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
12
18
|
|
|
13
19
|
## [0.2.3] - 2024-11-28
|
|
14
20
|
### Added
|
|
15
|
-
- Add support for all supported optional
|
|
21
|
+
- Add support for all supported optional llamaparse parameters to `parse_file`
|
|
16
22
|
|
|
17
23
|
[0.2.3]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.3...v0.2.2
|
|
18
24
|
|
data/README.md
CHANGED
|
@@ -118,6 +118,7 @@ client = Llamaparserb::Client.new(
|
|
|
118
118
|
|
|
119
119
|
# OCR and Image Processing
|
|
120
120
|
disable_ocr: false, # Disable Optical Character Recognition
|
|
121
|
+
disable_image_extraction: false, # Disable image extraction from documents
|
|
121
122
|
take_screenshot: false, # Capture screenshot of document
|
|
122
123
|
|
|
123
124
|
# Advanced Processing Features
|
|
@@ -156,6 +157,7 @@ client = Llamaparserb::Client.new(
|
|
|
156
157
|
|
|
157
158
|
#### OCR and Image Processing
|
|
158
159
|
- `disable_ocr`: Turn off Optical Character Recognition
|
|
160
|
+
- `disable_image_extraction`: Disable image extraction from documents
|
|
159
161
|
- `take_screenshot`: Generate document screenshots
|
|
160
162
|
- `skip_diagonal_text`: Ignore text at diagonal angles
|
|
161
163
|
|
data/lib/llamaparserb/version.rb
CHANGED
data/lib/llamaparserb.rb
CHANGED
|
@@ -109,6 +109,7 @@ module Llamaparserb
|
|
|
109
109
|
vendor_multimodal_model_name: nil,
|
|
110
110
|
take_screenshot: false,
|
|
111
111
|
disable_ocr: false,
|
|
112
|
+
disable_image_extraction: false,
|
|
112
113
|
is_formatting_instruction: false,
|
|
113
114
|
annotate_links: false,
|
|
114
115
|
webhook_url: nil,
|
|
@@ -240,7 +241,7 @@ module Llamaparserb
|
|
|
240
241
|
|
|
241
242
|
response = @connection.post("upload") do |req|
|
|
242
243
|
req.headers["Authorization"] = "Bearer #{api_key}"
|
|
243
|
-
req.body =
|
|
244
|
+
req.body = upload_params(file)
|
|
244
245
|
end
|
|
245
246
|
|
|
246
247
|
response.body["id"]
|
|
@@ -269,31 +270,31 @@ module Llamaparserb
|
|
|
269
270
|
premium_mode: @options[:premium_mode],
|
|
270
271
|
continuous_mode: @options[:continuous_mode],
|
|
271
272
|
do_not_unroll_columns: @options[:do_not_unroll_columns],
|
|
273
|
+
page_separator: @options[:page_separator],
|
|
274
|
+
page_prefix: @options[:page_prefix],
|
|
275
|
+
page_suffix: @options[:page_suffix],
|
|
276
|
+
target_pages: @options[:target_pages],
|
|
277
|
+
bounding_box: @options[:bounding_box],
|
|
278
|
+
disable_ocr: @options[:disable_ocr],
|
|
279
|
+
disable_image_extraction: @options[:disable_image_extraction],
|
|
280
|
+
take_screenshot: @options[:take_screenshot],
|
|
272
281
|
gpt4o_mode: @options[:gpt4o_mode],
|
|
273
282
|
gpt4o_api_key: @options[:gpt4o_api_key],
|
|
274
|
-
vendor_multimodal_api_key: @options[:vendor_multimodal_api_key],
|
|
275
|
-
use_vendor_multimodal_model: @options[:use_vendor_multimodal_model],
|
|
276
|
-
vendor_multimodal_model_name: @options[:vendor_multimodal_model_name],
|
|
277
|
-
take_screenshot: @options[:take_screenshot],
|
|
278
|
-
disable_ocr: @options[:disable_ocr],
|
|
279
283
|
guess_xlsx_sheet_names: @options[:guess_xlsx_sheet_names],
|
|
280
284
|
is_formatting_instruction: @options[:is_formatting_instruction],
|
|
281
285
|
annotate_links: @options[:annotate_links],
|
|
286
|
+
vendor_multimodal_api_key: @options[:vendor_multimodal_api_key],
|
|
287
|
+
use_vendor_multimodal_model: @options[:use_vendor_multimodal_model],
|
|
288
|
+
vendor_multimodal_model_name: @options[:vendor_multimodal_model_name],
|
|
289
|
+
webhook_url: @options[:webhook_url],
|
|
290
|
+
http_proxy: @options[:http_proxy],
|
|
291
|
+
azure_openai_deployment_name: @options[:azure_openai_deployment_name],
|
|
292
|
+
azure_openai_endpoint: @options[:azure_openai_endpoint],
|
|
293
|
+
azure_openai_api_version: @options[:azure_openai_api_version],
|
|
294
|
+
azure_openai_key: @options[:azure_openai_key],
|
|
282
295
|
from_ruby_package: true
|
|
283
296
|
}
|
|
284
297
|
|
|
285
|
-
params[:page_separator] = @options[:page_separator] if @options[:page_separator]
|
|
286
|
-
params[:page_prefix] = @options[:page_prefix] if @options[:page_prefix]
|
|
287
|
-
params[:page_suffix] = @options[:page_suffix] if @options[:page_suffix]
|
|
288
|
-
params[:bounding_box] = @options[:bounding_box] if @options[:bounding_box]
|
|
289
|
-
params[:target_pages] = @options[:target_pages] if @options[:target_pages]
|
|
290
|
-
params[:webhook_url] = @options[:webhook_url] if @options[:webhook_url]
|
|
291
|
-
params[:azure_openai_deployment_name] = @options[:azure_openai_deployment_name] if @options[:azure_openai_deployment_name]
|
|
292
|
-
params[:azure_openai_endpoint] = @options[:azure_openai_endpoint] if @options[:azure_openai_endpoint]
|
|
293
|
-
params[:azure_openai_api_version] = @options[:azure_openai_api_version] if @options[:azure_openai_api_version]
|
|
294
|
-
params[:azure_openai_key] = @options[:azure_openai_key] if @options[:azure_openai_key]
|
|
295
|
-
params[:http_proxy] = @options[:http_proxy] if @options[:http_proxy]
|
|
296
|
-
|
|
297
298
|
if url
|
|
298
299
|
params[:input_url] = url.to_s
|
|
299
300
|
elsif file
|
|
@@ -356,9 +357,8 @@ module Llamaparserb
|
|
|
356
357
|
response = @connection.post("upload") do |req|
|
|
357
358
|
req.headers["Authorization"] = "Bearer #{api_key}"
|
|
358
359
|
req.headers["Accept"] = "application/json"
|
|
359
|
-
|
|
360
|
-
req.
|
|
361
|
-
req.body = {"input_url" => url.to_s}
|
|
360
|
+
req.options.timeout = 30
|
|
361
|
+
req.body = upload_params(nil, url)
|
|
362
362
|
end
|
|
363
363
|
|
|
364
364
|
log "Response: #{response.body.inspect}", :debug
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llamaparserb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Heidar Bernhardsson
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-
|
|
11
|
+
date: 2024-12-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: faraday
|