llamaparserb 0.3.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/README.md +2 -0
- data/lib/llamaparserb/version.rb +1 -1
- data/lib/llamaparserb.rb +21 -21
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f836664d2420cb4d1c4cf4ff02eaa4105507f30a0d25f435565e236d5e03b4e7
|
4
|
+
data.tar.gz: b2d0d6d90d7c343528b78aa5df4ee41801b5b5a36394fe19f08c2295195b50a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc8c8a573f40858dc727c2744fad07dfa736a4e7f5848952e3bf401f704a435aec32fd65eefdc8444cd7194d8c5e8f21679b93a1ef9990e330d125050f88cfc9
|
7
|
+
data.tar.gz: bf19df08fab5f21a021f8f031fe22564641e78b94fd1b77121c2178ee608c08d6b2c56cc7fed070e2bd45c7c6ee1e11ab29c10994e829840e5eb34c9cda54862
|
data/CHANGELOG.md
CHANGED
@@ -5,6 +5,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
5
5
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
|
+
## [0.3.2] - 2024-12-02
|
9
|
+
### Added
|
10
|
+
- Add support for `disable_image_extraction`
|
11
|
+
|
12
|
+
## [0.3.1] - 2024-11-28
|
13
|
+
- Add support for all supported optional llamaparse parameters when parsing files from URLs
|
8
14
|
|
9
15
|
## [0.3.0] - 2024-11-28
|
10
16
|
### Added
|
@@ -12,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
12
18
|
|
13
19
|
## [0.2.3] - 2024-11-28
|
14
20
|
### Added
|
15
|
-
- Add support for all supported optional
|
21
|
+
- Add support for all supported optional llamaparse parameters to `parse_file`
|
16
22
|
|
17
23
|
[0.2.3]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.3...v0.2.2
|
18
24
|
|
data/README.md
CHANGED
@@ -118,6 +118,7 @@ client = Llamaparserb::Client.new(
|
|
118
118
|
|
119
119
|
# OCR and Image Processing
|
120
120
|
disable_ocr: false, # Disable Optical Character Recognition
|
121
|
+
disable_image_extraction: false, # Disable image extraction from documents
|
121
122
|
take_screenshot: false, # Capture screenshot of document
|
122
123
|
|
123
124
|
# Advanced Processing Features
|
@@ -156,6 +157,7 @@ client = Llamaparserb::Client.new(
|
|
156
157
|
|
157
158
|
#### OCR and Image Processing
|
158
159
|
- `disable_ocr`: Turn off Optical Character Recognition
|
160
|
+
- `disable_image_extraction`: Disable image extraction from documents
|
159
161
|
- `take_screenshot`: Generate document screenshots
|
160
162
|
- `skip_diagonal_text`: Ignore text at diagonal angles
|
161
163
|
|
data/lib/llamaparserb/version.rb
CHANGED
data/lib/llamaparserb.rb
CHANGED
@@ -109,6 +109,7 @@ module Llamaparserb
|
|
109
109
|
vendor_multimodal_model_name: nil,
|
110
110
|
take_screenshot: false,
|
111
111
|
disable_ocr: false,
|
112
|
+
disable_image_extraction: false,
|
112
113
|
is_formatting_instruction: false,
|
113
114
|
annotate_links: false,
|
114
115
|
webhook_url: nil,
|
@@ -240,7 +241,7 @@ module Llamaparserb
|
|
240
241
|
|
241
242
|
response = @connection.post("upload") do |req|
|
242
243
|
req.headers["Authorization"] = "Bearer #{api_key}"
|
243
|
-
req.body =
|
244
|
+
req.body = upload_params(file)
|
244
245
|
end
|
245
246
|
|
246
247
|
response.body["id"]
|
@@ -269,31 +270,31 @@ module Llamaparserb
|
|
269
270
|
premium_mode: @options[:premium_mode],
|
270
271
|
continuous_mode: @options[:continuous_mode],
|
271
272
|
do_not_unroll_columns: @options[:do_not_unroll_columns],
|
273
|
+
page_separator: @options[:page_separator],
|
274
|
+
page_prefix: @options[:page_prefix],
|
275
|
+
page_suffix: @options[:page_suffix],
|
276
|
+
target_pages: @options[:target_pages],
|
277
|
+
bounding_box: @options[:bounding_box],
|
278
|
+
disable_ocr: @options[:disable_ocr],
|
279
|
+
disable_image_extraction: @options[:disable_image_extraction],
|
280
|
+
take_screenshot: @options[:take_screenshot],
|
272
281
|
gpt4o_mode: @options[:gpt4o_mode],
|
273
282
|
gpt4o_api_key: @options[:gpt4o_api_key],
|
274
|
-
vendor_multimodal_api_key: @options[:vendor_multimodal_api_key],
|
275
|
-
use_vendor_multimodal_model: @options[:use_vendor_multimodal_model],
|
276
|
-
vendor_multimodal_model_name: @options[:vendor_multimodal_model_name],
|
277
|
-
take_screenshot: @options[:take_screenshot],
|
278
|
-
disable_ocr: @options[:disable_ocr],
|
279
283
|
guess_xlsx_sheet_names: @options[:guess_xlsx_sheet_names],
|
280
284
|
is_formatting_instruction: @options[:is_formatting_instruction],
|
281
285
|
annotate_links: @options[:annotate_links],
|
286
|
+
vendor_multimodal_api_key: @options[:vendor_multimodal_api_key],
|
287
|
+
use_vendor_multimodal_model: @options[:use_vendor_multimodal_model],
|
288
|
+
vendor_multimodal_model_name: @options[:vendor_multimodal_model_name],
|
289
|
+
webhook_url: @options[:webhook_url],
|
290
|
+
http_proxy: @options[:http_proxy],
|
291
|
+
azure_openai_deployment_name: @options[:azure_openai_deployment_name],
|
292
|
+
azure_openai_endpoint: @options[:azure_openai_endpoint],
|
293
|
+
azure_openai_api_version: @options[:azure_openai_api_version],
|
294
|
+
azure_openai_key: @options[:azure_openai_key],
|
282
295
|
from_ruby_package: true
|
283
296
|
}
|
284
297
|
|
285
|
-
params[:page_separator] = @options[:page_separator] if @options[:page_separator]
|
286
|
-
params[:page_prefix] = @options[:page_prefix] if @options[:page_prefix]
|
287
|
-
params[:page_suffix] = @options[:page_suffix] if @options[:page_suffix]
|
288
|
-
params[:bounding_box] = @options[:bounding_box] if @options[:bounding_box]
|
289
|
-
params[:target_pages] = @options[:target_pages] if @options[:target_pages]
|
290
|
-
params[:webhook_url] = @options[:webhook_url] if @options[:webhook_url]
|
291
|
-
params[:azure_openai_deployment_name] = @options[:azure_openai_deployment_name] if @options[:azure_openai_deployment_name]
|
292
|
-
params[:azure_openai_endpoint] = @options[:azure_openai_endpoint] if @options[:azure_openai_endpoint]
|
293
|
-
params[:azure_openai_api_version] = @options[:azure_openai_api_version] if @options[:azure_openai_api_version]
|
294
|
-
params[:azure_openai_key] = @options[:azure_openai_key] if @options[:azure_openai_key]
|
295
|
-
params[:http_proxy] = @options[:http_proxy] if @options[:http_proxy]
|
296
|
-
|
297
298
|
if url
|
298
299
|
params[:input_url] = url.to_s
|
299
300
|
elsif file
|
@@ -356,9 +357,8 @@ module Llamaparserb
|
|
356
357
|
response = @connection.post("upload") do |req|
|
357
358
|
req.headers["Authorization"] = "Bearer #{api_key}"
|
358
359
|
req.headers["Accept"] = "application/json"
|
359
|
-
|
360
|
-
req.
|
361
|
-
req.body = {"input_url" => url.to_s}
|
360
|
+
req.options.timeout = 30
|
361
|
+
req.body = upload_params(nil, url)
|
362
362
|
end
|
363
363
|
|
364
364
|
log "Response: #{response.body.inspect}", :debug
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llamaparserb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Heidar Bernhardsson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|