llamaparserb 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c49dc1624b1b955cad8032696308fc81357533113cf429f53fe063d5db0cab2
4
- data.tar.gz: 2eee40a9054fe05d02a094828bb47a6cc3a5b65ca6fcb5864b9648cff5f0b418
3
+ metadata.gz: f836664d2420cb4d1c4cf4ff02eaa4105507f30a0d25f435565e236d5e03b4e7
4
+ data.tar.gz: b2d0d6d90d7c343528b78aa5df4ee41801b5b5a36394fe19f08c2295195b50a2
5
5
  SHA512:
6
- metadata.gz: 50ed45e0d813d776b79fce2e87593cdc88063158045c62ba76e1bc0e353fb1b27b2b03b87dc4253443b4b2a355fec179708d04e1920dc0f47730c271e3ff81cc
7
- data.tar.gz: 9f86c50b3f5c4bd987c9bab987c5b139ce1dd998577d04ea3fba53d64b430d3cdcc0af3d880d73ebea1914341bc185fb78469f78d9521098f06510c7024b8746
6
+ metadata.gz: fc8c8a573f40858dc727c2744fad07dfa736a4e7f5848952e3bf401f704a435aec32fd65eefdc8444cd7194d8c5e8f21679b93a1ef9990e330d125050f88cfc9
7
+ data.tar.gz: bf19df08fab5f21a021f8f031fe22564641e78b94fd1b77121c2178ee608c08d6b2c56cc7fed070e2bd45c7c6ee1e11ab29c10994e829840e5eb34c9cda54862
data/CHANGELOG.md CHANGED
@@ -5,6 +5,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5
5
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
7
  ## [Unreleased]
8
+ ## [0.3.2] - 2024-12-02
9
+ ### Added
10
+ - Add support for `disable_image_extraction`
11
+
12
+ ## [0.3.1] - 2024-11-28
13
+ - Add support for all supported optional llamaparse parameters when parsing files from URLs
8
14
 
9
15
  ## [0.3.0] - 2024-11-28
10
16
  ### Added
@@ -12,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
12
18
 
13
19
  ## [0.2.3] - 2024-11-28
14
20
  ### Added
15
- - Add support for all supported optional llamaparsse parameters to `parse_file`
21
+ - Add support for all supported optional llamaparse parameters to `parse_file`
16
22
 
17
23
  [0.2.3]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.3...v0.2.2
18
24
 
data/README.md CHANGED
@@ -118,6 +118,7 @@ client = Llamaparserb::Client.new(
118
118
 
119
119
  # OCR and Image Processing
120
120
  disable_ocr: false, # Disable Optical Character Recognition
121
+ disable_image_extraction: false, # Disable image extraction from documents
121
122
  take_screenshot: false, # Capture screenshot of document
122
123
 
123
124
  # Advanced Processing Features
@@ -156,6 +157,7 @@ client = Llamaparserb::Client.new(
156
157
 
157
158
  #### OCR and Image Processing
158
159
  - `disable_ocr`: Turn off Optical Character Recognition
160
+ - `disable_image_extraction`: Disable image extraction from documents
159
161
  - `take_screenshot`: Generate document screenshots
160
162
  - `skip_diagonal_text`: Ignore text at diagonal angles
161
163
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Llamaparserb
4
- VERSION = "0.3.0"
4
+ VERSION = "0.3.2"
5
5
  end
data/lib/llamaparserb.rb CHANGED
@@ -109,6 +109,7 @@ module Llamaparserb
109
109
  vendor_multimodal_model_name: nil,
110
110
  take_screenshot: false,
111
111
  disable_ocr: false,
112
+ disable_image_extraction: false,
112
113
  is_formatting_instruction: false,
113
114
  annotate_links: false,
114
115
  webhook_url: nil,
@@ -240,7 +241,7 @@ module Llamaparserb
240
241
 
241
242
  response = @connection.post("upload") do |req|
242
243
  req.headers["Authorization"] = "Bearer #{api_key}"
243
- req.body = {file: file}
244
+ req.body = upload_params(file)
244
245
  end
245
246
 
246
247
  response.body["id"]
@@ -269,31 +270,31 @@ module Llamaparserb
269
270
  premium_mode: @options[:premium_mode],
270
271
  continuous_mode: @options[:continuous_mode],
271
272
  do_not_unroll_columns: @options[:do_not_unroll_columns],
273
+ page_separator: @options[:page_separator],
274
+ page_prefix: @options[:page_prefix],
275
+ page_suffix: @options[:page_suffix],
276
+ target_pages: @options[:target_pages],
277
+ bounding_box: @options[:bounding_box],
278
+ disable_ocr: @options[:disable_ocr],
279
+ disable_image_extraction: @options[:disable_image_extraction],
280
+ take_screenshot: @options[:take_screenshot],
272
281
  gpt4o_mode: @options[:gpt4o_mode],
273
282
  gpt4o_api_key: @options[:gpt4o_api_key],
274
- vendor_multimodal_api_key: @options[:vendor_multimodal_api_key],
275
- use_vendor_multimodal_model: @options[:use_vendor_multimodal_model],
276
- vendor_multimodal_model_name: @options[:vendor_multimodal_model_name],
277
- take_screenshot: @options[:take_screenshot],
278
- disable_ocr: @options[:disable_ocr],
279
283
  guess_xlsx_sheet_names: @options[:guess_xlsx_sheet_names],
280
284
  is_formatting_instruction: @options[:is_formatting_instruction],
281
285
  annotate_links: @options[:annotate_links],
286
+ vendor_multimodal_api_key: @options[:vendor_multimodal_api_key],
287
+ use_vendor_multimodal_model: @options[:use_vendor_multimodal_model],
288
+ vendor_multimodal_model_name: @options[:vendor_multimodal_model_name],
289
+ webhook_url: @options[:webhook_url],
290
+ http_proxy: @options[:http_proxy],
291
+ azure_openai_deployment_name: @options[:azure_openai_deployment_name],
292
+ azure_openai_endpoint: @options[:azure_openai_endpoint],
293
+ azure_openai_api_version: @options[:azure_openai_api_version],
294
+ azure_openai_key: @options[:azure_openai_key],
282
295
  from_ruby_package: true
283
296
  }
284
297
 
285
- params[:page_separator] = @options[:page_separator] if @options[:page_separator]
286
- params[:page_prefix] = @options[:page_prefix] if @options[:page_prefix]
287
- params[:page_suffix] = @options[:page_suffix] if @options[:page_suffix]
288
- params[:bounding_box] = @options[:bounding_box] if @options[:bounding_box]
289
- params[:target_pages] = @options[:target_pages] if @options[:target_pages]
290
- params[:webhook_url] = @options[:webhook_url] if @options[:webhook_url]
291
- params[:azure_openai_deployment_name] = @options[:azure_openai_deployment_name] if @options[:azure_openai_deployment_name]
292
- params[:azure_openai_endpoint] = @options[:azure_openai_endpoint] if @options[:azure_openai_endpoint]
293
- params[:azure_openai_api_version] = @options[:azure_openai_api_version] if @options[:azure_openai_api_version]
294
- params[:azure_openai_key] = @options[:azure_openai_key] if @options[:azure_openai_key]
295
- params[:http_proxy] = @options[:http_proxy] if @options[:http_proxy]
296
-
297
298
  if url
298
299
  params[:input_url] = url.to_s
299
300
  elsif file
@@ -356,9 +357,8 @@ module Llamaparserb
356
357
  response = @connection.post("upload") do |req|
357
358
  req.headers["Authorization"] = "Bearer #{api_key}"
358
359
  req.headers["Accept"] = "application/json"
359
- # Create a simple form data request
360
- req.options.timeout = 30 # Optional: add timeout
361
- req.body = {"input_url" => url.to_s}
360
+ req.options.timeout = 30
361
+ req.body = upload_params(nil, url)
362
362
  end
363
363
 
364
364
  log "Response: #{response.body.inspect}", :debug
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llamaparserb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heidar Bernhardsson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-11-28 00:00:00.000000000 Z
11
+ date: 2024-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday