llamaparserb 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba81bbf8d24dc79b57a29c8c40764c42d700012e6608d1a494075dd63900d06f
4
- data.tar.gz: 1ce8846e182bf7025d90e8722148554fe2e65d47d82e1671e19f7e6386d30ce9
3
+ metadata.gz: 9c49dc1624b1b955cad8032696308fc81357533113cf429f53fe063d5db0cab2
4
+ data.tar.gz: 2eee40a9054fe05d02a094828bb47a6cc3a5b65ca6fcb5864b9648cff5f0b418
5
5
  SHA512:
6
- metadata.gz: b5c86e77644210049df9a1095049e2a276f70e40208637e80fd14283fded8eee45ec034cc9bd7c205b802ef24be252989bbd5be671ac50981c0acb998876131b
7
- data.tar.gz: 91ea52459cc1fc38f15dd5b050a2c25449147f8905d24af34c76986f48c84f54d0187bad3c0d3b4da81061e8d593982252229611592948deffdc7a6d6d6c066f
6
+ metadata.gz: 50ed45e0d813d776b79fce2e87593cdc88063158045c62ba76e1bc0e353fb1b27b2b03b87dc4253443b4b2a355fec179708d04e1920dc0f47730c271e3ff81cc
7
+ data.tar.gz: 9f86c50b3f5c4bd987c9bab987c5b139ce1dd998577d04ea3fba53d64b430d3cdcc0af3d880d73ebea1914341bc185fb78469f78d9521098f06510c7024b8746
data/CHANGELOG.md CHANGED
@@ -6,14 +6,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.3.0] - 2024-11-28
10
+ ### Added
11
+ - Add support for parsing files from URLs
12
+
9
13
  ## [0.2.3] - 2024-11-28
10
14
  ### Added
11
15
  - Add support for all supported optional llamaparsse parameters to `parse_file`
12
16
 
17
+ [0.2.3]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.3...v0.2.2
18
+
13
19
  ## [0.2.2] - 2024-11-28
14
20
  ### Fixed
15
21
  - Fix issue with handling file path
16
22
 
23
+ [0.2.2]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.2
24
+
17
25
  ## [0.2.1] - 2024-11-28
18
26
  ### Fixed
19
27
  - Fix parse_file to handle files that are not on the local filesystem
@@ -29,8 +37,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
29
37
 
30
38
  ## [0.1.0] - 2024-11-27
31
39
  ### Added
32
- - Initial release
33
-
34
- [Unreleased]: https://github.com/horizing/llamaparserb/compare/v0.1.0...HEAD
35
- [0.1.1]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.1...v0.1.0
36
- [0.1.0]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.0
40
+ - Initial release
data/README.md CHANGED
@@ -43,11 +43,14 @@ text = client.parse_file(file_content, 'pdf')
43
43
  # Parse a file to markdown
44
44
  client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'], result_type: "markdown")
45
45
  markdown = client.parse_file('path/to/document.pdf')
46
+
47
+ # Parse a file from a URL
48
+ markdown = client.parse_file('https://example.com/document.pdf')
46
49
  ```
47
50
 
48
51
  ### File Input Options
49
52
 
50
- The `parse_file` method accepts two types of inputs:
53
+ The `parse_file` method accepts three types of inputs:
51
54
 
52
55
  1. File path (String):
53
56
  ```ruby
@@ -69,6 +72,11 @@ temp_file = Tempfile.new(['document', '.pdf'])
69
72
  client.parse_file(temp_file, 'pdf')
70
73
  ```
71
74
 
75
+ 3. URL (String):
76
+ ```ruby
77
+ client.parse_file('https://example.com/document.pdf')
78
+ ```
79
+
72
80
  ### Advanced Options
73
81
 
74
82
  ```ruby
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Llamaparserb
4
- VERSION = "0.2.3"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/llamaparserb.rb CHANGED
@@ -51,6 +51,9 @@ module Llamaparserb
51
51
  elsif File.exist?(file_input)
52
52
  job_id = create_job_from_path(file_input)
53
53
  log "Started parsing file under job_id #{job_id}", :info
54
+ elsif URI::DEFAULT_PARSER.make_regexp.match?(file_input)
55
+ job_id = create_job_from_url(file_input)
56
+ log "Started parsing URL under job_id #{job_id}", :info
54
57
  else
55
58
  raise Error, "file_type parameter is required for binary string input"
56
59
  end
@@ -197,7 +200,7 @@ module Llamaparserb
197
200
  def build_connection
198
201
  Faraday.new(url: base_url) do |f|
199
202
  f.request :multipart
200
- f.request :json
203
+ f.request :url_encoded
201
204
  f.response :json
202
205
  f.response :raise_error
203
206
  f.adapter Faraday.default_adapter
@@ -234,7 +237,13 @@ module Llamaparserb
234
237
  temp_file,
235
238
  detect_content_type(temp_file.path)
236
239
  )
237
- create_job(file)
240
+
241
+ response = @connection.post("upload") do |req|
242
+ req.headers["Authorization"] = "Bearer #{api_key}"
243
+ req.body = {file: file}
244
+ end
245
+
246
+ response.body["id"]
238
247
  ensure
239
248
  temp_file&.close
240
249
  temp_file&.unlink
@@ -249,9 +258,8 @@ module Llamaparserb
249
258
  response.body["id"]
250
259
  end
251
260
 
252
- def upload_params(file)
261
+ def upload_params(file = nil, url = nil)
253
262
  params = {
254
- file: file,
255
263
  language: @options[:language].to_s,
256
264
  parsing_instruction: @options[:parsing_instruction],
257
265
  invalidate_cache: @options[:invalidate_cache],
@@ -286,6 +294,12 @@ module Llamaparserb
286
294
  params[:azure_openai_key] = @options[:azure_openai_key] if @options[:azure_openai_key]
287
295
  params[:http_proxy] = @options[:http_proxy] if @options[:http_proxy]
288
296
 
297
+ if url
298
+ params[:input_url] = url.to_s
299
+ elsif file
300
+ params[:file] = file
301
+ end
302
+
289
303
  params.compact
290
304
  end
291
305
 
@@ -335,5 +349,20 @@ module Llamaparserb
335
349
  raise Error, "Unsupported file type: #{extension}. Supported types: #{SUPPORTED_FILE_TYPES.join(", ")}"
336
350
  end
337
351
  end
352
+
353
+ def create_job_from_url(url)
354
+ log "Creating job from URL: #{url}", :debug
355
+
356
+ response = @connection.post("upload") do |req|
357
+ req.headers["Authorization"] = "Bearer #{api_key}"
358
+ req.headers["Accept"] = "application/json"
359
+ # Create a simple form data request
360
+ req.options.timeout = 30 # Optional: add timeout
361
+ req.body = {"input_url" => url.to_s}
362
+ end
363
+
364
+ log "Response: #{response.body.inspect}", :debug
365
+ response.body["id"]
366
+ end
338
367
  end
339
368
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llamaparserb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heidar Bernhardsson