llamaparserb 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba81bbf8d24dc79b57a29c8c40764c42d700012e6608d1a494075dd63900d06f
4
- data.tar.gz: 1ce8846e182bf7025d90e8722148554fe2e65d47d82e1671e19f7e6386d30ce9
3
+ metadata.gz: 9c49dc1624b1b955cad8032696308fc81357533113cf429f53fe063d5db0cab2
4
+ data.tar.gz: 2eee40a9054fe05d02a094828bb47a6cc3a5b65ca6fcb5864b9648cff5f0b418
5
5
  SHA512:
6
- metadata.gz: b5c86e77644210049df9a1095049e2a276f70e40208637e80fd14283fded8eee45ec034cc9bd7c205b802ef24be252989bbd5be671ac50981c0acb998876131b
7
- data.tar.gz: 91ea52459cc1fc38f15dd5b050a2c25449147f8905d24af34c76986f48c84f54d0187bad3c0d3b4da81061e8d593982252229611592948deffdc7a6d6d6c066f
6
+ metadata.gz: 50ed45e0d813d776b79fce2e87593cdc88063158045c62ba76e1bc0e353fb1b27b2b03b87dc4253443b4b2a355fec179708d04e1920dc0f47730c271e3ff81cc
7
+ data.tar.gz: 9f86c50b3f5c4bd987c9bab987c5b139ce1dd998577d04ea3fba53d64b430d3cdcc0af3d880d73ebea1914341bc185fb78469f78d9521098f06510c7024b8746
data/CHANGELOG.md CHANGED
@@ -6,14 +6,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.3.0] - 2024-11-28
10
+ ### Added
11
+ - Add support for parsing files from URLs
12
+
9
13
  ## [0.2.3] - 2024-11-28
10
14
  ### Added
11
15
  - Add support for all supported optional llamaparsse parameters to `parse_file`
12
16
 
17
+ [0.2.3]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.3...v0.2.2
18
+
13
19
  ## [0.2.2] - 2024-11-28
14
20
  ### Fixed
15
21
  - Fix issue with handling file path
16
22
 
23
+ [0.2.2]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.2
24
+
17
25
  ## [0.2.1] - 2024-11-28
18
26
  ### Fixed
19
27
  - Fix parse_file to handle files that are not on the local filesystem
@@ -29,8 +37,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
29
37
 
30
38
  ## [0.1.0] - 2024-11-27
31
39
  ### Added
32
- - Initial release
33
-
34
- [Unreleased]: https://github.com/horizing/llamaparserb/compare/v0.1.0...HEAD
35
- [0.1.1]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.1...v0.1.0
36
- [0.1.0]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.0
40
+ - Initial release
data/README.md CHANGED
@@ -43,11 +43,14 @@ text = client.parse_file(file_content, 'pdf')
43
43
  # Parse a file to markdown
44
44
  client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'], result_type: "markdown")
45
45
  markdown = client.parse_file('path/to/document.pdf')
46
+
47
+ # Parse a file from a URL
48
+ markdown = client.parse_file('https://example.com/document.pdf')
46
49
  ```
47
50
 
48
51
  ### File Input Options
49
52
 
50
- The `parse_file` method accepts two types of inputs:
53
+ The `parse_file` method accepts three types of inputs:
51
54
 
52
55
  1. File path (String):
53
56
  ```ruby
@@ -69,6 +72,11 @@ temp_file = Tempfile.new(['document', '.pdf'])
69
72
  client.parse_file(temp_file, 'pdf')
70
73
  ```
71
74
 
75
+ 3. URL (String):
76
+ ```ruby
77
+ client.parse_file('https://example.com/document.pdf')
78
+ ```
79
+
72
80
  ### Advanced Options
73
81
 
74
82
  ```ruby
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Llamaparserb
4
- VERSION = "0.2.3"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/llamaparserb.rb CHANGED
@@ -51,6 +51,9 @@ module Llamaparserb
51
51
  elsif File.exist?(file_input)
52
52
  job_id = create_job_from_path(file_input)
53
53
  log "Started parsing file under job_id #{job_id}", :info
54
+ elsif URI::DEFAULT_PARSER.make_regexp.match?(file_input)
55
+ job_id = create_job_from_url(file_input)
56
+ log "Started parsing URL under job_id #{job_id}", :info
54
57
  else
55
58
  raise Error, "file_type parameter is required for binary string input"
56
59
  end
@@ -197,7 +200,7 @@ module Llamaparserb
197
200
  def build_connection
198
201
  Faraday.new(url: base_url) do |f|
199
202
  f.request :multipart
200
- f.request :json
203
+ f.request :url_encoded
201
204
  f.response :json
202
205
  f.response :raise_error
203
206
  f.adapter Faraday.default_adapter
@@ -234,7 +237,13 @@ module Llamaparserb
234
237
  temp_file,
235
238
  detect_content_type(temp_file.path)
236
239
  )
237
- create_job(file)
240
+
241
+ response = @connection.post("upload") do |req|
242
+ req.headers["Authorization"] = "Bearer #{api_key}"
243
+ req.body = {file: file}
244
+ end
245
+
246
+ response.body["id"]
238
247
  ensure
239
248
  temp_file&.close
240
249
  temp_file&.unlink
@@ -249,9 +258,8 @@ module Llamaparserb
249
258
  response.body["id"]
250
259
  end
251
260
 
252
- def upload_params(file)
261
+ def upload_params(file = nil, url = nil)
253
262
  params = {
254
- file: file,
255
263
  language: @options[:language].to_s,
256
264
  parsing_instruction: @options[:parsing_instruction],
257
265
  invalidate_cache: @options[:invalidate_cache],
@@ -286,6 +294,12 @@ module Llamaparserb
286
294
  params[:azure_openai_key] = @options[:azure_openai_key] if @options[:azure_openai_key]
287
295
  params[:http_proxy] = @options[:http_proxy] if @options[:http_proxy]
288
296
 
297
+ if url
298
+ params[:input_url] = url.to_s
299
+ elsif file
300
+ params[:file] = file
301
+ end
302
+
289
303
  params.compact
290
304
  end
291
305
 
@@ -335,5 +349,20 @@ module Llamaparserb
335
349
  raise Error, "Unsupported file type: #{extension}. Supported types: #{SUPPORTED_FILE_TYPES.join(", ")}"
336
350
  end
337
351
  end
352
+
353
+ def create_job_from_url(url)
354
+ log "Creating job from URL: #{url}", :debug
355
+
356
+ response = @connection.post("upload") do |req|
357
+ req.headers["Authorization"] = "Bearer #{api_key}"
358
+ req.headers["Accept"] = "application/json"
359
+ # Create a simple form data request
360
+ req.options.timeout = 30 # Optional: add timeout
361
+ req.body = {"input_url" => url.to_s}
362
+ end
363
+
364
+ log "Response: #{response.body.inspect}", :debug
365
+ response.body["id"]
366
+ end
338
367
  end
339
368
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llamaparserb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heidar Bernhardsson