llamaparserb 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -5
- data/README.md +9 -1
- data/lib/llamaparserb/version.rb +1 -1
- data/lib/llamaparserb.rb +33 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c49dc1624b1b955cad8032696308fc81357533113cf429f53fe063d5db0cab2
|
4
|
+
data.tar.gz: 2eee40a9054fe05d02a094828bb47a6cc3a5b65ca6fcb5864b9648cff5f0b418
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 50ed45e0d813d776b79fce2e87593cdc88063158045c62ba76e1bc0e353fb1b27b2b03b87dc4253443b4b2a355fec179708d04e1920dc0f47730c271e3ff81cc
|
7
|
+
data.tar.gz: 9f86c50b3f5c4bd987c9bab987c5b139ce1dd998577d04ea3fba53d64b430d3cdcc0af3d880d73ebea1914341bc185fb78469f78d9521098f06510c7024b8746
|
data/CHANGELOG.md
CHANGED
@@ -6,14 +6,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [0.3.0] - 2024-11-28
|
10
|
+
### Added
|
11
|
+
- Add support for parsing files from URLs
|
12
|
+
|
9
13
|
## [0.2.3] - 2024-11-28
|
10
14
|
### Added
|
11
15
|
- Add support for all supported optional llamaparsse parameters to `parse_file`
|
12
16
|
|
17
|
+
[0.2.3]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.3...v0.2.2
|
18
|
+
|
13
19
|
## [0.2.2] - 2024-11-28
|
14
20
|
### Fixed
|
15
21
|
- Fix issue with handling file path
|
16
22
|
|
23
|
+
[0.2.2]: https://github.com/horizing/llamaparserb/releases/tag/v0.2.2
|
24
|
+
|
17
25
|
## [0.2.1] - 2024-11-28
|
18
26
|
### Fixed
|
19
27
|
- Fix parse_file to handle files that are not on the local filesystem
|
@@ -29,8 +37,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
29
37
|
|
30
38
|
## [0.1.0] - 2024-11-27
|
31
39
|
### Added
|
32
|
-
- Initial release
|
33
|
-
|
34
|
-
[Unreleased]: https://github.com/horizing/llamaparserb/compare/v0.1.0...HEAD
|
35
|
-
[0.1.1]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.1...v0.1.0
|
36
|
-
[0.1.0]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.0
|
40
|
+
- Initial release
|
data/README.md
CHANGED
@@ -43,11 +43,14 @@ text = client.parse_file(file_content, 'pdf')
|
|
43
43
|
# Parse a file to markdown
|
44
44
|
client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'], result_type: "markdown")
|
45
45
|
markdown = client.parse_file('path/to/document.pdf')
|
46
|
+
|
47
|
+
# Parse a file from a URL
|
48
|
+
markdown = client.parse_file('https://example.com/document.pdf')
|
46
49
|
```
|
47
50
|
|
48
51
|
### File Input Options
|
49
52
|
|
50
|
-
The `parse_file` method accepts
|
53
|
+
The `parse_file` method accepts three types of inputs:
|
51
54
|
|
52
55
|
1. File path (String):
|
53
56
|
```ruby
|
@@ -69,6 +72,11 @@ temp_file = Tempfile.new(['document', '.pdf'])
|
|
69
72
|
client.parse_file(temp_file, 'pdf')
|
70
73
|
```
|
71
74
|
|
75
|
+
3. URL (String):
|
76
|
+
```ruby
|
77
|
+
client.parse_file('https://example.com/document.pdf')
|
78
|
+
```
|
79
|
+
|
72
80
|
### Advanced Options
|
73
81
|
|
74
82
|
```ruby
|
data/lib/llamaparserb/version.rb
CHANGED
data/lib/llamaparserb.rb
CHANGED
@@ -51,6 +51,9 @@ module Llamaparserb
|
|
51
51
|
elsif File.exist?(file_input)
|
52
52
|
job_id = create_job_from_path(file_input)
|
53
53
|
log "Started parsing file under job_id #{job_id}", :info
|
54
|
+
elsif URI::DEFAULT_PARSER.make_regexp.match?(file_input)
|
55
|
+
job_id = create_job_from_url(file_input)
|
56
|
+
log "Started parsing URL under job_id #{job_id}", :info
|
54
57
|
else
|
55
58
|
raise Error, "file_type parameter is required for binary string input"
|
56
59
|
end
|
@@ -197,7 +200,7 @@ module Llamaparserb
|
|
197
200
|
def build_connection
|
198
201
|
Faraday.new(url: base_url) do |f|
|
199
202
|
f.request :multipart
|
200
|
-
f.request :
|
203
|
+
f.request :url_encoded
|
201
204
|
f.response :json
|
202
205
|
f.response :raise_error
|
203
206
|
f.adapter Faraday.default_adapter
|
@@ -234,7 +237,13 @@ module Llamaparserb
|
|
234
237
|
temp_file,
|
235
238
|
detect_content_type(temp_file.path)
|
236
239
|
)
|
237
|
-
|
240
|
+
|
241
|
+
response = @connection.post("upload") do |req|
|
242
|
+
req.headers["Authorization"] = "Bearer #{api_key}"
|
243
|
+
req.body = {file: file}
|
244
|
+
end
|
245
|
+
|
246
|
+
response.body["id"]
|
238
247
|
ensure
|
239
248
|
temp_file&.close
|
240
249
|
temp_file&.unlink
|
@@ -249,9 +258,8 @@ module Llamaparserb
|
|
249
258
|
response.body["id"]
|
250
259
|
end
|
251
260
|
|
252
|
-
def upload_params(file)
|
261
|
+
def upload_params(file = nil, url = nil)
|
253
262
|
params = {
|
254
|
-
file: file,
|
255
263
|
language: @options[:language].to_s,
|
256
264
|
parsing_instruction: @options[:parsing_instruction],
|
257
265
|
invalidate_cache: @options[:invalidate_cache],
|
@@ -286,6 +294,12 @@ module Llamaparserb
|
|
286
294
|
params[:azure_openai_key] = @options[:azure_openai_key] if @options[:azure_openai_key]
|
287
295
|
params[:http_proxy] = @options[:http_proxy] if @options[:http_proxy]
|
288
296
|
|
297
|
+
if url
|
298
|
+
params[:input_url] = url.to_s
|
299
|
+
elsif file
|
300
|
+
params[:file] = file
|
301
|
+
end
|
302
|
+
|
289
303
|
params.compact
|
290
304
|
end
|
291
305
|
|
@@ -335,5 +349,20 @@ module Llamaparserb
|
|
335
349
|
raise Error, "Unsupported file type: #{extension}. Supported types: #{SUPPORTED_FILE_TYPES.join(", ")}"
|
336
350
|
end
|
337
351
|
end
|
352
|
+
|
353
|
+
def create_job_from_url(url)
|
354
|
+
log "Creating job from URL: #{url}", :debug
|
355
|
+
|
356
|
+
response = @connection.post("upload") do |req|
|
357
|
+
req.headers["Authorization"] = "Bearer #{api_key}"
|
358
|
+
req.headers["Accept"] = "application/json"
|
359
|
+
# Create a simple form data request
|
360
|
+
req.options.timeout = 30 # Optional: add timeout
|
361
|
+
req.body = {"input_url" => url.to_s}
|
362
|
+
end
|
363
|
+
|
364
|
+
log "Response: #{response.body.inspect}", :debug
|
365
|
+
response.body["id"]
|
366
|
+
end
|
338
367
|
end
|
339
368
|
end
|