llamaparserb 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c22d4883933d23de15c34dd65a2e851c7294ab934fcb43be1fd79cafc8c95515
4
- data.tar.gz: 6cde8b62919e0a73ccc7e73f00231c5711552f7a7dad1a8e29cccbc8c55551d4
3
+ metadata.gz: 14e4e5deea50bc3f5cb0e32c3c3029f6b26fda4cacf91be0f96e9f677f92d1e5
4
+ data.tar.gz: e3e6cb569456d1c649be22ad3e5af65e30484fce50582d7493f5e22b2ac03f61
5
5
  SHA512:
6
- metadata.gz: 54fee6b5080d020caf2f28f77645336fea1fdbcace4442ffe2f842bb3101e63f0df62102b45e3af6caaae51a1d0c83cff5278fc870f3e1b0fb81ee36e4c7e0fd
7
- data.tar.gz: ac05eeeaaceb34c47490797d8966ac12a3fd6e03050b1a4e51fe77ed8ff813d6deaaf3a85ba32219a47b1e53dd9ea25e51e91e1d582aa4f3e1d6c64d2f99c97d
6
+ metadata.gz: 72121ad4b70f95ddd2bfe23f129d5c9ee634543543ba43e4faa437da67bf3cbdf8bc1ea828aeb58d43073bed46c28d4010a6865b5bc006c32ccfc30a099d1d65
7
+ data.tar.gz: 5bea9e367b71d38fe8f6a1a80be0b4fd190e3e56b780e369d33c3319dfd91b483283f4b43ab8fc484468ab611c5e3efc639d8302ba611ccfb71258c459fdb783
data/CHANGELOG.md CHANGED
@@ -6,9 +6,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.2.0] - 2024-11-28
10
+ ### Changed
11
+ - Allow passing in a string or an IO object to `parse_file`
12
+ - Add support for file type parameter to `parse_file`
13
+
14
+ ## [0.1.1] - 2024-11-28
15
+ ### Changed
16
+ - Move gem ownership to Horizing
17
+
9
18
  ## [0.1.0] - 2024-11-27
10
19
  ### Added
11
20
  - Initial release
12
21
 
13
- [Unreleased]: https://github.com/heidar/llamaparserb/compare/v0.1.0...HEAD
14
- [0.1.0]: https://github.com/heidar/llamaparserb/releases/tag/v0.1.0
22
+ [Unreleased]: https://github.com/horizing/llamaparserb/compare/v0.1.0...HEAD
23
+ [0.1.1]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.1...v0.1.0
24
+ [0.1.0]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.0
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2024 Heidar Bernhardsson
3
+ Copyright (c) 2024 Horizing
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -32,14 +32,43 @@ require 'llamaparserb'
32
32
  # Initialize client with API key
33
33
  client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'])
34
34
 
35
- # Parse a file to text (default)
35
+ # Parse a file from disk (to text by default)
36
36
  text = client.parse_file('path/to/document.pdf')
37
37
 
38
+ # Parse an in-memory file (requires file type)
39
+ require 'open-uri'
40
+ file_content = URI.open('https://example.com/document.pdf')
41
+ text = client.parse_file(file_content, 'pdf')
42
+
38
43
  # Parse a file to markdown
39
44
  client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'], result_type: "markdown")
40
45
  markdown = client.parse_file('path/to/document.pdf')
41
46
  ```
42
47
 
48
+ ### File Input Options
49
+
50
+ The `parse_file` method accepts two types of inputs:
51
+
52
+ 1. File path (String):
53
+ ```ruby
54
+ client.parse_file('path/to/document.pdf')
55
+ ```
56
+
57
+ 2. IO object (requires file type parameter):
58
+ ```ruby
59
+ # From a URL
60
+ file_content = URI.open('https://example.com/document.pdf')
61
+ client.parse_file(file_content, 'pdf')
62
+
63
+ # From memory
64
+ io = StringIO.new(file_content)
65
+ client.parse_file(io, 'pdf')
66
+
67
+ # From a Tempfile
68
+ temp_file = Tempfile.new(['document', '.pdf'])
69
+ client.parse_file(temp_file, 'pdf')
70
+ ```
71
+
43
72
  ### Advanced Options
44
73
 
45
74
  ```ruby
@@ -107,7 +136,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
107
136
 
108
137
  ## Contributing
109
138
 
110
- Bug reports and pull requests are welcome on GitHub at https://github.com/heidar/llamaparserb.
139
+ Bug reports and pull requests are welcome on GitHub at https://github.com/horizing/llamaparserb.
111
140
 
112
141
  ## License
113
142
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Llamaparserb
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/llamaparserb.rb CHANGED
@@ -8,6 +8,7 @@ require "mime/types"
8
8
  require "uri"
9
9
  require "async"
10
10
  require "logger"
11
+ require "tempfile"
11
12
 
12
13
  module Llamaparserb
13
14
  class Error < StandardError; end
@@ -41,17 +42,27 @@ module Llamaparserb
41
42
  @connection = build_connection
42
43
  end
43
44
 
44
- def parse_file(file_path)
45
- job_id = create_job(file_path)
46
- log "Started parsing file under job_id #{job_id}", :info
45
+ def parse_file(file_input, file_type = nil)
46
+ case file_input
47
+ when String
48
+ # Treat as file path
49
+ job_id = create_job_from_path(file_input)
50
+ log "Started parsing file under job_id #{job_id}", :info
51
+ when IO, StringIO, Tempfile
52
+ # Treat as file object
53
+ raise Error, "file_type parameter is required for IO objects" unless file_type
54
+ job_id = create_job_from_io(file_input, file_type)
55
+ log "Started parsing in-memory file under job_id #{job_id}", :info
56
+ else
57
+ raise Error, "Invalid input type. Expected String (file path) or IO object, got #{file_input.class}"
58
+ end
47
59
 
48
60
  wait_for_completion(job_id)
49
-
50
61
  result = get_result(job_id)
51
62
  log "Successfully retrieved result", :info
52
63
  result
53
64
  rescue => e
54
- handle_error(e, file_path)
65
+ handle_error(e, file_input)
55
66
  end
56
67
 
57
68
  private
@@ -144,9 +155,9 @@ module Llamaparserb
144
155
  end
145
156
  end
146
157
 
147
- def handle_error(error, file_path)
158
+ def handle_error(error, file_input)
148
159
  if @options[:ignore_errors]
149
- log "Error while parsing file '#{file_path}': #{error.message}", :error
160
+ log "Error while parsing file '#{file_input}'", :error
150
161
  nil
151
162
  else
152
163
  raise error
@@ -163,14 +174,37 @@ module Llamaparserb
163
174
  end
164
175
  end
165
176
 
166
- def create_job(file_path)
177
+ def create_job_from_path(file_path)
167
178
  validate_file_type!(file_path)
168
-
169
179
  file = Faraday::Multipart::FilePart.new(
170
180
  file_path,
171
181
  detect_content_type(file_path)
172
182
  )
183
+ create_job(file)
184
+ end
185
+
186
+ def create_job_from_io(io, file_type)
187
+ # Ensure file_type starts with a dot
188
+ file_type = ".#{file_type}" unless file_type.start_with?(".")
189
+ validate_file_type!(file_type)
190
+
191
+ temp_file = Tempfile.new(["upload", file_type])
192
+ temp_file.binmode
193
+ io.rewind
194
+ temp_file.write(io.read)
195
+ temp_file.rewind
196
+
197
+ file = Faraday::Multipart::FilePart.new(
198
+ temp_file,
199
+ detect_content_type(temp_file.path)
200
+ )
201
+ create_job(file)
202
+ ensure
203
+ temp_file&.close
204
+ temp_file&.unlink
205
+ end
173
206
 
207
+ def create_job(file)
174
208
  response = @connection.post("upload") do |req|
175
209
  req.headers["Authorization"] = "Bearer #{api_key}"
176
210
  req.body = upload_params(file)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llamaparserb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heidar Bernhardsson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-11-27 00:00:00.000000000 Z
11
+ date: 2024-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -66,9 +66,10 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '2.0'
69
- description: A llamaparse client for Ruby.
69
+ description: A llamaparse client for Ruby. Loosely based on the Python version from
70
+ llamaparse.
70
71
  email:
71
- - heidar@heidarb.com
72
+ - heidar@horizing.io
72
73
  executables: []
73
74
  extensions: []
74
75
  extra_rdoc_files: []
@@ -78,14 +79,14 @@ files:
78
79
  - README.md
79
80
  - lib/llamaparserb.rb
80
81
  - lib/llamaparserb/version.rb
81
- homepage: https://github.com/heidar/llamaparserb
82
+ homepage: https://github.com/horizing/llamaparserb
82
83
  licenses:
83
84
  - MIT
84
85
  metadata:
85
86
  allowed_push_host: https://rubygems.org
86
- homepage_uri: https://github.com/heidar/llamaparserb
87
- source_code_uri: https://github.com/heidar/llamaparserb
88
- changelog_uri: https://github.com/heidar/llamaparserb/blob/master/CHANGELOG.md
87
+ homepage_uri: https://github.com/horizing/llamaparserb
88
+ source_code_uri: https://github.com/horizing/llamaparserb
89
+ changelog_uri: https://github.com/horizing/llamaparserb/blob/master/CHANGELOG.md
89
90
  post_install_message:
90
91
  rdoc_options: []
91
92
  require_paths: