llamaparserb 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c22d4883933d23de15c34dd65a2e851c7294ab934fcb43be1fd79cafc8c95515
4
- data.tar.gz: 6cde8b62919e0a73ccc7e73f00231c5711552f7a7dad1a8e29cccbc8c55551d4
3
+ metadata.gz: 14e4e5deea50bc3f5cb0e32c3c3029f6b26fda4cacf91be0f96e9f677f92d1e5
4
+ data.tar.gz: e3e6cb569456d1c649be22ad3e5af65e30484fce50582d7493f5e22b2ac03f61
5
5
  SHA512:
6
- metadata.gz: 54fee6b5080d020caf2f28f77645336fea1fdbcace4442ffe2f842bb3101e63f0df62102b45e3af6caaae51a1d0c83cff5278fc870f3e1b0fb81ee36e4c7e0fd
7
- data.tar.gz: ac05eeeaaceb34c47490797d8966ac12a3fd6e03050b1a4e51fe77ed8ff813d6deaaf3a85ba32219a47b1e53dd9ea25e51e91e1d582aa4f3e1d6c64d2f99c97d
6
+ metadata.gz: 72121ad4b70f95ddd2bfe23f129d5c9ee634543543ba43e4faa437da67bf3cbdf8bc1ea828aeb58d43073bed46c28d4010a6865b5bc006c32ccfc30a099d1d65
7
+ data.tar.gz: 5bea9e367b71d38fe8f6a1a80be0b4fd190e3e56b780e369d33c3319dfd91b483283f4b43ab8fc484468ab611c5e3efc639d8302ba611ccfb71258c459fdb783
data/CHANGELOG.md CHANGED
@@ -6,9 +6,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.2.0] - 2024-11-28
10
+ ### Changed
11
+ - Allow passing in a string or an IO object to `parse_file`
12
+ - Add support for file type parameter to `parse_file`
13
+
14
+ ## [0.1.1] - 2024-11-28
15
+ ### Changed
16
+ - Move gem ownership to Horizing
17
+
9
18
  ## [0.1.0] - 2024-11-27
10
19
  ### Added
11
20
  - Initial release
12
21
 
13
- [Unreleased]: https://github.com/heidar/llamaparserb/compare/v0.1.0...HEAD
14
- [0.1.0]: https://github.com/heidar/llamaparserb/releases/tag/v0.1.0
22
+ [Unreleased]: https://github.com/horizing/llamaparserb/compare/v0.1.0...HEAD
23
+ [0.1.1]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.1...v0.1.0
24
+ [0.1.0]: https://github.com/horizing/llamaparserb/releases/tag/v0.1.0
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2024 Heidar Bernhardsson
3
+ Copyright (c) 2024 Horizing
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -32,14 +32,43 @@ require 'llamaparserb'
32
32
  # Initialize client with API key
33
33
  client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'])
34
34
 
35
- # Parse a file to text (default)
35
+ # Parse a file from disk (to text by default)
36
36
  text = client.parse_file('path/to/document.pdf')
37
37
 
38
+ # Parse an in-memory file (requires file type)
39
+ require 'open-uri'
40
+ file_content = URI.open('https://example.com/document.pdf')
41
+ text = client.parse_file(file_content, 'pdf')
42
+
38
43
  # Parse a file to markdown
39
44
  client = Llamaparserb::Client.new(ENV['LLAMA_CLOUD_API_KEY'], result_type: "markdown")
40
45
  markdown = client.parse_file('path/to/document.pdf')
41
46
  ```
42
47
 
48
+ ### File Input Options
49
+
50
+ The `parse_file` method accepts two types of inputs:
51
+
52
+ 1. File path (String):
53
+ ```ruby
54
+ client.parse_file('path/to/document.pdf')
55
+ ```
56
+
57
+ 2. IO object (requires file type parameter):
58
+ ```ruby
59
+ # From a URL
60
+ file_content = URI.open('https://example.com/document.pdf')
61
+ client.parse_file(file_content, 'pdf')
62
+
63
+ # From memory
64
+ io = StringIO.new(file_content)
65
+ client.parse_file(io, 'pdf')
66
+
67
+ # From a Tempfile
68
+ temp_file = Tempfile.new(['document', '.pdf'])
69
+ client.parse_file(temp_file, 'pdf')
70
+ ```
71
+
43
72
  ### Advanced Options
44
73
 
45
74
  ```ruby
@@ -107,7 +136,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
107
136
 
108
137
  ## Contributing
109
138
 
110
- Bug reports and pull requests are welcome on GitHub at https://github.com/heidar/llamaparserb.
139
+ Bug reports and pull requests are welcome on GitHub at https://github.com/horizing/llamaparserb.
111
140
 
112
141
  ## License
113
142
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Llamaparserb
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/llamaparserb.rb CHANGED
@@ -8,6 +8,7 @@ require "mime/types"
8
8
  require "uri"
9
9
  require "async"
10
10
  require "logger"
11
+ require "tempfile"
11
12
 
12
13
  module Llamaparserb
13
14
  class Error < StandardError; end
@@ -41,17 +42,27 @@ module Llamaparserb
41
42
  @connection = build_connection
42
43
  end
43
44
 
44
- def parse_file(file_path)
45
- job_id = create_job(file_path)
46
- log "Started parsing file under job_id #{job_id}", :info
45
+ def parse_file(file_input, file_type = nil)
46
+ case file_input
47
+ when String
48
+ # Treat as file path
49
+ job_id = create_job_from_path(file_input)
50
+ log "Started parsing file under job_id #{job_id}", :info
51
+ when IO, StringIO, Tempfile
52
+ # Treat as file object
53
+ raise Error, "file_type parameter is required for IO objects" unless file_type
54
+ job_id = create_job_from_io(file_input, file_type)
55
+ log "Started parsing in-memory file under job_id #{job_id}", :info
56
+ else
57
+ raise Error, "Invalid input type. Expected String (file path) or IO object, got #{file_input.class}"
58
+ end
47
59
 
48
60
  wait_for_completion(job_id)
49
-
50
61
  result = get_result(job_id)
51
62
  log "Successfully retrieved result", :info
52
63
  result
53
64
  rescue => e
54
- handle_error(e, file_path)
65
+ handle_error(e, file_input)
55
66
  end
56
67
 
57
68
  private
@@ -144,9 +155,9 @@ module Llamaparserb
144
155
  end
145
156
  end
146
157
 
147
- def handle_error(error, file_path)
158
+ def handle_error(error, file_input)
148
159
  if @options[:ignore_errors]
149
- log "Error while parsing file '#{file_path}': #{error.message}", :error
160
+ log "Error while parsing file '#{file_input}'", :error
150
161
  nil
151
162
  else
152
163
  raise error
@@ -163,14 +174,37 @@ module Llamaparserb
163
174
  end
164
175
  end
165
176
 
166
- def create_job(file_path)
177
+ def create_job_from_path(file_path)
167
178
  validate_file_type!(file_path)
168
-
169
179
  file = Faraday::Multipart::FilePart.new(
170
180
  file_path,
171
181
  detect_content_type(file_path)
172
182
  )
183
+ create_job(file)
184
+ end
185
+
186
+ def create_job_from_io(io, file_type)
187
+ # Ensure file_type starts with a dot
188
+ file_type = ".#{file_type}" unless file_type.start_with?(".")
189
+ validate_file_type!(file_type)
190
+
191
+ temp_file = Tempfile.new(["upload", file_type])
192
+ temp_file.binmode
193
+ io.rewind
194
+ temp_file.write(io.read)
195
+ temp_file.rewind
196
+
197
+ file = Faraday::Multipart::FilePart.new(
198
+ temp_file,
199
+ detect_content_type(temp_file.path)
200
+ )
201
+ create_job(file)
202
+ ensure
203
+ temp_file&.close
204
+ temp_file&.unlink
205
+ end
173
206
 
207
+ def create_job(file)
174
208
  response = @connection.post("upload") do |req|
175
209
  req.headers["Authorization"] = "Bearer #{api_key}"
176
210
  req.body = upload_params(file)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llamaparserb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heidar Bernhardsson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-11-27 00:00:00.000000000 Z
11
+ date: 2024-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -66,9 +66,10 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '2.0'
69
- description: A llamaparse client for Ruby.
69
+ description: A llamaparse client for Ruby. Loosely based on the Python version from
70
+ llamaparse.
70
71
  email:
71
- - heidar@heidarb.com
72
+ - heidar@horizing.io
72
73
  executables: []
73
74
  extensions: []
74
75
  extra_rdoc_files: []
@@ -78,14 +79,14 @@ files:
78
79
  - README.md
79
80
  - lib/llamaparserb.rb
80
81
  - lib/llamaparserb/version.rb
81
- homepage: https://github.com/heidar/llamaparserb
82
+ homepage: https://github.com/horizing/llamaparserb
82
83
  licenses:
83
84
  - MIT
84
85
  metadata:
85
86
  allowed_push_host: https://rubygems.org
86
- homepage_uri: https://github.com/heidar/llamaparserb
87
- source_code_uri: https://github.com/heidar/llamaparserb
88
- changelog_uri: https://github.com/heidar/llamaparserb/blob/master/CHANGELOG.md
87
+ homepage_uri: https://github.com/horizing/llamaparserb
88
+ source_code_uri: https://github.com/horizing/llamaparserb
89
+ changelog_uri: https://github.com/horizing/llamaparserb/blob/master/CHANGELOG.md
89
90
  post_install_message:
90
91
  rdoc_options: []
91
92
  require_paths: