llamaparserb 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14e4e5deea50bc3f5cb0e32c3c3029f6b26fda4cacf91be0f96e9f677f92d1e5
4
- data.tar.gz: e3e6cb569456d1c649be22ad3e5af65e30484fce50582d7493f5e22b2ac03f61
3
+ metadata.gz: 7bc729e8371ed2f748eaef9a92b82380c2e7d674caf6c4fe1b17e7e53d8ea62a
4
+ data.tar.gz: 8f510a5be8efc2877b617bd7e4682a4cb0e92c4fd794e6e6ef036481f3d57284
5
5
  SHA512:
6
- metadata.gz: 72121ad4b70f95ddd2bfe23f129d5c9ee634543543ba43e4faa437da67bf3cbdf8bc1ea828aeb58d43073bed46c28d4010a6865b5bc006c32ccfc30a099d1d65
7
- data.tar.gz: 5bea9e367b71d38fe8f6a1a80be0b4fd190e3e56b780e369d33c3319dfd91b483283f4b43ab8fc484468ab611c5e3efc639d8302ba611ccfb71258c459fdb783
6
+ metadata.gz: 5e6c3d6df9c69da63cf631a296c9618c074fc127c48cc23b398237dc015f4cb4d2bf4d2f4f50b7ed380ac053ff98c19ea9fc3530d890db34cf090e2084fb0821
7
+ data.tar.gz: 04b10a441f82670dc58d873d586cdd043b94f1a477b2d72b6494128d9ab76cd2f8656b1c5db0ad61487d4efe51dbc42e63b047c8101f6f74593d29e098965b52
data/CHANGELOG.md CHANGED
@@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.2.1] - 2024-11-28
10
+ ### Fixed
11
+ - Fix parse_file to handle files that are not on the local filesystem
12
+
9
13
  ## [0.2.0] - 2024-11-28
10
14
  ### Changed
11
15
  - Allow passing in a string or an IO object to `parse_file`
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Llamaparserb
4
- VERSION = "0.2.0"
4
+ VERSION = "0.2.1"
5
5
  end
data/lib/llamaparserb.rb CHANGED
@@ -45,11 +45,16 @@ module Llamaparserb
45
45
  def parse_file(file_input, file_type = nil)
46
46
  case file_input
47
47
  when String
48
- # Treat as file path
49
- job_id = create_job_from_path(file_input)
50
- log "Started parsing file under job_id #{job_id}", :info
48
+ if file_type
49
+ job_id = create_job_from_io(file_input, file_type)
50
+ log "Started parsing binary data under job_id #{job_id}", :info
51
+ elsif File.exist?(file_input)
52
+ job_id = create_job_from_path(file_input)
53
+ log "Started parsing file under job_id #{job_id}", :info
54
+ else
55
+ raise Error, "file_type parameter is required for binary string input"
56
+ end
51
57
  when IO, StringIO, Tempfile
52
- # Treat as file object
53
58
  raise Error, "file_type parameter is required for IO objects" unless file_type
54
59
  job_id = create_job_from_io(file_input, file_type)
55
60
  log "Started parsing in-memory file under job_id #{job_id}", :info
@@ -63,6 +68,8 @@ module Llamaparserb
63
68
  result
64
69
  rescue => e
65
70
  handle_error(e, file_input)
71
+ raise unless @options[:ignore_errors]
72
+ nil
66
73
  end
67
74
 
68
75
  private
@@ -108,15 +115,19 @@ module Llamaparserb
108
115
 
109
116
  def log(message, level = :debug)
110
117
  return unless @options[:verbose]
118
+
119
+ # Convert message to string and force UTF-8 encoding, replacing invalid characters
120
+ safe_message = message.to_s.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
121
+
111
122
  case level
112
123
  when :info
113
- logger.info(message)
124
+ logger.info(safe_message)
114
125
  when :warn
115
- logger.warn(message)
126
+ logger.warn(safe_message)
116
127
  when :error
117
- logger.error(message)
128
+ logger.error(safe_message)
118
129
  else
119
- logger.debug(message)
130
+ logger.debug(safe_message)
120
131
  end
121
132
  end
122
133
 
@@ -157,7 +168,13 @@ module Llamaparserb
157
168
 
158
169
  def handle_error(error, file_input)
159
170
  if @options[:ignore_errors]
160
- log "Error while parsing file '#{file_input}'", :error
171
+ safe_message = if file_input.is_a?(String) && !File.exist?(file_input)
172
+ "binary data"
173
+ else
174
+ file_input.class.to_s
175
+ end
176
+
177
+ log "Error while parsing file (#{safe_message}): #{error.message}", :error
161
178
  nil
162
179
  else
163
180
  raise error
@@ -183,15 +200,21 @@ module Llamaparserb
183
200
  create_job(file)
184
201
  end
185
202
 
186
- def create_job_from_io(io, file_type)
187
- # Ensure file_type starts with a dot
203
+ def create_job_from_io(io_or_string, file_type)
188
204
  file_type = ".#{file_type}" unless file_type.start_with?(".")
189
205
  validate_file_type!(file_type)
190
206
 
191
207
  temp_file = Tempfile.new(["upload", file_type])
192
208
  temp_file.binmode
193
- io.rewind
194
- temp_file.write(io.read)
209
+
210
+ case io_or_string
211
+ when String
212
+ temp_file.write(io_or_string.force_encoding("ASCII-8BIT"))
213
+ else
214
+ io_or_string.rewind if io_or_string.respond_to?(:rewind)
215
+ temp_file.write(io_or_string.read.force_encoding("ASCII-8BIT"))
216
+ end
217
+
195
218
  temp_file.rewind
196
219
 
197
220
  file = Faraday::Multipart::FilePart.new(
@@ -267,7 +290,12 @@ module Llamaparserb
267
290
  end
268
291
 
269
292
  def validate_file_type!(file_path)
270
- extension = File.extname(file_path).downcase
293
+ extension = if file_path.start_with?(".")
294
+ file_path
295
+ else
296
+ File.extname(file_path).downcase
297
+ end
298
+
271
299
  unless SUPPORTED_FILE_TYPES.include?(extension)
272
300
  raise Error, "Unsupported file type: #{extension}. Supported types: #{SUPPORTED_FILE_TYPES.join(", ")}"
273
301
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llamaparserb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heidar Bernhardsson