llamaparserb 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14e4e5deea50bc3f5cb0e32c3c3029f6b26fda4cacf91be0f96e9f677f92d1e5
4
- data.tar.gz: e3e6cb569456d1c649be22ad3e5af65e30484fce50582d7493f5e22b2ac03f61
3
+ metadata.gz: 7bc729e8371ed2f748eaef9a92b82380c2e7d674caf6c4fe1b17e7e53d8ea62a
4
+ data.tar.gz: 8f510a5be8efc2877b617bd7e4682a4cb0e92c4fd794e6e6ef036481f3d57284
5
5
  SHA512:
6
- metadata.gz: 72121ad4b70f95ddd2bfe23f129d5c9ee634543543ba43e4faa437da67bf3cbdf8bc1ea828aeb58d43073bed46c28d4010a6865b5bc006c32ccfc30a099d1d65
7
- data.tar.gz: 5bea9e367b71d38fe8f6a1a80be0b4fd190e3e56b780e369d33c3319dfd91b483283f4b43ab8fc484468ab611c5e3efc639d8302ba611ccfb71258c459fdb783
6
+ metadata.gz: 5e6c3d6df9c69da63cf631a296c9618c074fc127c48cc23b398237dc015f4cb4d2bf4d2f4f50b7ed380ac053ff98c19ea9fc3530d890db34cf090e2084fb0821
7
+ data.tar.gz: 04b10a441f82670dc58d873d586cdd043b94f1a477b2d72b6494128d9ab76cd2f8656b1c5db0ad61487d4efe51dbc42e63b047c8101f6f74593d29e098965b52
data/CHANGELOG.md CHANGED
@@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.2.1] - 2024-11-28
10
+ ### Fixed
11
+ - Fix parse_file to handle files that are not on the local filesystem
12
+
9
13
  ## [0.2.0] - 2024-11-28
10
14
  ### Changed
11
15
  - Allow passing in a string or an IO object to `parse_file`
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Llamaparserb
4
- VERSION = "0.2.0"
4
+ VERSION = "0.2.1"
5
5
  end
data/lib/llamaparserb.rb CHANGED
@@ -45,11 +45,16 @@ module Llamaparserb
45
45
  def parse_file(file_input, file_type = nil)
46
46
  case file_input
47
47
  when String
48
- # Treat as file path
49
- job_id = create_job_from_path(file_input)
50
- log "Started parsing file under job_id #{job_id}", :info
48
+ if file_type
49
+ job_id = create_job_from_io(file_input, file_type)
50
+ log "Started parsing binary data under job_id #{job_id}", :info
51
+ elsif File.exist?(file_input)
52
+ job_id = create_job_from_path(file_input)
53
+ log "Started parsing file under job_id #{job_id}", :info
54
+ else
55
+ raise Error, "file_type parameter is required for binary string input"
56
+ end
51
57
  when IO, StringIO, Tempfile
52
- # Treat as file object
53
58
  raise Error, "file_type parameter is required for IO objects" unless file_type
54
59
  job_id = create_job_from_io(file_input, file_type)
55
60
  log "Started parsing in-memory file under job_id #{job_id}", :info
@@ -63,6 +68,8 @@ module Llamaparserb
63
68
  result
64
69
  rescue => e
65
70
  handle_error(e, file_input)
71
+ raise unless @options[:ignore_errors]
72
+ nil
66
73
  end
67
74
 
68
75
  private
@@ -108,15 +115,19 @@ module Llamaparserb
108
115
 
109
116
  def log(message, level = :debug)
110
117
  return unless @options[:verbose]
118
+
119
+ # Convert message to string and force UTF-8 encoding, replacing invalid characters
120
+ safe_message = message.to_s.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
121
+
111
122
  case level
112
123
  when :info
113
- logger.info(message)
124
+ logger.info(safe_message)
114
125
  when :warn
115
- logger.warn(message)
126
+ logger.warn(safe_message)
116
127
  when :error
117
- logger.error(message)
128
+ logger.error(safe_message)
118
129
  else
119
- logger.debug(message)
130
+ logger.debug(safe_message)
120
131
  end
121
132
  end
122
133
 
@@ -157,7 +168,13 @@ module Llamaparserb
157
168
 
158
169
  def handle_error(error, file_input)
159
170
  if @options[:ignore_errors]
160
- log "Error while parsing file '#{file_input}'", :error
171
+ safe_message = if file_input.is_a?(String) && !File.exist?(file_input)
172
+ "binary data"
173
+ else
174
+ file_input.class.to_s
175
+ end
176
+
177
+ log "Error while parsing file (#{safe_message}): #{error.message}", :error
161
178
  nil
162
179
  else
163
180
  raise error
@@ -183,15 +200,21 @@ module Llamaparserb
183
200
  create_job(file)
184
201
  end
185
202
 
186
- def create_job_from_io(io, file_type)
187
- # Ensure file_type starts with a dot
203
+ def create_job_from_io(io_or_string, file_type)
188
204
  file_type = ".#{file_type}" unless file_type.start_with?(".")
189
205
  validate_file_type!(file_type)
190
206
 
191
207
  temp_file = Tempfile.new(["upload", file_type])
192
208
  temp_file.binmode
193
- io.rewind
194
- temp_file.write(io.read)
209
+
210
+ case io_or_string
211
+ when String
212
+ temp_file.write(io_or_string.force_encoding("ASCII-8BIT"))
213
+ else
214
+ io_or_string.rewind if io_or_string.respond_to?(:rewind)
215
+ temp_file.write(io_or_string.read.force_encoding("ASCII-8BIT"))
216
+ end
217
+
195
218
  temp_file.rewind
196
219
 
197
220
  file = Faraday::Multipart::FilePart.new(
@@ -267,7 +290,12 @@ module Llamaparserb
267
290
  end
268
291
 
269
292
  def validate_file_type!(file_path)
270
- extension = File.extname(file_path).downcase
293
+ extension = if file_path.start_with?(".")
294
+ file_path
295
+ else
296
+ File.extname(file_path).downcase
297
+ end
298
+
271
299
  unless SUPPORTED_FILE_TYPES.include?(extension)
272
300
  raise Error, "Unsupported file type: #{extension}. Supported types: #{SUPPORTED_FILE_TYPES.join(", ")}"
273
301
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llamaparserb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heidar Bernhardsson