llamaparserb 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14e4e5deea50bc3f5cb0e32c3c3029f6b26fda4cacf91be0f96e9f677f92d1e5
4
- data.tar.gz: e3e6cb569456d1c649be22ad3e5af65e30484fce50582d7493f5e22b2ac03f61
3
+ metadata.gz: a1761244b0d8ac8c6ee13eb10b50d3eadcb182d8e7bad24828ba70a290bc7507
4
+ data.tar.gz: cf7908683f630f17ef39b4c7e6393cb4637e350ea89c674c78a03a68b1bdf1c5
5
5
  SHA512:
6
- metadata.gz: 72121ad4b70f95ddd2bfe23f129d5c9ee634543543ba43e4faa437da67bf3cbdf8bc1ea828aeb58d43073bed46c28d4010a6865b5bc006c32ccfc30a099d1d65
7
- data.tar.gz: 5bea9e367b71d38fe8f6a1a80be0b4fd190e3e56b780e369d33c3319dfd91b483283f4b43ab8fc484468ab611c5e3efc639d8302ba611ccfb71258c459fdb783
6
+ metadata.gz: 7bc3f4c44814c1cf63ad480882ee7f2b647af3ee8b34b46d95da7ae90c372b656c0f69fd224ba1c03d26cfa5185ee89a356f05e080169375945fc0f8d3548d8d
7
+ data.tar.gz: 5e8d6c3d234e298836f2f373d515321337d1f1ab1904a38e9dfb8a1e6076b6307e69e84d93b5fd27fd02f3f0642e19f8adc38b9de20e5d0ac87b47600b13de90
data/CHANGELOG.md CHANGED
@@ -6,6 +6,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.2.2] - 2024-11-28
10
+ ### Fixed
11
+ - Fix issue with handling file path
12
+
13
+ ## [0.2.1] - 2024-11-28
14
+ ### Fixed
15
+ - Fix parse_file to handle files that are not on the local filesystem
16
+
9
17
  ## [0.2.0] - 2024-11-28
10
18
  ### Changed
11
19
  - Allow passing in a string or an IO object to `parse_file`
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Llamaparserb
4
- VERSION = "0.2.0"
4
+ VERSION = "0.2.2"
5
5
  end
data/lib/llamaparserb.rb CHANGED
@@ -45,11 +45,16 @@ module Llamaparserb
45
45
  def parse_file(file_input, file_type = nil)
46
46
  case file_input
47
47
  when String
48
- # Treat as file path
49
- job_id = create_job_from_path(file_input)
50
- log "Started parsing file under job_id #{job_id}", :info
48
+ if file_type
49
+ job_id = create_job_from_io(file_input, file_type)
50
+ log "Started parsing binary data under job_id #{job_id}", :info
51
+ elsif File.exist?(file_input)
52
+ job_id = create_job_from_path(file_input)
53
+ log "Started parsing file under job_id #{job_id}", :info
54
+ else
55
+ raise Error, "file_type parameter is required for binary string input"
56
+ end
51
57
  when IO, StringIO, Tempfile
52
- # Treat as file object
53
58
  raise Error, "file_type parameter is required for IO objects" unless file_type
54
59
  job_id = create_job_from_io(file_input, file_type)
55
60
  log "Started parsing in-memory file under job_id #{job_id}", :info
@@ -63,6 +68,8 @@ module Llamaparserb
63
68
  result
64
69
  rescue => e
65
70
  handle_error(e, file_input)
71
+ raise unless @options[:ignore_errors]
72
+ nil
66
73
  end
67
74
 
68
75
  private
@@ -108,15 +115,19 @@ module Llamaparserb
108
115
 
109
116
  def log(message, level = :debug)
110
117
  return unless @options[:verbose]
118
+
119
+ # Convert message to string and force UTF-8 encoding, replacing invalid characters
120
+ safe_message = message.to_s.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
121
+
111
122
  case level
112
123
  when :info
113
- logger.info(message)
124
+ logger.info(safe_message)
114
125
  when :warn
115
- logger.warn(message)
126
+ logger.warn(safe_message)
116
127
  when :error
117
- logger.error(message)
128
+ logger.error(safe_message)
118
129
  else
119
- logger.debug(message)
130
+ logger.debug(safe_message)
120
131
  end
121
132
  end
122
133
 
@@ -157,7 +168,13 @@ module Llamaparserb
157
168
 
158
169
  def handle_error(error, file_input)
159
170
  if @options[:ignore_errors]
160
- log "Error while parsing file '#{file_input}'", :error
171
+ safe_message = if file_input.is_a?(String) && file_input.start_with?("/")
172
+ "file path: #{file_input}"
173
+ else
174
+ "binary data"
175
+ end
176
+
177
+ log "Error while parsing file (#{safe_message}): #{error.message}", :error
161
178
  nil
162
179
  else
163
180
  raise error
@@ -183,15 +200,21 @@ module Llamaparserb
183
200
  create_job(file)
184
201
  end
185
202
 
186
- def create_job_from_io(io, file_type)
187
- # Ensure file_type starts with a dot
203
+ def create_job_from_io(io_or_string, file_type)
188
204
  file_type = ".#{file_type}" unless file_type.start_with?(".")
189
205
  validate_file_type!(file_type)
190
206
 
191
207
  temp_file = Tempfile.new(["upload", file_type])
192
208
  temp_file.binmode
193
- io.rewind
194
- temp_file.write(io.read)
209
+
210
+ case io_or_string
211
+ when String
212
+ temp_file.write(io_or_string.force_encoding("ASCII-8BIT"))
213
+ else
214
+ io_or_string.rewind if io_or_string.respond_to?(:rewind)
215
+ temp_file.write(io_or_string.read.force_encoding("ASCII-8BIT"))
216
+ end
217
+
195
218
  temp_file.rewind
196
219
 
197
220
  file = Faraday::Multipart::FilePart.new(
@@ -267,7 +290,12 @@ module Llamaparserb
267
290
  end
268
291
 
269
292
  def validate_file_type!(file_path)
270
- extension = File.extname(file_path).downcase
293
+ extension = if file_path.start_with?(".")
294
+ file_path
295
+ else
296
+ File.extname(file_path).downcase
297
+ end
298
+
271
299
  unless SUPPORTED_FILE_TYPES.include?(extension)
272
300
  raise Error, "Unsupported file type: #{extension}. Supported types: #{SUPPORTED_FILE_TYPES.join(", ")}"
273
301
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llamaparserb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heidar Bernhardsson