llamaparserb 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14e4e5deea50bc3f5cb0e32c3c3029f6b26fda4cacf91be0f96e9f677f92d1e5
4
- data.tar.gz: e3e6cb569456d1c649be22ad3e5af65e30484fce50582d7493f5e22b2ac03f61
3
+ metadata.gz: a1761244b0d8ac8c6ee13eb10b50d3eadcb182d8e7bad24828ba70a290bc7507
4
+ data.tar.gz: cf7908683f630f17ef39b4c7e6393cb4637e350ea89c674c78a03a68b1bdf1c5
5
5
  SHA512:
6
- metadata.gz: 72121ad4b70f95ddd2bfe23f129d5c9ee634543543ba43e4faa437da67bf3cbdf8bc1ea828aeb58d43073bed46c28d4010a6865b5bc006c32ccfc30a099d1d65
7
- data.tar.gz: 5bea9e367b71d38fe8f6a1a80be0b4fd190e3e56b780e369d33c3319dfd91b483283f4b43ab8fc484468ab611c5e3efc639d8302ba611ccfb71258c459fdb783
6
+ metadata.gz: 7bc3f4c44814c1cf63ad480882ee7f2b647af3ee8b34b46d95da7ae90c372b656c0f69fd224ba1c03d26cfa5185ee89a356f05e080169375945fc0f8d3548d8d
7
+ data.tar.gz: 5e8d6c3d234e298836f2f373d515321337d1f1ab1904a38e9dfb8a1e6076b6307e69e84d93b5fd27fd02f3f0642e19f8adc38b9de20e5d0ac87b47600b13de90
data/CHANGELOG.md CHANGED
@@ -6,6 +6,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.2.2] - 2024-11-28
10
+ ### Fixed
11
+ - Fix issue with handling file path
12
+
13
+ ## [0.2.1] - 2024-11-28
14
+ ### Fixed
15
+ - Fix parse_file to handle files that are not on the local filesystem
16
+
9
17
  ## [0.2.0] - 2024-11-28
10
18
  ### Changed
11
19
  - Allow passing in a string or an IO object to `parse_file`
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Llamaparserb
4
- VERSION = "0.2.0"
4
+ VERSION = "0.2.2"
5
5
  end
data/lib/llamaparserb.rb CHANGED
@@ -45,11 +45,16 @@ module Llamaparserb
45
45
  def parse_file(file_input, file_type = nil)
46
46
  case file_input
47
47
  when String
48
- # Treat as file path
49
- job_id = create_job_from_path(file_input)
50
- log "Started parsing file under job_id #{job_id}", :info
48
+ if file_type
49
+ job_id = create_job_from_io(file_input, file_type)
50
+ log "Started parsing binary data under job_id #{job_id}", :info
51
+ elsif File.exist?(file_input)
52
+ job_id = create_job_from_path(file_input)
53
+ log "Started parsing file under job_id #{job_id}", :info
54
+ else
55
+ raise Error, "file_type parameter is required for binary string input"
56
+ end
51
57
  when IO, StringIO, Tempfile
52
- # Treat as file object
53
58
  raise Error, "file_type parameter is required for IO objects" unless file_type
54
59
  job_id = create_job_from_io(file_input, file_type)
55
60
  log "Started parsing in-memory file under job_id #{job_id}", :info
@@ -63,6 +68,8 @@ module Llamaparserb
63
68
  result
64
69
  rescue => e
65
70
  handle_error(e, file_input)
71
+ raise unless @options[:ignore_errors]
72
+ nil
66
73
  end
67
74
 
68
75
  private
@@ -108,15 +115,19 @@ module Llamaparserb
108
115
 
109
116
  def log(message, level = :debug)
110
117
  return unless @options[:verbose]
118
+
119
+ # Convert message to string and force UTF-8 encoding, replacing invalid characters
120
+ safe_message = message.to_s.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
121
+
111
122
  case level
112
123
  when :info
113
- logger.info(message)
124
+ logger.info(safe_message)
114
125
  when :warn
115
- logger.warn(message)
126
+ logger.warn(safe_message)
116
127
  when :error
117
- logger.error(message)
128
+ logger.error(safe_message)
118
129
  else
119
- logger.debug(message)
130
+ logger.debug(safe_message)
120
131
  end
121
132
  end
122
133
 
@@ -157,7 +168,13 @@ module Llamaparserb
157
168
 
158
169
  def handle_error(error, file_input)
159
170
  if @options[:ignore_errors]
160
- log "Error while parsing file '#{file_input}'", :error
171
+ safe_message = if file_input.is_a?(String) && file_input.start_with?("/")
172
+ "file path: #{file_input}"
173
+ else
174
+ "binary data"
175
+ end
176
+
177
+ log "Error while parsing file (#{safe_message}): #{error.message}", :error
161
178
  nil
162
179
  else
163
180
  raise error
@@ -183,15 +200,21 @@ module Llamaparserb
183
200
  create_job(file)
184
201
  end
185
202
 
186
- def create_job_from_io(io, file_type)
187
- # Ensure file_type starts with a dot
203
+ def create_job_from_io(io_or_string, file_type)
188
204
  file_type = ".#{file_type}" unless file_type.start_with?(".")
189
205
  validate_file_type!(file_type)
190
206
 
191
207
  temp_file = Tempfile.new(["upload", file_type])
192
208
  temp_file.binmode
193
- io.rewind
194
- temp_file.write(io.read)
209
+
210
+ case io_or_string
211
+ when String
212
+ temp_file.write(io_or_string.force_encoding("ASCII-8BIT"))
213
+ else
214
+ io_or_string.rewind if io_or_string.respond_to?(:rewind)
215
+ temp_file.write(io_or_string.read.force_encoding("ASCII-8BIT"))
216
+ end
217
+
195
218
  temp_file.rewind
196
219
 
197
220
  file = Faraday::Multipart::FilePart.new(
@@ -267,7 +290,12 @@ module Llamaparserb
267
290
  end
268
291
 
269
292
  def validate_file_type!(file_path)
270
- extension = File.extname(file_path).downcase
293
+ extension = if file_path.start_with?(".")
294
+ file_path
295
+ else
296
+ File.extname(file_path).downcase
297
+ end
298
+
271
299
  unless SUPPORTED_FILE_TYPES.include?(extension)
272
300
  raise Error, "Unsupported file type: #{extension}. Supported types: #{SUPPORTED_FILE_TYPES.join(", ")}"
273
301
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llamaparserb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Heidar Bernhardsson