llamaparserb 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/llamaparserb/version.rb +1 -1
- data/lib/llamaparserb.rb +42 -14
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7bc729e8371ed2f748eaef9a92b82380c2e7d674caf6c4fe1b17e7e53d8ea62a
|
4
|
+
data.tar.gz: 8f510a5be8efc2877b617bd7e4682a4cb0e92c4fd794e6e6ef036481f3d57284
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e6c3d6df9c69da63cf631a296c9618c074fc127c48cc23b398237dc015f4cb4d2bf4d2f4f50b7ed380ac053ff98c19ea9fc3530d890db34cf090e2084fb0821
|
7
|
+
data.tar.gz: 04b10a441f82670dc58d873d586cdd043b94f1a477b2d72b6494128d9ab76cd2f8656b1c5db0ad61487d4efe51dbc42e63b047c8101f6f74593d29e098965b52
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [0.2.1] - 2024-11-28
|
10
|
+
### Fixed
|
11
|
+
- Fix parse_file to handle files that are not on the local filesystem
|
12
|
+
|
9
13
|
## [0.2.0] - 2024-11-28
|
10
14
|
### Changed
|
11
15
|
- Allow passing in a string or an IO object to `parse_file`
|
data/lib/llamaparserb/version.rb
CHANGED
data/lib/llamaparserb.rb
CHANGED
@@ -45,11 +45,16 @@ module Llamaparserb
|
|
45
45
|
def parse_file(file_input, file_type = nil)
|
46
46
|
case file_input
|
47
47
|
when String
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
if file_type
|
49
|
+
job_id = create_job_from_io(file_input, file_type)
|
50
|
+
log "Started parsing binary data under job_id #{job_id}", :info
|
51
|
+
elsif File.exist?(file_input)
|
52
|
+
job_id = create_job_from_path(file_input)
|
53
|
+
log "Started parsing file under job_id #{job_id}", :info
|
54
|
+
else
|
55
|
+
raise Error, "file_type parameter is required for binary string input"
|
56
|
+
end
|
51
57
|
when IO, StringIO, Tempfile
|
52
|
-
# Treat as file object
|
53
58
|
raise Error, "file_type parameter is required for IO objects" unless file_type
|
54
59
|
job_id = create_job_from_io(file_input, file_type)
|
55
60
|
log "Started parsing in-memory file under job_id #{job_id}", :info
|
@@ -63,6 +68,8 @@ module Llamaparserb
|
|
63
68
|
result
|
64
69
|
rescue => e
|
65
70
|
handle_error(e, file_input)
|
71
|
+
raise unless @options[:ignore_errors]
|
72
|
+
nil
|
66
73
|
end
|
67
74
|
|
68
75
|
private
|
@@ -108,15 +115,19 @@ module Llamaparserb
|
|
108
115
|
|
109
116
|
def log(message, level = :debug)
|
110
117
|
return unless @options[:verbose]
|
118
|
+
|
119
|
+
# Convert message to string and force UTF-8 encoding, replacing invalid characters
|
120
|
+
safe_message = message.to_s.encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
|
121
|
+
|
111
122
|
case level
|
112
123
|
when :info
|
113
|
-
logger.info(
|
124
|
+
logger.info(safe_message)
|
114
125
|
when :warn
|
115
|
-
logger.warn(
|
126
|
+
logger.warn(safe_message)
|
116
127
|
when :error
|
117
|
-
logger.error(
|
128
|
+
logger.error(safe_message)
|
118
129
|
else
|
119
|
-
logger.debug(
|
130
|
+
logger.debug(safe_message)
|
120
131
|
end
|
121
132
|
end
|
122
133
|
|
@@ -157,7 +168,13 @@ module Llamaparserb
|
|
157
168
|
|
158
169
|
def handle_error(error, file_input)
|
159
170
|
if @options[:ignore_errors]
|
160
|
-
|
171
|
+
safe_message = if file_input.is_a?(String) && !File.exist?(file_input)
|
172
|
+
"binary data"
|
173
|
+
else
|
174
|
+
file_input.class.to_s
|
175
|
+
end
|
176
|
+
|
177
|
+
log "Error while parsing file (#{safe_message}): #{error.message}", :error
|
161
178
|
nil
|
162
179
|
else
|
163
180
|
raise error
|
@@ -183,15 +200,21 @@ module Llamaparserb
|
|
183
200
|
create_job(file)
|
184
201
|
end
|
185
202
|
|
186
|
-
def create_job_from_io(
|
187
|
-
# Ensure file_type starts with a dot
|
203
|
+
def create_job_from_io(io_or_string, file_type)
|
188
204
|
file_type = ".#{file_type}" unless file_type.start_with?(".")
|
189
205
|
validate_file_type!(file_type)
|
190
206
|
|
191
207
|
temp_file = Tempfile.new(["upload", file_type])
|
192
208
|
temp_file.binmode
|
193
|
-
|
194
|
-
|
209
|
+
|
210
|
+
case io_or_string
|
211
|
+
when String
|
212
|
+
temp_file.write(io_or_string.force_encoding("ASCII-8BIT"))
|
213
|
+
else
|
214
|
+
io_or_string.rewind if io_or_string.respond_to?(:rewind)
|
215
|
+
temp_file.write(io_or_string.read.force_encoding("ASCII-8BIT"))
|
216
|
+
end
|
217
|
+
|
195
218
|
temp_file.rewind
|
196
219
|
|
197
220
|
file = Faraday::Multipart::FilePart.new(
|
@@ -267,7 +290,12 @@ module Llamaparserb
|
|
267
290
|
end
|
268
291
|
|
269
292
|
def validate_file_type!(file_path)
|
270
|
-
extension =
|
293
|
+
extension = if file_path.start_with?(".")
|
294
|
+
file_path
|
295
|
+
else
|
296
|
+
File.extname(file_path).downcase
|
297
|
+
end
|
298
|
+
|
271
299
|
unless SUPPORTED_FILE_TYPES.include?(extension)
|
272
300
|
raise Error, "Unsupported file type: #{extension}. Supported types: #{SUPPORTED_FILE_TYPES.join(", ")}"
|
273
301
|
end
|