universal_document_processor 1.0.3 โ 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ISSUES_ANALYSIS.md +295 -0
- data/PERFORMANCE.md +492 -0
- data/USER_GUIDE.md +597 -0
- data/debug_test.rb +35 -0
- data/lib/universal_document_processor/document.rb +5 -1
- data/lib/universal_document_processor/processors/base_processor.rb +5 -1
- data/lib/universal_document_processor/processors/pdf_processor.rb +17 -0
- data/lib/universal_document_processor/version.rb +1 -1
- data/test_ai_dependency.rb +80 -0
- data/test_core_functionality.rb +280 -0
- data/test_performance_memory.rb +271 -0
- data/test_published_gem.rb +349 -0
- metadata +20 -6
@@ -0,0 +1,349 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Test script to check for potential issues with the published gem
|
4
|
+
# This simulates real-world usage scenarios
|
5
|
+
|
6
|
+
puts "๐ Testing Universal Document Processor v1.0.3 for Potential Issues"
|
7
|
+
puts "=" * 70
|
8
|
+
|
9
|
+
# Add lib directory to load path for local testing
|
10
|
+
$LOAD_PATH.unshift File.expand_path('lib', __dir__)
|
11
|
+
|
12
|
+
require 'universal_document_processor'
|
13
|
+
require 'tempfile'
|
14
|
+
|
15
|
+
test_count = 0
|
16
|
+
issue_count = 0
|
17
|
+
warnings = []
|
18
|
+
|
19
|
+
def test_issue(description)
|
20
|
+
global_test_count = caller_locations.first.lineno
|
21
|
+
print "#{global_test_count}. #{description}... "
|
22
|
+
|
23
|
+
begin
|
24
|
+
yield
|
25
|
+
puts "โ
OK"
|
26
|
+
return false # No issue
|
27
|
+
rescue => e
|
28
|
+
puts "โ ISSUE: #{e.message}"
|
29
|
+
puts " #{e.backtrace.first}" if ENV['DEBUG']
|
30
|
+
return true # Issue found
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def check_warning(description)
|
35
|
+
print "โ ๏ธ #{description}... "
|
36
|
+
begin
|
37
|
+
result = yield
|
38
|
+
if result
|
39
|
+
puts "FOUND"
|
40
|
+
return result
|
41
|
+
else
|
42
|
+
puts "OK"
|
43
|
+
return nil
|
44
|
+
end
|
45
|
+
rescue => e
|
46
|
+
puts "ERROR: #{e.message}"
|
47
|
+
return e.message
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
puts "\n๐งช Testing Core Functionality Issues"
|
52
|
+
puts "-" * 40
|
53
|
+
|
54
|
+
# Test 1: Basic gem loading
|
55
|
+
test_count += 1
|
56
|
+
issue_found = test_issue("Gem loads without errors") do
|
57
|
+
# Just loading the gem should work
|
58
|
+
raise "VERSION not defined" unless defined?(UniversalDocumentProcessor::VERSION)
|
59
|
+
raise "Main module not available" unless defined?(UniversalDocumentProcessor)
|
60
|
+
end
|
61
|
+
issue_count += 1 if issue_found
|
62
|
+
|
63
|
+
# Test 2: AI agent without API key (should not crash)
|
64
|
+
test_count += 1
|
65
|
+
issue_found = test_issue("AI agent creation without API key") do
|
66
|
+
agent = UniversalDocumentProcessor.create_ai_agent
|
67
|
+
raise "Agent not created" unless agent.is_a?(UniversalDocumentProcessor::AIAgent)
|
68
|
+
raise "AI should not be available" if agent.ai_available?
|
69
|
+
end
|
70
|
+
issue_count += 1 if issue_found
|
71
|
+
|
72
|
+
# Test 3: Text file processing
|
73
|
+
test_count += 1
|
74
|
+
issue_found = test_issue("Basic text file processing") do
|
75
|
+
txt_file = Tempfile.new(['test', '.txt'])
|
76
|
+
txt_file.write("Sample text content")
|
77
|
+
txt_file.close
|
78
|
+
|
79
|
+
result = UniversalDocumentProcessor.process(txt_file.path)
|
80
|
+
raise "No text_content key" unless result.has_key?(:text_content)
|
81
|
+
raise "No metadata key" unless result.has_key?(:metadata)
|
82
|
+
|
83
|
+
txt_file.unlink
|
84
|
+
end
|
85
|
+
issue_count += 1 if issue_found
|
86
|
+
|
87
|
+
# Test 4: CSV processing
|
88
|
+
test_count += 1
|
89
|
+
issue_found = test_issue("CSV file processing") do
|
90
|
+
csv_file = Tempfile.new(['test', '.csv'])
|
91
|
+
csv_file.write("Name,Age\nJohn,25\nJane,30")
|
92
|
+
csv_file.close
|
93
|
+
|
94
|
+
result = UniversalDocumentProcessor.process(csv_file.path)
|
95
|
+
raise "Wrong format detected" unless result[:metadata][:format] == "csv"
|
96
|
+
raise "No tables extracted" unless result[:tables].length > 0
|
97
|
+
|
98
|
+
csv_file.unlink
|
99
|
+
end
|
100
|
+
issue_count += 1 if issue_found
|
101
|
+
|
102
|
+
# Test 5: TSV processing (our new feature)
|
103
|
+
test_count += 1
|
104
|
+
issue_found = test_issue("TSV file processing") do
|
105
|
+
tsv_file = Tempfile.new(['test', '.tsv'])
|
106
|
+
tsv_file.write("Name\tAge\nJohn\t25\nJane\t30")
|
107
|
+
tsv_file.close
|
108
|
+
|
109
|
+
result = UniversalDocumentProcessor.process(tsv_file.path)
|
110
|
+
raise "Wrong format detected" unless result[:metadata][:format] == "tsv"
|
111
|
+
raise "Wrong delimiter" unless result[:metadata][:delimiter] == "tab"
|
112
|
+
raise "No tables extracted" unless result[:tables].length > 0
|
113
|
+
|
114
|
+
tsv_file.unlink
|
115
|
+
end
|
116
|
+
issue_count += 1 if issue_found
|
117
|
+
|
118
|
+
puts "\n๐ Testing Dependency Issues"
|
119
|
+
puts "-" * 40
|
120
|
+
|
121
|
+
# Test 6: Optional dependency checking
|
122
|
+
test_count += 1
|
123
|
+
issue_found = test_issue("Optional dependency information") do
|
124
|
+
deps = UniversalDocumentProcessor.optional_dependencies
|
125
|
+
raise "No optional deps info" if deps.empty?
|
126
|
+
|
127
|
+
missing = UniversalDocumentProcessor.missing_dependencies
|
128
|
+
raise "Missing deps not array" unless missing.is_a?(Array)
|
129
|
+
|
130
|
+
instructions = UniversalDocumentProcessor.installation_instructions
|
131
|
+
raise "No installation instructions" if instructions.empty?
|
132
|
+
end
|
133
|
+
issue_count += 1 if issue_found
|
134
|
+
|
135
|
+
# Test 7: PDF processing without pdf-reader gem
|
136
|
+
test_count += 1
|
137
|
+
issue_found = test_issue("PDF processing dependency handling") do
|
138
|
+
# Create a fake PDF file (just for testing error handling)
|
139
|
+
pdf_file = Tempfile.new(['test', '.pdf'])
|
140
|
+
pdf_file.write("%PDF-1.4\nFake PDF content")
|
141
|
+
pdf_file.close
|
142
|
+
|
143
|
+
begin
|
144
|
+
result = UniversalDocumentProcessor.process(pdf_file.path)
|
145
|
+
# Should either work (if pdf-reader available) or give graceful error
|
146
|
+
rescue UniversalDocumentProcessor::DependencyMissingError => e
|
147
|
+
# This is expected and good
|
148
|
+
raise "Wrong error message" unless e.message.include?("pdf-reader")
|
149
|
+
end
|
150
|
+
|
151
|
+
pdf_file.unlink
|
152
|
+
end
|
153
|
+
issue_count += 1 if issue_found
|
154
|
+
|
155
|
+
puts "\nโ ๏ธ Testing Edge Cases & Potential Warnings"
|
156
|
+
puts "-" * 40
|
157
|
+
|
158
|
+
# Warning 1: Large file handling
|
159
|
+
warning = check_warning("Large file memory usage") do
|
160
|
+
# Create a moderately large text file
|
161
|
+
large_file = Tempfile.new(['large_test', '.txt'])
|
162
|
+
content = "This is a test line.\n" * 10000 # ~200KB
|
163
|
+
large_file.write(content)
|
164
|
+
large_file.close
|
165
|
+
|
166
|
+
start_time = Time.now
|
167
|
+
result = UniversalDocumentProcessor.process(large_file.path)
|
168
|
+
end_time = Time.now
|
169
|
+
|
170
|
+
large_file.unlink
|
171
|
+
|
172
|
+
processing_time = end_time - start_time
|
173
|
+
if processing_time > 5.0
|
174
|
+
"Large file processing took #{processing_time.round(2)} seconds"
|
175
|
+
else
|
176
|
+
false
|
177
|
+
end
|
178
|
+
end
|
179
|
+
warnings << warning if warning
|
180
|
+
|
181
|
+
# Warning 2: Unicode/Japanese filename handling
|
182
|
+
warning = check_warning("Unicode filename handling") do
|
183
|
+
begin
|
184
|
+
japanese_content = "ใใใฏๆฅๆฌ่ชใฎใในใใงใใ"
|
185
|
+
unicode_file = Tempfile.new(['ใในใ', '.txt'])
|
186
|
+
unicode_file.write(japanese_content)
|
187
|
+
unicode_file.close
|
188
|
+
|
189
|
+
result = UniversalDocumentProcessor.process(unicode_file.path)
|
190
|
+
unicode_file.unlink
|
191
|
+
false
|
192
|
+
rescue => e
|
193
|
+
"Unicode filename issue: #{e.message}"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
warnings << warning if warning
|
197
|
+
|
198
|
+
# Warning 3: Empty file handling
|
199
|
+
warning = check_warning("Empty file handling") do
|
200
|
+
empty_file = Tempfile.new(['empty', '.txt'])
|
201
|
+
empty_file.close
|
202
|
+
|
203
|
+
begin
|
204
|
+
result = UniversalDocumentProcessor.process(empty_file.path)
|
205
|
+
empty_file.unlink
|
206
|
+
|
207
|
+
if result[:text_content].nil? || result[:text_content].empty?
|
208
|
+
false # This is expected
|
209
|
+
else
|
210
|
+
false # Also fine
|
211
|
+
end
|
212
|
+
rescue => e
|
213
|
+
empty_file.unlink
|
214
|
+
"Empty file processing issue: #{e.message}"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
warnings << warning if warning
|
218
|
+
|
219
|
+
# Warning 4: Invalid file extension handling
|
220
|
+
warning = check_warning("Invalid file extension handling") do
|
221
|
+
invalid_file = Tempfile.new(['test', '.xyz'])
|
222
|
+
invalid_file.write("Test content")
|
223
|
+
invalid_file.close
|
224
|
+
|
225
|
+
begin
|
226
|
+
result = UniversalDocumentProcessor.process(invalid_file.path)
|
227
|
+
invalid_file.unlink
|
228
|
+
false # Processed successfully
|
229
|
+
rescue UniversalDocumentProcessor::UnsupportedFormatError
|
230
|
+
invalid_file.unlink
|
231
|
+
false # Expected error, good
|
232
|
+
rescue => e
|
233
|
+
invalid_file.unlink
|
234
|
+
"Unexpected error for unsupported format: #{e.message}"
|
235
|
+
end
|
236
|
+
end
|
237
|
+
warnings << warning if warning
|
238
|
+
|
239
|
+
# Warning 5: Memory usage with multiple files
|
240
|
+
warning = check_warning("Memory usage with batch processing") do
|
241
|
+
files = []
|
242
|
+
5.times do |i|
|
243
|
+
file = Tempfile.new(["batch_#{i}", '.txt'])
|
244
|
+
file.write("Batch test content #{i}\n" * 1000)
|
245
|
+
file.close
|
246
|
+
files << file.path
|
247
|
+
end
|
248
|
+
|
249
|
+
begin
|
250
|
+
start_memory = `tasklist /FI "PID eq #{Process.pid}" /FO CSV`.split("\n")[1].split(",")[4].gsub('"', '').gsub(',', '').to_i rescue 0
|
251
|
+
|
252
|
+
results = UniversalDocumentProcessor.batch_process(files)
|
253
|
+
|
254
|
+
end_memory = `tasklist /FI "PID eq #{Process.pid}" /FO CSV`.split("\n")[1].split(",")[4].gsub('"', '').gsub(',', '').to_i rescue 0
|
255
|
+
|
256
|
+
files.each { |f| File.delete(f) if File.exist?(f) }
|
257
|
+
|
258
|
+
memory_increase = end_memory - start_memory
|
259
|
+
if memory_increase > 50000 # 50MB increase
|
260
|
+
"High memory usage: #{memory_increase}KB increase"
|
261
|
+
else
|
262
|
+
false
|
263
|
+
end
|
264
|
+
rescue => e
|
265
|
+
files.each { |f| File.delete(f) if File.exist?(f) }
|
266
|
+
"Batch processing memory test failed: #{e.message}"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
warnings << warning if warning
|
270
|
+
|
271
|
+
puts "\n๐ Testing AI Features (Without API Key)"
|
272
|
+
puts "-" * 40
|
273
|
+
|
274
|
+
# Test 8: AI methods should fail gracefully
|
275
|
+
test_count += 1
|
276
|
+
issue_found = test_issue("AI methods fail gracefully without API key") do
|
277
|
+
txt_file = Tempfile.new(['ai_test', '.txt'])
|
278
|
+
txt_file.write("Test content for AI")
|
279
|
+
txt_file.close
|
280
|
+
|
281
|
+
begin
|
282
|
+
UniversalDocumentProcessor.ai_analyze(txt_file.path)
|
283
|
+
raise "Should have raised DependencyMissingError"
|
284
|
+
rescue UniversalDocumentProcessor::DependencyMissingError => e
|
285
|
+
# Expected - this is good
|
286
|
+
raise "Wrong error message" unless e.message.include?("OpenAI API key")
|
287
|
+
end
|
288
|
+
|
289
|
+
txt_file.unlink
|
290
|
+
end
|
291
|
+
issue_count += 1 if issue_found
|
292
|
+
|
293
|
+
puts "\n๐ Testing Available Features"
|
294
|
+
puts "-" * 40
|
295
|
+
|
296
|
+
# Test 9: Feature detection
|
297
|
+
test_count += 1
|
298
|
+
issue_found = test_issue("Feature detection works correctly") do
|
299
|
+
features = UniversalDocumentProcessor.available_features
|
300
|
+
raise "No features detected" if features.empty?
|
301
|
+
raise "Missing basic features" unless features.include?(:text_processing)
|
302
|
+
raise "Missing TSV support" unless features.include?(:tsv_processing)
|
303
|
+
|
304
|
+
# AI should not be available without API key
|
305
|
+
raise "AI should not be available" if features.include?(:ai_processing)
|
306
|
+
end
|
307
|
+
issue_count += 1 if issue_found
|
308
|
+
|
309
|
+
puts "\n" + "=" * 70
|
310
|
+
puts "๐ฏ ISSUE ANALYSIS COMPLETE"
|
311
|
+
puts "=" * 70
|
312
|
+
|
313
|
+
puts "\n๐ SUMMARY:"
|
314
|
+
puts " Total tests run: #{test_count}"
|
315
|
+
puts " Issues found: #{issue_count}"
|
316
|
+
puts " Warnings: #{warnings.compact.length}"
|
317
|
+
|
318
|
+
if issue_count == 0
|
319
|
+
puts "\nโ
NO CRITICAL ISSUES FOUND!"
|
320
|
+
puts "The gem appears to be working correctly for basic usage."
|
321
|
+
else
|
322
|
+
puts "\nโ CRITICAL ISSUES DETECTED!"
|
323
|
+
puts "The gem has #{issue_count} critical issues that need attention."
|
324
|
+
end
|
325
|
+
|
326
|
+
if warnings.compact.length > 0
|
327
|
+
puts "\nโ ๏ธ WARNINGS TO CONSIDER:"
|
328
|
+
warnings.compact.each_with_index do |warning, i|
|
329
|
+
puts " #{i + 1}. #{warning}"
|
330
|
+
end
|
331
|
+
else
|
332
|
+
puts "\nโ
No significant warnings detected."
|
333
|
+
end
|
334
|
+
|
335
|
+
puts "\n๐ฎ POTENTIAL USER ISSUES TO WATCH FOR:"
|
336
|
+
puts "1. Users trying to use AI features without setting OPENAI_API_KEY"
|
337
|
+
puts "2. Users expecting PDF/Word processing without installing optional gems"
|
338
|
+
puts "3. Large file processing performance"
|
339
|
+
puts "4. Unicode filename handling on different systems"
|
340
|
+
puts "5. Memory usage with batch processing of many files"
|
341
|
+
|
342
|
+
puts "\n๐ก RECOMMENDATIONS:"
|
343
|
+
puts "1. โ
AI dependency handling is working correctly"
|
344
|
+
puts "2. โ
TSV processing is functional"
|
345
|
+
puts "3. โ
Error messages are helpful"
|
346
|
+
puts "4. ๐ Consider adding performance guidelines to documentation"
|
347
|
+
puts "5. ๐ Consider adding memory usage notes for large files"
|
348
|
+
|
349
|
+
exit issue_count
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: universal_document_processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vikas Patil
|
@@ -13,16 +13,22 @@ dependencies:
|
|
13
13
|
name: activesupport
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
|
-
- - "
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '5.0'
|
19
|
+
- - "<"
|
17
20
|
- !ruby/object:Gem::Version
|
18
|
-
version: '
|
21
|
+
version: '9.0'
|
19
22
|
type: :runtime
|
20
23
|
prerelease: false
|
21
24
|
version_requirements: !ruby/object:Gem::Requirement
|
22
25
|
requirements:
|
23
|
-
- - "
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
version: '5.0'
|
29
|
+
- - "<"
|
24
30
|
- !ruby/object:Gem::Version
|
25
|
-
version: '
|
31
|
+
version: '9.0'
|
26
32
|
- !ruby/object:Gem::Dependency
|
27
33
|
name: marcel
|
28
34
|
requirement: !ruby/object:Gem::Requirement
|
@@ -189,9 +195,13 @@ extra_rdoc_files: []
|
|
189
195
|
files:
|
190
196
|
- CHANGELOG.md
|
191
197
|
- Gemfile
|
198
|
+
- ISSUES_ANALYSIS.md
|
192
199
|
- LICENSE
|
200
|
+
- PERFORMANCE.md
|
193
201
|
- README.md
|
194
202
|
- Rakefile
|
203
|
+
- USER_GUIDE.md
|
204
|
+
- debug_test.rb
|
195
205
|
- lib/universal_document_processor.rb
|
196
206
|
- lib/universal_document_processor/ai_agent.rb
|
197
207
|
- lib/universal_document_processor/document.rb
|
@@ -207,6 +217,10 @@ files:
|
|
207
217
|
- lib/universal_document_processor/utils/file_detector.rb
|
208
218
|
- lib/universal_document_processor/utils/japanese_filename_handler.rb
|
209
219
|
- lib/universal_document_processor/version.rb
|
220
|
+
- test_ai_dependency.rb
|
221
|
+
- test_core_functionality.rb
|
222
|
+
- test_performance_memory.rb
|
223
|
+
- test_published_gem.rb
|
210
224
|
homepage: https://github.com/vpatil160/universal_document_processor
|
211
225
|
licenses:
|
212
226
|
- MIT
|
@@ -227,7 +241,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
227
241
|
requirements:
|
228
242
|
- - ">="
|
229
243
|
- !ruby/object:Gem::Version
|
230
|
-
version: 2.
|
244
|
+
version: 2.2.2
|
231
245
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
232
246
|
requirements:
|
233
247
|
- - ">="
|