universal_document_processor 1.0.3 โ†’ 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,349 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Test script to check for potential issues with the published gem
4
+ # This simulates real-world usage scenarios
5
+
6
+ puts "๐Ÿ” Testing Universal Document Processor v1.0.3 for Potential Issues"
7
+ puts "=" * 70
8
+
9
+ # Add lib directory to load path for local testing
10
+ $LOAD_PATH.unshift File.expand_path('lib', __dir__)
11
+
12
+ require 'universal_document_processor'
13
+ require 'tempfile'
14
+
15
+ test_count = 0
16
+ issue_count = 0
17
+ warnings = []
18
+
19
+ def test_issue(description)
20
+ global_test_count = caller_locations.first.lineno
21
+ print "#{global_test_count}. #{description}... "
22
+
23
+ begin
24
+ yield
25
+ puts "โœ… OK"
26
+ return false # No issue
27
+ rescue => e
28
+ puts "โŒ ISSUE: #{e.message}"
29
+ puts " #{e.backtrace.first}" if ENV['DEBUG']
30
+ return true # Issue found
31
+ end
32
+ end
33
+
34
+ def check_warning(description)
35
+ print "โš ๏ธ #{description}... "
36
+ begin
37
+ result = yield
38
+ if result
39
+ puts "FOUND"
40
+ return result
41
+ else
42
+ puts "OK"
43
+ return nil
44
+ end
45
+ rescue => e
46
+ puts "ERROR: #{e.message}"
47
+ return e.message
48
+ end
49
+ end
50
+
51
+ puts "\n๐Ÿงช Testing Core Functionality Issues"
52
+ puts "-" * 40
53
+
54
+ # Test 1: Basic gem loading
55
+ test_count += 1
56
+ issue_found = test_issue("Gem loads without errors") do
57
+ # Just loading the gem should work
58
+ raise "VERSION not defined" unless defined?(UniversalDocumentProcessor::VERSION)
59
+ raise "Main module not available" unless defined?(UniversalDocumentProcessor)
60
+ end
61
+ issue_count += 1 if issue_found
62
+
63
+ # Test 2: AI agent without API key (should not crash)
64
+ test_count += 1
65
+ issue_found = test_issue("AI agent creation without API key") do
66
+ agent = UniversalDocumentProcessor.create_ai_agent
67
+ raise "Agent not created" unless agent.is_a?(UniversalDocumentProcessor::AIAgent)
68
+ raise "AI should not be available" if agent.ai_available?
69
+ end
70
+ issue_count += 1 if issue_found
71
+
72
+ # Test 3: Text file processing
73
+ test_count += 1
74
+ issue_found = test_issue("Basic text file processing") do
75
+ txt_file = Tempfile.new(['test', '.txt'])
76
+ txt_file.write("Sample text content")
77
+ txt_file.close
78
+
79
+ result = UniversalDocumentProcessor.process(txt_file.path)
80
+ raise "No text_content key" unless result.has_key?(:text_content)
81
+ raise "No metadata key" unless result.has_key?(:metadata)
82
+
83
+ txt_file.unlink
84
+ end
85
+ issue_count += 1 if issue_found
86
+
87
+ # Test 4: CSV processing
88
+ test_count += 1
89
+ issue_found = test_issue("CSV file processing") do
90
+ csv_file = Tempfile.new(['test', '.csv'])
91
+ csv_file.write("Name,Age\nJohn,25\nJane,30")
92
+ csv_file.close
93
+
94
+ result = UniversalDocumentProcessor.process(csv_file.path)
95
+ raise "Wrong format detected" unless result[:metadata][:format] == "csv"
96
+ raise "No tables extracted" unless result[:tables].length > 0
97
+
98
+ csv_file.unlink
99
+ end
100
+ issue_count += 1 if issue_found
101
+
102
+ # Test 5: TSV processing (our new feature)
103
+ test_count += 1
104
+ issue_found = test_issue("TSV file processing") do
105
+ tsv_file = Tempfile.new(['test', '.tsv'])
106
+ tsv_file.write("Name\tAge\nJohn\t25\nJane\t30")
107
+ tsv_file.close
108
+
109
+ result = UniversalDocumentProcessor.process(tsv_file.path)
110
+ raise "Wrong format detected" unless result[:metadata][:format] == "tsv"
111
+ raise "Wrong delimiter" unless result[:metadata][:delimiter] == "tab"
112
+ raise "No tables extracted" unless result[:tables].length > 0
113
+
114
+ tsv_file.unlink
115
+ end
116
+ issue_count += 1 if issue_found
117
+
118
+ puts "\n๐Ÿ”’ Testing Dependency Issues"
119
+ puts "-" * 40
120
+
121
+ # Test 6: Optional dependency checking
122
+ test_count += 1
123
+ issue_found = test_issue("Optional dependency information") do
124
+ deps = UniversalDocumentProcessor.optional_dependencies
125
+ raise "No optional deps info" if deps.empty?
126
+
127
+ missing = UniversalDocumentProcessor.missing_dependencies
128
+ raise "Missing deps not array" unless missing.is_a?(Array)
129
+
130
+ instructions = UniversalDocumentProcessor.installation_instructions
131
+ raise "No installation instructions" if instructions.empty?
132
+ end
133
+ issue_count += 1 if issue_found
134
+
135
+ # Test 7: PDF processing without pdf-reader gem
136
+ test_count += 1
137
+ issue_found = test_issue("PDF processing dependency handling") do
138
+ # Create a fake PDF file (just for testing error handling)
139
+ pdf_file = Tempfile.new(['test', '.pdf'])
140
+ pdf_file.write("%PDF-1.4\nFake PDF content")
141
+ pdf_file.close
142
+
143
+ begin
144
+ result = UniversalDocumentProcessor.process(pdf_file.path)
145
+ # Should either work (if pdf-reader available) or give graceful error
146
+ rescue UniversalDocumentProcessor::DependencyMissingError => e
147
+ # This is expected and good
148
+ raise "Wrong error message" unless e.message.include?("pdf-reader")
149
+ end
150
+
151
+ pdf_file.unlink
152
+ end
153
+ issue_count += 1 if issue_found
154
+
155
+ puts "\nโš ๏ธ Testing Edge Cases & Potential Warnings"
156
+ puts "-" * 40
157
+
158
+ # Warning 1: Large file handling
159
+ warning = check_warning("Large file memory usage") do
160
+ # Create a moderately large text file
161
+ large_file = Tempfile.new(['large_test', '.txt'])
162
+ content = "This is a test line.\n" * 10000 # ~200KB
163
+ large_file.write(content)
164
+ large_file.close
165
+
166
+ start_time = Time.now
167
+ result = UniversalDocumentProcessor.process(large_file.path)
168
+ end_time = Time.now
169
+
170
+ large_file.unlink
171
+
172
+ processing_time = end_time - start_time
173
+ if processing_time > 5.0
174
+ "Large file processing took #{processing_time.round(2)} seconds"
175
+ else
176
+ false
177
+ end
178
+ end
179
+ warnings << warning if warning
180
+
181
+ # Warning 2: Unicode/Japanese filename handling
182
+ warning = check_warning("Unicode filename handling") do
183
+ begin
184
+ japanese_content = "ใ“ใ‚Œใฏๆ—ฅๆœฌ่ชžใฎใƒ†ใ‚นใƒˆใงใ™ใ€‚"
185
+ unicode_file = Tempfile.new(['ใƒ†ใ‚นใƒˆ', '.txt'])
186
+ unicode_file.write(japanese_content)
187
+ unicode_file.close
188
+
189
+ result = UniversalDocumentProcessor.process(unicode_file.path)
190
+ unicode_file.unlink
191
+ false
192
+ rescue => e
193
+ "Unicode filename issue: #{e.message}"
194
+ end
195
+ end
196
+ warnings << warning if warning
197
+
198
+ # Warning 3: Empty file handling
199
+ warning = check_warning("Empty file handling") do
200
+ empty_file = Tempfile.new(['empty', '.txt'])
201
+ empty_file.close
202
+
203
+ begin
204
+ result = UniversalDocumentProcessor.process(empty_file.path)
205
+ empty_file.unlink
206
+
207
+ if result[:text_content].nil? || result[:text_content].empty?
208
+ false # This is expected
209
+ else
210
+ false # Also fine
211
+ end
212
+ rescue => e
213
+ empty_file.unlink
214
+ "Empty file processing issue: #{e.message}"
215
+ end
216
+ end
217
+ warnings << warning if warning
218
+
219
+ # Warning 4: Invalid file extension handling
220
+ warning = check_warning("Invalid file extension handling") do
221
+ invalid_file = Tempfile.new(['test', '.xyz'])
222
+ invalid_file.write("Test content")
223
+ invalid_file.close
224
+
225
+ begin
226
+ result = UniversalDocumentProcessor.process(invalid_file.path)
227
+ invalid_file.unlink
228
+ false # Processed successfully
229
+ rescue UniversalDocumentProcessor::UnsupportedFormatError
230
+ invalid_file.unlink
231
+ false # Expected error, good
232
+ rescue => e
233
+ invalid_file.unlink
234
+ "Unexpected error for unsupported format: #{e.message}"
235
+ end
236
+ end
237
+ warnings << warning if warning
238
+
239
+ # Warning 5: Memory usage with multiple files
240
+ warning = check_warning("Memory usage with batch processing") do
241
+ files = []
242
+ 5.times do |i|
243
+ file = Tempfile.new(["batch_#{i}", '.txt'])
244
+ file.write("Batch test content #{i}\n" * 1000)
245
+ file.close
246
+ files << file.path
247
+ end
248
+
249
+ begin
250
+ start_memory = `tasklist /FI "PID eq #{Process.pid}" /FO CSV`.split("\n")[1].split(",")[4].gsub('"', '').gsub(',', '').to_i rescue 0
251
+
252
+ results = UniversalDocumentProcessor.batch_process(files)
253
+
254
+ end_memory = `tasklist /FI "PID eq #{Process.pid}" /FO CSV`.split("\n")[1].split(",")[4].gsub('"', '').gsub(',', '').to_i rescue 0
255
+
256
+ files.each { |f| File.delete(f) if File.exist?(f) }
257
+
258
+ memory_increase = end_memory - start_memory
259
+ if memory_increase > 50000 # 50MB increase
260
+ "High memory usage: #{memory_increase}KB increase"
261
+ else
262
+ false
263
+ end
264
+ rescue => e
265
+ files.each { |f| File.delete(f) if File.exist?(f) }
266
+ "Batch processing memory test failed: #{e.message}"
267
+ end
268
+ end
269
+ warnings << warning if warning
270
+
271
+ puts "\n๐Ÿ” Testing AI Features (Without API Key)"
272
+ puts "-" * 40
273
+
274
+ # Test 8: AI methods should fail gracefully
275
+ test_count += 1
276
+ issue_found = test_issue("AI methods fail gracefully without API key") do
277
+ txt_file = Tempfile.new(['ai_test', '.txt'])
278
+ txt_file.write("Test content for AI")
279
+ txt_file.close
280
+
281
+ begin
282
+ UniversalDocumentProcessor.ai_analyze(txt_file.path)
283
+ raise "Should have raised DependencyMissingError"
284
+ rescue UniversalDocumentProcessor::DependencyMissingError => e
285
+ # Expected - this is good
286
+ raise "Wrong error message" unless e.message.include?("OpenAI API key")
287
+ end
288
+
289
+ txt_file.unlink
290
+ end
291
+ issue_count += 1 if issue_found
292
+
293
+ puts "\n๐Ÿ“Š Testing Available Features"
294
+ puts "-" * 40
295
+
296
+ # Test 9: Feature detection
297
+ test_count += 1
298
+ issue_found = test_issue("Feature detection works correctly") do
299
+ features = UniversalDocumentProcessor.available_features
300
+ raise "No features detected" if features.empty?
301
+ raise "Missing basic features" unless features.include?(:text_processing)
302
+ raise "Missing TSV support" unless features.include?(:tsv_processing)
303
+
304
+ # AI should not be available without API key
305
+ raise "AI should not be available" if features.include?(:ai_processing)
306
+ end
307
+ issue_count += 1 if issue_found
308
+
309
+ puts "\n" + "=" * 70
310
+ puts "๐ŸŽฏ ISSUE ANALYSIS COMPLETE"
311
+ puts "=" * 70
312
+
313
+ puts "\n๐Ÿ“ˆ SUMMARY:"
314
+ puts " Total tests run: #{test_count}"
315
+ puts " Issues found: #{issue_count}"
316
+ puts " Warnings: #{warnings.compact.length}"
317
+
318
+ if issue_count == 0
319
+ puts "\nโœ… NO CRITICAL ISSUES FOUND!"
320
+ puts "The gem appears to be working correctly for basic usage."
321
+ else
322
+ puts "\nโŒ CRITICAL ISSUES DETECTED!"
323
+ puts "The gem has #{issue_count} critical issues that need attention."
324
+ end
325
+
326
+ if warnings.compact.length > 0
327
+ puts "\nโš ๏ธ WARNINGS TO CONSIDER:"
328
+ warnings.compact.each_with_index do |warning, i|
329
+ puts " #{i + 1}. #{warning}"
330
+ end
331
+ else
332
+ puts "\nโœ… No significant warnings detected."
333
+ end
334
+
335
+ puts "\n๐Ÿ”ฎ POTENTIAL USER ISSUES TO WATCH FOR:"
336
+ puts "1. Users trying to use AI features without setting OPENAI_API_KEY"
337
+ puts "2. Users expecting PDF/Word processing without installing optional gems"
338
+ puts "3. Large file processing performance"
339
+ puts "4. Unicode filename handling on different systems"
340
+ puts "5. Memory usage with batch processing of many files"
341
+
342
+ puts "\n๐Ÿ’ก RECOMMENDATIONS:"
343
+ puts "1. โœ… AI dependency handling is working correctly"
344
+ puts "2. โœ… TSV processing is functional"
345
+ puts "3. โœ… Error messages are helpful"
346
+ puts "4. ๐Ÿ“š Consider adding performance guidelines to documentation"
347
+ puts "5. ๐Ÿ“š Consider adding memory usage notes for large files"
348
+
349
+ exit issue_count
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: universal_document_processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vikas Patil
@@ -13,16 +13,22 @@ dependencies:
13
13
  name: activesupport
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - "~>"
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '5.0'
19
+ - - "<"
17
20
  - !ruby/object:Gem::Version
18
- version: '7.0'
21
+ version: '9.0'
19
22
  type: :runtime
20
23
  prerelease: false
21
24
  version_requirements: !ruby/object:Gem::Requirement
22
25
  requirements:
23
- - - "~>"
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ version: '5.0'
29
+ - - "<"
24
30
  - !ruby/object:Gem::Version
25
- version: '7.0'
31
+ version: '9.0'
26
32
  - !ruby/object:Gem::Dependency
27
33
  name: marcel
28
34
  requirement: !ruby/object:Gem::Requirement
@@ -189,9 +195,13 @@ extra_rdoc_files: []
189
195
  files:
190
196
  - CHANGELOG.md
191
197
  - Gemfile
198
+ - ISSUES_ANALYSIS.md
192
199
  - LICENSE
200
+ - PERFORMANCE.md
193
201
  - README.md
194
202
  - Rakefile
203
+ - USER_GUIDE.md
204
+ - debug_test.rb
195
205
  - lib/universal_document_processor.rb
196
206
  - lib/universal_document_processor/ai_agent.rb
197
207
  - lib/universal_document_processor/document.rb
@@ -207,6 +217,10 @@ files:
207
217
  - lib/universal_document_processor/utils/file_detector.rb
208
218
  - lib/universal_document_processor/utils/japanese_filename_handler.rb
209
219
  - lib/universal_document_processor/version.rb
220
+ - test_ai_dependency.rb
221
+ - test_core_functionality.rb
222
+ - test_performance_memory.rb
223
+ - test_published_gem.rb
210
224
  homepage: https://github.com/vpatil160/universal_document_processor
211
225
  licenses:
212
226
  - MIT
@@ -227,7 +241,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
227
241
  requirements:
228
242
  - - ">="
229
243
  - !ruby/object:Gem::Version
230
- version: 2.7.0
244
+ version: 2.2.2
231
245
  required_rubygems_version: !ruby/object:Gem::Requirement
232
246
  requirements:
233
247
  - - ">="