universal_document_processor 1.0.2 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ require 'uri'
4
4
 
5
5
  module UniversalDocumentProcessor
6
6
  class AIAgent
7
- attr_reader :api_key, :model, :base_url, :conversation_history
7
+ attr_reader :api_key, :model, :base_url, :conversation_history, :ai_enabled
8
8
 
9
9
  def initialize(options = {})
10
10
  @api_key = options[:api_key] || ENV['OPENAI_API_KEY']
@@ -13,12 +13,15 @@ module UniversalDocumentProcessor
13
13
  @conversation_history = []
14
14
  @max_history = options[:max_history] || 10
15
15
  @temperature = options[:temperature] || 0.7
16
+ @ai_enabled = false
16
17
 
17
18
  validate_configuration
18
19
  end
19
20
 
20
21
  # Main document analysis with AI
21
22
  def analyze_document(document_result, query = nil)
23
+ ensure_ai_available!
24
+
22
25
  context = build_document_context(document_result)
23
26
 
24
27
  if query
@@ -63,6 +66,8 @@ Please provide:
63
66
 
64
67
  # Ask specific questions about a document
65
68
  def ask_document_question(document_result, question)
69
+ ensure_ai_available!
70
+
66
71
  context = build_document_context(document_result)
67
72
 
68
73
  prompt = build_question_prompt(context, question)
@@ -74,6 +79,8 @@ Please provide:
74
79
 
75
80
  # Summarize document content
76
81
  def summarize_document(document_result, length: :medium)
82
+ ensure_ai_available!
83
+
77
84
  context = build_document_context(document_result)
78
85
 
79
86
  length_instruction = case length
@@ -92,6 +99,8 @@ Please provide:
92
99
 
93
100
  # Extract key information from document
94
101
  def extract_key_information(document_result, categories = nil)
102
+ ensure_ai_available!
103
+
95
104
  context = build_document_context(document_result)
96
105
  categories ||= ['key_facts', 'important_dates', 'names', 'locations', 'numbers']
97
106
 
@@ -104,6 +113,8 @@ Please provide:
104
113
 
105
114
  # Translate document content
106
115
  def translate_document(document_result, target_language)
116
+ ensure_ai_available!
117
+
107
118
  context = build_document_context(document_result)
108
119
 
109
120
  prompt = build_translation_prompt(context, target_language)
@@ -115,6 +126,8 @@ Please provide:
115
126
 
116
127
  # Generate document insights and recommendations
117
128
  def generate_insights(document_result)
129
+ ensure_ai_available!
130
+
118
131
  context = build_document_context(document_result)
119
132
 
120
133
  prompt = build_insights_prompt(context)
@@ -126,6 +139,8 @@ Please provide:
126
139
 
127
140
  # Compare multiple documents
128
141
  def compare_documents(document_results, comparison_type = :content)
142
+ ensure_ai_available!
143
+
129
144
  contexts = document_results.map { |doc| build_document_context(doc) }
130
145
 
131
146
  prompt = build_comparison_prompt(contexts, comparison_type)
@@ -137,6 +152,8 @@ Please provide:
137
152
 
138
153
  # Classify document type and purpose
139
154
  def classify_document(document_result)
155
+ ensure_ai_available!
156
+
140
157
  context = build_document_context(document_result)
141
158
 
142
159
  prompt = build_classification_prompt(context)
@@ -148,6 +165,8 @@ Please provide:
148
165
 
149
166
  # Generate action items from document
150
167
  def extract_action_items(document_result)
168
+ ensure_ai_available!
169
+
151
170
  context = build_document_context(document_result)
152
171
 
153
172
  prompt = build_action_items_prompt(context)
@@ -159,6 +178,8 @@ Please provide:
159
178
 
160
179
  # Chat about the document
161
180
  def chat(message, document_result = nil)
181
+ ensure_ai_available!
182
+
162
183
  if document_result
163
184
  context = build_document_context(document_result)
164
185
  prompt = build_chat_prompt(context, message)
@@ -180,6 +201,10 @@ Please provide:
180
201
  def conversation_summary
181
202
  return "No conversation history" if @conversation_history.empty?
182
203
 
204
+ unless @ai_enabled
205
+ return "AI features are disabled. Cannot generate conversation summary."
206
+ end
207
+
183
208
  history_text = @conversation_history.map do |entry|
184
209
  "Q: #{entry[:question]}\nA: #{entry[:answer]}"
185
210
  end.join("\n\n")
@@ -188,11 +213,27 @@ Please provide:
188
213
  call_openai_api(prompt)
189
214
  end
190
215
 
216
+ # Check if AI features are available
217
+ def ai_available?
218
+ @ai_enabled
219
+ end
220
+
191
221
  private
192
222
 
193
223
  def validate_configuration
194
- raise ArgumentError, "OpenAI API key is required" unless @api_key
195
- raise ArgumentError, "OpenAI API key cannot be empty" if @api_key.empty?
224
+ if @api_key && !@api_key.empty?
225
+ @ai_enabled = true
226
+ else
227
+ @ai_enabled = false
228
+ warn "Warning: OpenAI API key not provided. AI features will be disabled. Set OPENAI_API_KEY environment variable or pass api_key option to enable AI features."
229
+ end
230
+ end
231
+
232
+ # Ensure AI is available before making API calls
233
+ def ensure_ai_available!
234
+ unless @ai_enabled
235
+ raise DependencyMissingError, "AI features are not available. Please provide an OpenAI API key to use AI functionality."
236
+ end
196
237
  end
197
238
 
198
239
  def build_document_context(document_result)
@@ -222,7 +222,11 @@ module UniversalDocumentProcessor
222
222
 
223
223
  def fallback_text_extraction
224
224
  begin
225
- Yomu.new(@file_path).text
225
+ if defined?(Yomu)
226
+ Yomu.new(@file_path).text
227
+ else
228
+ "Unable to extract text: Yomu gem not available. Please install 'yomu' gem for universal text extraction: gem install yomu"
229
+ end
226
230
  rescue => e
227
231
  "Unable to extract text: #{e.message}"
228
232
  end
@@ -10,7 +10,11 @@ module UniversalDocumentProcessor
10
10
 
11
11
  def extract_text
12
12
  # Fallback to universal text extraction
13
- Yomu.new(@file_path).text
13
+ if defined?(Yomu)
14
+ Yomu.new(@file_path).text
15
+ else
16
+ raise ProcessingError, "Universal text extraction requires the 'yomu' gem. Install it with: gem install yomu -v '~> 0.2'"
17
+ end
14
18
  rescue => e
15
19
  raise ProcessingError, "Failed to extract text: #{e.message}"
16
20
  end
@@ -2,14 +2,21 @@ module UniversalDocumentProcessor
2
2
  module Processors
3
3
  class PdfProcessor < BaseProcessor
4
4
  def extract_text
5
+ ensure_pdf_reader_available!
6
+
5
7
  with_error_handling do
6
8
  reader = PDF::Reader.new(@file_path)
7
9
  text = reader.pages.map(&:text).join("\n")
8
10
  text.strip.empty? ? "No text content found in PDF" : text
9
11
  end
12
+ rescue => e
13
+ # Fallback to Yomu if pdf-reader fails
14
+ fallback_text_extraction(e)
10
15
  end
11
16
 
12
17
  def extract_metadata
18
+ ensure_pdf_reader_available!
19
+
13
20
  with_error_handling do
14
21
  reader = PDF::Reader.new(@file_path)
15
22
  info = reader.info || {}
@@ -32,6 +39,8 @@ module UniversalDocumentProcessor
32
39
  end
33
40
 
34
41
  def extract_images
42
+ ensure_pdf_reader_available!
43
+
35
44
  with_error_handling do
36
45
  # Extract embedded images from PDF
37
46
  images = []
@@ -57,6 +66,8 @@ module UniversalDocumentProcessor
57
66
  end
58
67
 
59
68
  def extract_tables
69
+ ensure_pdf_reader_available!
70
+
60
71
  with_error_handling do
61
72
  # Basic table extraction from PDF text
62
73
  tables = []
@@ -87,6 +98,12 @@ module UniversalDocumentProcessor
87
98
 
88
99
  private
89
100
 
101
+ def ensure_pdf_reader_available!
102
+ unless defined?(PDF::Reader)
103
+ raise DependencyMissingError, "PDF processing requires the 'pdf-reader' gem. Install it with: gem install pdf-reader -v '~> 2.0'"
104
+ end
105
+ end
106
+
90
107
  def extract_form_fields(reader)
91
108
  # Extract PDF form fields if present
92
109
  []
@@ -100,6 +117,20 @@ module UniversalDocumentProcessor
100
117
  rescue
101
118
  []
102
119
  end
120
+
121
+ def fallback_text_extraction(original_error)
122
+ if defined?(Yomu)
123
+ begin
124
+ text = Yomu.new(@file_path).text
125
+ return text unless text.nil? || text.strip.empty?
126
+ "No text content found in PDF"
127
+ rescue => yomu_error
128
+ raise ProcessingError, "PDF text extraction failed. pdf-reader error: #{original_error.message}. Yomu fallback error: #{yomu_error.message}"
129
+ end
130
+ else
131
+ raise ProcessingError, "PDF text extraction failed: #{original_error.message}. Consider installing 'yomu' gem for fallback extraction: gem install yomu"
132
+ end
133
+ end
103
134
  end
104
135
  end
105
136
  end
@@ -30,6 +30,7 @@ module UniversalDocumentProcessor
30
30
  def extract_images
31
31
  with_error_handling do
32
32
  return [] unless @file_path.end_with?('.docx')
33
+ ensure_docx_available!
33
34
 
34
35
  images = []
35
36
  doc = Docx::Document.open(@file_path)
@@ -53,6 +54,7 @@ module UniversalDocumentProcessor
53
54
  def extract_tables
54
55
  with_error_handling do
55
56
  return [] unless @file_path.end_with?('.docx')
57
+ ensure_docx_available!
56
58
 
57
59
  tables = []
58
60
  doc = Docx::Document.open(@file_path)
@@ -88,7 +90,15 @@ module UniversalDocumentProcessor
88
90
 
89
91
  private
90
92
 
93
+ def ensure_docx_available!
94
+ unless defined?(Docx)
95
+ raise DependencyMissingError, "DOCX processing requires the 'docx' gem. Install it with: gem install docx -v '~> 0.8'"
96
+ end
97
+ end
98
+
91
99
  def extract_docx_text
100
+ ensure_docx_available!
101
+
92
102
  doc = Docx::Document.open(@file_path)
93
103
  text_content = []
94
104
 
@@ -109,6 +119,8 @@ module UniversalDocumentProcessor
109
119
  end
110
120
 
111
121
  def extract_docx_metadata
122
+ ensure_docx_available!
123
+
112
124
  doc = Docx::Document.open(@file_path)
113
125
  core_properties = doc.core_properties
114
126
 
@@ -1,3 +1,3 @@
1
1
  module UniversalDocumentProcessor
2
- VERSION = "1.0.2"
2
+ VERSION = "1.0.5"
3
3
  end
@@ -122,48 +122,72 @@ module UniversalDocumentProcessor
122
122
  def self.ai_analyze(file_path, options = {})
123
123
  document_result = process(file_path, options)
124
124
  ai_agent = AIAgent.new(options)
125
+ unless ai_agent.ai_available?
126
+ raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
127
+ end
125
128
  ai_agent.analyze_document(document_result, options[:query])
126
129
  end
127
130
 
128
131
  def self.ai_summarize(file_path, length: :medium, options: {})
129
132
  document_result = process(file_path, options)
130
133
  ai_agent = AIAgent.new(options)
134
+ unless ai_agent.ai_available?
135
+ raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
136
+ end
131
137
  ai_agent.summarize_document(document_result, length: length)
132
138
  end
133
139
 
134
140
  def self.ai_extract_info(file_path, categories = nil, options = {})
135
141
  document_result = process(file_path, options)
136
142
  ai_agent = AIAgent.new(options)
143
+ unless ai_agent.ai_available?
144
+ raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
145
+ end
137
146
  ai_agent.extract_key_information(document_result, categories)
138
147
  end
139
148
 
140
149
  def self.ai_translate(file_path, target_language, options = {})
141
150
  document_result = process(file_path, options)
142
151
  ai_agent = AIAgent.new(options)
152
+ unless ai_agent.ai_available?
153
+ raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
154
+ end
143
155
  ai_agent.translate_document(document_result, target_language)
144
156
  end
145
157
 
146
158
  def self.ai_classify(file_path, options = {})
147
159
  document_result = process(file_path, options)
148
160
  ai_agent = AIAgent.new(options)
161
+ unless ai_agent.ai_available?
162
+ raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
163
+ end
149
164
  ai_agent.classify_document(document_result)
150
165
  end
151
166
 
152
167
  def self.ai_insights(file_path, options = {})
153
168
  document_result = process(file_path, options)
154
169
  ai_agent = AIAgent.new(options)
170
+ unless ai_agent.ai_available?
171
+ raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
172
+ end
155
173
  ai_agent.generate_insights(document_result)
156
174
  end
157
175
 
158
176
  def self.ai_action_items(file_path, options = {})
159
177
  document_result = process(file_path, options)
160
178
  ai_agent = AIAgent.new(options)
179
+ unless ai_agent.ai_available?
180
+ raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
181
+ end
161
182
  ai_agent.extract_action_items(document_result)
162
183
  end
163
184
 
164
185
  def self.ai_compare(file_paths, comparison_type = :content, options = {})
165
186
  document_results = file_paths.map { |path| process(path, options) }
166
187
  ai_agent = AIAgent.new(options)
188
+ unless ai_agent.ai_available?
189
+ raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
190
+ end
167
191
  ai_agent.compare_documents(document_results, comparison_type)
168
192
  end
169
193
 
@@ -171,6 +195,12 @@ module UniversalDocumentProcessor
171
195
  AIAgent.new(options)
172
196
  end
173
197
 
198
+ # Check if AI features are available
199
+ def self.ai_available?(options = {})
200
+ ai_agent = AIAgent.new(options)
201
+ ai_agent.ai_available?
202
+ end
203
+
174
204
  # Convert document to different format
175
205
  def self.convert(file_path_or_io, target_format, options = {})
176
206
  Document.new(file_path_or_io, options).convert_to(target_format)
@@ -207,9 +237,54 @@ module UniversalDocumentProcessor
207
237
  end
208
238
  end
209
239
 
240
+ # Get list of optional dependencies
241
+ def self.optional_dependencies
242
+ {
243
+ 'pdf-reader' => '~> 2.0', # PDF text extraction
244
+ 'prawn' => '~> 2.4', # PDF generation
245
+ 'docx' => '~> 0.8', # Word document processing
246
+ 'roo' => '~> 2.8', # Excel/Spreadsheet processing
247
+ 'mini_magick' => '~> 4.11', # Image processing
248
+ 'yomu' => '~> 0.2' # Universal text extraction fallback
249
+ }
250
+ end
251
+
252
+ # Check which optional dependencies are missing
253
+ def self.missing_dependencies
254
+ missing = []
255
+ missing << 'pdf-reader' unless dependency_available?(:pdf_reader)
256
+ missing << 'prawn' unless dependency_available?(:prawn)
257
+ missing << 'docx' unless dependency_available?(:docx)
258
+ missing << 'roo' unless dependency_available?(:roo)
259
+ missing << 'mini_magick' unless dependency_available?(:mini_magick)
260
+ missing << 'yomu' unless dependency_available?(:yomu)
261
+ missing
262
+ end
263
+
264
+ # Generate installation instructions for missing dependencies
265
+ def self.installation_instructions
266
+ missing = missing_dependencies
267
+ return "All optional dependencies are installed!" if missing.empty?
268
+
269
+ instructions = ["To enable additional features, install these optional gems:"]
270
+ missing.each do |gem_name|
271
+ version = optional_dependencies[gem_name]
272
+ instructions << " gem install #{gem_name} -v '#{version}'"
273
+ end
274
+
275
+ instructions << ""
276
+ instructions << "Or add to your Gemfile:"
277
+ missing.each do |gem_name|
278
+ version = optional_dependencies[gem_name]
279
+ instructions << " gem '#{gem_name}', '#{version}'"
280
+ end
281
+
282
+ instructions.join("\n")
283
+ end
284
+
210
285
  # Get list of available features based on installed dependencies
211
286
  def self.available_features
212
- features = [:text_processing, :html_processing, :xml_processing, :csv_processing, :json_processing, :archive_processing]
287
+ features = [:text_processing, :html_processing, :xml_processing, :csv_processing, :json_processing, :archive_processing, :tsv_processing]
213
288
 
214
289
  features << :pdf_processing if dependency_available?(:pdf_reader)
215
290
  features << :word_processing if dependency_available?(:docx)
@@ -218,6 +293,14 @@ module UniversalDocumentProcessor
218
293
  features << :universal_text_extraction if dependency_available?(:yomu)
219
294
  features << :pdf_generation if dependency_available?(:prawn)
220
295
 
296
+ # Check AI availability without creating circular dependency
297
+ begin
298
+ ai_agent = AIAgent.new
299
+ features << :ai_processing if ai_agent.ai_enabled
300
+ rescue
301
+ # AI not available
302
+ end
303
+
221
304
  features
222
305
  end
223
306
  end
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Add lib directory to load path
4
+ $LOAD_PATH.unshift File.expand_path('lib', __dir__)
5
+
6
+ # Load the gem
7
+ require 'universal_document_processor'
8
+
9
+ puts "Testing AI Dependency Handling"
10
+ puts "=" * 50
11
+
12
+ # Test 1: Check AI availability without API key
13
+ puts "\n1. Testing AI availability without API key:"
14
+ ai_available = UniversalDocumentProcessor.ai_available?
15
+ puts " AI Available: #{ai_available}"
16
+
17
+ # Test 2: Create AI agent without API key
18
+ puts "\n2. Creating AI agent without API key:"
19
+ agent = UniversalDocumentProcessor.create_ai_agent
20
+ puts " Agent created: #{agent.class}"
21
+ puts " AI enabled: #{agent.ai_enabled}"
22
+ puts " AI available: #{agent.ai_available?}"
23
+
24
+ # Test 3: Try to use AI methods without API key
25
+ puts "\n3. Testing AI methods without API key:"
26
+
27
+ # Create a sample text file
28
+ require 'tempfile'
29
+ sample_file = Tempfile.new(['test', '.txt'])
30
+ sample_file.write("This is a test document for AI processing.")
31
+ sample_file.close
32
+
33
+ begin
34
+ result = UniversalDocumentProcessor.ai_analyze(sample_file.path)
35
+ puts " ERROR: Should have raised an exception!"
36
+ rescue UniversalDocumentProcessor::DependencyMissingError => e
37
+ puts " ✓ Correctly raised DependencyMissingError: #{e.message}"
38
+ rescue => e
39
+ puts " ✗ Unexpected error: #{e.class} - #{e.message}"
40
+ end
41
+
42
+ # Test 4: Check available features
43
+ puts "\n4. Available features:"
44
+ features = UniversalDocumentProcessor.available_features
45
+ puts " Features: #{features.join(', ')}"
46
+ puts " AI processing included: #{features.include?(:ai_processing)}"
47
+
48
+ # Test 5: Check optional dependencies
49
+ puts "\n5. Optional dependencies:"
50
+ optional_deps = UniversalDocumentProcessor.optional_dependencies
51
+ puts " Optional dependencies: #{optional_deps.keys.join(', ')}"
52
+
53
+ missing_deps = UniversalDocumentProcessor.missing_dependencies
54
+ puts " Missing dependencies: #{missing_deps.join(', ')}"
55
+
56
+ # Test 6: Installation instructions
57
+ puts "\n6. Installation instructions:"
58
+ instructions = UniversalDocumentProcessor.installation_instructions
59
+ puts instructions
60
+
61
+ # Test 7: Test with API key if provided
62
+ if ENV['OPENAI_API_KEY'] && !ENV['OPENAI_API_KEY'].empty?
63
+ puts "\n7. Testing with API key:"
64
+ ai_available_with_key = UniversalDocumentProcessor.ai_available?
65
+ puts " AI Available with key: #{ai_available_with_key}"
66
+
67
+ agent_with_key = UniversalDocumentProcessor.create_ai_agent
68
+ puts " Agent AI enabled: #{agent_with_key.ai_enabled}"
69
+ else
70
+ puts "\n7. Skipping API key test (OPENAI_API_KEY not set)"
71
+ end
72
+
73
+ # Clean up
74
+ sample_file.unlink
75
+
76
+ puts "\n" + "=" * 50
77
+ puts "AI Dependency Test Complete!"
78
+ puts "✓ AI features are properly optional"
79
+ puts "✓ Clear error messages when dependencies missing"
80
+ puts "✓ Graceful degradation when features unavailable"