universal_document_processor 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +100 -17
- data/lib/universal_document_processor/ai_agent.rb +44 -3
- data/lib/universal_document_processor/processors/pdf_processor.rb +14 -0
- data/lib/universal_document_processor/processors/word_processor.rb +12 -0
- data/lib/universal_document_processor/version.rb +1 -1
- data/lib/universal_document_processor.rb +84 -1
- metadata +34 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8ec66decfe8626354f9fe05b757dbdc11921b21fa6b5dccfdb4d8ce5deba2c3f
|
4
|
+
data.tar.gz: 19c2802d337d0517ab91cfe71bdb2b051213e17f0e1a76605c5bce895429eed4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2cbb1944e533a4a75d6248dd6df279219e4a4c7b77dac3b0e4d474b5b4375203d188bff5d388af30716b3dc5487fcd293955c8504565c4a1b56d552a8484993
|
7
|
+
data.tar.gz: 2d95c2f173de302d14cdfda6d3357b5d7d9a5cf82cabc2e5622bdb8f6d7e60c56bab75eccf85a312045be5f8e66b743b4344420847192a3b0268cd4a70c5f414
|
data/Rakefile
CHANGED
@@ -1,12 +1,105 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require "rubocop/rake_task"
|
1
|
+
require 'rake/testtask'
|
2
|
+
require 'bundler/gem_tasks'
|
4
3
|
|
5
|
-
|
6
|
-
|
4
|
+
# Default task
|
5
|
+
task default: :test
|
7
6
|
|
8
|
-
|
9
|
-
|
7
|
+
# Test task
|
8
|
+
Rake::TestTask.new(:test) do |t|
|
9
|
+
t.libs << 'test'
|
10
|
+
t.libs << 'lib'
|
11
|
+
t.test_files = FileList['test/test_*.rb']
|
12
|
+
t.verbose = true
|
13
|
+
end
|
14
|
+
|
15
|
+
# Individual test tasks
|
16
|
+
Rake::TestTask.new(:test_core) do |t|
|
17
|
+
t.libs << 'test'
|
18
|
+
t.libs << 'lib'
|
19
|
+
t.test_files = FileList['test/test_universal_document_processor.rb']
|
20
|
+
t.verbose = true
|
21
|
+
end
|
22
|
+
|
23
|
+
Rake::TestTask.new(:test_ai) do |t|
|
24
|
+
t.libs << 'test'
|
25
|
+
t.libs << 'lib'
|
26
|
+
t.test_files = FileList['test/test_ai_agent.rb']
|
27
|
+
t.verbose = true
|
28
|
+
end
|
29
|
+
|
30
|
+
Rake::TestTask.new(:test_processors) do |t|
|
31
|
+
t.libs << 'test'
|
32
|
+
t.libs << 'lib'
|
33
|
+
t.test_files = FileList['test/test_processors.rb']
|
34
|
+
t.verbose = true
|
35
|
+
end
|
36
|
+
|
37
|
+
# Coverage task (if simplecov is available)
|
38
|
+
desc "Run tests with coverage"
|
39
|
+
task :coverage do
|
40
|
+
ENV['COVERAGE'] = 'true'
|
41
|
+
Rake::Task[:test].invoke
|
42
|
+
end
|
43
|
+
|
44
|
+
# Lint task (if rubocop is available)
|
45
|
+
desc "Run RuboCop"
|
46
|
+
task :lint do
|
47
|
+
begin
|
48
|
+
require 'rubocop/rake_task'
|
49
|
+
RuboCop::RakeTask.new
|
50
|
+
rescue LoadError
|
51
|
+
puts "RuboCop not available. Install it with: gem install rubocop"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Documentation task
|
56
|
+
desc "Generate documentation"
|
57
|
+
task :doc do
|
58
|
+
system "yard doc"
|
59
|
+
end
|
60
|
+
|
61
|
+
# Clean task
|
62
|
+
desc "Clean up generated files"
|
63
|
+
task :clean do
|
64
|
+
FileUtils.rm_rf('coverage')
|
65
|
+
FileUtils.rm_rf('doc')
|
66
|
+
FileUtils.rm_rf('pkg')
|
67
|
+
FileUtils.rm_f('Gemfile.lock')
|
68
|
+
end
|
69
|
+
|
70
|
+
# Install dependencies
|
71
|
+
desc "Install dependencies"
|
72
|
+
task :install do
|
73
|
+
system "bundle install"
|
74
|
+
end
|
75
|
+
|
76
|
+
# Quality check task
|
77
|
+
desc "Run all quality checks"
|
78
|
+
task quality: [:test, :lint]
|
79
|
+
|
80
|
+
# CI task
|
81
|
+
desc "Run CI tasks"
|
82
|
+
task ci: [:install, :test]
|
83
|
+
|
84
|
+
# Development setup
|
85
|
+
desc "Setup development environment"
|
86
|
+
task :setup do
|
87
|
+
puts "Setting up development environment..."
|
88
|
+
Rake::Task[:install].invoke
|
89
|
+
puts "Development environment ready!"
|
90
|
+
puts ""
|
91
|
+
puts "Available tasks:"
|
92
|
+
puts " rake test - Run all tests"
|
93
|
+
puts " rake test_core - Run core functionality tests"
|
94
|
+
puts " rake test_ai - Run AI agent tests"
|
95
|
+
puts " rake test_processors - Run processor tests"
|
96
|
+
puts " rake coverage - Run tests with coverage"
|
97
|
+
puts " rake lint - Run RuboCop linting"
|
98
|
+
puts " rake doc - Generate documentation"
|
99
|
+
puts " rake clean - Clean up generated files"
|
100
|
+
puts ""
|
101
|
+
puts "To run tests with AI features, set OPENAI_API_KEY environment variable"
|
102
|
+
end
|
10
103
|
|
11
104
|
desc "Build the gem"
|
12
105
|
task :build do
|
@@ -23,14 +116,4 @@ desc "Install the gem locally"
|
|
23
116
|
task :install do
|
24
117
|
system "gem build universal_document_processor.gemspec"
|
25
118
|
system "gem install universal_document_processor-*.gem"
|
26
|
-
end
|
27
|
-
|
28
|
-
desc "Clean build artifacts"
|
29
|
-
task :clean do
|
30
|
-
system "rm -f *.gem"
|
31
|
-
end
|
32
|
-
|
33
|
-
desc "Generate documentation"
|
34
|
-
task :doc do
|
35
|
-
system "yard doc"
|
36
119
|
end
|
@@ -4,7 +4,7 @@ require 'uri'
|
|
4
4
|
|
5
5
|
module UniversalDocumentProcessor
|
6
6
|
class AIAgent
|
7
|
-
attr_reader :api_key, :model, :base_url, :conversation_history
|
7
|
+
attr_reader :api_key, :model, :base_url, :conversation_history, :ai_enabled
|
8
8
|
|
9
9
|
def initialize(options = {})
|
10
10
|
@api_key = options[:api_key] || ENV['OPENAI_API_KEY']
|
@@ -13,12 +13,15 @@ module UniversalDocumentProcessor
|
|
13
13
|
@conversation_history = []
|
14
14
|
@max_history = options[:max_history] || 10
|
15
15
|
@temperature = options[:temperature] || 0.7
|
16
|
+
@ai_enabled = false
|
16
17
|
|
17
18
|
validate_configuration
|
18
19
|
end
|
19
20
|
|
20
21
|
# Main document analysis with AI
|
21
22
|
def analyze_document(document_result, query = nil)
|
23
|
+
ensure_ai_available!
|
24
|
+
|
22
25
|
context = build_document_context(document_result)
|
23
26
|
|
24
27
|
if query
|
@@ -63,6 +66,8 @@ Please provide:
|
|
63
66
|
|
64
67
|
# Ask specific questions about a document
|
65
68
|
def ask_document_question(document_result, question)
|
69
|
+
ensure_ai_available!
|
70
|
+
|
66
71
|
context = build_document_context(document_result)
|
67
72
|
|
68
73
|
prompt = build_question_prompt(context, question)
|
@@ -74,6 +79,8 @@ Please provide:
|
|
74
79
|
|
75
80
|
# Summarize document content
|
76
81
|
def summarize_document(document_result, length: :medium)
|
82
|
+
ensure_ai_available!
|
83
|
+
|
77
84
|
context = build_document_context(document_result)
|
78
85
|
|
79
86
|
length_instruction = case length
|
@@ -92,6 +99,8 @@ Please provide:
|
|
92
99
|
|
93
100
|
# Extract key information from document
|
94
101
|
def extract_key_information(document_result, categories = nil)
|
102
|
+
ensure_ai_available!
|
103
|
+
|
95
104
|
context = build_document_context(document_result)
|
96
105
|
categories ||= ['key_facts', 'important_dates', 'names', 'locations', 'numbers']
|
97
106
|
|
@@ -104,6 +113,8 @@ Please provide:
|
|
104
113
|
|
105
114
|
# Translate document content
|
106
115
|
def translate_document(document_result, target_language)
|
116
|
+
ensure_ai_available!
|
117
|
+
|
107
118
|
context = build_document_context(document_result)
|
108
119
|
|
109
120
|
prompt = build_translation_prompt(context, target_language)
|
@@ -115,6 +126,8 @@ Please provide:
|
|
115
126
|
|
116
127
|
# Generate document insights and recommendations
|
117
128
|
def generate_insights(document_result)
|
129
|
+
ensure_ai_available!
|
130
|
+
|
118
131
|
context = build_document_context(document_result)
|
119
132
|
|
120
133
|
prompt = build_insights_prompt(context)
|
@@ -126,6 +139,8 @@ Please provide:
|
|
126
139
|
|
127
140
|
# Compare multiple documents
|
128
141
|
def compare_documents(document_results, comparison_type = :content)
|
142
|
+
ensure_ai_available!
|
143
|
+
|
129
144
|
contexts = document_results.map { |doc| build_document_context(doc) }
|
130
145
|
|
131
146
|
prompt = build_comparison_prompt(contexts, comparison_type)
|
@@ -137,6 +152,8 @@ Please provide:
|
|
137
152
|
|
138
153
|
# Classify document type and purpose
|
139
154
|
def classify_document(document_result)
|
155
|
+
ensure_ai_available!
|
156
|
+
|
140
157
|
context = build_document_context(document_result)
|
141
158
|
|
142
159
|
prompt = build_classification_prompt(context)
|
@@ -148,6 +165,8 @@ Please provide:
|
|
148
165
|
|
149
166
|
# Generate action items from document
|
150
167
|
def extract_action_items(document_result)
|
168
|
+
ensure_ai_available!
|
169
|
+
|
151
170
|
context = build_document_context(document_result)
|
152
171
|
|
153
172
|
prompt = build_action_items_prompt(context)
|
@@ -159,6 +178,8 @@ Please provide:
|
|
159
178
|
|
160
179
|
# Chat about the document
|
161
180
|
def chat(message, document_result = nil)
|
181
|
+
ensure_ai_available!
|
182
|
+
|
162
183
|
if document_result
|
163
184
|
context = build_document_context(document_result)
|
164
185
|
prompt = build_chat_prompt(context, message)
|
@@ -180,6 +201,10 @@ Please provide:
|
|
180
201
|
def conversation_summary
|
181
202
|
return "No conversation history" if @conversation_history.empty?
|
182
203
|
|
204
|
+
unless @ai_enabled
|
205
|
+
return "AI features are disabled. Cannot generate conversation summary."
|
206
|
+
end
|
207
|
+
|
183
208
|
history_text = @conversation_history.map do |entry|
|
184
209
|
"Q: #{entry[:question]}\nA: #{entry[:answer]}"
|
185
210
|
end.join("\n\n")
|
@@ -188,11 +213,27 @@ Please provide:
|
|
188
213
|
call_openai_api(prompt)
|
189
214
|
end
|
190
215
|
|
216
|
+
# Check if AI features are available
|
217
|
+
def ai_available?
|
218
|
+
@ai_enabled
|
219
|
+
end
|
220
|
+
|
191
221
|
private
|
192
222
|
|
193
223
|
def validate_configuration
|
194
|
-
|
195
|
-
|
224
|
+
if @api_key && !@api_key.empty?
|
225
|
+
@ai_enabled = true
|
226
|
+
else
|
227
|
+
@ai_enabled = false
|
228
|
+
warn "Warning: OpenAI API key not provided. AI features will be disabled. Set OPENAI_API_KEY environment variable or pass api_key option to enable AI features."
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
# Ensure AI is available before making API calls
|
233
|
+
def ensure_ai_available!
|
234
|
+
unless @ai_enabled
|
235
|
+
raise DependencyMissingError, "AI features are not available. Please provide an OpenAI API key to use AI functionality."
|
236
|
+
end
|
196
237
|
end
|
197
238
|
|
198
239
|
def build_document_context(document_result)
|
@@ -2,6 +2,8 @@ module UniversalDocumentProcessor
|
|
2
2
|
module Processors
|
3
3
|
class PdfProcessor < BaseProcessor
|
4
4
|
def extract_text
|
5
|
+
ensure_pdf_reader_available!
|
6
|
+
|
5
7
|
with_error_handling do
|
6
8
|
reader = PDF::Reader.new(@file_path)
|
7
9
|
text = reader.pages.map(&:text).join("\n")
|
@@ -10,6 +12,8 @@ module UniversalDocumentProcessor
|
|
10
12
|
end
|
11
13
|
|
12
14
|
def extract_metadata
|
15
|
+
ensure_pdf_reader_available!
|
16
|
+
|
13
17
|
with_error_handling do
|
14
18
|
reader = PDF::Reader.new(@file_path)
|
15
19
|
info = reader.info || {}
|
@@ -32,6 +36,8 @@ module UniversalDocumentProcessor
|
|
32
36
|
end
|
33
37
|
|
34
38
|
def extract_images
|
39
|
+
ensure_pdf_reader_available!
|
40
|
+
|
35
41
|
with_error_handling do
|
36
42
|
# Extract embedded images from PDF
|
37
43
|
images = []
|
@@ -57,6 +63,8 @@ module UniversalDocumentProcessor
|
|
57
63
|
end
|
58
64
|
|
59
65
|
def extract_tables
|
66
|
+
ensure_pdf_reader_available!
|
67
|
+
|
60
68
|
with_error_handling do
|
61
69
|
# Basic table extraction from PDF text
|
62
70
|
tables = []
|
@@ -87,6 +95,12 @@ module UniversalDocumentProcessor
|
|
87
95
|
|
88
96
|
private
|
89
97
|
|
98
|
+
def ensure_pdf_reader_available!
|
99
|
+
unless defined?(PDF::Reader)
|
100
|
+
raise DependencyMissingError, "PDF processing requires the 'pdf-reader' gem. Install it with: gem install pdf-reader -v '~> 2.0'"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
90
104
|
def extract_form_fields(reader)
|
91
105
|
# Extract PDF form fields if present
|
92
106
|
[]
|
@@ -30,6 +30,7 @@ module UniversalDocumentProcessor
|
|
30
30
|
def extract_images
|
31
31
|
with_error_handling do
|
32
32
|
return [] unless @file_path.end_with?('.docx')
|
33
|
+
ensure_docx_available!
|
33
34
|
|
34
35
|
images = []
|
35
36
|
doc = Docx::Document.open(@file_path)
|
@@ -53,6 +54,7 @@ module UniversalDocumentProcessor
|
|
53
54
|
def extract_tables
|
54
55
|
with_error_handling do
|
55
56
|
return [] unless @file_path.end_with?('.docx')
|
57
|
+
ensure_docx_available!
|
56
58
|
|
57
59
|
tables = []
|
58
60
|
doc = Docx::Document.open(@file_path)
|
@@ -88,7 +90,15 @@ module UniversalDocumentProcessor
|
|
88
90
|
|
89
91
|
private
|
90
92
|
|
93
|
+
def ensure_docx_available!
|
94
|
+
unless defined?(Docx)
|
95
|
+
raise DependencyMissingError, "DOCX processing requires the 'docx' gem. Install it with: gem install docx -v '~> 0.8'"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
91
99
|
def extract_docx_text
|
100
|
+
ensure_docx_available!
|
101
|
+
|
92
102
|
doc = Docx::Document.open(@file_path)
|
93
103
|
text_content = []
|
94
104
|
|
@@ -109,6 +119,8 @@ module UniversalDocumentProcessor
|
|
109
119
|
end
|
110
120
|
|
111
121
|
def extract_docx_metadata
|
122
|
+
ensure_docx_available!
|
123
|
+
|
112
124
|
doc = Docx::Document.open(@file_path)
|
113
125
|
core_properties = doc.core_properties
|
114
126
|
|
@@ -122,48 +122,72 @@ module UniversalDocumentProcessor
|
|
122
122
|
def self.ai_analyze(file_path, options = {})
|
123
123
|
document_result = process(file_path, options)
|
124
124
|
ai_agent = AIAgent.new(options)
|
125
|
+
unless ai_agent.ai_available?
|
126
|
+
raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
|
127
|
+
end
|
125
128
|
ai_agent.analyze_document(document_result, options[:query])
|
126
129
|
end
|
127
130
|
|
128
131
|
def self.ai_summarize(file_path, length: :medium, options: {})
|
129
132
|
document_result = process(file_path, options)
|
130
133
|
ai_agent = AIAgent.new(options)
|
134
|
+
unless ai_agent.ai_available?
|
135
|
+
raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
|
136
|
+
end
|
131
137
|
ai_agent.summarize_document(document_result, length: length)
|
132
138
|
end
|
133
139
|
|
134
140
|
def self.ai_extract_info(file_path, categories = nil, options = {})
|
135
141
|
document_result = process(file_path, options)
|
136
142
|
ai_agent = AIAgent.new(options)
|
143
|
+
unless ai_agent.ai_available?
|
144
|
+
raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
|
145
|
+
end
|
137
146
|
ai_agent.extract_key_information(document_result, categories)
|
138
147
|
end
|
139
148
|
|
140
149
|
def self.ai_translate(file_path, target_language, options = {})
|
141
150
|
document_result = process(file_path, options)
|
142
151
|
ai_agent = AIAgent.new(options)
|
152
|
+
unless ai_agent.ai_available?
|
153
|
+
raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
|
154
|
+
end
|
143
155
|
ai_agent.translate_document(document_result, target_language)
|
144
156
|
end
|
145
157
|
|
146
158
|
def self.ai_classify(file_path, options = {})
|
147
159
|
document_result = process(file_path, options)
|
148
160
|
ai_agent = AIAgent.new(options)
|
161
|
+
unless ai_agent.ai_available?
|
162
|
+
raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
|
163
|
+
end
|
149
164
|
ai_agent.classify_document(document_result)
|
150
165
|
end
|
151
166
|
|
152
167
|
def self.ai_insights(file_path, options = {})
|
153
168
|
document_result = process(file_path, options)
|
154
169
|
ai_agent = AIAgent.new(options)
|
170
|
+
unless ai_agent.ai_available?
|
171
|
+
raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
|
172
|
+
end
|
155
173
|
ai_agent.generate_insights(document_result)
|
156
174
|
end
|
157
175
|
|
158
176
|
def self.ai_action_items(file_path, options = {})
|
159
177
|
document_result = process(file_path, options)
|
160
178
|
ai_agent = AIAgent.new(options)
|
179
|
+
unless ai_agent.ai_available?
|
180
|
+
raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
|
181
|
+
end
|
161
182
|
ai_agent.extract_action_items(document_result)
|
162
183
|
end
|
163
184
|
|
164
185
|
def self.ai_compare(file_paths, comparison_type = :content, options = {})
|
165
186
|
document_results = file_paths.map { |path| process(path, options) }
|
166
187
|
ai_agent = AIAgent.new(options)
|
188
|
+
unless ai_agent.ai_available?
|
189
|
+
raise DependencyMissingError, "AI features require an OpenAI API key. Set OPENAI_API_KEY environment variable or pass api_key in options."
|
190
|
+
end
|
167
191
|
ai_agent.compare_documents(document_results, comparison_type)
|
168
192
|
end
|
169
193
|
|
@@ -171,6 +195,12 @@ module UniversalDocumentProcessor
|
|
171
195
|
AIAgent.new(options)
|
172
196
|
end
|
173
197
|
|
198
|
+
# Check if AI features are available
|
199
|
+
def self.ai_available?(options = {})
|
200
|
+
ai_agent = AIAgent.new(options)
|
201
|
+
ai_agent.ai_available?
|
202
|
+
end
|
203
|
+
|
174
204
|
# Convert document to different format
|
175
205
|
def self.convert(file_path_or_io, target_format, options = {})
|
176
206
|
Document.new(file_path_or_io, options).convert_to(target_format)
|
@@ -207,9 +237,54 @@ module UniversalDocumentProcessor
|
|
207
237
|
end
|
208
238
|
end
|
209
239
|
|
240
|
+
# Get list of optional dependencies
|
241
|
+
def self.optional_dependencies
|
242
|
+
{
|
243
|
+
'pdf-reader' => '~> 2.0', # PDF text extraction
|
244
|
+
'prawn' => '~> 2.4', # PDF generation
|
245
|
+
'docx' => '~> 0.8', # Word document processing
|
246
|
+
'roo' => '~> 2.8', # Excel/Spreadsheet processing
|
247
|
+
'mini_magick' => '~> 4.11', # Image processing
|
248
|
+
'yomu' => '~> 0.2' # Universal text extraction fallback
|
249
|
+
}
|
250
|
+
end
|
251
|
+
|
252
|
+
# Check which optional dependencies are missing
|
253
|
+
def self.missing_dependencies
|
254
|
+
missing = []
|
255
|
+
missing << 'pdf-reader' unless dependency_available?(:pdf_reader)
|
256
|
+
missing << 'prawn' unless dependency_available?(:prawn)
|
257
|
+
missing << 'docx' unless dependency_available?(:docx)
|
258
|
+
missing << 'roo' unless dependency_available?(:roo)
|
259
|
+
missing << 'mini_magick' unless dependency_available?(:mini_magick)
|
260
|
+
missing << 'yomu' unless dependency_available?(:yomu)
|
261
|
+
missing
|
262
|
+
end
|
263
|
+
|
264
|
+
# Generate installation instructions for missing dependencies
|
265
|
+
def self.installation_instructions
|
266
|
+
missing = missing_dependencies
|
267
|
+
return "All optional dependencies are installed!" if missing.empty?
|
268
|
+
|
269
|
+
instructions = ["To enable additional features, install these optional gems:"]
|
270
|
+
missing.each do |gem_name|
|
271
|
+
version = optional_dependencies[gem_name]
|
272
|
+
instructions << " gem install #{gem_name} -v '#{version}'"
|
273
|
+
end
|
274
|
+
|
275
|
+
instructions << ""
|
276
|
+
instructions << "Or add to your Gemfile:"
|
277
|
+
missing.each do |gem_name|
|
278
|
+
version = optional_dependencies[gem_name]
|
279
|
+
instructions << " gem '#{gem_name}', '#{version}'"
|
280
|
+
end
|
281
|
+
|
282
|
+
instructions.join("\n")
|
283
|
+
end
|
284
|
+
|
210
285
|
# Get list of available features based on installed dependencies
|
211
286
|
def self.available_features
|
212
|
-
features = [:text_processing, :html_processing, :xml_processing, :csv_processing, :json_processing, :archive_processing]
|
287
|
+
features = [:text_processing, :html_processing, :xml_processing, :csv_processing, :json_processing, :archive_processing, :tsv_processing]
|
213
288
|
|
214
289
|
features << :pdf_processing if dependency_available?(:pdf_reader)
|
215
290
|
features << :word_processing if dependency_available?(:docx)
|
@@ -218,6 +293,14 @@ module UniversalDocumentProcessor
|
|
218
293
|
features << :universal_text_extraction if dependency_available?(:yomu)
|
219
294
|
features << :pdf_generation if dependency_available?(:prawn)
|
220
295
|
|
296
|
+
# Check AI availability without creating circular dependency
|
297
|
+
begin
|
298
|
+
ai_agent = AIAgent.new
|
299
|
+
features << :ai_processing if ai_agent.ai_enabled
|
300
|
+
rescue
|
301
|
+
# AI not available
|
302
|
+
end
|
303
|
+
|
221
304
|
features
|
222
305
|
end
|
223
306
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: universal_document_processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vikas Patil
|
@@ -80,19 +80,33 @@ dependencies:
|
|
80
80
|
- !ruby/object:Gem::Version
|
81
81
|
version: '3.2'
|
82
82
|
- !ruby/object:Gem::Dependency
|
83
|
-
name:
|
83
|
+
name: minitest
|
84
84
|
requirement: !ruby/object:Gem::Requirement
|
85
85
|
requirements:
|
86
86
|
- - "~>"
|
87
87
|
- !ruby/object:Gem::Version
|
88
|
-
version: '
|
88
|
+
version: '5.0'
|
89
89
|
type: :development
|
90
90
|
prerelease: false
|
91
91
|
version_requirements: !ruby/object:Gem::Requirement
|
92
92
|
requirements:
|
93
93
|
- - "~>"
|
94
94
|
- !ruby/object:Gem::Version
|
95
|
-
version: '
|
95
|
+
version: '5.0'
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: minitest-reporters
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - "~>"
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '1.0'
|
103
|
+
type: :development
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '1.0'
|
96
110
|
- !ruby/object:Gem::Dependency
|
97
111
|
name: rake
|
98
112
|
requirement: !ruby/object:Gem::Requirement
|
@@ -149,6 +163,20 @@ dependencies:
|
|
149
163
|
- - "~>"
|
150
164
|
- !ruby/object:Gem::Version
|
151
165
|
version: '2.0'
|
166
|
+
- !ruby/object:Gem::Dependency
|
167
|
+
name: simplecov
|
168
|
+
requirement: !ruby/object:Gem::Requirement
|
169
|
+
requirements:
|
170
|
+
- - "~>"
|
171
|
+
- !ruby/object:Gem::Version
|
172
|
+
version: '0.22'
|
173
|
+
type: :development
|
174
|
+
prerelease: false
|
175
|
+
version_requirements: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - "~>"
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: '0.22'
|
152
180
|
description: A comprehensive Ruby gem that handles document processing, text extraction,
|
153
181
|
and AI-powered analysis for PDF, Word, Excel, PowerPoint, images, archives, and
|
154
182
|
more with a unified API. Includes agentic AI features for document analysis, summarization,
|
@@ -190,6 +218,8 @@ metadata:
|
|
190
218
|
documentation_uri: https://github.com/vpatil160/universal_document_processor/blob/main/README.md
|
191
219
|
funding_uri: https://github.com/sponsors/vpatil160
|
192
220
|
rubygems_mfa_required: 'true'
|
221
|
+
optional_dependencies: pdf-reader ~> 2.0, prawn ~> 2.4, docx ~> 0.8, roo ~> 2.8,
|
222
|
+
mini_magick ~> 4.11, yomu ~> 0.2
|
193
223
|
rdoc_options: []
|
194
224
|
require_paths:
|
195
225
|
- lib
|