llm_chain 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,13 +46,15 @@ module LLMChain
46
46
  num_results = extract_num_results(prompt)
47
47
 
48
48
  begin
49
- results = perform_search(query, num_results)
49
+ results = perform_search_with_retry(query, num_results)
50
50
  format_search_results(query, results)
51
51
  rescue => e
52
+ log_error("Search failed for '#{query}'", e)
52
53
  {
53
54
  query: query,
54
55
  error: e.message,
55
- formatted: "Error searching for '#{query}': #{e.message}"
56
+ results: [],
57
+ formatted: "Search unavailable for '#{query}'. Please try again later or rephrase your query."
56
58
  }
57
59
  end
58
60
  end
@@ -97,6 +99,31 @@ module LLMChain
97
99
  5 # default
98
100
  end
99
101
 
102
+ def perform_search_with_retry(query, num_results, max_retries: 3)
103
+ retries = 0
104
+ last_error = nil
105
+
106
+ begin
107
+ perform_search(query, num_results)
108
+ rescue => e
109
+ last_error = e
110
+ retries += 1
111
+
112
+ if retries <= max_retries && retryable_error?(e)
113
+ sleep_time = [0.5 * (2 ** (retries - 1)), 5.0].min # exponential backoff, max 5 seconds
114
+ log_retry("Retrying search (#{retries}/#{max_retries}) after #{sleep_time}s", e)
115
+ sleep(sleep_time)
116
+ retry
117
+ else
118
+ log_error("Search failed after #{retries} attempts", e)
119
+ # Fallback to hardcoded results as last resort
120
+ hardcoded = get_hardcoded_results(query)
121
+ return hardcoded unless hardcoded.empty?
122
+ raise e
123
+ end
124
+ end
125
+ end
126
+
100
127
  def perform_search(query, num_results)
101
128
  case @search_engine
102
129
  when :google
@@ -115,25 +142,52 @@ module LLMChain
115
142
  def fallback_search(query, num_results)
116
143
  return [] if num_results <= 0
117
144
 
118
- # Если обычный поиск не работает, используем заранее заготовленные данные
119
- # для популярных запросов
145
+ # Сначала пробуем заранее заготовленные данные для популярных запросов
120
146
  hardcoded_results = get_hardcoded_results(query)
121
147
  return hardcoded_results unless hardcoded_results.empty?
122
148
 
123
- # Простой поиск по HTML странице DuckDuckGo
124
- uri = URI("https://html.duckduckgo.com/html/")
125
- uri.query = URI.encode_www_form(q: query)
126
-
127
- http = Net::HTTP.new(uri.host, uri.port)
128
- http.use_ssl = true
129
- http.open_timeout = 10
130
- http.read_timeout = 10
149
+ # Проверяем, доступен ли интернет
150
+ return offline_fallback_results(query) if offline_mode?
131
151
 
132
- response = http.get(uri.request_uri)
133
- return [] unless response.code == '200'
152
+ begin
153
+ results = search_duckduckgo_html(query, num_results)
154
+ return results unless results.empty?
155
+
156
+ # Если DuckDuckGo не дал результатов, возвращаем заглушку
157
+ offline_fallback_results(query)
158
+ rescue => e
159
+ log_error("Fallback search failed", e)
160
+ offline_fallback_results(query)
161
+ end
162
+ end
163
+
164
+ def search_duckduckgo_html(query, num_results)
165
+ require 'timeout'
134
166
 
135
- # Улучшенный парсинг результатов
136
- html = response.body
167
+ Timeout.timeout(15) do
168
+ uri = URI("https://html.duckduckgo.com/html/")
169
+ uri.query = URI.encode_www_form(q: query)
170
+
171
+ http = Net::HTTP.new(uri.host, uri.port)
172
+ http.use_ssl = true
173
+ http.open_timeout = 8
174
+ http.read_timeout = 10
175
+
176
+ response = http.get(uri.request_uri)
177
+
178
+ unless response.code == '200'
179
+ log_error("DuckDuckGo returned #{response.code}", StandardError.new(response.body))
180
+ return []
181
+ end
182
+
183
+ parse_duckduckgo_results(response.body, num_results)
184
+ end
185
+ rescue Timeout::Error
186
+ log_error("DuckDuckGo search timeout", Timeout::Error.new("Request took longer than 15 seconds"))
187
+ []
188
+ end
189
+
190
+ def parse_duckduckgo_results(html, num_results)
137
191
  results = []
138
192
 
139
193
  # Ищем различные паттерны результатов
@@ -147,9 +201,10 @@ module LLMChain
147
201
  html.scan(pattern) do |url, title|
148
202
  next if results.length >= num_results
149
203
  next if url.include?('duckduckgo.com/y.js') # Skip tracking links
204
+ next if title.strip.empty?
150
205
 
151
206
  results << {
152
- title: title.strip.gsub(/\s+/, ' '),
207
+ title: clean_html_text(title),
153
208
  url: clean_url(url),
154
209
  snippet: "Search result from DuckDuckGo"
155
210
  }
@@ -158,14 +213,37 @@ module LLMChain
158
213
  end
159
214
 
160
215
  results
161
- rescue => e
216
+ end
217
+
218
+ def offline_fallback_results(query)
162
219
  [{
163
220
  title: "Search unavailable",
164
221
  url: "",
165
- snippet: "Unable to perform web search at this time. Query: #{query}"
222
+ snippet: "Unable to perform web search at this time. Query: #{query}. Please check your internet connection."
166
223
  }]
167
224
  end
168
225
 
226
+ def offline_mode?
227
+ # Простая проверка доступности интернета
228
+ begin
229
+ require 'socket'
230
+ Socket.tcp("8.8.8.8", 53, connect_timeout: 3) {}
231
+ false
232
+ rescue
233
+ true
234
+ end
235
+ end
236
+
237
+ def clean_html_text(text)
238
+ text.strip
239
+ .gsub(/&lt;/, '<')
240
+ .gsub(/&gt;/, '>')
241
+ .gsub(/&amp;/, '&')
242
+ .gsub(/&quot;/, '"')
243
+ .gsub(/&#39;/, "'")
244
+ .gsub(/\s+/, ' ')
245
+ end
246
+
169
247
  # Заранее заготовленные результаты для популярных запросов
170
248
  def get_hardcoded_results(query)
171
249
  ruby_version_queries = [
@@ -203,76 +281,151 @@ module LLMChain
203
281
  def search_google(query, num_results)
204
282
  # Google Custom Search API (требует API ключ)
205
283
  unless @api_key
284
+ log_error("Google API key not provided, using fallback", StandardError.new("No API key"))
206
285
  return fallback_search(query, num_results)
207
286
  end
208
287
 
209
- search_engine_id = ENV['GOOGLE_SEARCH_ENGINE_ID'] || ENV['GOOGLE_CX'] || 'your-search-engine-id'
210
-
211
- uri = URI("https://www.googleapis.com/customsearch/v1")
212
- params = {
213
- key: @api_key,
214
- cx: search_engine_id,
215
- q: query,
216
- num: [num_results, 10].min
217
- }
218
- uri.query = URI.encode_www_form(params)
219
-
220
- http = Net::HTTP.new(uri.host, uri.port)
221
- http.use_ssl = true
222
- http.open_timeout = 10
223
- http.read_timeout = 10
224
-
225
- response = http.get(uri.request_uri)
226
-
227
- unless response.code == '200'
288
+ search_engine_id = ENV['GOOGLE_SEARCH_ENGINE_ID'] || ENV['GOOGLE_CX']
289
+ unless search_engine_id && search_engine_id != 'your-search-engine-id'
290
+ log_error("Google Search Engine ID not configured", StandardError.new("Missing GOOGLE_SEARCH_ENGINE_ID"))
228
291
  return fallback_search(query, num_results)
229
292
  end
230
-
231
- data = JSON.parse(response.body)
232
293
 
233
- results = (data['items'] || []).map do |item|
234
- {
235
- title: item['title'],
236
- url: item['link'],
237
- snippet: item['snippet']
238
- }
294
+ begin
295
+ require 'timeout'
296
+
297
+ Timeout.timeout(20) do
298
+ uri = URI("https://www.googleapis.com/customsearch/v1")
299
+ params = {
300
+ key: @api_key,
301
+ cx: search_engine_id,
302
+ q: query,
303
+ num: [num_results, 10].min,
304
+ safe: 'active'
305
+ }
306
+ uri.query = URI.encode_www_form(params)
307
+
308
+ http = Net::HTTP.new(uri.host, uri.port)
309
+ http.use_ssl = true
310
+ http.open_timeout = 8
311
+ http.read_timeout = 12
312
+
313
+ response = http.get(uri.request_uri)
314
+
315
+ case response.code
316
+ when '200'
317
+ data = JSON.parse(response.body)
318
+
319
+ if data['error']
320
+ log_error("Google API error: #{data['error']['message']}", StandardError.new(data['error']['message']))
321
+ return fallback_search(query, num_results)
322
+ end
323
+
324
+ results = (data['items'] || []).map do |item|
325
+ {
326
+ title: item['title']&.strip || 'Untitled',
327
+ url: item['link'] || '',
328
+ snippet: item['snippet']&.strip || 'No description available'
329
+ }
330
+ end
331
+
332
+ # Если Google не вернул результатов, используем fallback
333
+ results.empty? ? fallback_search(query, num_results) : results
334
+ when '403'
335
+ log_error("Google API quota exceeded or invalid key", StandardError.new(response.body))
336
+ fallback_search(query, num_results)
337
+ when '400'
338
+ log_error("Google API bad request", StandardError.new(response.body))
339
+ fallback_search(query, num_results)
340
+ else
341
+ log_error("Google API returned #{response.code}", StandardError.new(response.body))
342
+ fallback_search(query, num_results)
343
+ end
344
+ end
345
+ rescue Timeout::Error
346
+ log_error("Google search timeout", Timeout::Error.new("Request took longer than 20 seconds"))
347
+ fallback_search(query, num_results)
348
+ rescue JSON::ParserError => e
349
+ log_error("Invalid JSON response from Google", e)
350
+ fallback_search(query, num_results)
351
+ rescue => e
352
+ log_error("Google search failed", e)
353
+ fallback_search(query, num_results)
239
354
  end
240
-
241
- # Если Google не вернул результатов, используем fallback
242
- results.empty? ? fallback_search(query, num_results) : results
243
- rescue => e
244
- fallback_search(query, num_results)
245
355
  end
246
356
 
247
357
  def search_bing(query, num_results)
248
358
  # Bing Web Search API (требует API ключ)
249
- raise "Bing API key required" unless @api_key
250
-
251
- uri = URI("https://api.bing.microsoft.com/v7.0/search")
252
- params = {
253
- q: query,
254
- count: [num_results, 20].min,
255
- responseFilter: 'Webpages'
256
- }
257
- uri.query = URI.encode_www_form(params)
359
+ unless @api_key
360
+ log_error("Bing API key not provided, using fallback", StandardError.new("No API key"))
361
+ return fallback_search(query, num_results)
362
+ end
258
363
 
259
- http = Net::HTTP.new(uri.host, uri.port)
260
- http.use_ssl = true
261
-
262
- request = Net::HTTP::Get.new(uri)
263
- request['Ocp-Apim-Subscription-Key'] = @api_key
264
-
265
- response = http.request(request)
266
- raise "Bing API error: #{response.code}" unless response.code == '200'
364
+ begin
365
+ require 'timeout'
366
+
367
+ Timeout.timeout(20) do
368
+ uri = URI("https://api.bing.microsoft.com/v7.0/search")
369
+ params = {
370
+ q: query,
371
+ count: [num_results, 20].min,
372
+ responseFilter: 'Webpages',
373
+ safeSearch: 'Moderate'
374
+ }
375
+ uri.query = URI.encode_www_form(params)
267
376
 
268
- data = JSON.parse(response.body)
269
-
270
- (data.dig('webPages', 'value') || []).map do |item|
271
- {
272
- title: item['name'],
273
- url: item['url'],
274
- snippet: item['snippet']
275
- }
377
+ http = Net::HTTP.new(uri.host, uri.port)
378
+ http.use_ssl = true
379
+ http.open_timeout = 8
380
+ http.read_timeout = 12
381
+
382
+ request = Net::HTTP::Get.new(uri)
383
+ request['Ocp-Apim-Subscription-Key'] = @api_key
384
+ request['User-Agent'] = 'LLMChain/1.0'
385
+
386
+ response = http.request(request)
387
+
388
+ case response.code
389
+ when '200'
390
+ data = JSON.parse(response.body)
391
+
392
+ if data['error']
393
+ log_error("Bing API error: #{data['error']['message']}", StandardError.new(data['error']['message']))
394
+ return fallback_search(query, num_results)
395
+ end
396
+
397
+ results = (data.dig('webPages', 'value') || []).map do |item|
398
+ {
399
+ title: item['name']&.strip || 'Untitled',
400
+ url: item['url'] || '',
401
+ snippet: item['snippet']&.strip || 'No description available'
402
+ }
403
+ end
404
+
405
+ results.empty? ? fallback_search(query, num_results) : results
406
+ when '401'
407
+ log_error("Bing API unauthorized - check your subscription key", StandardError.new(response.body))
408
+ fallback_search(query, num_results)
409
+ when '403'
410
+ log_error("Bing API quota exceeded", StandardError.new(response.body))
411
+ fallback_search(query, num_results)
412
+ when '429'
413
+ log_error("Bing API rate limit exceeded", StandardError.new(response.body))
414
+ fallback_search(query, num_results)
415
+ else
416
+ log_error("Bing API returned #{response.code}", StandardError.new(response.body))
417
+ fallback_search(query, num_results)
418
+ end
419
+ end
420
+ rescue Timeout::Error
421
+ log_error("Bing search timeout", Timeout::Error.new("Request took longer than 20 seconds"))
422
+ fallback_search(query, num_results)
423
+ rescue JSON::ParserError => e
424
+ log_error("Invalid JSON response from Bing", e)
425
+ fallback_search(query, num_results)
426
+ rescue => e
427
+ log_error("Bing search failed", e)
428
+ fallback_search(query, num_results)
276
429
  end
277
430
  end
278
431
 
@@ -300,6 +453,52 @@ module LLMChain
300
453
  def required_parameters
301
454
  ['query']
302
455
  end
456
+
457
+ private
458
+
459
+ def retryable_error?(error)
460
+ # Определяем, стоит ли повторять запрос при данной ошибке
461
+ case error
462
+ when Net::TimeoutError, Net::OpenTimeout, Net::ReadTimeout
463
+ true
464
+ when SocketError
465
+ # DNS ошибки обычно временные
466
+ true
467
+ when Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::EHOSTUNREACH
468
+ true
469
+ when Net::HTTPError
470
+ # Повторяем только для серверных ошибок (5xx)
471
+ error.message.match?(/5\d\d/)
472
+ else
473
+ false
474
+ end
475
+ end
476
+
477
+ def log_error(message, error)
478
+ return unless should_log?
479
+
480
+ if defined?(Rails) && Rails.logger
481
+ Rails.logger.error "[WebSearch] #{message}: #{error.class} - #{error.message}"
482
+ else
483
+ warn "[WebSearch] #{message}: #{error.class} - #{error.message}"
484
+ end
485
+ end
486
+
487
+ def log_retry(message, error)
488
+ return unless should_log?
489
+
490
+ if defined?(Rails) && Rails.logger
491
+ Rails.logger.warn "[WebSearch] #{message}: #{error.class} - #{error.message}"
492
+ else
493
+ warn "[WebSearch] #{message}: #{error.class} - #{error.message}"
494
+ end
495
+ end
496
+
497
+ def should_log?
498
+ ENV['LLM_CHAIN_DEBUG'] == 'true' ||
499
+ ENV['RAILS_ENV'] == 'development' ||
500
+ (defined?(Rails) && Rails.env.development?)
501
+ end
303
502
  end
304
503
  end
305
504
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LlmChain
4
- VERSION = "0.5.1"
4
+ VERSION = "0.5.3"
5
5
  end
data/lib/llm_chain.rb CHANGED
@@ -28,6 +28,12 @@ module LLMChain
28
28
  class ServerError < Error; end
29
29
  class TimeoutError < Error; end
30
30
  class MemoryError < Error; end
31
+ end
32
+
33
+ # Загружаем валидатор после определения базовых классов
34
+ require_relative "llm_chain/configuration_validator"
35
+
36
+ module LLMChain
31
37
 
32
38
  # Простая система конфигурации
33
39
  class Configuration
@@ -53,12 +59,13 @@ module LLMChain
53
59
  end
54
60
 
55
61
  # Быстрое создание цепочки с настройками по умолчанию
56
- def quick_chain(model: nil, tools: true, memory: true, **options)
62
+ def quick_chain(model: nil, tools: true, memory: true, validate_config: true, **options)
57
63
  model ||= configuration.default_model
58
64
 
59
65
  chain_options = {
60
66
  model: model,
61
67
  retriever: false,
68
+ validate_config: validate_config,
62
69
  **options
63
70
  }
64
71
 
@@ -73,5 +80,39 @@ module LLMChain
73
80
 
74
81
  Chain.new(**chain_options)
75
82
  end
83
+
84
+ # Диагностика системы
85
+ def diagnose_system
86
+ puts "🔍 LLMChain System Diagnostics"
87
+ puts "=" * 50
88
+
89
+ results = ConfigurationValidator.validate_environment
90
+
91
+ puts "\n📋 System Components:"
92
+ puts " Ruby: #{results[:ruby] ? '✅' : '❌'} (#{RUBY_VERSION})"
93
+ puts " Python: #{results[:python] ? '✅' : '❌'}"
94
+ puts " Node.js: #{results[:node] ? '✅' : '❌'}"
95
+ puts " Internet: #{results[:internet] ? '✅' : '❌'}"
96
+ puts " Ollama: #{results[:ollama] ? '✅' : '❌'}"
97
+
98
+ puts "\n🔑 API Keys:"
99
+ results[:apis].each do |api, available|
100
+ puts " #{api.to_s.capitalize}: #{available ? '✅' : '❌'}"
101
+ end
102
+
103
+ if results[:warnings].any?
104
+ puts "\n⚠️ Warnings:"
105
+ results[:warnings].each { |warning| puts " • #{warning}" }
106
+ end
107
+
108
+ puts "\n💡 Recommendations:"
109
+ puts " • Install missing components for full functionality"
110
+ puts " • Configure API keys for enhanced features"
111
+ puts " • Start Ollama server: ollama serve" unless results[:ollama]
112
+
113
+ puts "\n" + "=" * 50
114
+
115
+ results
116
+ end
76
117
  end
77
118
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm_chain
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - FuryCow
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-06-25 00:00:00.000000000 Z
11
+ date: 2025-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -111,7 +111,8 @@ dependencies:
111
111
  description:
112
112
  email:
113
113
  - dreamweaver0408@gmail.com
114
- executables: []
114
+ executables:
115
+ - llm-chain
115
116
  extensions: []
116
117
  extra_rdoc_files: []
117
118
  files:
@@ -124,6 +125,7 @@ files:
124
125
  - Rakefile
125
126
  - examples/quick_demo.rb
126
127
  - examples/tools_example.rb
128
+ - exe/llm-chain
127
129
  - lib/llm_chain.rb
128
130
  - lib/llm_chain/chain.rb
129
131
  - lib/llm_chain/client_registry.rb
@@ -133,6 +135,7 @@ files:
133
135
  - lib/llm_chain/clients/ollama_base.rb
134
136
  - lib/llm_chain/clients/openai.rb
135
137
  - lib/llm_chain/clients/qwen.rb
138
+ - lib/llm_chain/configuration_validator.rb
136
139
  - lib/llm_chain/embeddings/clients/local/ollama_client.rb
137
140
  - lib/llm_chain/embeddings/clients/local/weaviate_retriever.rb
138
141
  - lib/llm_chain/embeddings/clients/local/weaviate_vector_store.rb