deepsearch-rb 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +7 -0
  3. data/README.md +2 -8
  4. data/lib/deepsearch/configuration.rb +2 -1
  5. data/lib/deepsearch/engine/pipeline.rb +11 -14
  6. data/lib/deepsearch/engine/steps/data_aggregation/parsed_website.rb +4 -4
  7. data/lib/deepsearch/engine/steps/data_aggregation/result.rb +1 -1
  8. data/lib/deepsearch/engine/steps/parallel_search/process.rb +3 -3
  9. data/lib/deepsearch/engine/steps/parallel_search/result.rb +1 -1
  10. data/lib/deepsearch/engine/steps/parallel_search/search.rb +5 -6
  11. data/lib/deepsearch/engine/steps/prepare_subqueries/process.rb +28 -32
  12. data/lib/deepsearch/engine/steps/prepare_subqueries/result.rb +1 -1
  13. data/lib/deepsearch/engine/steps/rag/chunker.rb +1 -1
  14. data/lib/deepsearch/engine/steps/rag/process.rb +68 -36
  15. data/lib/deepsearch/engine/steps/rag/similarity.rb +4 -5
  16. data/lib/deepsearch/engine/steps/rag/values/chunk.rb +2 -2
  17. data/lib/deepsearch/engine/steps/rag/values/query.rb +1 -1
  18. data/lib/deepsearch/engine/steps/rag/values/result.rb +1 -1
  19. data/lib/deepsearch/engine/steps/summarization/process.rb +9 -8
  20. data/lib/deepsearch/engine/steps/summarization/result.rb +29 -0
  21. data/lib/deepsearch/logger.rb +1 -1
  22. data/lib/deepsearch/prompts_config.rb +1 -1
  23. data/lib/deepsearch/version.rb +1 -1
  24. data/lib/deepsearch.rb +1 -1
  25. data/lib/search_adapters/mock_adapter.rb +3 -2
  26. data/lib/search_adapters/serper_adapter.rb +1 -1
  27. data/lib/search_adapters/tavily_adapter.rb +5 -9
  28. metadata +2 -2
  29. data/lib/deepsearch/engine/steps/summarization/values/result.rb +0 -31
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 435d04eb1b8c5a7d2d8e86cd17b863766559f82ffe4f194597e9b05a087b5ae0
4
- data.tar.gz: 1f394f33996d8f85b17e17709a3178debb8ccad84d8268bd7590b532ac0d9fa7
3
+ metadata.gz: 51ef2b5c1780ff68752d82960a651443f6b0d5f7de38f9d0313acd17bdaaa144
4
+ data.tar.gz: 16af241a4eff83e5a5290832dc4ad14f4f8ed724f7c91abbfa060b4bf8c44ddb
5
5
  SHA512:
6
- metadata.gz: 30634e66e50d377e755fe82354255d8b1e265578aca7e8f4c4833e8959ff33277a9a120c56b96c93efb13ea6e890cec8ddbf06d5952c0ef384efaceb3f8f8839
7
- data.tar.gz: fca34ac34b09e4a8c5558c8114853dfec7c982fdf4bb9f630068c8ebf60bc398b1f711d5443b4e30aedc04c78539de24dcbf8a33761a719eb9815d8b38e37499
6
+ metadata.gz: 3172fe8597368c0ba863f6593f6c080a41b73de0890a3a02ca49f8e1dbcbf33bc17ab4c4a06a1b557dc324a949e9cabb2ee307752b8af8afd16872398c1dcaeb
7
+ data.tar.gz: eb185bd46ca110d9bbcb9134872dae8dab0c76db52841d4c85e38ec4a98f1c41033ab620930c4f9a22ec2238f2fa6f6d421b65bb7811492fccd8097c03cc099a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [Pending]
4
+
5
+ ## [0.1.1] - 2025-07-12
6
+
7
+ ### Added
8
+ - Minor specs improvements and examples cleanup
9
+
3
10
  ## [Released]
4
11
 
5
12
  ## [0.1.0] - 2025-07-12
data/README.md CHANGED
@@ -13,14 +13,8 @@ The only runtime dependencies are [ruby_llm](https://github.com/crmne/ruby_llm?t
13
13
 
14
14
  ---
15
15
 
16
- **NOTE**: You can implement your own chains in the way it works for you, BFS/DFS search on any topic. A draft code might look like:
17
-
18
- ```
19
- Deepsearch.search(initial search) ->
20
- LLM(Generate additional queries) ->
21
- Async [for each additional query]{ Deepsearch.search(sub-query) } ->
22
- Aggregate()
23
- ```
16
+ **NOTE**: You can also implement your own chains in the way it works for you, BFS/DFS search on any topic.
17
+ See the draft implementation of multi-chain flow in `examples/multi-step-chain/script.rb`
24
18
 
25
19
  ## Installation
26
20
 
@@ -47,7 +47,8 @@ module Deepsearch
47
47
  # end
48
48
  # end
49
49
  # Deepsearch.configure { |c| c.listener = MyListener.new
50
- attr_accessor :tavily_api_key, :serper_api_key, :search_adapter, :custom_search_adapter_class, :logger, :listener, :prompts
50
+ attr_accessor :tavily_api_key, :serper_api_key, :search_adapter, :custom_search_adapter_class, :logger, :listener,
51
+ :prompts
51
52
  attr_reader :ruby_llm
52
53
 
53
54
  def initialize
@@ -31,7 +31,6 @@ module Deepsearch
31
31
  # - original_query [String] The unmodified input query
32
32
  # - sub_queries [Array<String>] Generated subqueries (empty array on error)
33
33
  # - error [String, nil] Error message if processing failed
34
-
35
34
 
36
35
  parallel_search_options = {
37
36
  initial_query: query_preprocessing_result.cleaned_query,
@@ -42,7 +41,7 @@ module Deepsearch
42
41
 
43
42
  parallel_search_result = with_retry { Steps::ParallelSearch::Process.new(**parallel_search_options).execute }
44
43
  notify_listener(:step_completed, step: :parallel_search, result: parallel_search_result)
45
- # [parallel_search_result] Contains:
44
+ # [parallel_search_result] Contains:
46
45
  # - websites [Array<ParallelSearch::Result>] Search results
47
46
  # - ParallelSearch::Result objects with:
48
47
  # - websites [Array<Hash#url>] Array of website URLs
@@ -51,18 +50,18 @@ module Deepsearch
51
50
 
52
51
  data_aggregation_result = with_retry do
53
52
  Steps::DataAggregation::Process.new(
54
- websites: parallel_search_result.websites,
53
+ websites: parallel_search_result.websites
55
54
  ).execute
56
55
  end
57
56
  notify_listener(:step_completed, step: :data_aggregation, result: data_aggregation_result)
58
- # [data_aggregation_result] Contains:
57
+ # [data_aggregation_result] Contains:
59
58
  # - parsed_websites [Array<DataAggregation::Result>]
60
59
  # - DataAggregation::Result objects with:
61
60
  # - url [String] Website URL
62
61
  # - content [String] Parsed content from the website
63
62
  # - success [Boolean] Whether search succeeded
64
63
  # - error [String, nil] Error message if search failed
65
-
64
+
66
65
  rag_result = with_retry do
67
66
  Steps::Rag::Process.new(
68
67
  query: query_preprocessing_result.cleaned_query,
@@ -92,7 +91,7 @@ module Deepsearch
92
91
 
93
92
  def notify_listener(event, **payload)
94
93
  listener = Deepsearch.configuration.listener
95
- if !listener.respond_to?(:on_deepsearch_event)
94
+ unless listener.respond_to?(:on_deepsearch_event)
96
95
  Deepsearch.configuration.logger.debug("Attached listener does not respond to on_deepsearch_event, skipping notification")
97
96
  return
98
97
  end
@@ -110,15 +109,13 @@ module Deepsearch
110
109
  result = block.call
111
110
  # Handle "soft" failures from steps that return a result object with a #failure? method
112
111
  raise "Operation failed: #{result.error}" if result.respond_to?(:failure?) && result.failure?
113
-
112
+
114
113
  result
115
- rescue => e
116
- if (retries += 1) <= 1
117
- Deepsearch.configuration.logger.debug("Retrying after error: #{e.message}")
118
- retry
119
- else
120
- raise e
121
- end
114
+ rescue StandardError => e
115
+ raise e unless (retries += 1) <= 1
116
+
117
+ Deepsearch.configuration.logger.debug("Retrying after error: #{e.message}")
118
+ retry
122
119
  end
123
120
  end
124
121
  end
@@ -42,7 +42,7 @@ module Deepsearch
42
42
 
43
43
  def fetch_content!
44
44
  uri = URI.parse(@url)
45
-
45
+
46
46
  unless %w[http https].include?(uri.scheme)
47
47
  @error = "Invalid URL scheme: #{uri.scheme}"
48
48
  return
@@ -65,7 +65,7 @@ module Deepsearch
65
65
  else
66
66
  @error = "HTTP #{response.code}"
67
67
  end
68
- rescue => e
68
+ rescue StandardError => e
69
69
  @error = e.message
70
70
  end
71
71
 
@@ -111,7 +111,7 @@ module Deepsearch
111
111
  rescue StandardError
112
112
  # Fallback if Nokogiri fails. The raw_content is the problem. Sanitize it from binary to UTF-8.
113
113
  fallback_text = content.to_s.encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
114
- fallback_text.gsub(/<script\b[^>]*>.*?<\/script>/mi, "").gsub(/<style\b[^>]*>.*?<\/style>/mi, "").gsub(
114
+ fallback_text.gsub(%r{<script\b[^>]*>.*?</script>}mi, "").gsub(%r{<style\b[^>]*>.*?</style>}mi, "").gsub(
115
115
  /[[:space:]]+/, " "
116
116
  ).strip
117
117
  end
@@ -119,4 +119,4 @@ module Deepsearch
119
119
  end
120
120
  end
121
121
  end
122
- end
122
+ end
@@ -25,4 +25,4 @@ module Deepsearch
25
25
  end
26
26
  end
27
27
  end
28
- end
28
+ end
@@ -14,9 +14,9 @@ module Deepsearch
14
14
  attr_reader :initial_query, :sub_queries, :search_adapter, :options
15
15
 
16
16
  def initialize(initial_query:,
17
- sub_queries:,
18
- search_adapter:,
19
- **options)
17
+ sub_queries:,
18
+ search_adapter:,
19
+ **options)
20
20
  @initial_query = initial_query
21
21
  @sub_queries = sub_queries
22
22
  @search_adapter = search_adapter
@@ -25,4 +25,4 @@ module Deepsearch
25
25
  end
26
26
  end
27
27
  end
28
- end
28
+ end
@@ -44,11 +44,11 @@ module Deepsearch
44
44
 
45
45
  Sync do |task|
46
46
  semaphore = Async::Semaphore.new(MAX_CONCURRENCY, parent: task)
47
-
47
+
48
48
  tasks = @all_queries.each_with_index.map do |query, index|
49
49
  # Add a small delay for subsequent tasks to avoid overwhelming the search api
50
50
  sleep(1) if index > 0
51
-
51
+
52
52
  semaphore.async do |sub_task|
53
53
  sub_task.annotate("query ##{index + 1}: #{query}")
54
54
  perform_search_with_retries(query, index + 1)
@@ -62,15 +62,14 @@ module Deepsearch
62
62
  def perform_search_with_retries(query, query_number)
63
63
  (MAX_RETRIES + 1).times do |attempt|
64
64
  @logger.debug("Task #{query_number}: Searching '#{query}' (Attempt #{attempt + 1})")
65
-
65
+
66
66
  results = @search_adapter.search(query, @search_options)
67
67
  extracted = extract_results(results)
68
68
  @logger.debug("✓ Task #{query_number} completed with #{extracted.size} results for '#{query}'")
69
69
  return extracted
70
-
71
70
  rescue StandardError => e
72
71
  @logger.debug("✗ Task #{query_number} error for '#{query}': #{e.message}")
73
-
72
+
74
73
  break if attempt >= MAX_RETRIES
75
74
 
76
75
  sleep_duration = (INITIAL_BACKOFF * (2**attempt)) + rand(0.1..0.5)
@@ -92,4 +91,4 @@ module Deepsearch
92
91
  end
93
92
  end
94
93
  end
95
- end
94
+ end
@@ -5,7 +5,7 @@ require_relative 'result'
5
5
  module Deepsearch
6
6
  class Engine
7
7
  module Steps
8
- module PrepareSubqueries
8
+ module PrepareSubqueries
9
9
  class Process
10
10
  def initialize(original_query)
11
11
  @original_query = original_query
@@ -26,15 +26,15 @@ module Deepsearch
26
26
  private
27
27
 
28
28
  def validate_input
29
- unless @original_query && !@original_query.strip.empty?
30
- raise StandardError, "Original query is required for preprocessing"
31
- end
29
+ return if @original_query && !@original_query.strip.empty?
30
+
31
+ raise StandardError, "Original query is required for preprocessing"
32
32
  end
33
33
 
34
34
  def process_query
35
35
  cleaned_query = clean_query(@original_query)
36
36
  subqueries = generate_subqueries(cleaned_query)
37
-
37
+
38
38
  PrepareSubqueries::Result.new(
39
39
  cleaned_query: cleaned_query,
40
40
  original_query: @original_query,
@@ -47,38 +47,34 @@ module Deepsearch
47
47
  end
48
48
 
49
49
  def generate_subqueries(query)
50
- begin
51
- Deepsearch.configuration.logger.debug("Attempting to generate subqueries using LLM...")
52
- chat = RubyLLM.chat
53
-
54
- prompt = Deepsearch.configuration.prompts.subquery_prompt(query: query)
55
- Deepsearch.configuration.logger.debug("Sending prompt to LLM...")
56
- response = chat.ask(prompt)
57
-
58
- Deepsearch.configuration.logger.debug("Received response from LLM")
59
- subqueries = parse_subqueries(response.content)
60
- Deepsearch.configuration.logger.debug("Generated #{subqueries.size} subqueries")
61
- subqueries
62
- rescue StandardError => e
63
- Deepsearch.configuration.logger.debug("Error generating subqueries: #{e.message}")
64
- Deepsearch.configuration.logger.debug("Error class: #{e.class}")
65
- Deepsearch.configuration.logger.debug("Backtrace: #{e.backtrace.first(3).join('\n')}")
66
- []
67
- end
50
+ Deepsearch.configuration.logger.debug("Attempting to generate subqueries using LLM...")
51
+ chat = RubyLLM.chat
52
+
53
+ prompt = Deepsearch.configuration.prompts.subquery_prompt(query: query)
54
+ Deepsearch.configuration.logger.debug("Sending prompt to LLM...")
55
+ response = chat.ask(prompt)
56
+
57
+ Deepsearch.configuration.logger.debug("Received response from LLM")
58
+ subqueries = parse_subqueries(response.content)
59
+ Deepsearch.configuration.logger.debug("Generated #{subqueries.size} subqueries")
60
+ subqueries
61
+ rescue StandardError => e
62
+ Deepsearch.configuration.logger.debug("Error generating subqueries: #{e.message}")
63
+ Deepsearch.configuration.logger.debug("Error class: #{e.class}")
64
+ Deepsearch.configuration.logger.debug("Backtrace: #{e.backtrace.first(3).join('\n')}")
65
+ []
68
66
  end
69
67
 
70
68
  def parse_subqueries(response_content)
71
69
  return [] unless response_content
72
70
 
73
- subqueries = response_content.split("\n")
74
- .map(&:strip)
75
- .reject(&:empty?)
76
- .map { |line| line.gsub(/^\d+\.\s*|^[-*]\s*/, '') }
77
- .map { |query| query.gsub(/^["']|["']$/, '') }
78
- .reject(&:empty?)
79
- .first(5)
80
-
81
- subqueries
71
+ response_content.split("\n")
72
+ .map(&:strip)
73
+ .reject(&:empty?)
74
+ .map { |line| line.gsub(/^\d+\.\s*|^[-*]\s*/, '') }
75
+ .map { |query| query.gsub(/^["']|["']$/, '') }
76
+ .reject(&:empty?)
77
+ .first(5)
82
78
  end
83
79
  end
84
80
  end
@@ -27,4 +27,4 @@ module Deepsearch
27
27
  end
28
28
  end
29
29
  end
30
- end
30
+ end
@@ -28,4 +28,4 @@ module Deepsearch
28
28
  end
29
29
  end
30
30
  end
31
- end
31
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'async'
4
+ require 'async/semaphore'
5
+
3
6
  require_relative 'values/chunk'
4
7
  require_relative 'values/query'
5
8
  require_relative 'values/result'
@@ -13,63 +16,92 @@ module Deepsearch
13
16
  # Implements the core Retrieval-Augmented Generation (RAG) logic.
14
17
  # It takes a query and a set of parsed websites, then:
15
18
  # 1. Chunks the website content into smaller pieces.
16
- # 2. Generates embeddings for all text chunks in batches.
19
+ # 2. Generates embeddings for all text chunks concurrently in batches.
17
20
  # 3. Uses a similarity search to find the chunks most relevant to the query.
18
21
  # 4. Returns a result containing the relevant chunks.
19
22
  class Process
20
23
  CHUNK_BATCH_SIZE = 100
21
24
  MAX_TOTAL_CHUNKS = 500
22
25
  MAX_CHUNKS_PER_WEBSITE = 15
26
+ MAX_EMBEDDING_CONCURRENCY = 3
23
27
 
24
28
  def initialize(query:, parsed_websites:)
25
29
  @query = Values::Query.new(text: query)
26
30
  @documents = parsed_websites.map do |website|
27
31
  { url: website.url, content: website.content }
28
32
  end
33
+ @logger = Deepsearch.configuration.logger
29
34
  end
30
35
 
31
36
  def execute
32
- begin
33
- chunker = Chunker.new
34
- all_chunks = @documents.each_with_object([]) do |doc, chunks|
35
- next if doc[:content].to_s.strip.empty?
36
-
37
- doc_chunks = chunker.chunk(doc[:content])
38
- if doc_chunks.count > MAX_CHUNKS_PER_WEBSITE
39
- Deepsearch.configuration.logger.debug("Truncating chunks for #{doc[:url]} from #{doc_chunks.count} to #{MAX_CHUNKS_PER_WEBSITE}")
40
- doc_chunks = doc_chunks.first(MAX_CHUNKS_PER_WEBSITE)
41
- end
42
- doc_chunks.each { |chunk| chunk.document_url = doc[:url] }
43
- chunks.concat(doc_chunks)
37
+ chunker = Chunker.new
38
+ all_chunks = @documents.each_with_object([]) do |doc, chunks|
39
+ next if doc[:content].to_s.strip.empty?
40
+
41
+ doc_chunks = chunker.chunk(doc[:content])
42
+ if doc_chunks.count > MAX_CHUNKS_PER_WEBSITE
43
+ @logger.debug("Truncating chunks for #{doc[:url]} from #{doc_chunks.count} to #{MAX_CHUNKS_PER_WEBSITE}")
44
+ doc_chunks = doc_chunks.first(MAX_CHUNKS_PER_WEBSITE)
44
45
  end
46
+ doc_chunks.each { |chunk| chunk.document_url = doc[:url] }
47
+ chunks.concat(doc_chunks)
48
+ end
45
49
 
46
- Deepsearch.configuration.logger.debug("Chunked #{@documents.count} documents into #{all_chunks.count} chunks")
50
+ @logger.debug("Chunked #{@documents.count} documents into #{all_chunks.count} chunks")
47
51
 
48
- if all_chunks.count > MAX_TOTAL_CHUNKS
49
- Deepsearch.configuration.logger.debug("Chunk count (#{all_chunks.count}) exceeds limit of #{MAX_TOTAL_CHUNKS}. Truncating.")
50
- all_chunks = all_chunks.first(MAX_TOTAL_CHUNKS)
51
- end
52
+ if all_chunks.count > MAX_TOTAL_CHUNKS
53
+ @logger.debug("Chunk count (#{all_chunks.count}) exceeds limit of #{MAX_TOTAL_CHUNKS}. Truncating.")
54
+ all_chunks = all_chunks.first(MAX_TOTAL_CHUNKS)
55
+ end
56
+
57
+ generate_embeddings_in_parallel(all_chunks)
58
+
59
+ @logger.debug('Finished embedding generation, initiating similarity match..')
60
+ chunks_with_embeddings = all_chunks.select(&:embedding)
61
+ relevant_chunks = Similarity.new.find_relevant(@query, chunks_with_embeddings)
62
+ @logger.debug("Found #{relevant_chunks.count} relevant chunks for query: '#{@query.text}'")
52
63
 
53
- all_chunks.each_slice(CHUNK_BATCH_SIZE) do |batch|
54
- texts = batch.map(&:text)
55
- embeddings = RubyLLM.embed(texts).vectors
56
- batch.each_with_index { |chunk, i| chunk.embedding = embeddings[i] }
64
+ Values::Result.new(
65
+ query: @query,
66
+ relevant_chunks: relevant_chunks
67
+ )
68
+ rescue StandardError => e
69
+ Values::Result.new(
70
+ query: @query,
71
+ relevant_chunks: [],
72
+ error: e.message
73
+ )
74
+ end
75
+
76
+ private
77
+
78
+ def generate_embeddings_in_parallel(chunks)
79
+ return if chunks.empty?
80
+
81
+ num_batches = (chunks.count.to_f / CHUNK_BATCH_SIZE).ceil
82
+ @logger.debug("Starting parallel embedding generation for #{num_batches} batches with max concurrency of #{MAX_EMBEDDING_CONCURRENCY}")
83
+
84
+ Sync do |task|
85
+ semaphore = Async::Semaphore.new(MAX_EMBEDDING_CONCURRENCY, parent: task)
86
+
87
+ tasks = chunks.each_slice(CHUNK_BATCH_SIZE).with_index.map do |batch, index|
88
+ semaphore.async do |sub_task|
89
+ task_number = index + 1
90
+ sub_task.annotate("Embedding batch #{task_number}/#{num_batches}")
91
+ @logger.debug("Task #{task_number}: Generating embeddings for batch of #{batch.size} chunks")
92
+
93
+ begin
94
+ texts = batch.map(&:text)
95
+ embeddings = RubyLLM.embed(texts).vectors
96
+ batch.each_with_index { |chunk, i| chunk.embedding = embeddings[i] }
97
+ @logger.debug("✓ Task #{task_number} completed.")
98
+ rescue StandardError => e
99
+ @logger.error("✗ Task #{task_number} error: #{e.message}")
100
+ end
101
+ end
57
102
  end
58
103
 
59
- Deepsearch.configuration.logger.debug("Generated embeddings for #{all_chunks.count} chunks, initiating similarity match..")
60
- relevant_chunks = Similarity.new.find_relevant(@query, all_chunks)
61
- Deepsearch.configuration.logger.debug("Found #{relevant_chunks.count} relevant chunks for query: '#{@query.text}'")
62
-
63
- Values::Result.new(
64
- query: @query,
65
- relevant_chunks: relevant_chunks
66
- )
67
- rescue StandardError => e
68
- Values::Result.new(
69
- query: @query,
70
- relevant_chunks: [],
71
- error: e.message
72
- )
104
+ tasks.map(&:wait)
73
105
  end
74
106
  end
75
107
  end
@@ -24,10 +24,8 @@ module Deepsearch
24
24
  best_score = top_candidates.first.first
25
25
  cutoff_score = best_score * threshold
26
26
 
27
- relevant_chunks = top_candidates.select { |score, _| score >= cutoff_score }
28
- .map { |_, index| chunks[index] }
29
-
30
- relevant_chunks
27
+ top_candidates.select { |score, _| score >= cutoff_score }
28
+ .map { |_, index| chunks[index] }
31
29
  end
32
30
 
33
31
  private
@@ -51,10 +49,11 @@ module Deepsearch
51
49
  magnitude_b = Math.sqrt(vec_b.sum { |v| v**2 })
52
50
 
53
51
  return 0.0 if magnitude_a.zero? || magnitude_b.zero?
52
+
54
53
  dot_product / (magnitude_a * magnitude_b)
55
54
  end
56
55
  end
57
56
  end
58
57
  end
59
58
  end
60
- end
59
+ end
@@ -9,7 +9,7 @@ module Deepsearch
9
9
  # This is the fundamental unit of data used in the RAG process.
10
10
  class Chunk
11
11
  attr_accessor :text, :embedding, :document_url
12
-
12
+
13
13
  def initialize(text:, embedding: nil, document_url: nil)
14
14
  @text = text
15
15
  @embedding = embedding
@@ -20,4 +20,4 @@ module Deepsearch
20
20
  end
21
21
  end
22
22
  end
23
- end
23
+ end
@@ -41,4 +41,4 @@ module Deepsearch
41
41
  end
42
42
  end
43
43
  end
44
- end
44
+ end
@@ -30,4 +30,4 @@ module Deepsearch
30
30
  end
31
31
  end
32
32
  end
33
- end
33
+ end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'values/result'
3
+ require_relative 'result'
4
4
 
5
5
  module Deepsearch
6
6
  class Engine
@@ -18,17 +18,17 @@ module Deepsearch
18
18
  end
19
19
 
20
20
  def execute
21
- return Values::Result.new(summary: "No relevant content found to summarize.") if relevant_chunks.empty?
21
+ return Result.new(summary: "No relevant content found to summarize.") if relevant_chunks.empty?
22
22
 
23
23
  prompt = build_summary_prompt
24
24
  Deepsearch.configuration.logger.debug("Summarizing content with LLM...")
25
25
  response = RubyLLM.chat.ask(prompt)
26
26
  Deepsearch.configuration.logger.debug("Summarization complete.")
27
27
 
28
- Values::Result.new(summary: response.content)
28
+ Result.new(summary: response.content)
29
29
  rescue StandardError => e
30
30
  Deepsearch.configuration.logger.debug("Error during summarization: #{e.message}")
31
- Values::Result.new(summary: nil, error: e.message)
31
+ Result.new(summary: nil, error: e.message)
32
32
  end
33
33
 
34
34
  private
@@ -36,18 +36,19 @@ module Deepsearch
36
36
  def build_summary_prompt
37
37
  chunks_by_url = relevant_chunks.group_by(&:document_url)
38
38
  citation_map = chunks_by_url.keys.each_with_index.to_h { |url, i| [url, i + 1] }
39
-
39
+
40
40
  context_text = chunks_by_url.map do |url, chunks|
41
41
  citation_number = citation_map[url]
42
42
  chunk_contents = chunks.map(&:text).join("\n\n")
43
43
  "Source [#{citation_number}]:\n#{chunk_contents}"
44
44
  end.join("\n\n---\n\n")
45
-
45
+
46
46
  sources_list = citation_map.map { |url, number| "[#{number}]: #{url}" }.join("\n")
47
- Deepsearch.configuration.prompts.summarization_prompt(query: @query.text, context_text: context_text, sources_list: sources_list)
47
+ Deepsearch.configuration.prompts.summarization_prompt(query: @query.text, context_text: context_text,
48
+ sources_list: sources_list)
48
49
  end
49
50
  end
50
51
  end
51
52
  end
52
53
  end
53
- end
54
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deepsearch
4
+ class Engine
5
+ module Steps
6
+ module Summarization
7
+ # Represents the result of the summarization step.
8
+ # It holds the final, synthesized summary and any potential error message.
9
+ class Result
10
+ attr_reader :summary, :error, :success
11
+
12
+ def initialize(summary: nil, error: nil)
13
+ @summary = summary
14
+ @success = error.nil?
15
+ @error = error
16
+ end
17
+
18
+ def success?
19
+ @success
20
+ end
21
+
22
+ def failure?
23
+ !success?
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -29,4 +29,4 @@ module Deepsearch
29
29
  end
30
30
  end
31
31
  end
32
- end
32
+ end
@@ -79,4 +79,4 @@ module Deepsearch
79
79
  PROMPT
80
80
  end
81
81
  end
82
- end
82
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Deepsearch
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
data/lib/deepsearch.rb CHANGED
@@ -12,7 +12,7 @@ module Deepsearch
12
12
  # A generic error class for exceptions raised by the Deepsearch gem,
13
13
  # from which more specific errors can inherit.
14
14
  class Error < StandardError; end
15
-
15
+
16
16
  class << self
17
17
  def configuration
18
18
  @configuration ||= Configuration.new
@@ -9,7 +9,7 @@ module Deepsearch
9
9
  def initialize(api_key = nil); end
10
10
 
11
11
  def search(query, options = {})
12
- return mock_results(query, options)
12
+ mock_results(query, options)
13
13
  end
14
14
 
15
15
  private
@@ -63,7 +63,8 @@ module Deepsearch
63
63
  }
64
64
 
65
65
  if include_answer
66
- response["answer"] = "Ruby is a dynamic, open-source programming language with a focus on simplicity and productivity. It was created by Yukihiro Matsumoto in the mid-1990s and follows the principle that everything is an object. Ruby is particularly popular for web development, especially with the Ruby on Rails framework, but it's also used for automation, data processing, and various other applications."
66
+ response["answer"] =
67
+ "Ruby is a dynamic, open-source programming language with a focus on simplicity and productivity. It was created by Yukihiro Matsumoto in the mid-1990s and follows the principle that everything is an object. Ruby is particularly popular for web development, especially with the Ruby on Rails framework, but it's also used for automation, data processing, and various other applications."
67
68
  end
68
69
 
69
70
  response
@@ -103,4 +103,4 @@ module Deepsearch
103
103
  # Custom error class for exceptions raised by the SerperAdapter.
104
104
  class SerperError < StandardError; end
105
105
  end
106
- end
106
+ end
@@ -40,13 +40,11 @@ module Deepsearch
40
40
  private
41
41
 
42
42
  def validate_api_key!
43
- if @api_key.nil? || @api_key.strip.empty?
44
- raise TavilyError, "API key is required"
45
- end
43
+ raise TavilyError, "API key is required" if @api_key.nil? || @api_key.strip.empty?
46
44
 
47
- unless @api_key.start_with?('tvly-')
48
- raise TavilyError, "Invalid API key format. Expected format: tvly-YOUR_API_KEY"
49
- end
45
+ return if @api_key.start_with?('tvly-')
46
+
47
+ raise TavilyError, "Invalid API key format. Expected format: tvly-YOUR_API_KEY"
50
48
  end
51
49
 
52
50
  def build_payload(query, options)
@@ -78,9 +76,7 @@ module Deepsearch
78
76
 
79
77
  response = http.request(request)
80
78
 
81
- unless response.is_a?(Net::HTTPSuccess)
82
- handle_error_response(response)
83
- end
79
+ handle_error_response(response) unless response.is_a?(Net::HTTPSuccess)
84
80
 
85
81
  response
86
82
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deepsearch-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Shagov
@@ -150,7 +150,7 @@ files:
150
150
  - lib/deepsearch/engine/steps/rag/values/query.rb
151
151
  - lib/deepsearch/engine/steps/rag/values/result.rb
152
152
  - lib/deepsearch/engine/steps/summarization/process.rb
153
- - lib/deepsearch/engine/steps/summarization/values/result.rb
153
+ - lib/deepsearch/engine/steps/summarization/result.rb
154
154
  - lib/deepsearch/logger.rb
155
155
  - lib/deepsearch/prompts_config.rb
156
156
  - lib/deepsearch/version.rb
@@ -1,31 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Deepsearch
4
- class Engine
5
- module Steps
6
- module Summarization
7
- module Values
8
- # Represents the result of the summarization step.
9
- # It holds the final, synthesized summary and any potential error message.
10
- class Result
11
- attr_reader :summary, :error, :success
12
-
13
- def initialize(summary: nil, error: nil)
14
- @summary = summary
15
- @success = error.nil?
16
- @error = error
17
- end
18
-
19
- def success?
20
- @success
21
- end
22
-
23
- def failure?
24
- !success?
25
- end
26
- end
27
- end
28
- end
29
- end
30
- end
31
- end