hokipoki 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +26 -0
- data/lib/generators/hive_mind/install_generator.rb +761 -0
- data/lib/hokipoki/claude/auto_loader.rb +162 -0
- data/lib/hokipoki/claude/connection_manager.rb +382 -0
- data/lib/hokipoki/claude/parasite.rb +333 -0
- data/lib/hokipoki/configuration.rb +187 -0
- data/lib/hokipoki/engine.rb +122 -0
- data/lib/hokipoki/feedback/ascii_banners.rb +108 -0
- data/lib/hokipoki/feedback/display_manager.rb +436 -0
- data/lib/hokipoki/intelligence/smart_retrieval_engine.rb +401 -0
- data/lib/hokipoki/intelligence/unified_orchestrator.rb +395 -0
- data/lib/hokipoki/license_validator.rb +296 -0
- data/lib/hokipoki/parasites/universal_generator.rb +662 -0
- data/lib/hokipoki/railtie.rb +34 -0
- data/lib/hokipoki/version.rb +3 -0
- data/lib/hokipoki.rb +174 -0
- metadata +271 -0
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Hokipoki
|
|
4
|
+
module Intelligence
|
|
5
|
+
# Smart Retrieval Engine with Real-time User Feedback
|
|
6
|
+
# Enhanced version of the original with comprehensive operation visibility
|
|
7
|
+
class SmartRetrievalEngine
|
|
8
|
+
include Singleton
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
@logger = Rails.logger
|
|
12
|
+
@feedback = Feedback::DisplayManager.instance
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# PATTERN-003: Intent-based analysis with surgical precision + real vector similarity
|
|
16
|
+
def retrieve_targeted_facts(query, token_budget: 1500, intent: 'auto')
|
|
17
|
+
start_time = Time.current
|
|
18
|
+
|
|
19
|
+
@feedback.performing_vector_retrieval(query, token_budget) do
|
|
20
|
+
@logger.info "🎯 SMART RETRIEVAL: Processing query with vector similarity"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
begin
|
|
24
|
+
# Step 1: Analyze intent if auto
|
|
25
|
+
if intent == 'auto'
|
|
26
|
+
@feedback.debug_info('intent_analysis', 'detecting query intent')
|
|
27
|
+
intent = analyze_query_intent(query)
|
|
28
|
+
@feedback.debug_info('intent_detected', intent.to_s)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Step 2: Multi-stage retrieval with vector similarity
|
|
32
|
+
@feedback.pulling_from_hive_mind('vectors')
|
|
33
|
+
facts = perform_multi_stage_retrieval(query, token_budget, intent)
|
|
34
|
+
|
|
35
|
+
# Step 3: Calculate processing metrics
|
|
36
|
+
processing_time = ((Time.current - start_time) * 1000).round(2)
|
|
37
|
+
|
|
38
|
+
if facts.present?
|
|
39
|
+
fact_count = facts.is_a?(Array) ? facts.length : 1
|
|
40
|
+
total_tokens = estimate_total_tokens(facts)
|
|
41
|
+
tokens_saved = calculate_tokens_saved(total_tokens, query)
|
|
42
|
+
|
|
43
|
+
@feedback.vector_results_found(fact_count, processing_time)
|
|
44
|
+
@feedback.vector_context_built(fact_count, total_tokens)
|
|
45
|
+
@feedback.total_tokens_saved(tokens_saved, get_session_total_saved(tokens_saved))
|
|
46
|
+
@feedback.token_usage(total_tokens, token_budget)
|
|
47
|
+
|
|
48
|
+
return facts
|
|
49
|
+
else
|
|
50
|
+
@feedback.vector_results_found(0, processing_time)
|
|
51
|
+
@feedback.operation_warning('vector_retrieval', 'falling back to pattern guidance')
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
rescue => e
|
|
55
|
+
@feedback.operation_error('vector_retrieval', e.message)
|
|
56
|
+
@logger.warn "Vector retrieval failed: #{e.message}, falling back to patterns"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Fallback to pattern-based retrieval
|
|
60
|
+
@feedback.debug_info('fallback', 'using pattern-based retrieval')
|
|
61
|
+
fallback_result = get_pattern_fallback(query, intent)
|
|
62
|
+
|
|
63
|
+
if fallback_result
|
|
64
|
+
@feedback.vector_results_found(1, ((Time.current - start_time) * 1000).round(2))
|
|
65
|
+
return fallback_result
|
|
66
|
+
else
|
|
67
|
+
@feedback.vector_results_found(0, ((Time.current - start_time) * 1000).round(2))
|
|
68
|
+
return "Context: No specific guidance available for query: #{query}"
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Multi-stage retrieval with comprehensive feedback
|
|
73
|
+
def perform_multi_stage_retrieval(query, token_budget, intent)
|
|
74
|
+
@feedback.debug_info('retrieval_stages', 'starting multi-stage retrieval')
|
|
75
|
+
|
|
76
|
+
# Stage 1: Vector similarity search
|
|
77
|
+
@feedback.debug_info('stage_1', 'vector similarity search')
|
|
78
|
+
vector_candidates = perform_vector_similarity_search(query, intent)
|
|
79
|
+
|
|
80
|
+
# Stage 2: Rank by relevance
|
|
81
|
+
@feedback.debug_info('stage_2', 'ranking by relevance')
|
|
82
|
+
ranked_candidates = rank_by_relevance(vector_candidates, query, intent)
|
|
83
|
+
|
|
84
|
+
# Stage 3: Filter by quality
|
|
85
|
+
@feedback.debug_info('stage_3', 'filtering by quality')
|
|
86
|
+
filtered_candidates = filter_by_quality(ranked_candidates)
|
|
87
|
+
|
|
88
|
+
# Stage 4: Build context within token budget
|
|
89
|
+
@feedback.debug_info('stage_4', 'building final context')
|
|
90
|
+
final_context = build_contextual_response(filtered_candidates, query, token_budget)
|
|
91
|
+
|
|
92
|
+
final_context
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Vector similarity search with detailed feedback
|
|
96
|
+
def perform_vector_similarity_search(query, intent)
|
|
97
|
+
@feedback.debug_info('vector_search', "searching for intent: #{intent}")
|
|
98
|
+
|
|
99
|
+
candidates = []
|
|
100
|
+
|
|
101
|
+
# Search 1: Exact semantic match
|
|
102
|
+
@feedback.debug_info('search_exact', 'performing exact semantic match')
|
|
103
|
+
exact_matches = search_documents_by_content(query, similarity_threshold: 0.9, limit: 10)
|
|
104
|
+
candidates.concat(tag_results(exact_matches, :exact_match))
|
|
105
|
+
@feedback.debug_info('exact_results', "found #{exact_matches.length} exact matches")
|
|
106
|
+
|
|
107
|
+
# Search 2: Conceptual similarity
|
|
108
|
+
@feedback.debug_info('search_conceptual', 'performing conceptual similarity search')
|
|
109
|
+
conceptual_matches = search_documents_by_content(query, similarity_threshold: 0.7, limit: 15)
|
|
110
|
+
candidates.concat(tag_results(conceptual_matches, :conceptual_match))
|
|
111
|
+
@feedback.debug_info('conceptual_results', "found #{conceptual_matches.length} conceptual matches")
|
|
112
|
+
|
|
113
|
+
# Search 3: Intent-based search
|
|
114
|
+
@feedback.debug_info('search_intent', "performing intent-based search for #{intent}")
|
|
115
|
+
intent_matches = search_by_intent(query, intent)
|
|
116
|
+
candidates.concat(tag_results(intent_matches, :intent_match))
|
|
117
|
+
@feedback.debug_info('intent_results', "found #{intent_matches.length} intent matches")
|
|
118
|
+
|
|
119
|
+
# Remove duplicates
|
|
120
|
+
unique_candidates = deduplicate_candidates(candidates)
|
|
121
|
+
@feedback.debug_info('deduplication', "#{candidates.length} → #{unique_candidates.length} after deduplication")
|
|
122
|
+
|
|
123
|
+
unique_candidates
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Search documents with feedback
|
|
127
|
+
def search_documents_by_content(query, similarity_threshold: 0.7, limit: 10)
|
|
128
|
+
@feedback.debug_info('db_query', "searching documents (threshold: #{similarity_threshold})")
|
|
129
|
+
|
|
130
|
+
# Check if Document model is available
|
|
131
|
+
unless defined?(Document)
|
|
132
|
+
@feedback.operation_warning('db_search', 'Document model not available')
|
|
133
|
+
return []
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
begin
|
|
137
|
+
# Search documents with content matching
|
|
138
|
+
results = Document.where("content ILIKE ?", "%#{sanitize_query(query)}%")
|
|
139
|
+
.limit(limit * 2) # Get extra for filtering
|
|
140
|
+
.includes(:keywords)
|
|
141
|
+
|
|
142
|
+
@feedback.debug_info('db_results', "found #{results.count} database matches")
|
|
143
|
+
|
|
144
|
+
# Calculate similarity scores
|
|
145
|
+
scored_results = results.map do |doc|
|
|
146
|
+
similarity_score = calculate_content_similarity(query, doc.content)
|
|
147
|
+
|
|
148
|
+
{
|
|
149
|
+
document: doc,
|
|
150
|
+
content: doc.content,
|
|
151
|
+
similarity: similarity_score,
|
|
152
|
+
metadata: doc.metadata || {},
|
|
153
|
+
keywords: extract_keywords(doc),
|
|
154
|
+
relevance: assess_content_relevance(doc, query)
|
|
155
|
+
}
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Filter by threshold and sort
|
|
159
|
+
filtered_results = scored_results.select { |r| r[:similarity] >= similarity_threshold }
|
|
160
|
+
final_results = filtered_results.sort_by { |r| -(r[:similarity] * 0.7 + r[:relevance] * 0.3) }
|
|
161
|
+
.first(limit)
|
|
162
|
+
|
|
163
|
+
@feedback.debug_info('filtering', "#{scored_results.length} → #{final_results.length} after filtering")
|
|
164
|
+
|
|
165
|
+
final_results
|
|
166
|
+
|
|
167
|
+
rescue => e
|
|
168
|
+
@feedback.operation_error('db_search', e.message)
|
|
169
|
+
[]
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Intent-based search with feedback
|
|
174
|
+
def search_by_intent(query, intent)
|
|
175
|
+
@feedback.debug_info('intent_search', "searching for #{intent} intent")
|
|
176
|
+
|
|
177
|
+
# Map intent to search strategies
|
|
178
|
+
search_strategy = get_search_strategy_for_intent(intent)
|
|
179
|
+
@feedback.debug_info('search_strategy', "using #{search_strategy[:fact_types].join(', ')} fact types")
|
|
180
|
+
|
|
181
|
+
return [] unless defined?(Document)
|
|
182
|
+
|
|
183
|
+
begin
|
|
184
|
+
intent_docs = Document.where("metadata->>'fact_type' IN (?)", search_strategy[:fact_types])
|
|
185
|
+
.where("content ILIKE ?", "%#{sanitize_query(query)}%")
|
|
186
|
+
.limit(10)
|
|
187
|
+
|
|
188
|
+
@feedback.debug_info('intent_results', "found #{intent_docs.count} intent-specific documents")
|
|
189
|
+
|
|
190
|
+
intent_docs.map do |doc|
|
|
191
|
+
base_relevance = calculate_content_relevance(doc, query)
|
|
192
|
+
boosted_relevance = base_relevance * search_strategy[:boost]
|
|
193
|
+
|
|
194
|
+
{
|
|
195
|
+
document: doc,
|
|
196
|
+
content: doc.content,
|
|
197
|
+
similarity: boosted_relevance,
|
|
198
|
+
metadata: doc.metadata || {},
|
|
199
|
+
keywords: extract_keywords(doc),
|
|
200
|
+
intent: intent,
|
|
201
|
+
fact_type: doc.metadata&.dig('fact_type')
|
|
202
|
+
}
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
rescue => e
|
|
206
|
+
@feedback.operation_error('intent_search', e.message)
|
|
207
|
+
[]
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Build final context with token tracking
|
|
212
|
+
def build_contextual_response(candidates, query, token_budget)
|
|
213
|
+
return "" if candidates.empty?
|
|
214
|
+
|
|
215
|
+
@feedback.debug_info('context_building', "processing #{candidates.length} candidates")
|
|
216
|
+
|
|
217
|
+
# Sort by overall quality score
|
|
218
|
+
sorted_candidates = candidates.sort_by do |candidate|
|
|
219
|
+
-(candidate[:similarity] * 0.6 + candidate[:relevance] * 0.4)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Build context within token budget
|
|
223
|
+
context_parts = []
|
|
224
|
+
used_tokens = 0
|
|
225
|
+
|
|
226
|
+
sorted_candidates.each_with_index do |candidate, index|
|
|
227
|
+
content = candidate[:content]
|
|
228
|
+
content_tokens = estimate_tokens(content)
|
|
229
|
+
|
|
230
|
+
if used_tokens + content_tokens <= token_budget
|
|
231
|
+
context_parts << content
|
|
232
|
+
used_tokens += content_tokens
|
|
233
|
+
@feedback.debug_info('context_add', "added part #{index + 1} (#{content_tokens} tokens)")
|
|
234
|
+
else
|
|
235
|
+
@feedback.debug_info('context_skip', "skipped part #{index + 1} (would exceed budget)")
|
|
236
|
+
break
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
if context_parts.any?
|
|
241
|
+
final_context = context_parts.join(" | ")
|
|
242
|
+
@feedback.vector_context_built(context_parts.length, used_tokens)
|
|
243
|
+
final_context
|
|
244
|
+
else
|
|
245
|
+
@feedback.operation_warning('context_building', 'no content fit within token budget')
|
|
246
|
+
""
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
private
|
|
251
|
+
|
|
252
|
+
def analyze_query_intent(query)
|
|
253
|
+
return :implementation if query.match?(/how to|implement|create|build|fix|debug|error/i)
|
|
254
|
+
return :definition if query.match?(/what is|define|explain|meaning|concept/i)
|
|
255
|
+
return :frontend if query.match?(/css|style|html|frontend|design/i)
|
|
256
|
+
return :commands if query.match?(/command|run|execute|bash|terminal/i)
|
|
257
|
+
return :reference if query.match?(/example|show me|sample|demo/i)
|
|
258
|
+
:general
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def get_search_strategy_for_intent(intent)
|
|
262
|
+
strategies = {
|
|
263
|
+
implementation: { fact_types: ['code_example', 'tutorial', 'implementation'], boost: 1.2 },
|
|
264
|
+
debugging: { fact_types: ['error_solution', 'debugging', 'troubleshooting'], boost: 1.1 },
|
|
265
|
+
definition: { fact_types: ['explanation', 'tutorial', 'definition'], boost: 1.0 },
|
|
266
|
+
reference: { fact_types: ['api_reference', 'documentation', 'specification'], boost: 1.3 },
|
|
267
|
+
frontend: { fact_types: ['css_example', 'html_example', 'frontend'], boost: 1.1 },
|
|
268
|
+
commands: { fact_types: ['command', 'terminal', 'script'], boost: 1.2 }
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
strategies[intent] || { fact_types: ['general'], boost: 1.0 }
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def rank_by_relevance(candidates, query, intent)
|
|
275
|
+
candidates.map do |candidate|
|
|
276
|
+
# Calculate combined relevance score
|
|
277
|
+
similarity_score = candidate[:similarity] || 0.5
|
|
278
|
+
relevance_score = candidate[:relevance] || 0.5
|
|
279
|
+
intent_bonus = candidate[:intent] == intent ? 0.1 : 0.0
|
|
280
|
+
|
|
281
|
+
candidate[:combined_score] = similarity_score * 0.5 + relevance_score * 0.4 + intent_bonus
|
|
282
|
+
candidate
|
|
283
|
+
end.sort_by { |c| -c[:combined_score] }
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def filter_by_quality(candidates)
|
|
287
|
+
quality_threshold = 0.3
|
|
288
|
+
candidates.select { |candidate| (candidate[:combined_score] || 0) >= quality_threshold }
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def tag_results(results, source_type)
|
|
292
|
+
results.map do |result|
|
|
293
|
+
result[:source_type] = source_type
|
|
294
|
+
result
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def deduplicate_candidates(candidates)
|
|
299
|
+
seen_docs = Set.new
|
|
300
|
+
candidates.reject do |candidate|
|
|
301
|
+
doc_id = candidate.dig(:document, :id) || candidate[:content]&.hash
|
|
302
|
+
if seen_docs.include?(doc_id)
|
|
303
|
+
true
|
|
304
|
+
else
|
|
305
|
+
seen_docs.add(doc_id)
|
|
306
|
+
false
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def calculate_content_similarity(query, content)
|
|
312
|
+
# Simple word overlap similarity
|
|
313
|
+
query_words = query.downcase.split(/\W+/).reject(&:blank?)
|
|
314
|
+
content_words = content.downcase.split(/\W+/).reject(&:blank?)
|
|
315
|
+
|
|
316
|
+
return 0.0 if query_words.empty? || content_words.empty?
|
|
317
|
+
|
|
318
|
+
intersection = (query_words & content_words).size
|
|
319
|
+
union = (query_words | content_words).size
|
|
320
|
+
|
|
321
|
+
intersection.to_f / union
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def calculate_content_relevance(document, query)
|
|
325
|
+
# Enhanced relevance calculation
|
|
326
|
+
query_terms = query.downcase.split(/\W+/).reject(&:blank?)
|
|
327
|
+
content_lower = document.content.downcase
|
|
328
|
+
|
|
329
|
+
# Count matches and assess density
|
|
330
|
+
matches = query_terms.count { |term| content_lower.include?(term) }
|
|
331
|
+
match_density = matches.to_f / [query_terms.length, 1].max
|
|
332
|
+
|
|
333
|
+
# Boost for document metadata
|
|
334
|
+
metadata_boost = document.metadata.present? ? 0.1 : 0.0
|
|
335
|
+
|
|
336
|
+
[match_density + metadata_boost, 1.0].min
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
def assess_content_relevance(document, query)
|
|
340
|
+
calculate_content_relevance(document, query)
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def extract_keywords(document)
|
|
344
|
+
if document.respond_to?(:keywords) && document.keywords.respond_to?(:pluck)
|
|
345
|
+
document.keywords.pluck(:name)
|
|
346
|
+
else
|
|
347
|
+
[]
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def sanitize_query(query)
|
|
352
|
+
query.to_s.gsub(/['";\\]/, '').strip
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def estimate_tokens(text)
|
|
356
|
+
(text.to_s.length * 0.25).ceil
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def estimate_total_tokens(facts)
|
|
360
|
+
case facts
|
|
361
|
+
when Array
|
|
362
|
+
facts.sum { |fact| estimate_tokens(fact) }
|
|
363
|
+
when String
|
|
364
|
+
estimate_tokens(facts)
|
|
365
|
+
else
|
|
366
|
+
estimate_tokens(facts.to_s)
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def calculate_tokens_saved(context_tokens, query)
|
|
371
|
+
# Estimate tokens saved by providing specific context vs generic response
|
|
372
|
+
baseline_tokens = query.length * 0.8 # Rough estimate of generic response
|
|
373
|
+
savings = [baseline_tokens - context_tokens, 0].max.round
|
|
374
|
+
savings
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
def get_session_total_saved(current_saved)
|
|
378
|
+
# Track session total (would be persistent in real implementation)
|
|
379
|
+
@session_tokens_saved ||= 0
|
|
380
|
+
@session_tokens_saved += current_saved
|
|
381
|
+
@session_tokens_saved
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
def get_pattern_fallback(query, intent)
|
|
385
|
+
# Basic pattern-based fallback
|
|
386
|
+
case intent
|
|
387
|
+
when :implementation
|
|
388
|
+
"Context: Implementation guidance for #{query} | Use best practices and error handling"
|
|
389
|
+
when :debugging
|
|
390
|
+
"Context: Debug #{query} | Check logs, verify configuration, test incrementally"
|
|
391
|
+
when :definition
|
|
392
|
+
"Context: #{query} definition | Core concepts and usage patterns"
|
|
393
|
+
when :reference
|
|
394
|
+
"Context: #{query} reference | API documentation and examples"
|
|
395
|
+
else
|
|
396
|
+
"Context: General guidance for #{query}"
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
end
|
|
401
|
+
end
|