hokipoki 0.3.4 ā 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/generators/hive_mind/install_generator.rb +18 -2
- data/lib/generators/hive_mind/templates/hokipoki_claude.rb +45 -0
- data/lib/generators/hokipoki/attach_parasite_generator.rb +355 -0
- data/lib/generators/hokipoki/install_generator.rb +515 -0
- data/lib/generators/hokipoki/scan_project_generator.rb +279 -0
- data/lib/generators/parasite/install_generator.rb +458 -0
- data/lib/hokipoki/atomic_fact_extractor.rb +524 -0
- data/lib/hokipoki/claude/parasite.rb +62 -10
- data/lib/hokipoki/claude/thought_interceptor.rb +385 -0
- data/lib/hokipoki/claude_auto_loader.rb +28 -11
- data/lib/hokipoki/template_store.rb +425 -0
- data/lib/hokipoki/vector_engine.rb +525 -0
- data/lib/hokipoki/version.rb +1 -1
- data/lib/hokipoki.rb +260 -6
- metadata +80 -1
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'sqlite3'
|
|
4
|
+
require 'json'
|
|
5
|
+
require 'digest'
|
|
6
|
+
|
|
7
|
+
module Hokipoki
|
|
8
|
+
# Core Vector Engine - The brain behind parasitic intelligence
|
|
9
|
+
# Uses template-based compression for 75% storage reduction
|
|
10
|
+
class VectorEngine
|
|
11
|
+
include Singleton
|
|
12
|
+
|
|
13
|
+
def initialize
|
|
14
|
+
@db_path = File.expand_path('~/.hokipoki/vectors.db')
|
|
15
|
+
@template_store = nil
|
|
16
|
+
@keyword_index = nil
|
|
17
|
+
@vector_cache = {}
|
|
18
|
+
@learning_patterns = {}
|
|
19
|
+
@stats = {
|
|
20
|
+
total_vectors: 0,
|
|
21
|
+
successful_retrievals: 0,
|
|
22
|
+
failed_retrievals: 0,
|
|
23
|
+
compression_ratio: 0.0
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
ensure_db_directory
|
|
27
|
+
initialize_database
|
|
28
|
+
display_startup_message
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Main API - Retrieve intelligent facts with template generation
|
|
32
|
+
def retrieve_facts(query, token_budget: 1500)
|
|
33
|
+
$stdout.puts "š§ VECTOR ENGINE: Analyzing query intent..."
|
|
34
|
+
|
|
35
|
+
begin
|
|
36
|
+
# 1. Analyze query intent with multiple dimensions
|
|
37
|
+
intent = analyze_query_intent(query)
|
|
38
|
+
$stdout.puts " šÆ Intent detected: #{intent}"
|
|
39
|
+
|
|
40
|
+
# 2. Extract technical keywords
|
|
41
|
+
keywords = extract_technical_keywords(query)
|
|
42
|
+
$stdout.puts " š Keywords: #{keywords.join(', ')}"
|
|
43
|
+
|
|
44
|
+
# 3. Find matching template vectors
|
|
45
|
+
matching_vectors = find_matching_vectors(keywords, intent)
|
|
46
|
+
$stdout.puts " š Found #{matching_vectors.length} matching vectors"
|
|
47
|
+
|
|
48
|
+
if matching_vectors.any?
|
|
49
|
+
# 4. Generate content from templates
|
|
50
|
+
generated_content = generate_content_from_vectors(matching_vectors, intent, keywords)
|
|
51
|
+
|
|
52
|
+
# 5. Apply token budget management
|
|
53
|
+
final_content = apply_token_budget(generated_content, token_budget)
|
|
54
|
+
|
|
55
|
+
@stats[:successful_retrievals] += 1
|
|
56
|
+
$stdout.puts " ā
Content generated (#{estimate_tokens(final_content)} tokens)"
|
|
57
|
+
|
|
58
|
+
# Learn from successful retrieval
|
|
59
|
+
learn_from_success(query, keywords, intent, final_content)
|
|
60
|
+
|
|
61
|
+
return [final_content]
|
|
62
|
+
else
|
|
63
|
+
# Fallback to template-based generation
|
|
64
|
+
$stdout.puts " ā ļø No vectors found, using template fallback"
|
|
65
|
+
fallback_content = generate_fallback_content(query, intent, keywords)
|
|
66
|
+
|
|
67
|
+
@stats[:failed_retrievals] += 1
|
|
68
|
+
return [fallback_content]
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
rescue => e
|
|
72
|
+
$stdout.puts " ā Vector engine error: #{e.message}"
|
|
73
|
+
@stats[:failed_retrievals] += 1
|
|
74
|
+
return generate_emergency_fallback(query, intent)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Store content as template-based vector (75% compression)
|
|
79
|
+
def store_template_vector(content, source_file, metadata = {})
|
|
80
|
+
$stdout.puts "š¦ STORING VECTOR: #{File.basename(source_file || 'unknown')}"
|
|
81
|
+
|
|
82
|
+
# 1. Detect content template type
|
|
83
|
+
template_type = detect_template_type(content)
|
|
84
|
+
$stdout.puts " š Template type: #{template_type}"
|
|
85
|
+
|
|
86
|
+
# 2. Extract atomic keywords
|
|
87
|
+
keywords = extract_atomic_keywords(content)
|
|
88
|
+
$stdout.puts " š Extracted #{keywords.length} keywords"
|
|
89
|
+
|
|
90
|
+
# 3. Calculate keyword weights (TF-IDF style)
|
|
91
|
+
keyword_weights = calculate_keyword_weights(keywords, content)
|
|
92
|
+
|
|
93
|
+
# 4. Determine generation parameters
|
|
94
|
+
generation_params = {
|
|
95
|
+
complexity: assess_content_complexity(content),
|
|
96
|
+
style: detect_content_style(content),
|
|
97
|
+
domain: detect_content_domain(content),
|
|
98
|
+
original_length: content.length
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# 5. Store compressed representation
|
|
102
|
+
vector_id = store_vector_record(
|
|
103
|
+
template_type: template_type,
|
|
104
|
+
keywords: keywords,
|
|
105
|
+
keyword_weights: keyword_weights,
|
|
106
|
+
generation_params: generation_params,
|
|
107
|
+
source_file: source_file,
|
|
108
|
+
metadata: metadata
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# 6. Update statistics
|
|
112
|
+
original_size = content.bytesize
|
|
113
|
+
compressed_size = calculate_compressed_size(keywords, template_type, generation_params)
|
|
114
|
+
compression_ratio = ((original_size - compressed_size).to_f / original_size * 100).round(2)
|
|
115
|
+
|
|
116
|
+
@stats[:total_vectors] += 1
|
|
117
|
+
@stats[:compression_ratio] = ((@stats[:compression_ratio] * (@stats[:total_vectors] - 1)) + compression_ratio) / @stats[:total_vectors]
|
|
118
|
+
|
|
119
|
+
$stdout.puts " ā
Stored with #{compression_ratio}% compression"
|
|
120
|
+
|
|
121
|
+
vector_id
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Get engine statistics
|
|
125
|
+
def statistics
|
|
126
|
+
success_rate = @stats[:successful_retrievals] + @stats[:failed_retrievals] > 0 ?
|
|
127
|
+
(@stats[:successful_retrievals].to_f / (@stats[:successful_retrievals] + @stats[:failed_retrievals]) * 100).round(1) : 0
|
|
128
|
+
|
|
129
|
+
{
|
|
130
|
+
total_vectors: @stats[:total_vectors],
|
|
131
|
+
success_rate: success_rate,
|
|
132
|
+
average_compression: @stats[:compression_ratio].round(2),
|
|
133
|
+
cache_size: @vector_cache.size,
|
|
134
|
+
learning_patterns: @learning_patterns.size,
|
|
135
|
+
database_size: File.exist?(@db_path) ? File.size(@db_path) : 0
|
|
136
|
+
}
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Display status for Claude visibility
|
|
140
|
+
def display_status
|
|
141
|
+
stats = statistics
|
|
142
|
+
|
|
143
|
+
message = "\nš§ VECTOR ENGINE STATUS\n"
|
|
144
|
+
message += "=" * 40 + "\n"
|
|
145
|
+
message += "š Total Vectors: #{stats[:total_vectors]}\n"
|
|
146
|
+
message += "ā
Success Rate: #{stats[:success_rate]}%\n"
|
|
147
|
+
message += "šļø Avg Compression: #{stats[:average_compression]}%\n"
|
|
148
|
+
message += "š¾ Cache Size: #{stats[:cache_size]} entries\n"
|
|
149
|
+
message += "š§ Learning Patterns: #{stats[:learning_patterns]}\n"
|
|
150
|
+
message += "šæ Database Size: #{(stats[:database_size] / 1024.0).round(2)} KB\n"
|
|
151
|
+
message += "=" * 40 + "\n"
|
|
152
|
+
|
|
153
|
+
$stdout.puts message
|
|
154
|
+
puts message
|
|
155
|
+
|
|
156
|
+
message
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Scan and vectorize project files
|
|
160
|
+
def scan_project(project_path = Dir.pwd)
|
|
161
|
+
$stdout.puts "š SCANNING PROJECT: #{File.basename(project_path)}"
|
|
162
|
+
|
|
163
|
+
# Find relevant files
|
|
164
|
+
file_patterns = %w[**/*.rb **/*.js **/*.erb **/*.yml **/*.md]
|
|
165
|
+
files = file_patterns.flat_map { |pattern| Dir.glob(File.join(project_path, pattern)) }
|
|
166
|
+
|
|
167
|
+
# Filter out excluded paths
|
|
168
|
+
excluded_patterns = %w[node_modules vendor log tmp .git]
|
|
169
|
+
files = files.reject { |file| excluded_patterns.any? { |pattern| file.include?(pattern) } }
|
|
170
|
+
|
|
171
|
+
$stdout.puts "š Found #{files.length} files to process"
|
|
172
|
+
|
|
173
|
+
processed = 0
|
|
174
|
+
files.each_with_index do |file, index|
|
|
175
|
+
begin
|
|
176
|
+
$stdout.puts " [#{index + 1}/#{files.length}] Processing: #{File.basename(file)}"
|
|
177
|
+
|
|
178
|
+
content = File.read(file)
|
|
179
|
+
next if content.strip.empty?
|
|
180
|
+
|
|
181
|
+
store_template_vector(content, file, {
|
|
182
|
+
file_type: File.extname(file),
|
|
183
|
+
processed_at: Time.current.iso8601
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
processed += 1
|
|
187
|
+
|
|
188
|
+
rescue => e
|
|
189
|
+
$stdout.puts " ā Error processing #{file}: #{e.message}"
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
$stdout.puts "ā
PROJECT SCAN COMPLETE: #{processed}/#{files.length} files processed"
|
|
194
|
+
processed
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
private
|
|
198
|
+
|
|
199
|
+
def ensure_db_directory
|
|
200
|
+
db_dir = File.dirname(@db_path)
|
|
201
|
+
FileUtils.mkdir_p(db_dir) unless Dir.exist?(db_dir)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def initialize_database
|
|
205
|
+
@db = SQLite3::Database.new(@db_path)
|
|
206
|
+
@db.results_as_hash = true
|
|
207
|
+
|
|
208
|
+
create_tables_if_needed
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def create_tables_if_needed
|
|
212
|
+
@db.execute <<~SQL
|
|
213
|
+
CREATE TABLE IF NOT EXISTS vectors (
|
|
214
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
215
|
+
template_type TEXT NOT NULL,
|
|
216
|
+
keywords TEXT NOT NULL,
|
|
217
|
+
keyword_weights TEXT NOT NULL,
|
|
218
|
+
generation_params TEXT NOT NULL,
|
|
219
|
+
source_file TEXT,
|
|
220
|
+
metadata TEXT,
|
|
221
|
+
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
222
|
+
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
223
|
+
)
|
|
224
|
+
SQL
|
|
225
|
+
|
|
226
|
+
@db.execute <<~SQL
|
|
227
|
+
CREATE INDEX IF NOT EXISTS idx_template_type ON vectors(template_type)
|
|
228
|
+
SQL
|
|
229
|
+
|
|
230
|
+
@db.execute <<~SQL
|
|
231
|
+
CREATE INDEX IF NOT EXISTS idx_keywords ON vectors(keywords)
|
|
232
|
+
SQL
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def analyze_query_intent(query)
|
|
236
|
+
# Multi-dimensional intent analysis
|
|
237
|
+
intents = []
|
|
238
|
+
|
|
239
|
+
# Primary intent detection
|
|
240
|
+
case query.downcase
|
|
241
|
+
when /\b(implement|create|build|make|add|generate|write)\b/
|
|
242
|
+
intents << :implementation
|
|
243
|
+
when /\b(error|bug|fix|debug|troubleshoot|issue|problem)\b/
|
|
244
|
+
intents << :debugging
|
|
245
|
+
when /\b(how|what|why|when|where|explain|understand|learn)\b/
|
|
246
|
+
intents << :learning
|
|
247
|
+
when /\b(optimize|improve|enhance|better|faster|performance)\b/
|
|
248
|
+
intents << :optimization
|
|
249
|
+
when /\b(example|show|demo|sample|tutorial|guide)\b/
|
|
250
|
+
intents << :reference
|
|
251
|
+
when /\b(test|testing|spec|rspec|jest|unit|integration)\b/
|
|
252
|
+
intents << :testing
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Secondary intent detection
|
|
256
|
+
intents << :css if query.match?(/\b(css|style|stylesheet|tailwind|bootstrap)\b/i)
|
|
257
|
+
intents << :javascript if query.match?(/\b(js|javascript|node|npm|react|vue)\b/i)
|
|
258
|
+
intents << :ruby if query.match?(/\b(ruby|rails|gem|bundler|rake)\b/i)
|
|
259
|
+
intents << :database if query.match?(/\b(database|sql|migration|model|table)\b/i)
|
|
260
|
+
|
|
261
|
+
# Default to general if no specific intent detected
|
|
262
|
+
intents << :general if intents.empty?
|
|
263
|
+
|
|
264
|
+
intents.first || :general
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def extract_technical_keywords(query)
|
|
268
|
+
# Remove stop words and extract meaningful terms
|
|
269
|
+
stop_words = %w[
|
|
270
|
+
the a an and or but in on at to for of with by from
|
|
271
|
+
how do i can you please help me show get make create
|
|
272
|
+
what is are was were will would should could might
|
|
273
|
+
this that these those here there where when why
|
|
274
|
+
]
|
|
275
|
+
|
|
276
|
+
# Extract words, filter stop words, keep technical terms
|
|
277
|
+
words = query.downcase
|
|
278
|
+
.gsub(/[^\w\s]/, ' ')
|
|
279
|
+
.split(/\s+/)
|
|
280
|
+
.reject { |word| stop_words.include?(word) || word.length < 2 }
|
|
281
|
+
.select { |word| word.length > 2 || technical_term?(word) }
|
|
282
|
+
|
|
283
|
+
# Remove duplicates and limit to most relevant
|
|
284
|
+
words.uniq.first(10)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def technical_term?(word)
|
|
288
|
+
# Short technical terms that should be preserved
|
|
289
|
+
technical_terms = %w[css js sql api url ui ux db id]
|
|
290
|
+
technical_terms.include?(word.downcase)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def find_matching_vectors(keywords, intent)
|
|
294
|
+
# Build SQL query to find matching vectors
|
|
295
|
+
keyword_conditions = keywords.map { |keyword| "keywords LIKE '%#{keyword}%'" }.join(' OR ')
|
|
296
|
+
|
|
297
|
+
sql = if keyword_conditions.present?
|
|
298
|
+
"SELECT * FROM vectors WHERE (#{keyword_conditions}) ORDER BY created_at DESC LIMIT 10"
|
|
299
|
+
else
|
|
300
|
+
"SELECT * FROM vectors ORDER BY created_at DESC LIMIT 5"
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
@db.execute(sql)
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def generate_content_from_vectors(vectors, intent, keywords)
|
|
307
|
+
template_store = get_template_store
|
|
308
|
+
|
|
309
|
+
generated_parts = vectors.map do |vector|
|
|
310
|
+
template_type = vector['template_type']
|
|
311
|
+
stored_keywords = JSON.parse(vector['keywords'])
|
|
312
|
+
generation_params = JSON.parse(vector['generation_params'])
|
|
313
|
+
|
|
314
|
+
template_store.generate_content(
|
|
315
|
+
template_type,
|
|
316
|
+
keywords: keywords + stored_keywords,
|
|
317
|
+
intent: intent,
|
|
318
|
+
params: generation_params
|
|
319
|
+
)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
# Combine and deduplicate content
|
|
323
|
+
combined_content = generated_parts.compact.join(' | ')
|
|
324
|
+
combined_content.present? ? combined_content : generate_fallback_content(keywords.join(' '), intent, keywords)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def get_template_store
|
|
328
|
+
@template_store ||= TemplateStore.new
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def apply_token_budget(content, budget)
|
|
332
|
+
estimated_tokens = estimate_tokens(content)
|
|
333
|
+
|
|
334
|
+
if estimated_tokens <= budget
|
|
335
|
+
content
|
|
336
|
+
else
|
|
337
|
+
# Truncate content to fit budget
|
|
338
|
+
chars_per_token = content.length.to_f / estimated_tokens
|
|
339
|
+
max_chars = (budget * chars_per_token * 0.9).to_i # 90% safety margin
|
|
340
|
+
|
|
341
|
+
content[0..max_chars].strip + "..."
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def estimate_tokens(text)
|
|
346
|
+
return 0 if text.nil? || text.empty?
|
|
347
|
+
# Conservative estimation: ~4 characters per token
|
|
348
|
+
(text.length / 4.0).ceil
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def generate_fallback_content(query, intent, keywords)
|
|
352
|
+
case intent
|
|
353
|
+
when :implementation
|
|
354
|
+
"Implementation context: #{keywords.join(', ')}. Consider Rails conventions, security best practices, and maintainable code patterns."
|
|
355
|
+
when :debugging
|
|
356
|
+
"Debugging context: #{keywords.join(', ')}. Check logs, verify configurations, test incrementally. Common issues: environment variables, dependencies, permissions."
|
|
357
|
+
when :learning
|
|
358
|
+
"Learning context: #{keywords.join(', ')}. Focus on fundamentals, official documentation, and proven patterns. Practice with simple examples first."
|
|
359
|
+
when :optimization
|
|
360
|
+
"Optimization context: #{keywords.join(', ')}. Profile before optimizing, focus on bottlenecks, consider caching, database queries, and algorithmic improvements."
|
|
361
|
+
else
|
|
362
|
+
"Context: #{keywords.join(', ')}. Use best practices, follow conventions, prioritize readability and maintainability."
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
def generate_emergency_fallback(query, intent)
|
|
367
|
+
["Emergency context: Basic guidance available. Check documentation and verify system configuration."]
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def detect_template_type(content)
|
|
371
|
+
# Analyze content to determine template type
|
|
372
|
+
case content
|
|
373
|
+
when /class\s+\w+.*< ApplicationRecord/, /belongs_to/, /has_many/, /validates/
|
|
374
|
+
'active_record_model'
|
|
375
|
+
when /class\s+\w+.*< ApplicationController/, /def\s+\w+/, /render/, /redirect_to/
|
|
376
|
+
'rails_controller'
|
|
377
|
+
when /def\s+\w+/, /class\s+\w+/, /module\s+\w+/
|
|
378
|
+
'ruby_class'
|
|
379
|
+
when /function\s+\w+/, /const\s+\w+/, /=>\s*/, /async\s+function/
|
|
380
|
+
'javascript_code'
|
|
381
|
+
when /\.css/, /background:/, /color:/, /font-size:/, /margin:/, /padding:/
|
|
382
|
+
'css_styles'
|
|
383
|
+
when /<%=/, /<% /, /<%# /, /<html/, /<div/, /<span/
|
|
384
|
+
'erb_template'
|
|
385
|
+
when /describe/, /it\s+["']/, /expect/, /test/, /spec/
|
|
386
|
+
'test_spec'
|
|
387
|
+
when /#\s+[A-Z]/, /##\s+/, /###\s+/, /\*\s+/, /-\s+/
|
|
388
|
+
'documentation'
|
|
389
|
+
when /migration/, /create_table/, /add_column/, /drop_table/
|
|
390
|
+
'database_migration'
|
|
391
|
+
when /config/, /settings/, /environment/, /secrets/
|
|
392
|
+
'configuration'
|
|
393
|
+
else
|
|
394
|
+
'general_code'
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
def extract_atomic_keywords(content)
|
|
399
|
+
keywords = []
|
|
400
|
+
|
|
401
|
+
# Extract Ruby methods
|
|
402
|
+
content.scan(/def\s+(\w+)/) { |match| keywords << "method_#{match[0]}" }
|
|
403
|
+
|
|
404
|
+
# Extract class names
|
|
405
|
+
content.scan(/class\s+(\w+)/) { |match| keywords << "class_#{match[0]}" }
|
|
406
|
+
|
|
407
|
+
# Extract constants
|
|
408
|
+
content.scan(/([A-Z][A-Z_]+)\s*=/) { |match| keywords << "constant_#{match[0]}" }
|
|
409
|
+
|
|
410
|
+
# Extract CSS classes
|
|
411
|
+
content.scan(/class=["\']([^"\']+)["\']/) { |match|
|
|
412
|
+
match[0].split(/\s+/).each { |cls| keywords << "css_#{cls}" }
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
# Extract technical terms
|
|
416
|
+
technical_terms = content.scan(/\b(rails|react|vue|angular|bootstrap|tailwind|postgres|mysql|redis|docker|kubernetes|aws|api|rest|graphql|jwt|oauth|json|xml|html|css|javascript|ruby|python|sql)\b/i)
|
|
417
|
+
technical_terms.flatten.each { |term| keywords << "tech_#{term.downcase}" }
|
|
418
|
+
|
|
419
|
+
# Extract file extensions and types
|
|
420
|
+
content.scan(/\.(\w+)$/) { |match| keywords << "filetype_#{match[0]}" }
|
|
421
|
+
|
|
422
|
+
keywords.uniq.first(20) # Limit to most relevant keywords
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
def calculate_keyword_weights(keywords, content)
|
|
426
|
+
weights = {}
|
|
427
|
+
total_words = content.split(/\s+/).length
|
|
428
|
+
|
|
429
|
+
keywords.each do |keyword|
|
|
430
|
+
# Simple TF-IDF approximation
|
|
431
|
+
term_frequency = content.scan(/#{Regexp.escape(keyword)}/i).length
|
|
432
|
+
# Inverse document frequency approximation (assuming 1000 documents)
|
|
433
|
+
inverse_doc_freq = Math.log(1000.0 / [@stats[:total_vectors] + 1, 1].max)
|
|
434
|
+
|
|
435
|
+
weights[keyword] = (term_frequency.to_f / total_words) * inverse_doc_freq
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
weights
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
def assess_content_complexity(content)
|
|
442
|
+
# Simple complexity assessment
|
|
443
|
+
factors = 0
|
|
444
|
+
factors += 1 if content.length > 1000
|
|
445
|
+
factors += 1 if content.scan(/def\s+\w+/).length > 5
|
|
446
|
+
factors += 1 if content.scan(/class\s+\w+/).length > 1
|
|
447
|
+
factors += 1 if content.include?('module')
|
|
448
|
+
factors += 1 if content.scan(/\b(async|await|promise|callback)\b/i).any?
|
|
449
|
+
|
|
450
|
+
case factors
|
|
451
|
+
when 0..1 then 'simple'
|
|
452
|
+
when 2..3 then 'moderate'
|
|
453
|
+
else 'complex'
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
def detect_content_style(content)
|
|
458
|
+
return 'tutorial' if content.match?(/step\s+\d+|first|then|next|finally/i)
|
|
459
|
+
return 'reference' if content.match?(/api|documentation|docs|spec/i)
|
|
460
|
+
return 'example' if content.match?(/example|demo|sample|usage/i)
|
|
461
|
+
return 'guide' if content.match?(/guide|how\s+to|walkthrough/i)
|
|
462
|
+
'standard'
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
def detect_content_domain(content)
|
|
466
|
+
return 'frontend' if content.match?/(css|html|javascript|react|vue|angular)/i
|
|
467
|
+
return 'backend' if content.match?/(rails|ruby|controller|model|database)/i
|
|
468
|
+
return 'devops' if content.match?/(docker|kubernetes|aws|deployment|server)/i
|
|
469
|
+
return 'testing' if content.match?/(test|spec|rspec|jest|unit|integration)/i
|
|
470
|
+
'general'
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
def store_vector_record(template_type:, keywords:, keyword_weights:, generation_params:, source_file:, metadata:)
|
|
474
|
+
vector_id = Digest::SHA256.hexdigest("#{template_type}_#{keywords.join('_')}_#{Time.current.to_f}")
|
|
475
|
+
|
|
476
|
+
@db.execute(
|
|
477
|
+
"INSERT INTO vectors (id, template_type, keywords, keyword_weights, generation_params, source_file, metadata) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
478
|
+
[
|
|
479
|
+
vector_id,
|
|
480
|
+
template_type,
|
|
481
|
+
JSON.generate(keywords),
|
|
482
|
+
JSON.generate(keyword_weights),
|
|
483
|
+
JSON.generate(generation_params),
|
|
484
|
+
source_file,
|
|
485
|
+
JSON.generate(metadata)
|
|
486
|
+
]
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
vector_id
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
def calculate_compressed_size(keywords, template_type, generation_params)
|
|
493
|
+
# Estimate compressed representation size
|
|
494
|
+
keywords_size = keywords.join('').bytesize
|
|
495
|
+
template_size = template_type.bytesize
|
|
496
|
+
params_size = JSON.generate(generation_params).bytesize
|
|
497
|
+
|
|
498
|
+
keywords_size + template_size + params_size + 100 # Base overhead
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def learn_from_success(query, keywords, intent, content)
|
|
502
|
+
# Store successful pattern for future improvement
|
|
503
|
+
pattern_key = "#{intent}_#{keywords.first(3).join('_')}"
|
|
504
|
+
|
|
505
|
+
@learning_patterns[pattern_key] ||= { successes: 0, failures: 0, last_success: nil }
|
|
506
|
+
@learning_patterns[pattern_key][:successes] += 1
|
|
507
|
+
@learning_patterns[pattern_key][:last_success] = Time.current
|
|
508
|
+
|
|
509
|
+
# Keep only recent patterns to avoid memory bloat
|
|
510
|
+
if @learning_patterns.size > 1000
|
|
511
|
+
oldest_patterns = @learning_patterns.sort_by { |k, v| v[:last_success] || Time.at(0) }.first(200)
|
|
512
|
+
oldest_patterns.each { |pattern, _| @learning_patterns.delete(pattern) }
|
|
513
|
+
end
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def display_startup_message
|
|
517
|
+
$stdout.puts "\nš§ VECTOR ENGINE: Initialized successfully"
|
|
518
|
+
$stdout.puts "š¾ Database: #{@db_path}"
|
|
519
|
+
$stdout.puts "š Ready for parasitic intelligence operations"
|
|
520
|
+
end
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
# Load template store dependency
|
|
525
|
+
require_relative 'template_store'
|
data/lib/hokipoki/version.rb
CHANGED