claude-flow-novice 2.18.38 → 2.18.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/cfn-loop-task.md +227 -291
- package/.claude/skills/cfn-local-ruvector-accelerator/.claude/hooks/SessionStart-cfn-build-ruvector.sh +10 -9
- package/.claude/skills/cfn-local-ruvector-accelerator/index-code.sh +33 -387
- package/.claude/skills/cfn-local-ruvector-accelerator/index_all.sh +1 -1
- package/.claude/skills/cfn-local-ruvector-accelerator/init-local-ruvector.sh +56 -95
- package/.claude/skills/cfn-local-ruvector-accelerator/query-local.sh +66 -108
- package/package.json +1 -1
- package/tsconfig.tsbuildinfo +0 -0
|
@@ -1,25 +1,17 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
|
-
# index-code.sh - Index codebase
|
|
2
|
+
# index-code.sh - Index codebase using local-ruvector Rust binary
|
|
3
|
+
# This is a wrapper around the Rust binary for convenience
|
|
3
4
|
|
|
4
5
|
set -e
|
|
5
6
|
|
|
6
7
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
7
|
-
STORAGE_PATH="${HOME}/.local-ruvector"
|
|
8
|
-
DEFAULT_PATH="."
|
|
9
8
|
|
|
10
|
-
#
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
SUSPICIOUS_PATTERNS=("node_modules" ".git" "/dist/" "/build/" "/target/" ".svn" ".hg" "__pycache__" ".pytest_cache")
|
|
14
|
-
|
|
15
|
-
# Parse arguments
|
|
16
|
-
PATH_TO_INDEX=""
|
|
17
|
-
FILE_TYPES=("")
|
|
18
|
-
PATTERNS=""
|
|
19
|
-
VERBOSE=false
|
|
9
|
+
# Default values
|
|
10
|
+
PATH_TO_INDEX="."
|
|
11
|
+
FILE_TYPES="ts,tsx,js,jsx,py,sh,sql,rs"
|
|
20
12
|
HELP=false
|
|
21
|
-
MAX_FILES=${MAX_FILES_TO_PROCESS}
|
|
22
13
|
|
|
14
|
+
# Parse arguments
|
|
23
15
|
while [[ $# -gt 0 ]]; do
|
|
24
16
|
case $1 in
|
|
25
17
|
--path)
|
|
@@ -27,27 +19,15 @@ while [[ $# -gt 0 ]]; do
|
|
|
27
19
|
shift 2
|
|
28
20
|
;;
|
|
29
21
|
--types)
|
|
30
|
-
|
|
31
|
-
shift 2
|
|
32
|
-
;;
|
|
33
|
-
--patterns)
|
|
34
|
-
PATTERNS="$2"
|
|
22
|
+
FILE_TYPES="$2"
|
|
35
23
|
shift 2
|
|
36
24
|
;;
|
|
37
|
-
--max-files)
|
|
38
|
-
MAX_FILES="$2"
|
|
39
|
-
shift 2
|
|
40
|
-
;;
|
|
41
|
-
--verbose)
|
|
42
|
-
VERBOSE=true
|
|
43
|
-
shift
|
|
44
|
-
;;
|
|
45
25
|
--help|-h)
|
|
46
26
|
HELP=true
|
|
47
27
|
shift
|
|
48
28
|
;;
|
|
49
29
|
*)
|
|
50
|
-
if [[ -z "$PATH_TO_INDEX" ]]; then
|
|
30
|
+
if [[ -z "$PATH_TO_INDEX" || "$PATH_TO_INDEX" == "." ]]; then
|
|
51
31
|
PATH_TO_INDEX="$1"
|
|
52
32
|
fi
|
|
53
33
|
shift
|
|
@@ -55,391 +35,57 @@ while [[ $# -gt 0 ]]; do
|
|
|
55
35
|
esac
|
|
56
36
|
done
|
|
57
37
|
|
|
58
|
-
# Security validation functions
|
|
59
|
-
validate_path() {
|
|
60
|
-
local path="$1"
|
|
61
|
-
|
|
62
|
-
# Reject null bytes and control characters
|
|
63
|
-
if [[ "$path" =~ $'\0' ]]; then
|
|
64
|
-
echo "❌ Error: Path contains null bytes" >&2
|
|
65
|
-
exit 1
|
|
66
|
-
fi
|
|
67
|
-
|
|
68
|
-
# Reject absolute paths that aren't under the base
|
|
69
|
-
if [[ "$path" == /* && "$path" != "$PATH_TO_INDEX"/* ]]; then
|
|
70
|
-
echo "❌ Error: Absolute path outside base directory: $path" >&2
|
|
71
|
-
exit 1
|
|
72
|
-
fi
|
|
73
|
-
|
|
74
|
-
# Reject suspicious patterns
|
|
75
|
-
for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
|
|
76
|
-
if [[ "$path" == *"$pattern"* ]]; then
|
|
77
|
-
echo "❌ Error: Path contains suspicious pattern '$pattern': $path" >&2
|
|
78
|
-
exit 1
|
|
79
|
-
fi
|
|
80
|
-
done
|
|
81
|
-
|
|
82
|
-
# Reject paths with too many ..
|
|
83
|
-
if [[ $(echo "$path" | grep -o '\.\.' | wc -l) -gt 5 ]]; then
|
|
84
|
-
echo "❌ Error: Too many parent directory references: $path" >&2
|
|
85
|
-
exit 1
|
|
86
|
-
fi
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
validate_file_type() {
|
|
90
|
-
local ft="$1"
|
|
91
|
-
|
|
92
|
-
# Only allow alphanumeric and common extensions
|
|
93
|
-
if [[ ! "$ft" =~ ^[a-zA-Z0-9._-]+$ ]]; then
|
|
94
|
-
echo "❌ Error: Invalid file type format: $ft" >&2
|
|
95
|
-
exit 1
|
|
96
|
-
fi
|
|
97
|
-
}
|
|
98
|
-
|
|
99
38
|
# Show help
|
|
100
39
|
if [[ "$HELP" == true ]]; then
|
|
101
40
|
cat << EOF
|
|
102
41
|
Usage: index-code [OPTIONS] [PATH]
|
|
103
42
|
|
|
104
|
-
Index codebase patterns
|
|
43
|
+
Index codebase patterns using local-ruvector Rust binary
|
|
105
44
|
|
|
106
45
|
Arguments:
|
|
107
46
|
PATH Path to directory to index (default: current directory)
|
|
108
47
|
|
|
109
48
|
Options:
|
|
110
49
|
--path PATH Path to directory to index
|
|
111
|
-
--types TYPES Comma-separated file types (default:
|
|
112
|
-
--patterns PATTERNS Comma-separated patterns to focus on
|
|
113
|
-
--max-files N Maximum files to process (default: 10000)
|
|
114
|
-
--verbose Show detailed progress
|
|
50
|
+
--types TYPES Comma-separated file types (default: ts,tsx,js,jsx,py,sh,sql,rs)
|
|
115
51
|
--help, -h Show this help
|
|
116
52
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
- File size limits (10MB per file)
|
|
120
|
-
- Maximum files limit
|
|
121
|
-
- Suspicious pattern detection
|
|
122
|
-
- Input sanitization
|
|
53
|
+
Database Location:
|
|
54
|
+
~/.local/share/ruvector/index_v2.db
|
|
123
55
|
|
|
124
56
|
Examples:
|
|
125
|
-
index-code --path ~/projects/my-
|
|
126
|
-
index-code --types rs,py --
|
|
127
|
-
index-code --max-files 5000 --verbose
|
|
57
|
+
index-code --path ~/projects/my-app
|
|
58
|
+
index-code --types rs,py --path /path/to/project
|
|
128
59
|
|
|
129
60
|
EOF
|
|
130
61
|
exit 0
|
|
131
62
|
fi
|
|
132
63
|
|
|
133
|
-
#
|
|
134
|
-
if
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
# Validate max_files
|
|
143
|
-
if ! [[ "$MAX_FILES" =~ ^[0-9]+$ ]] || [[ "$MAX_FILES" -lt 1 ]] || [[ "$MAX_FILES" -gt 100000 ]]; then
|
|
144
|
-
echo "❌ Error: Invalid max-files value: $MAX_FILES (must be 1-100000)" >&2
|
|
64
|
+
# Find the binary
|
|
65
|
+
if command -v local-ruvector &>/dev/null; then
|
|
66
|
+
BINARY="local-ruvector"
|
|
67
|
+
elif [[ -x "$HOME/.local/bin/local-ruvector" ]]; then
|
|
68
|
+
BINARY="$HOME/.local/bin/local-ruvector"
|
|
69
|
+
else
|
|
70
|
+
echo "❌ Error: local-ruvector binary not found"
|
|
71
|
+
echo " Install with: ./scripts/install-ruvector-global.sh"
|
|
145
72
|
exit 1
|
|
146
73
|
fi
|
|
147
74
|
|
|
148
|
-
#
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
# Convert to absolute path and canonicalize
|
|
152
|
-
PATH_TO_INDEX="$(cd "$PATH_TO_INDEX" && pwd -P)"
|
|
153
|
-
|
|
154
|
-
# Additional security check - ensure we're not in a system directory
|
|
155
|
-
if [[ "$PATH_TO_INDEX" == /etc/* ]] || [[ "$PATH_TO_INDEX" == /usr/bin/* ]] || [[ "$PATH_TO_INDEX" == /bin/* ]] || [[ "$PATH_TO_INDEX" == /sbin/* ]]; then
|
|
156
|
-
echo "❌ Error: Cannot index system directory: $PATH_TO_INDEX" >&2
|
|
75
|
+
# Convert to absolute path
|
|
76
|
+
PATH_TO_INDEX="$(cd "$PATH_TO_INDEX" 2>/dev/null && pwd)" || {
|
|
77
|
+
echo "❌ Error: Directory not found: $PATH_TO_INDEX"
|
|
157
78
|
exit 1
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
# Validate file types
|
|
161
|
-
for ft in "${FILE_TYPES[@]}"; do
|
|
162
|
-
validate_file_type "$ft"
|
|
163
|
-
done
|
|
164
|
-
|
|
165
|
-
# Convert array to comma-separated string for Python
|
|
166
|
-
FILE_TYPES_STR=$(IFS=','; echo "${FILE_TYPES[*]}")
|
|
79
|
+
}
|
|
167
80
|
|
|
168
81
|
echo "🔍 Indexing code patterns in: $PATH_TO_INDEX"
|
|
169
|
-
echo "📄 File types: $
|
|
170
|
-
echo "
|
|
171
|
-
echo "
|
|
172
|
-
if [[ -n "$PATTERNS" ]]; then
|
|
173
|
-
echo "🎯 Focusing on patterns: $PATTERNS"
|
|
174
|
-
fi
|
|
175
|
-
|
|
176
|
-
# Initialize search engine
|
|
177
|
-
cd "$SCRIPT_DIR"
|
|
178
|
-
python3 -c "
|
|
179
|
-
import sys
|
|
180
|
-
import os
|
|
181
|
-
import json
|
|
182
|
-
import hashlib
|
|
183
|
-
import stat
|
|
184
|
-
from pathlib import Path
|
|
185
|
-
from search_engine_v2 import SearchEngine
|
|
186
|
-
from security import SecurityError, PathValidator, ResourceMonitor, safe_file_read
|
|
187
|
-
|
|
188
|
-
# Security checks
|
|
189
|
-
def is_safe_file(file_path):
|
|
190
|
-
\"\"\"Check if file is safe to process\"\"\"
|
|
191
|
-
try:
|
|
192
|
-
# Check file permissions
|
|
193
|
-
file_stat = file_path.stat()
|
|
194
|
-
|
|
195
|
-
# Reject world-writable files
|
|
196
|
-
if file_stat.st_mode & stat.S_IWOTH:
|
|
197
|
-
return False, \"File is world-writable\"
|
|
198
|
-
|
|
199
|
-
# Reject files with suspicious permissions
|
|
200
|
-
if file_stat.st_mode & 0o7777 != 0o644 and file_stat.st_mode & 0o7777 != 0o755:
|
|
201
|
-
return False, f\"Suspicious file permissions: {oct(file_stat.st_mode & 0o7777)}\"
|
|
202
|
-
|
|
203
|
-
# Check if file is owned by current user (on Unix systems)
|
|
204
|
-
if hasattr(os, 'getuid') and file_stat.st_uid != os.getuid():
|
|
205
|
-
return False, \"File not owned by current user\"
|
|
206
|
-
|
|
207
|
-
return True, None
|
|
208
|
-
|
|
209
|
-
except Exception as e:
|
|
210
|
-
return False, f\"Error checking file: {e}\"
|
|
211
|
-
|
|
212
|
-
def generate_pattern_id(file_path, content):
|
|
213
|
-
\"\"\"Generate unique pattern ID with security\"\"\"
|
|
214
|
-
# Use SHA-256 for better security
|
|
215
|
-
hash_input = f'{file_path}:{len(content)}:{hashlib.sha256(content.encode()).hexdigest()}'
|
|
216
|
-
hash_obj = hashlib.md5(hash_input.encode('utf-8'))
|
|
217
|
-
return f'pattern_{hash_obj.hexdigest()[:16]}'
|
|
218
|
-
|
|
219
|
-
def extract_functions(content, file_type):
|
|
220
|
-
\"\"\"Extract functions/classes from code safely\"\"\"
|
|
221
|
-
patterns = []
|
|
222
|
-
|
|
223
|
-
# Limit content size for processing
|
|
224
|
-
if len(content) > 50000:
|
|
225
|
-
content = content[:50000]
|
|
226
|
-
|
|
227
|
-
# Simple extraction based on language
|
|
228
|
-
if file_type == 'rs':
|
|
229
|
-
import re
|
|
230
|
-
# Limit regex operations
|
|
231
|
-
lines = content.split('\\n')[:1000] # Limit to first 1000 lines
|
|
232
|
-
|
|
233
|
-
fn_pattern = r'^(pub\\s+)?(async\\s+)?(unsafe\\s+)?fn\\s+(\\w+)'
|
|
234
|
-
struct_pattern = r'^(pub\\s+)?struct\\s+(\\w+)'
|
|
235
|
-
impl_pattern = r'^impl\\s+(\\w+)\\s*(for\\s+(\\w+))?'
|
|
236
|
-
|
|
237
|
-
for i, line in enumerate(lines):
|
|
238
|
-
for pattern, type_name in [(fn_pattern, 'function'),
|
|
239
|
-
(struct_pattern, 'struct'),
|
|
240
|
-
(impl_pattern, 'impl')]:
|
|
241
|
-
match = re.match(pattern, line)
|
|
242
|
-
if match:
|
|
243
|
-
patterns.append({
|
|
244
|
-
'type': type_name,
|
|
245
|
-
'name': match.groups()[-1][:100], # Limit name length
|
|
246
|
-
'line': i + 1,
|
|
247
|
-
'signature': line.strip()[:200] # Limit signature length
|
|
248
|
-
})
|
|
249
|
-
|
|
250
|
-
elif file_type == 'py':
|
|
251
|
-
import re
|
|
252
|
-
lines = content.split('\\n')[:1000]
|
|
253
|
-
|
|
254
|
-
fn_pattern = r'^\\s*def\\s+(\\w+)'
|
|
255
|
-
class_pattern = r'^\\s*class\\s+(\\w+)'
|
|
256
|
-
|
|
257
|
-
for i, line in enumerate(lines):
|
|
258
|
-
if re.match(fn_pattern, line):
|
|
259
|
-
patterns.append({
|
|
260
|
-
'type': 'function',
|
|
261
|
-
'name': re.match(fn_pattern, line).group(1)[:100],
|
|
262
|
-
'line': i + 1,
|
|
263
|
-
'signature': line.strip()[:200]
|
|
264
|
-
})
|
|
265
|
-
elif re.match(class_pattern, line):
|
|
266
|
-
patterns.append({
|
|
267
|
-
'type': 'class',
|
|
268
|
-
'name': re.match(class_pattern, line).group(1)[:100],
|
|
269
|
-
'line': i + 1,
|
|
270
|
-
'signature': line.strip()[:200]
|
|
271
|
-
})
|
|
272
|
-
|
|
273
|
-
return patterns[:50] # Limit number of patterns per file
|
|
274
|
-
|
|
275
|
-
# Initialize security
|
|
276
|
-
try:
|
|
277
|
-
validator = PathValidator('${PATH_TO_INDEX}')
|
|
278
|
-
monitor = ResourceMonitor()
|
|
279
|
-
|
|
280
|
-
# Initialize engine with security
|
|
281
|
-
engine = SearchEngine('${STORAGE_PATH}/storage')
|
|
282
|
-
|
|
283
|
-
print('📂 Scanning files with security checks...')
|
|
284
|
-
|
|
285
|
-
# Parse file types from shell
|
|
286
|
-
file_types = '${FILE_TYPES_STR}'.split(',')
|
|
287
|
-
verbose = '${VERBOSE}' == 'true'
|
|
288
|
-
max_files = ${MAX_FILES}
|
|
289
|
-
|
|
290
|
-
# Index files with security
|
|
291
|
-
indexed_count = 0
|
|
292
|
-
pattern_count = 0
|
|
293
|
-
skipped_count = 0
|
|
294
|
-
error_count = 0
|
|
295
|
-
|
|
296
|
-
for file_type in file_types:
|
|
297
|
-
file_type = file_type.strip()
|
|
298
|
-
if not file_type:
|
|
299
|
-
continue
|
|
300
|
-
|
|
301
|
-
pattern = f'**/*.{file_type}'
|
|
302
|
-
|
|
303
|
-
# Use pathlib's rglob with depth limit
|
|
304
|
-
try:
|
|
305
|
-
files = list(Path('${PATH_TO_INDEX}').rglob(pattern))
|
|
306
|
-
except Exception as e:
|
|
307
|
-
print(f'⚠️ Error scanning for {file_type} files: {e}')
|
|
308
|
-
continue
|
|
309
|
-
|
|
310
|
-
# Limit total files
|
|
311
|
-
if indexed_count >= max_files:
|
|
312
|
-
print(f'⚠️ Reached maximum file limit ({max_files}), stopping')
|
|
313
|
-
break
|
|
314
|
-
|
|
315
|
-
for file_path in files:
|
|
316
|
-
# Check file limit
|
|
317
|
-
if indexed_count >= max_files:
|
|
318
|
-
break
|
|
319
|
-
|
|
320
|
-
if file_path.is_file():
|
|
321
|
-
try:
|
|
322
|
-
# Validate path
|
|
323
|
-
safe_path = validator.validate_path(file_path)
|
|
324
|
-
|
|
325
|
-
# Additional safety checks
|
|
326
|
-
is_safe, reason = is_safe_file(safe_path)
|
|
327
|
-
if not is_safe:
|
|
328
|
-
if verbose:
|
|
329
|
-
print(f' ⚠️ Skipped {file_path}: {reason}')
|
|
330
|
-
skipped_count += 1
|
|
331
|
-
continue
|
|
332
|
-
|
|
333
|
-
# Check file size before reading
|
|
334
|
-
monitor.check_file_size(safe_path)
|
|
335
|
-
|
|
336
|
-
# Safely read file
|
|
337
|
-
content = safe_file_read(safe_path)
|
|
338
|
-
|
|
339
|
-
# Skip very small files
|
|
340
|
-
if len(content) < 50:
|
|
341
|
-
continue
|
|
342
|
-
|
|
343
|
-
# Limit content size
|
|
344
|
-
if len(content) > 1000000: # 1MB limit
|
|
345
|
-
content = content[:1000000]
|
|
346
|
-
if verbose:
|
|
347
|
-
print(f' ⚠️ Truncated large file: {file_path}')
|
|
348
|
-
|
|
349
|
-
# Generate pattern ID
|
|
350
|
-
pattern_id = generate_pattern_id(str(safe_path), content)
|
|
351
|
-
|
|
352
|
-
# Extract functions/classes safely
|
|
353
|
-
patterns = extract_functions(content, file_type)
|
|
354
|
-
|
|
355
|
-
# Prepare metadata with security
|
|
356
|
-
metadata = {
|
|
357
|
-
'file_size': len(content),
|
|
358
|
-
'line_count': len(content.split('\\n')),
|
|
359
|
-
'patterns': patterns,
|
|
360
|
-
'last_modified': safe_path.stat().st_mtime,
|
|
361
|
-
'indexed_by': 'secure-index-code',
|
|
362
|
-
'security_verified': True
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
# Store pattern with security
|
|
366
|
-
success = engine.add_pattern(
|
|
367
|
-
pattern_id=pattern_id,
|
|
368
|
-
file_path=str(safe_path),
|
|
369
|
-
file_type=file_type,
|
|
370
|
-
content=content,
|
|
371
|
-
metadata=metadata
|
|
372
|
-
)
|
|
373
|
-
|
|
374
|
-
if success:
|
|
375
|
-
indexed_count += 1
|
|
376
|
-
pattern_count += len(patterns)
|
|
377
|
-
monitor.record_file_processed(len(content))
|
|
378
|
-
|
|
379
|
-
if verbose and indexed_count % 100 == 0:
|
|
380
|
-
stats = monitor.get_stats()
|
|
381
|
-
print(f' 📊 Processed {indexed_count} files, {stats[\"bytes_processed\"]:,} bytes')
|
|
382
|
-
else:
|
|
383
|
-
error_count += 1
|
|
384
|
-
|
|
385
|
-
except SecurityError as e:
|
|
386
|
-
if verbose:
|
|
387
|
-
print(f' 🔒 Security violation: {file_path} - {e}')
|
|
388
|
-
skipped_count += 1
|
|
389
|
-
continue
|
|
390
|
-
except Exception as e:
|
|
391
|
-
if verbose:
|
|
392
|
-
print(f' ⚠️ Skipped {file_path}: {e}')
|
|
393
|
-
error_count += 1
|
|
394
|
-
continue
|
|
395
|
-
|
|
396
|
-
print(f'\\n🎉 Indexing completed with security controls!')
|
|
397
|
-
print(f' ✅ Successfully indexed: {indexed_count} files')
|
|
398
|
-
print(f' 📋 Total patterns: {pattern_count}')
|
|
399
|
-
print(f' ⚠️ Skipped files: {skipped_count}')
|
|
400
|
-
print(f' ❌ Errors: {error_count}')
|
|
401
|
-
print(f' 📍 Storage: ${STORAGE_PATH}')
|
|
402
|
-
|
|
403
|
-
# Show resource usage
|
|
404
|
-
stats = monitor.get_stats()
|
|
405
|
-
print(f'\\n📊 Resource Usage:')
|
|
406
|
-
print(f' Files processed: {stats[\"files_processed\"]:,}')
|
|
407
|
-
print(f' Bytes processed: {stats[\"bytes_processed\"]:,}')
|
|
408
|
-
print(f' Processing time: {stats[\"elapsed_seconds\"]:.2f}s')
|
|
409
|
-
print(f' Throughput: {stats[\"bytes_per_second\"]:,.0f} bytes/s')
|
|
410
|
-
|
|
411
|
-
# Show stats
|
|
412
|
-
stats = engine.get_stats()
|
|
413
|
-
print(f'\\n📈 Database Stats:')
|
|
414
|
-
print(f' Total patterns: {stats[\"total_patterns\"]:,}')
|
|
415
|
-
print(f' Average success rate: {stats[\"avg_success_rate\"]:.2f}')
|
|
416
|
-
print(f' Total usage: {stats[\"total_usage\"]:,}')
|
|
417
|
-
print(f' Unique file types: {stats[\"unique_file_types\"]}')
|
|
418
|
-
print(f' Database size: {stats[\"database_size_bytes\"] / (1024*1024):.1f} MB')
|
|
419
|
-
print(f' Total embeddings: {stats[\"total_embeddings\"]:,}')
|
|
420
|
-
|
|
421
|
-
# Security summary
|
|
422
|
-
print(f'\\n🔒 Security Summary:')
|
|
423
|
-
print(f' ✅ Path traversal protection: Active')
|
|
424
|
-
print(f' ✅ File size limits: Enforced')
|
|
425
|
-
print(f' ✅ Input sanitization: Active')
|
|
426
|
-
print(f' ✅ Permission checks: Active')
|
|
82
|
+
echo "📄 File types: $FILE_TYPES"
|
|
83
|
+
echo "🗄️ Database: ~/.local/share/ruvector/index_v2.db"
|
|
84
|
+
echo ""
|
|
427
85
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
sys.exit(1)
|
|
431
|
-
except Exception as e:
|
|
432
|
-
print(f'\\n❌ Unexpected error: {e}')
|
|
433
|
-
import traceback
|
|
434
|
-
traceback.print_exc()
|
|
435
|
-
sys.exit(1)
|
|
436
|
-
"
|
|
86
|
+
# Run the indexer
|
|
87
|
+
$BINARY index --path "$PATH_TO_INDEX" --types "$FILE_TYPES"
|
|
437
88
|
|
|
438
89
|
echo ""
|
|
439
|
-
echo "✅ Indexing complete
|
|
440
|
-
echo "
|
|
441
|
-
echo "💡 Query patterns with: query-local --pattern 'your search term'"
|
|
442
|
-
echo "📊 View stats: query-local --stats"
|
|
443
|
-
|
|
444
|
-
# Make script executable
|
|
445
|
-
chmod +x "${BASH_SOURCE[0]}"
|
|
90
|
+
echo "✅ Indexing complete!"
|
|
91
|
+
echo "💡 Query patterns with: sqlite3 ~/.local/share/ruvector/index_v2.db \"SELECT * FROM entities WHERE name LIKE '%keyword%';\""
|
|
@@ -34,7 +34,7 @@ for ext in "${file_types[@]}"; do
|
|
|
34
34
|
fi
|
|
35
35
|
|
|
36
36
|
# Index this file type
|
|
37
|
-
|
|
37
|
+
local-ruvector index --path ../.. --types ${ext} --force
|
|
38
38
|
|
|
39
39
|
if [ $? -eq 0 ]; then
|
|
40
40
|
echo "✅ Successfully indexed .${ext} files"
|
|
@@ -1,112 +1,73 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
|
-
# init-local-ruvector.sh -
|
|
2
|
+
# init-local-ruvector.sh - Verify and setup local RuVector
|
|
3
|
+
# Ensures the Rust binary is installed and PATH is configured
|
|
3
4
|
|
|
4
5
|
set -e
|
|
5
6
|
|
|
6
|
-
STORAGE_PATH="${HOME}/.local-ruvector"
|
|
7
7
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
8
|
+
BINARY_PATH="$HOME/.local/bin/local-ruvector"
|
|
9
|
+
DB_PATH="$HOME/.local/share/ruvector/index_v2.db"
|
|
8
10
|
|
|
9
|
-
echo "🚀
|
|
10
|
-
|
|
11
|
-
# Create storage directory structure
|
|
12
|
-
echo "📁 Creating storage structure at ${STORAGE_PATH}..."
|
|
13
|
-
mkdir -p "${STORAGE_PATH}/storage"
|
|
14
|
-
mkdir -p "${STORAGE_PATH}/indexes"
|
|
15
|
-
mkdir -p "${STORAGE_PATH}/config"
|
|
16
|
-
|
|
17
|
-
# Create default config
|
|
18
|
-
echo "⚙️ Creating default configuration..."
|
|
19
|
-
cat > "${STORAGE_PATH}/config/settings.json" << EOF
|
|
20
|
-
{
|
|
21
|
-
"version": "1.0",
|
|
22
|
-
"embedding_dimension": 1536,
|
|
23
|
-
"similarity_threshold": 0.7,
|
|
24
|
-
"max_patterns_per_query": 100,
|
|
25
|
-
"cache_size": 1000,
|
|
26
|
-
"auto_cleanup": {
|
|
27
|
-
"enabled": true,
|
|
28
|
-
"days_old": 30,
|
|
29
|
-
"min_usage": 5
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
EOF
|
|
11
|
+
echo "🚀 Checking Local RuVector Installation..."
|
|
12
|
+
echo ""
|
|
33
13
|
|
|
34
|
-
#
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
14
|
+
# Check if binary exists in PATH
|
|
15
|
+
if command -v local-ruvector &>/dev/null; then
|
|
16
|
+
BINARY_LOCATION=$(command -v local-ruvector)
|
|
17
|
+
VERSION=$(local-ruvector --version 2>/dev/null || echo "unknown")
|
|
18
|
+
echo "✅ local-ruvector found in PATH"
|
|
19
|
+
echo " Location: $BINARY_LOCATION"
|
|
20
|
+
echo " Version: $VERSION"
|
|
21
|
+
elif [[ -x "$BINARY_PATH" ]]; then
|
|
22
|
+
VERSION=$($BINARY_PATH --version 2>/dev/null || echo "unknown")
|
|
23
|
+
echo "✅ local-ruvector found at $BINARY_PATH"
|
|
24
|
+
echo " Version: $VERSION"
|
|
25
|
+
echo ""
|
|
26
|
+
echo "⚠️ Binary not in PATH. Add to your shell profile:"
|
|
27
|
+
echo " export PATH=\"\$HOME/.local/bin:\$PATH\""
|
|
40
28
|
else
|
|
41
|
-
echo "❌
|
|
29
|
+
echo "❌ local-ruvector binary not found"
|
|
30
|
+
echo ""
|
|
31
|
+
echo "📦 Installation options:"
|
|
32
|
+
echo ""
|
|
33
|
+
echo " Option 1 - Use install script (if available):"
|
|
34
|
+
echo " ./scripts/install-ruvector-global.sh"
|
|
35
|
+
echo ""
|
|
36
|
+
echo " Option 2 - Build from source (requires Rust):"
|
|
37
|
+
echo " cd $SCRIPT_DIR"
|
|
38
|
+
echo " cargo build --release"
|
|
39
|
+
echo " cp target/release/local-ruvector ~/.local/bin/"
|
|
40
|
+
echo ""
|
|
42
41
|
exit 1
|
|
43
42
|
fi
|
|
44
43
|
|
|
45
|
-
|
|
46
|
-
echo "📦 Installing required packages..."
|
|
47
|
-
${PYTHON} -m pip install --quiet numpy scikit-learn sqlite3 2>/dev/null || {
|
|
48
|
-
echo "⚠️ Installing required packages system-wide..."
|
|
49
|
-
${PYTHON} -m pip install numpy scikit-learn
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
# Test imports
|
|
53
|
-
echo "🧪 Testing dependencies..."
|
|
54
|
-
${PYTHON} -c "
|
|
55
|
-
import numpy as np
|
|
56
|
-
from sklearn.metrics.pairwise import cosine_similarity
|
|
57
|
-
import sqlite3
|
|
58
|
-
print('✅ All dependencies imported successfully')
|
|
59
|
-
" || {
|
|
60
|
-
echo "❌ Error: Failed to import required dependencies"
|
|
61
|
-
exit 1
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
# Create CLI links
|
|
65
|
-
echo "🔗 Creating CLI commands..."
|
|
66
|
-
chmod +x "${SCRIPT_DIR}/index-code.sh"
|
|
67
|
-
chmod +x "${SCRIPT_DIR}/query-local.sh"
|
|
68
|
-
|
|
69
|
-
# Create convenience symlinks
|
|
70
|
-
BIN_DIR="${HOME}/.local/bin"
|
|
71
|
-
mkdir -p "${BIN_DIR}"
|
|
72
|
-
|
|
73
|
-
# Remove old symlinks if they exist
|
|
74
|
-
rm -f "${BIN_DIR}/index-code" "${BIN_DIR}/query-local"
|
|
75
|
-
|
|
76
|
-
# Create new symlinks
|
|
77
|
-
ln -s "${SCRIPT_DIR}/index-code.sh" "${BIN_DIR}/index-code"
|
|
78
|
-
ln -s "${SCRIPT_DIR}/query-local.sh" "${BIN_DIR}/query-local"
|
|
44
|
+
echo ""
|
|
79
45
|
|
|
80
|
-
# Check
|
|
81
|
-
if [[
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
46
|
+
# Check database
|
|
47
|
+
if [[ -f "$DB_PATH" ]]; then
|
|
48
|
+
DB_SIZE=$(du -h "$DB_PATH" | cut -f1)
|
|
49
|
+
ENTITY_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM entities;" 2>/dev/null || echo "0")
|
|
50
|
+
PROJECT_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(DISTINCT project_root) FROM entities WHERE project_root != '';" 2>/dev/null || echo "0")
|
|
51
|
+
|
|
52
|
+
echo "✅ Database found"
|
|
53
|
+
echo " Location: $DB_PATH"
|
|
54
|
+
echo " Size: $DB_SIZE"
|
|
55
|
+
echo " Entities: $ENTITY_COUNT"
|
|
56
|
+
echo " Projects: $PROJECT_COUNT"
|
|
57
|
+
else
|
|
58
|
+
echo "⚠️ No database found at $DB_PATH"
|
|
59
|
+
echo " Index a project with: local-ruvector index --path /your/project"
|
|
86
60
|
fi
|
|
87
61
|
|
|
88
|
-
# Initialize the database
|
|
89
|
-
echo "🗄️ Initializing database..."
|
|
90
|
-
${PYTHON} -c "
|
|
91
|
-
import sys
|
|
92
|
-
sys.path.append('${SCRIPT_DIR}')
|
|
93
|
-
from search_engine_v2 import SearchEngine
|
|
94
|
-
engine = SearchEngine('${STORAGE_PATH}/storage')
|
|
95
|
-
print('✅ Database initialized')
|
|
96
|
-
"
|
|
97
|
-
|
|
98
62
|
echo ""
|
|
99
|
-
echo "
|
|
63
|
+
echo "📖 Quick Start:"
|
|
64
|
+
echo " # Index a project"
|
|
65
|
+
echo " local-ruvector index --path ~/projects/my-app --types ts,tsx,js,jsx,py"
|
|
100
66
|
echo ""
|
|
101
|
-
echo "
|
|
102
|
-
echo "
|
|
103
|
-
echo " index-code --path /path/to/project"
|
|
104
|
-
echo " query-local --pattern 'authentication middleware'"
|
|
67
|
+
echo " # Query via SQL"
|
|
68
|
+
echo " sqlite3 $DB_PATH \"SELECT file_path, name FROM entities WHERE name LIKE '%auth%' LIMIT 10;\""
|
|
105
69
|
echo ""
|
|
106
|
-
echo "
|
|
107
|
-
echo "
|
|
108
|
-
echo "
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
# Make script executable
|
|
112
|
-
chmod +x "${BASH_SOURCE[0]}"
|
|
70
|
+
echo " # Semantic search"
|
|
71
|
+
echo " local-ruvector query \"authentication middleware\""
|
|
72
|
+
echo ""
|
|
73
|
+
echo "🎉 Local RuVector is ready!"
|