ruvector 0.1.46 → 0.1.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/cli.js +279 -1
  2. package/package.json +1 -1
package/bin/cli.js CHANGED
@@ -2353,7 +2353,8 @@ hooksCmd.command('init')
2353
2353
  'Bash(rm -rf /)',
2354
2354
  'Bash(sudo rm:*)',
2355
2355
  'Bash(chmod 777:*)',
2356
- 'Bash(:(){ :|:& };:)'
2356
+ 'Bash(mkfs:*)',
2357
+ 'Bash(dd if=/dev/zero:*)'
2357
2358
  ];
2358
2359
  console.log(chalk.blue(' āœ“ Permissions configured (project-specific)'));
2359
2360
  }
@@ -2996,4 +2997,281 @@ hooksCmd.command('import')
2996
2997
  }
2997
2998
  });
2998
2999
 
3000
+ // Pretrain - analyze repo and bootstrap learning with agent swarm
3001
+ hooksCmd.command('pretrain')
3002
+ .description('Pretrain intelligence by analyzing the repository with agent swarm')
3003
+ .option('--depth <n>', 'Git history depth to analyze', '100')
3004
+ .option('--workers <n>', 'Number of parallel analysis workers', '4')
3005
+ .option('--skip-git', 'Skip git history analysis')
3006
+ .option('--skip-files', 'Skip file structure analysis')
3007
+ .option('--verbose', 'Show detailed progress')
3008
+ .action(async (opts) => {
3009
+ const { execSync, spawn } = require('child_process');
3010
+ console.log(chalk.bold.cyan('\n🧠 RuVector Pretrain - Repository Intelligence Bootstrap\n'));
3011
+
3012
+ const intel = new Intelligence();
3013
+ const startTime = Date.now();
3014
+ const stats = { files: 0, patterns: 0, memories: 0, coedits: 0 };
3015
+
3016
+ // Agent types for different file patterns
3017
+ const agentMapping = {
3018
+ // Rust
3019
+ '.rs': 'rust-developer',
3020
+ 'Cargo.toml': 'rust-developer',
3021
+ 'Cargo.lock': 'rust-developer',
3022
+ // JavaScript/TypeScript
3023
+ '.js': 'javascript-developer',
3024
+ '.jsx': 'react-developer',
3025
+ '.ts': 'typescript-developer',
3026
+ '.tsx': 'react-developer',
3027
+ '.mjs': 'javascript-developer',
3028
+ '.cjs': 'javascript-developer',
3029
+ 'package.json': 'node-developer',
3030
+ // Python
3031
+ '.py': 'python-developer',
3032
+ 'requirements.txt': 'python-developer',
3033
+ 'pyproject.toml': 'python-developer',
3034
+ 'setup.py': 'python-developer',
3035
+ // Go
3036
+ '.go': 'go-developer',
3037
+ 'go.mod': 'go-developer',
3038
+ // Web
3039
+ '.html': 'frontend-developer',
3040
+ '.css': 'frontend-developer',
3041
+ '.scss': 'frontend-developer',
3042
+ '.vue': 'vue-developer',
3043
+ '.svelte': 'svelte-developer',
3044
+ // Config
3045
+ '.json': 'config-specialist',
3046
+ '.yaml': 'config-specialist',
3047
+ '.yml': 'config-specialist',
3048
+ '.toml': 'config-specialist',
3049
+ // Docs
3050
+ '.md': 'documentation-specialist',
3051
+ '.mdx': 'documentation-specialist',
3052
+ // Tests
3053
+ '.test.js': 'test-engineer',
3054
+ '.test.ts': 'test-engineer',
3055
+ '.spec.js': 'test-engineer',
3056
+ '.spec.ts': 'test-engineer',
3057
+ '_test.go': 'test-engineer',
3058
+ '_test.rs': 'test-engineer',
3059
+ // DevOps
3060
+ 'Dockerfile': 'devops-engineer',
3061
+ 'docker-compose.yml': 'devops-engineer',
3062
+ '.github/workflows': 'cicd-engineer',
3063
+ 'Makefile': 'devops-engineer',
3064
+ // SQL
3065
+ '.sql': 'database-specialist',
3066
+ };
3067
+
3068
+ // Phase 1: Analyze file structure
3069
+ if (!opts.skipFiles) {
3070
+ console.log(chalk.yellow('šŸ“ Phase 1: Analyzing file structure...\n'));
3071
+
3072
+ try {
3073
+ // Get all files in repo
3074
+ const files = execSync('git ls-files 2>/dev/null || find . -type f -not -path "./.git/*" -not -path "./node_modules/*" -not -path "./target/*"',
3075
+ { encoding: 'utf-8', maxBuffer: 50 * 1024 * 1024 }).trim().split('\n').filter(f => f);
3076
+
3077
+ const filesByType = {};
3078
+ const dirPatterns = {};
3079
+
3080
+ files.forEach(file => {
3081
+ stats.files++;
3082
+ const ext = path.extname(file);
3083
+ const basename = path.basename(file);
3084
+ const dir = path.dirname(file);
3085
+
3086
+ // Determine agent for this file
3087
+ let agent = 'coder'; // default
3088
+ if (agentMapping[basename]) {
3089
+ agent = agentMapping[basename];
3090
+ } else if (agentMapping[ext]) {
3091
+ agent = agentMapping[ext];
3092
+ } else if (file.includes('.test.') || file.includes('.spec.') || file.includes('_test.')) {
3093
+ agent = 'test-engineer';
3094
+ } else if (file.includes('.github/workflows')) {
3095
+ agent = 'cicd-engineer';
3096
+ }
3097
+
3098
+ // Track file types
3099
+ filesByType[ext] = (filesByType[ext] || 0) + 1;
3100
+
3101
+ // Track directory patterns
3102
+ const parts = dir.split('/');
3103
+ if (parts[0]) {
3104
+ dirPatterns[parts[0]] = dirPatterns[parts[0]] || { count: 0, agents: {} };
3105
+ dirPatterns[parts[0]].count++;
3106
+ dirPatterns[parts[0]].agents[agent] = (dirPatterns[parts[0]].agents[agent] || 0) + 1;
3107
+ }
3108
+
3109
+ // Create Q-learning pattern for this file type
3110
+ const state = `edit:${ext || 'unknown'}`;
3111
+ if (!intel.data.patterns[state]) {
3112
+ intel.data.patterns[state] = {};
3113
+ }
3114
+ intel.data.patterns[state][agent] = (intel.data.patterns[state][agent] || 0) + 0.5;
3115
+ stats.patterns++;
3116
+ });
3117
+
3118
+ // Log summary
3119
+ if (opts.verbose) {
3120
+ console.log(chalk.dim(' File types found:'));
3121
+ Object.entries(filesByType).sort((a, b) => b[1] - a[1]).slice(0, 10).forEach(([ext, count]) => {
3122
+ console.log(chalk.dim(` ${ext || '(no ext)'}: ${count} files`));
3123
+ });
3124
+ }
3125
+ console.log(chalk.green(` āœ“ Analyzed ${stats.files} files`));
3126
+ console.log(chalk.green(` āœ“ Created ${Object.keys(intel.data.patterns).length} routing patterns`));
3127
+
3128
+ } catch (e) {
3129
+ console.log(chalk.yellow(` ⚠ File analysis skipped: ${e.message}`));
3130
+ }
3131
+ }
3132
+
3133
+ // Phase 2: Analyze git history for co-edit patterns
3134
+ if (!opts.skipGit) {
3135
+ console.log(chalk.yellow('\nšŸ“œ Phase 2: Analyzing git history for co-edit patterns...\n'));
3136
+
3137
+ try {
3138
+ // Get commits with files changed
3139
+ const gitLog = execSync(
3140
+ `git log --name-only --pretty=format:"COMMIT:%H" -n ${opts.depth} 2>/dev/null`,
3141
+ { encoding: 'utf-8', maxBuffer: 50 * 1024 * 1024 }
3142
+ );
3143
+
3144
+ const commits = gitLog.split('COMMIT:').filter(c => c.trim());
3145
+ const coEditMap = {};
3146
+
3147
+ commits.forEach(commit => {
3148
+ const lines = commit.trim().split('\n').filter(l => l && !l.startsWith('COMMIT:'));
3149
+ const files = lines.slice(1).filter(f => f.trim()); // Skip the hash
3150
+
3151
+ // Track which files are edited together
3152
+ files.forEach(file1 => {
3153
+ files.forEach(file2 => {
3154
+ if (file1 !== file2) {
3155
+ const key = [file1, file2].sort().join('|');
3156
+ coEditMap[key] = (coEditMap[key] || 0) + 1;
3157
+ }
3158
+ });
3159
+ });
3160
+ });
3161
+
3162
+ // Find strong co-edit patterns (files edited together 3+ times)
3163
+ const strongPatterns = Object.entries(coEditMap)
3164
+ .filter(([, count]) => count >= 3)
3165
+ .sort((a, b) => b[1] - a[1]);
3166
+
3167
+ // Store as sequence patterns
3168
+ strongPatterns.slice(0, 100).forEach(([key, count]) => {
3169
+ const [file1, file2] = key.split('|');
3170
+ if (!intel.data.sequences) intel.data.sequences = {};
3171
+ if (!intel.data.sequences[file1]) intel.data.sequences[file1] = [];
3172
+
3173
+ const existing = intel.data.sequences[file1].find(s => s.file === file2);
3174
+ if (existing) {
3175
+ existing.score += count;
3176
+ } else {
3177
+ intel.data.sequences[file1].push({ file: file2, score: count });
3178
+ }
3179
+ stats.coedits++;
3180
+ });
3181
+
3182
+ console.log(chalk.green(` āœ“ Analyzed ${commits.length} commits`));
3183
+ console.log(chalk.green(` āœ“ Found ${strongPatterns.length} co-edit patterns`));
3184
+
3185
+ if (opts.verbose && strongPatterns.length > 0) {
3186
+ console.log(chalk.dim(' Top co-edit patterns:'));
3187
+ strongPatterns.slice(0, 5).forEach(([key, count]) => {
3188
+ const [f1, f2] = key.split('|');
3189
+ console.log(chalk.dim(` ${path.basename(f1)} ↔ ${path.basename(f2)}: ${count} times`));
3190
+ });
3191
+ }
3192
+
3193
+ } catch (e) {
3194
+ console.log(chalk.yellow(` ⚠ Git analysis skipped: ${e.message}`));
3195
+ }
3196
+ }
3197
+
3198
+ // Phase 3: Create vector memories from important files
3199
+ console.log(chalk.yellow('\nšŸ’¾ Phase 3: Creating vector memories from key files...\n'));
3200
+
3201
+ try {
3202
+ const importantFiles = [
3203
+ 'README.md', 'CLAUDE.md', 'package.json', 'Cargo.toml',
3204
+ 'pyproject.toml', 'go.mod', '.claude/settings.json'
3205
+ ];
3206
+
3207
+ for (const filename of importantFiles) {
3208
+ const filePath = path.join(process.cwd(), filename);
3209
+ if (fs.existsSync(filePath)) {
3210
+ try {
3211
+ const content = fs.readFileSync(filePath, 'utf-8').slice(0, 2000); // First 2KB
3212
+ intel.data.memories = intel.data.memories || [];
3213
+ intel.data.memories.push({
3214
+ content: `[${filename}] ${content.replace(/\n/g, ' ').slice(0, 500)}`,
3215
+ type: 'project',
3216
+ created: new Date().toISOString(),
3217
+ embedding: intel.simpleEmbed ? intel.simpleEmbed(content) : null
3218
+ });
3219
+ stats.memories++;
3220
+ if (opts.verbose) console.log(chalk.dim(` āœ“ ${filename}`));
3221
+ } catch (e) { /* skip unreadable files */ }
3222
+ }
3223
+ }
3224
+
3225
+ console.log(chalk.green(` āœ“ Created ${stats.memories} memory entries`));
3226
+
3227
+ } catch (e) {
3228
+ console.log(chalk.yellow(` ⚠ Memory creation skipped: ${e.message}`));
3229
+ }
3230
+
3231
+ // Phase 4: Analyze directory structure for agent recommendations
3232
+ console.log(chalk.yellow('\nšŸ—‚ļø Phase 4: Building directory-agent mappings...\n'));
3233
+
3234
+ try {
3235
+ const dirs = execSync('find . -type d -maxdepth 2 -not -path "./.git*" -not -path "./node_modules*" -not -path "./target*" 2>/dev/null || echo "."',
3236
+ { encoding: 'utf-8' }).trim().split('\n');
3237
+
3238
+ const dirAgentMap = {};
3239
+ dirs.forEach(dir => {
3240
+ const name = path.basename(dir);
3241
+ // Infer agent from directory name
3242
+ if (['src', 'lib', 'core'].includes(name)) dirAgentMap[dir] = 'coder';
3243
+ else if (['test', 'tests', '__tests__', 'spec'].includes(name)) dirAgentMap[dir] = 'test-engineer';
3244
+ else if (['docs', 'documentation'].includes(name)) dirAgentMap[dir] = 'documentation-specialist';
3245
+ else if (['scripts', 'bin'].includes(name)) dirAgentMap[dir] = 'devops-engineer';
3246
+ else if (['components', 'views', 'pages'].includes(name)) dirAgentMap[dir] = 'frontend-developer';
3247
+ else if (['api', 'routes', 'handlers'].includes(name)) dirAgentMap[dir] = 'backend-developer';
3248
+ else if (['models', 'entities', 'schemas'].includes(name)) dirAgentMap[dir] = 'database-specialist';
3249
+ else if (['.github', '.gitlab', 'ci'].includes(name)) dirAgentMap[dir] = 'cicd-engineer';
3250
+ });
3251
+
3252
+ // Store directory patterns
3253
+ intel.data.dirPatterns = dirAgentMap;
3254
+ console.log(chalk.green(` āœ“ Mapped ${Object.keys(dirAgentMap).length} directories to agents`));
3255
+
3256
+ } catch (e) {
3257
+ console.log(chalk.yellow(` ⚠ Directory analysis skipped: ${e.message}`));
3258
+ }
3259
+
3260
+ // Save all learning data
3261
+ intel.data.pretrained = {
3262
+ date: new Date().toISOString(),
3263
+ stats: stats
3264
+ };
3265
+ intel.save();
3266
+
3267
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
3268
+ console.log(chalk.bold.green(`\nāœ… Pretrain complete in ${elapsed}s!\n`));
3269
+ console.log(chalk.cyan('Summary:'));
3270
+ console.log(` šŸ“ ${stats.files} files analyzed`);
3271
+ console.log(` 🧠 ${stats.patterns} agent routing patterns`);
3272
+ console.log(` šŸ”— ${stats.coedits} co-edit patterns`);
3273
+ console.log(` šŸ’¾ ${stats.memories} memory entries`);
3274
+ console.log(chalk.dim('\nThe intelligence layer will now provide better recommendations.'));
3275
+ });
3276
+
2999
3277
  program.parse();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ruvector",
3
- "version": "0.1.46",
3
+ "version": "0.1.47",
4
4
  "description": "High-performance vector database for Node.js with automatic native/WASM fallback",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",