jstar-reviewer 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +147 -0
- package/bin/jstar.js +170 -0
- package/package.json +64 -0
- package/scripts/config.ts +21 -0
- package/scripts/dashboard.ts +227 -0
- package/scripts/detective.ts +137 -0
- package/scripts/gemini-embedding.ts +97 -0
- package/scripts/indexer.ts +103 -0
- package/scripts/local-embedding.ts +55 -0
- package/scripts/mock-llm.ts +18 -0
- package/scripts/reviewer.ts +295 -0
- package/scripts/types.ts +61 -0
- package/setup.js +364 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import chalk from 'chalk';
|
|
4
|
+
|
|
5
|
+
interface Violation {
|
|
6
|
+
file: string;
|
|
7
|
+
line: number;
|
|
8
|
+
message: string;
|
|
9
|
+
severity: 'high' | 'medium' | 'low';
|
|
10
|
+
code: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface Rule {
|
|
14
|
+
id: string;
|
|
15
|
+
severity: 'high' | 'medium' | 'low';
|
|
16
|
+
message: string;
|
|
17
|
+
pattern: RegExp;
|
|
18
|
+
filePattern?: RegExp; // Only check files matching this pattern
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const RULES: Rule[] = [
|
|
22
|
+
{
|
|
23
|
+
id: 'SEC-001',
|
|
24
|
+
severity: 'high',
|
|
25
|
+
message: 'Possible Hardcoded Secret detected',
|
|
26
|
+
pattern: /(api_key|secret|password|token)\s*[:=]\s*['"`][a-zA-Z0-9_\-\.]{10,}['"`]/i
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
id: 'ARCH-001',
|
|
30
|
+
severity: 'medium',
|
|
31
|
+
message: 'Avoid using console.log in production code',
|
|
32
|
+
pattern: /console\.log\(/
|
|
33
|
+
},
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
// File-level rules that check the whole content
|
|
37
|
+
const FILE_RULES: Rule[] = [
|
|
38
|
+
{
|
|
39
|
+
id: 'ARCH-002',
|
|
40
|
+
severity: 'high',
|
|
41
|
+
message: 'Next.js "use client" must be at the very top of the file',
|
|
42
|
+
pattern: /^(?!['"]use client['"]).*['"]use client['"]/s,
|
|
43
|
+
filePattern: /\.tsx?$/
|
|
44
|
+
}
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
export class Detective {
|
|
48
|
+
violations: Violation[] = [];
|
|
49
|
+
|
|
50
|
+
constructor(private directory: string) { }
|
|
51
|
+
|
|
52
|
+
async scan(): Promise<Violation[]> {
|
|
53
|
+
this.walk(this.directory);
|
|
54
|
+
return this.violations;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
private walk(dir: string) {
|
|
58
|
+
if (!fs.existsSync(dir)) return;
|
|
59
|
+
const files = fs.readdirSync(dir);
|
|
60
|
+
for (const file of files) {
|
|
61
|
+
const filePath = path.join(dir, file);
|
|
62
|
+
const stat = fs.statSync(filePath);
|
|
63
|
+
|
|
64
|
+
if (stat.isDirectory()) {
|
|
65
|
+
if (file !== 'node_modules' && file !== '.git' && file !== '.jstar') {
|
|
66
|
+
this.walk(filePath);
|
|
67
|
+
}
|
|
68
|
+
} else {
|
|
69
|
+
this.checkFile(filePath);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private checkFile(filePath: string) {
|
|
75
|
+
if (!filePath.match(/\.(ts|tsx|js|jsx)$/)) return;
|
|
76
|
+
|
|
77
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
78
|
+
const lines = content.split('\n');
|
|
79
|
+
|
|
80
|
+
// Line-based rules
|
|
81
|
+
for (const rule of RULES) {
|
|
82
|
+
if (rule.filePattern && !filePath.match(rule.filePattern)) continue;
|
|
83
|
+
|
|
84
|
+
lines.forEach((line, index) => {
|
|
85
|
+
if (rule.pattern.test(line)) {
|
|
86
|
+
this.addViolation(filePath, index + 1, rule);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// File-based rules
|
|
92
|
+
for (const rule of FILE_RULES) {
|
|
93
|
+
if (rule.filePattern && !filePath.match(rule.filePattern)) continue;
|
|
94
|
+
if (rule.pattern.test(content)) {
|
|
95
|
+
this.addViolation(filePath, 1, rule);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
private addViolation(filePath: string, line: number, rule: Rule) {
|
|
101
|
+
this.violations.push({
|
|
102
|
+
file: path.relative(process.cwd(), filePath),
|
|
103
|
+
line,
|
|
104
|
+
message: rule.message,
|
|
105
|
+
severity: rule.severity,
|
|
106
|
+
code: rule.id
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
report() {
|
|
111
|
+
if (this.violations.length === 0) {
|
|
112
|
+
console.log(chalk.green("✅ Detective Engine: No violations found."));
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
console.log(chalk.red(`🚨 Detective Engine found ${this.violations.length} violations:`));
|
|
117
|
+
// Only show first 10 to avoid wall of text
|
|
118
|
+
const total = this.violations.length;
|
|
119
|
+
const toShow = this.violations.slice(0, 10);
|
|
120
|
+
|
|
121
|
+
toShow.forEach(v => {
|
|
122
|
+
const color = v.severity === 'high' ? chalk.red : chalk.yellow;
|
|
123
|
+
console.log(color(`[${v.code}] ${v.file}:${v.line} - ${v.message}`));
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
if (total > 10) {
|
|
127
|
+
console.log(chalk.dim(`... and ${total - 10} more.`));
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// CLI Integration
|
|
133
|
+
if (require.main === module) {
|
|
134
|
+
const detective = new Detective(path.join(process.cwd(), 'src'));
|
|
135
|
+
detective.scan();
|
|
136
|
+
detective.report();
|
|
137
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { GoogleGenerativeAI } from "@google/generative-ai";
|
|
2
|
+
|
|
3
|
+
export class GeminiEmbedding {
|
|
4
|
+
private genAI: GoogleGenerativeAI;
|
|
5
|
+
private model: any;
|
|
6
|
+
|
|
7
|
+
constructor() {
|
|
8
|
+
const apiKey = process.env.GOOGLE_API_KEY;
|
|
9
|
+
if (!apiKey) {
|
|
10
|
+
throw new Error("GOOGLE_API_KEY is missing from environment variables.");
|
|
11
|
+
}
|
|
12
|
+
this.genAI = new GoogleGenerativeAI(apiKey);
|
|
13
|
+
// User requested 'text-embedding-004', which has better rate limits
|
|
14
|
+
this.model = this.genAI.getGenerativeModel({ model: "text-embedding-004" });
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async getTextEmbedding(text: string): Promise<number[]> {
|
|
18
|
+
// Retry logic for transient network errors
|
|
19
|
+
let retries = 0;
|
|
20
|
+
const maxRetries = 3;
|
|
21
|
+
while (retries < maxRetries) {
|
|
22
|
+
try {
|
|
23
|
+
const result = await this.model.embedContent(text);
|
|
24
|
+
return result.embedding.values;
|
|
25
|
+
} catch (e: any) {
|
|
26
|
+
if (e.message.includes("fetch failed") || e.message.includes("network")) {
|
|
27
|
+
retries++;
|
|
28
|
+
const waitTime = Math.pow(2, retries) * 1000;
|
|
29
|
+
console.warn(`⚠️ Network error. Retrying in ${waitTime / 1000}s... (${retries}/${maxRetries})`);
|
|
30
|
+
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
31
|
+
} else {
|
|
32
|
+
throw e;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
throw new Error("Max retries exceeded for embedding request.");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async getQueryEmbedding(query: string): Promise<number[]> {
|
|
40
|
+
return this.getTextEmbedding(query);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async getTextEmbeddings(texts: string[]): Promise<number[][]> {
|
|
44
|
+
const embeddings: number[][] = [];
|
|
45
|
+
console.log(`Creating embeddings for ${texts.length} chunks (Batching to avoid rate limits)...`);
|
|
46
|
+
|
|
47
|
+
// Process in smaller batches with delay
|
|
48
|
+
const BATCH_SIZE = 1; // Strict serial for safety on free tier
|
|
49
|
+
const DELAY_MS = 1000; // 1s delay between calls
|
|
50
|
+
|
|
51
|
+
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
52
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
53
|
+
for (const text of batch) {
|
|
54
|
+
let retries = 0;
|
|
55
|
+
let success = false;
|
|
56
|
+
while (!success && retries < 5) {
|
|
57
|
+
try {
|
|
58
|
+
const embedding = await this.getTextEmbedding(text);
|
|
59
|
+
embeddings.push(embedding);
|
|
60
|
+
success = true;
|
|
61
|
+
// Standard delay between calls
|
|
62
|
+
await new Promise(resolve => setTimeout(resolve, DELAY_MS));
|
|
63
|
+
process.stdout.write("."); // Progress indicator
|
|
64
|
+
} catch (e: any) {
|
|
65
|
+
if (e.message.includes("429") || e.message.includes("quota")) {
|
|
66
|
+
retries++;
|
|
67
|
+
const waitTime = Math.pow(2, retries) * 2000; // 2s, 4s, 8s, 16s...
|
|
68
|
+
console.warn(`\n⚠️ Rate limit hit. Retrying in ${waitTime / 1000}s...`);
|
|
69
|
+
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
70
|
+
} else {
|
|
71
|
+
console.error("\n❌ Embedding failed irreversibly:", e.message);
|
|
72
|
+
throw e;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (!success) {
|
|
77
|
+
throw new Error("Max retries exceeded for rate limits.");
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
console.log("\n✅ Done embedding.");
|
|
82
|
+
return embeddings;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Stubs for BaseEmbedding compliance
|
|
86
|
+
embedBatchSize = 10;
|
|
87
|
+
similarity(embedding1: number[], embedding2: number[]): number {
|
|
88
|
+
return embedding1.reduce((sum, val, i) => sum + val * embedding2[i], 0);
|
|
89
|
+
}
|
|
90
|
+
async transform(nodes: any[], _options?: any): Promise<any[]> {
|
|
91
|
+
for (const node of nodes) {
|
|
92
|
+
node.embedding = await this.getTextEmbedding(node.getContent("text"));
|
|
93
|
+
}
|
|
94
|
+
return nodes;
|
|
95
|
+
}
|
|
96
|
+
async getTextEmbeddingsBatch(texts: string[]): Promise<number[][]> { return this.getTextEmbeddings(texts); }
|
|
97
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import {
|
|
2
|
+
VectorStoreIndex,
|
|
3
|
+
storageContextFromDefaults,
|
|
4
|
+
SimpleDirectoryReader,
|
|
5
|
+
serviceContextFromDefaults
|
|
6
|
+
} from "llamaindex";
|
|
7
|
+
import { GeminiEmbedding } from "./gemini-embedding";
|
|
8
|
+
import { MockLLM } from "./mock-llm";
|
|
9
|
+
import * as path from "path";
|
|
10
|
+
import * as fs from "fs";
|
|
11
|
+
import chalk from "chalk";
|
|
12
|
+
import dotenv from "dotenv";
|
|
13
|
+
|
|
14
|
+
// Load .env.local first, then .env
|
|
15
|
+
dotenv.config({ path: ".env.local" });
|
|
16
|
+
dotenv.config();
|
|
17
|
+
|
|
18
|
+
// Configuration
|
|
19
|
+
const STORAGE_DIR = path.join(process.cwd(), ".jstar", "storage");
|
|
20
|
+
const SOURCE_DIR = path.join(process.cwd(), "scripts"); // Changed from src/ to scripts/
|
|
21
|
+
|
|
22
|
+
// Ensure OpenAI Key exists (LlamaIndex default) or fallback if we configured something else
|
|
23
|
+
// (We are using local now, so this check is less critical, but good to have if we revert)
|
|
24
|
+
// if (!process.env.OPENAI_API_KEY) {
|
|
25
|
+
// console.warn(chalk.yellow("⚠️ OPENAI_API_KEY not found. Embeddings may fail unless you have configured a local model."));
|
|
26
|
+
// }
|
|
27
|
+
|
|
28
|
+
async function main() {
|
|
29
|
+
const args = process.argv.slice(2);
|
|
30
|
+
const isWatch = args.includes("--watch");
|
|
31
|
+
|
|
32
|
+
console.log(chalk.blue("🧠 J-Star Indexer: Scanning codebase..."));
|
|
33
|
+
|
|
34
|
+
// 1. Load documents (Your Code)
|
|
35
|
+
if (!fs.existsSync(SOURCE_DIR)) {
|
|
36
|
+
console.error(chalk.red(`❌ Source directory not found: ${SOURCE_DIR}`));
|
|
37
|
+
process.exit(1);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const reader = new SimpleDirectoryReader();
|
|
41
|
+
const documents = await reader.loadData({ directoryPath: SOURCE_DIR });
|
|
42
|
+
|
|
43
|
+
console.log(chalk.yellow(`📄 Found ${documents.length} files to index.`));
|
|
44
|
+
|
|
45
|
+
const isInit = args.includes("--init");
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
// 2. Setup Service Context with Google Gemini Embeddings
|
|
49
|
+
// using 'models/text-embedding-004' which is a strong, recent model
|
|
50
|
+
const embedModel = new GeminiEmbedding();
|
|
51
|
+
const llm = new MockLLM();
|
|
52
|
+
const serviceContext = serviceContextFromDefaults({
|
|
53
|
+
embedModel,
|
|
54
|
+
llm: llm as any
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
// 3. Create the Storage Context
|
|
58
|
+
let storageContext;
|
|
59
|
+
if (isInit) {
|
|
60
|
+
console.log(chalk.blue("✨ Initializing fresh Local Brain..."));
|
|
61
|
+
storageContext = await storageContextFromDefaults({});
|
|
62
|
+
} else {
|
|
63
|
+
// Try to load
|
|
64
|
+
if (!fs.existsSync(STORAGE_DIR)) {
|
|
65
|
+
console.log(chalk.yellow("⚠️ Storage not found. Running fresh init..."));
|
|
66
|
+
storageContext = await storageContextFromDefaults({});
|
|
67
|
+
} else {
|
|
68
|
+
storageContext = await storageContextFromDefaults({
|
|
69
|
+
persistDir: STORAGE_DIR,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// 4. Generate the Index
|
|
75
|
+
const index = await VectorStoreIndex.fromDocuments(documents, {
|
|
76
|
+
storageContext,
|
|
77
|
+
serviceContext,
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
// 4. Persist (Save the Brain)
|
|
81
|
+
// Manual persistence for LlamaIndex TS compatibility
|
|
82
|
+
const ctxToPersist: any = index.storageContext;
|
|
83
|
+
if (ctxToPersist.docStore) await ctxToPersist.docStore.persist(path.join(STORAGE_DIR, "doc_store.json"));
|
|
84
|
+
if (ctxToPersist.vectorStore) await ctxToPersist.vectorStore.persist(path.join(STORAGE_DIR, "vector_store.json"));
|
|
85
|
+
if (ctxToPersist.indexStore) await ctxToPersist.indexStore.persist(path.join(STORAGE_DIR, "index_store.json"));
|
|
86
|
+
if (ctxToPersist.propStore) await ctxToPersist.propStore.persist(path.join(STORAGE_DIR, "property_store.json"));
|
|
87
|
+
|
|
88
|
+
console.log(chalk.green("✅ Indexing Complete. Brain is updated."));
|
|
89
|
+
|
|
90
|
+
if (isWatch) {
|
|
91
|
+
console.log(chalk.blue("👀 Watch mode enabled."));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
} catch (e: any) {
|
|
95
|
+
console.error(chalk.red("❌ Indexing Failed:"), e.message);
|
|
96
|
+
if (e.message.includes("OpenAI")) {
|
|
97
|
+
console.log(chalk.yellow("👉 Tip: Make sure you have OPENAI_API_KEY in your .env file (or configure a local embedding model)."));
|
|
98
|
+
}
|
|
99
|
+
process.exit(1);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
main().catch(console.error);
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { pipeline, env } from "@xenova/transformers";
|
|
2
|
+
|
|
3
|
+
// Skip local model checks if needed, or let it download
|
|
4
|
+
env.allowLocalModels = false;
|
|
5
|
+
env.useBrowserCache = false;
|
|
6
|
+
|
|
7
|
+
export class LocalEmbedding {
|
|
8
|
+
private pipe: any;
|
|
9
|
+
private modelName: string;
|
|
10
|
+
|
|
11
|
+
constructor() {
|
|
12
|
+
this.modelName = "Xenova/bge-small-en-v1.5";
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async init() {
|
|
16
|
+
if (!this.pipe) {
|
|
17
|
+
console.log("📥 Loading local embedding model (Xenova/bge-small-en-v1.5)...");
|
|
18
|
+
this.pipe = await pipeline("feature-extraction", this.modelName);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async getTextEmbedding(text: string): Promise<number[]> {
|
|
23
|
+
await this.init();
|
|
24
|
+
const result = await this.pipe(text, { pooling: "mean", normalize: true });
|
|
25
|
+
return Array.from(result.data);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async getQueryEmbedding(query: string): Promise<number[]> {
|
|
29
|
+
return this.getTextEmbedding(query);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Batch method (Required by LlamaIndex)
|
|
33
|
+
async getTextEmbeddings(texts: string[]): Promise<number[][]> {
|
|
34
|
+
await this.init();
|
|
35
|
+
const embeddings: number[][] = [];
|
|
36
|
+
for (const text of texts) {
|
|
37
|
+
embeddings.push(await this.getTextEmbedding(text));
|
|
38
|
+
}
|
|
39
|
+
return embeddings;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Stubs for BaseEmbedding interface compliance
|
|
43
|
+
embedBatchSize = 10;
|
|
44
|
+
similarity(embedding1: number[], embedding2: number[]): number {
|
|
45
|
+
// Simple dot product for normalized vectors
|
|
46
|
+
return embedding1.reduce((sum, val, i) => sum + val * embedding2[i], 0);
|
|
47
|
+
}
|
|
48
|
+
async transform(nodes: any[], _options?: any): Promise<any[]> {
|
|
49
|
+
for (const node of nodes) {
|
|
50
|
+
node.embedding = await this.getTextEmbedding(node.getContent("text"));
|
|
51
|
+
}
|
|
52
|
+
return nodes;
|
|
53
|
+
}
|
|
54
|
+
async getTextEmbeddingsBatch(texts: string[]): Promise<number[][]> { return this.getTextEmbeddings(texts); }
|
|
55
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export class MockLLM {
|
|
2
|
+
hasStreaming = false;
|
|
3
|
+
metadata = {
|
|
4
|
+
model: "mock",
|
|
5
|
+
temperature: 0,
|
|
6
|
+
topP: 1,
|
|
7
|
+
contextWindow: 1024,
|
|
8
|
+
tokenizer: undefined,
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
async chat(messages: any[], parentEvent?: any): Promise<any> {
|
|
12
|
+
return { message: { content: "Mock response" } };
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async complete(prompt: string, parentEvent?: any): Promise<any> {
|
|
16
|
+
return { text: "Mock response" };
|
|
17
|
+
}
|
|
18
|
+
}
|