jstar-reviewer 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import chalk from 'chalk';
4
+
5
+ interface Violation {
6
+ file: string;
7
+ line: number;
8
+ message: string;
9
+ severity: 'high' | 'medium' | 'low';
10
+ code: string;
11
+ }
12
+
13
+ interface Rule {
14
+ id: string;
15
+ severity: 'high' | 'medium' | 'low';
16
+ message: string;
17
+ pattern: RegExp;
18
+ filePattern?: RegExp; // Only check files matching this pattern
19
+ }
20
+
21
+ const RULES: Rule[] = [
22
+ {
23
+ id: 'SEC-001',
24
+ severity: 'high',
25
+ message: 'Possible Hardcoded Secret detected',
26
+ pattern: /(api_key|secret|password|token)\s*[:=]\s*['"`][a-zA-Z0-9_\-\.]{10,}['"`]/i
27
+ },
28
+ {
29
+ id: 'ARCH-001',
30
+ severity: 'medium',
31
+ message: 'Avoid using console.log in production code',
32
+ pattern: /console\.log\(/
33
+ },
34
+ ];
35
+
36
+ // File-level rules that check the whole content
37
+ const FILE_RULES: Rule[] = [
38
+ {
39
+ id: 'ARCH-002',
40
+ severity: 'high',
41
+ message: 'Next.js "use client" must be at the very top of the file',
42
+ pattern: /^(?!['"]use client['"]).*['"]use client['"]/s,
43
+ filePattern: /\.tsx?$/
44
+ }
45
+ ];
46
+
47
+ export class Detective {
48
+ violations: Violation[] = [];
49
+
50
+ constructor(private directory: string) { }
51
+
52
+ async scan(): Promise<Violation[]> {
53
+ this.walk(this.directory);
54
+ return this.violations;
55
+ }
56
+
57
+ private walk(dir: string) {
58
+ if (!fs.existsSync(dir)) return;
59
+ const files = fs.readdirSync(dir);
60
+ for (const file of files) {
61
+ const filePath = path.join(dir, file);
62
+ const stat = fs.statSync(filePath);
63
+
64
+ if (stat.isDirectory()) {
65
+ if (file !== 'node_modules' && file !== '.git' && file !== '.jstar') {
66
+ this.walk(filePath);
67
+ }
68
+ } else {
69
+ this.checkFile(filePath);
70
+ }
71
+ }
72
+ }
73
+
74
+ private checkFile(filePath: string) {
75
+ if (!filePath.match(/\.(ts|tsx|js|jsx)$/)) return;
76
+
77
+ const content = fs.readFileSync(filePath, 'utf-8');
78
+ const lines = content.split('\n');
79
+
80
+ // Line-based rules
81
+ for (const rule of RULES) {
82
+ if (rule.filePattern && !filePath.match(rule.filePattern)) continue;
83
+
84
+ lines.forEach((line, index) => {
85
+ if (rule.pattern.test(line)) {
86
+ this.addViolation(filePath, index + 1, rule);
87
+ }
88
+ });
89
+ }
90
+
91
+ // File-based rules
92
+ for (const rule of FILE_RULES) {
93
+ if (rule.filePattern && !filePath.match(rule.filePattern)) continue;
94
+ if (rule.pattern.test(content)) {
95
+ this.addViolation(filePath, 1, rule);
96
+ }
97
+ }
98
+ }
99
+
100
+ private addViolation(filePath: string, line: number, rule: Rule) {
101
+ this.violations.push({
102
+ file: path.relative(process.cwd(), filePath),
103
+ line,
104
+ message: rule.message,
105
+ severity: rule.severity,
106
+ code: rule.id
107
+ });
108
+ }
109
+
110
+ report() {
111
+ if (this.violations.length === 0) {
112
+ console.log(chalk.green("✅ Detective Engine: No violations found."));
113
+ return;
114
+ }
115
+
116
+ console.log(chalk.red(`🚨 Detective Engine found ${this.violations.length} violations:`));
117
+ // Only show first 10 to avoid wall of text
118
+ const total = this.violations.length;
119
+ const toShow = this.violations.slice(0, 10);
120
+
121
+ toShow.forEach(v => {
122
+ const color = v.severity === 'high' ? chalk.red : chalk.yellow;
123
+ console.log(color(`[${v.code}] ${v.file}:${v.line} - ${v.message}`));
124
+ });
125
+
126
+ if (total > 10) {
127
+ console.log(chalk.dim(`... and ${total - 10} more.`));
128
+ }
129
+ }
130
+ }
131
+
132
+ // CLI Integration
133
+ if (require.main === module) {
134
+ const detective = new Detective(path.join(process.cwd(), 'src'));
135
+ detective.scan();
136
+ detective.report();
137
+ }
@@ -0,0 +1,97 @@
1
+ import { GoogleGenerativeAI } from "@google/generative-ai";
2
+
3
+ export class GeminiEmbedding {
4
+ private genAI: GoogleGenerativeAI;
5
+ private model: any;
6
+
7
+ constructor() {
8
+ const apiKey = process.env.GOOGLE_API_KEY;
9
+ if (!apiKey) {
10
+ throw new Error("GOOGLE_API_KEY is missing from environment variables.");
11
+ }
12
+ this.genAI = new GoogleGenerativeAI(apiKey);
13
+ // User requested 'text-embedding-004', which has better rate limits
14
+ this.model = this.genAI.getGenerativeModel({ model: "text-embedding-004" });
15
+ }
16
+
17
+ async getTextEmbedding(text: string): Promise<number[]> {
18
+ // Retry logic for transient network errors
19
+ let retries = 0;
20
+ const maxRetries = 3;
21
+ while (retries < maxRetries) {
22
+ try {
23
+ const result = await this.model.embedContent(text);
24
+ return result.embedding.values;
25
+ } catch (e: any) {
26
+ if (e.message.includes("fetch failed") || e.message.includes("network")) {
27
+ retries++;
28
+ const waitTime = Math.pow(2, retries) * 1000;
29
+ console.warn(`⚠️ Network error. Retrying in ${waitTime / 1000}s... (${retries}/${maxRetries})`);
30
+ await new Promise(resolve => setTimeout(resolve, waitTime));
31
+ } else {
32
+ throw e;
33
+ }
34
+ }
35
+ }
36
+ throw new Error("Max retries exceeded for embedding request.");
37
+ }
38
+
39
+ async getQueryEmbedding(query: string): Promise<number[]> {
40
+ return this.getTextEmbedding(query);
41
+ }
42
+
43
+ async getTextEmbeddings(texts: string[]): Promise<number[][]> {
44
+ const embeddings: number[][] = [];
45
+ console.log(`Creating embeddings for ${texts.length} chunks (Batching to avoid rate limits)...`);
46
+
47
+ // Process in smaller batches with delay
48
+ const BATCH_SIZE = 1; // Strict serial for safety on free tier
49
+ const DELAY_MS = 1000; // 1s delay between calls
50
+
51
+ for (let i = 0; i < texts.length; i += BATCH_SIZE) {
52
+ const batch = texts.slice(i, i + BATCH_SIZE);
53
+ for (const text of batch) {
54
+ let retries = 0;
55
+ let success = false;
56
+ while (!success && retries < 5) {
57
+ try {
58
+ const embedding = await this.getTextEmbedding(text);
59
+ embeddings.push(embedding);
60
+ success = true;
61
+ // Standard delay between calls
62
+ await new Promise(resolve => setTimeout(resolve, DELAY_MS));
63
+ process.stdout.write("."); // Progress indicator
64
+ } catch (e: any) {
65
+ if (e.message.includes("429") || e.message.includes("quota")) {
66
+ retries++;
67
+ const waitTime = Math.pow(2, retries) * 2000; // 2s, 4s, 8s, 16s...
68
+ console.warn(`\n⚠️ Rate limit hit. Retrying in ${waitTime / 1000}s...`);
69
+ await new Promise(resolve => setTimeout(resolve, waitTime));
70
+ } else {
71
+ console.error("\n❌ Embedding failed irreversibly:", e.message);
72
+ throw e;
73
+ }
74
+ }
75
+ }
76
+ if (!success) {
77
+ throw new Error("Max retries exceeded for rate limits.");
78
+ }
79
+ }
80
+ }
81
+ console.log("\n✅ Done embedding.");
82
+ return embeddings;
83
+ }
84
+
85
+ // Stubs for BaseEmbedding compliance
86
+ embedBatchSize = 10;
87
+ similarity(embedding1: number[], embedding2: number[]): number {
88
+ return embedding1.reduce((sum, val, i) => sum + val * embedding2[i], 0);
89
+ }
90
+ async transform(nodes: any[], _options?: any): Promise<any[]> {
91
+ for (const node of nodes) {
92
+ node.embedding = await this.getTextEmbedding(node.getContent("text"));
93
+ }
94
+ return nodes;
95
+ }
96
+ async getTextEmbeddingsBatch(texts: string[]): Promise<number[][]> { return this.getTextEmbeddings(texts); }
97
+ }
@@ -0,0 +1,103 @@
1
+ import {
2
+ VectorStoreIndex,
3
+ storageContextFromDefaults,
4
+ SimpleDirectoryReader,
5
+ serviceContextFromDefaults
6
+ } from "llamaindex";
7
+ import { GeminiEmbedding } from "./gemini-embedding";
8
+ import { MockLLM } from "./mock-llm";
9
+ import * as path from "path";
10
+ import * as fs from "fs";
11
+ import chalk from "chalk";
12
+ import dotenv from "dotenv";
13
+
14
+ // Load .env.local first, then .env
15
+ dotenv.config({ path: ".env.local" });
16
+ dotenv.config();
17
+
18
+ // Configuration
19
+ const STORAGE_DIR = path.join(process.cwd(), ".jstar", "storage");
20
+ const SOURCE_DIR = path.join(process.cwd(), "scripts"); // Changed from src/ to scripts/
21
+
22
+ // Ensure OpenAI Key exists (LlamaIndex default) or fallback if we configured something else
23
+ // (We are using local now, so this check is less critical, but good to have if we revert)
24
+ // if (!process.env.OPENAI_API_KEY) {
25
+ // console.warn(chalk.yellow("⚠️ OPENAI_API_KEY not found. Embeddings may fail unless you have configured a local model."));
26
+ // }
27
+
28
+ async function main() {
29
+ const args = process.argv.slice(2);
30
+ const isWatch = args.includes("--watch");
31
+
32
+ console.log(chalk.blue("🧠 J-Star Indexer: Scanning codebase..."));
33
+
34
+ // 1. Load documents (Your Code)
35
+ if (!fs.existsSync(SOURCE_DIR)) {
36
+ console.error(chalk.red(`❌ Source directory not found: ${SOURCE_DIR}`));
37
+ process.exit(1);
38
+ }
39
+
40
+ const reader = new SimpleDirectoryReader();
41
+ const documents = await reader.loadData({ directoryPath: SOURCE_DIR });
42
+
43
+ console.log(chalk.yellow(`📄 Found ${documents.length} files to index.`));
44
+
45
+ const isInit = args.includes("--init");
46
+
47
+ try {
48
+ // 2. Setup Service Context with Google Gemini Embeddings
49
+ // using 'models/text-embedding-004' which is a strong, recent model
50
+ const embedModel = new GeminiEmbedding();
51
+ const llm = new MockLLM();
52
+ const serviceContext = serviceContextFromDefaults({
53
+ embedModel,
54
+ llm: llm as any
55
+ });
56
+
57
+ // 3. Create the Storage Context
58
+ let storageContext;
59
+ if (isInit) {
60
+ console.log(chalk.blue("✨ Initializing fresh Local Brain..."));
61
+ storageContext = await storageContextFromDefaults({});
62
+ } else {
63
+ // Try to load
64
+ if (!fs.existsSync(STORAGE_DIR)) {
65
+ console.log(chalk.yellow("⚠️ Storage not found. Running fresh init..."));
66
+ storageContext = await storageContextFromDefaults({});
67
+ } else {
68
+ storageContext = await storageContextFromDefaults({
69
+ persistDir: STORAGE_DIR,
70
+ });
71
+ }
72
+ }
73
+
74
+ // 4. Generate the Index
75
+ const index = await VectorStoreIndex.fromDocuments(documents, {
76
+ storageContext,
77
+ serviceContext,
78
+ });
79
+
80
+ // 4. Persist (Save the Brain)
81
+ // Manual persistence for LlamaIndex TS compatibility
82
+ const ctxToPersist: any = index.storageContext;
83
+ if (ctxToPersist.docStore) await ctxToPersist.docStore.persist(path.join(STORAGE_DIR, "doc_store.json"));
84
+ if (ctxToPersist.vectorStore) await ctxToPersist.vectorStore.persist(path.join(STORAGE_DIR, "vector_store.json"));
85
+ if (ctxToPersist.indexStore) await ctxToPersist.indexStore.persist(path.join(STORAGE_DIR, "index_store.json"));
86
+ if (ctxToPersist.propStore) await ctxToPersist.propStore.persist(path.join(STORAGE_DIR, "property_store.json"));
87
+
88
+ console.log(chalk.green("✅ Indexing Complete. Brain is updated."));
89
+
90
+ if (isWatch) {
91
+ console.log(chalk.blue("👀 Watch mode enabled."));
92
+ }
93
+
94
+ } catch (e: any) {
95
+ console.error(chalk.red("❌ Indexing Failed:"), e.message);
96
+ if (e.message.includes("OpenAI")) {
97
+ console.log(chalk.yellow("👉 Tip: Make sure you have OPENAI_API_KEY in your .env file (or configure a local embedding model)."));
98
+ }
99
+ process.exit(1);
100
+ }
101
+ }
102
+
103
+ main().catch(console.error);
@@ -0,0 +1,55 @@
1
+ import { pipeline, env } from "@xenova/transformers";
2
+
3
+ // Skip local model checks if needed, or let it download
4
+ env.allowLocalModels = false;
5
+ env.useBrowserCache = false;
6
+
7
+ export class LocalEmbedding {
8
+ private pipe: any;
9
+ private modelName: string;
10
+
11
+ constructor() {
12
+ this.modelName = "Xenova/bge-small-en-v1.5";
13
+ }
14
+
15
+ async init() {
16
+ if (!this.pipe) {
17
+ console.log("📥 Loading local embedding model (Xenova/bge-small-en-v1.5)...");
18
+ this.pipe = await pipeline("feature-extraction", this.modelName);
19
+ }
20
+ }
21
+
22
+ async getTextEmbedding(text: string): Promise<number[]> {
23
+ await this.init();
24
+ const result = await this.pipe(text, { pooling: "mean", normalize: true });
25
+ return Array.from(result.data);
26
+ }
27
+
28
+ async getQueryEmbedding(query: string): Promise<number[]> {
29
+ return this.getTextEmbedding(query);
30
+ }
31
+
32
+ // Batch method (Required by LlamaIndex)
33
+ async getTextEmbeddings(texts: string[]): Promise<number[][]> {
34
+ await this.init();
35
+ const embeddings: number[][] = [];
36
+ for (const text of texts) {
37
+ embeddings.push(await this.getTextEmbedding(text));
38
+ }
39
+ return embeddings;
40
+ }
41
+
42
+ // Stubs for BaseEmbedding interface compliance
43
+ embedBatchSize = 10;
44
+ similarity(embedding1: number[], embedding2: number[]): number {
45
+ // Simple dot product for normalized vectors
46
+ return embedding1.reduce((sum, val, i) => sum + val * embedding2[i], 0);
47
+ }
48
+ async transform(nodes: any[], _options?: any): Promise<any[]> {
49
+ for (const node of nodes) {
50
+ node.embedding = await this.getTextEmbedding(node.getContent("text"));
51
+ }
52
+ return nodes;
53
+ }
54
+ async getTextEmbeddingsBatch(texts: string[]): Promise<number[][]> { return this.getTextEmbeddings(texts); }
55
+ }
@@ -0,0 +1,18 @@
1
+ export class MockLLM {
2
+ hasStreaming = false;
3
+ metadata = {
4
+ model: "mock",
5
+ temperature: 0,
6
+ topP: 1,
7
+ contextWindow: 1024,
8
+ tokenizer: undefined,
9
+ };
10
+
11
+ async chat(messages: any[], parentEvent?: any): Promise<any> {
12
+ return { message: { content: "Mock response" } };
13
+ }
14
+
15
+ async complete(prompt: string, parentEvent?: any): Promise<any> {
16
+ return { text: "Mock response" };
17
+ }
18
+ }