create-rag-app 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -0
- package/bin/cli.js +8 -0
- package/package.json +35 -0
- package/src/index.js +65 -0
- package/src/prompts.js +66 -0
- package/src/utils.js +62 -0
- package/templates/nextjs-rag/.env.example +13 -0
- package/templates/nextjs-rag/documents/readme.txt +2 -0
- package/templates/nextjs-rag/jsconfig.json +15 -0
- package/templates/nextjs-rag/next.config.mjs +7 -0
- package/templates/nextjs-rag/package.json +33 -0
- package/templates/nextjs-rag/postcss.config.js +6 -0
- package/templates/nextjs-rag/scripts/ingest.js +25 -0
- package/templates/nextjs-rag/src/app/api/chat/route.js +52 -0
- package/templates/nextjs-rag/src/app/api/ingest/route.js +51 -0
- package/templates/nextjs-rag/src/app/globals.css +44 -0
- package/templates/nextjs-rag/src/app/layout.js +17 -0
- package/templates/nextjs-rag/src/app/page.js +352 -0
- package/templates/nextjs-rag/src/lib/ingest.js +123 -0
- package/templates/nextjs-rag/src/lib/llm.js +54 -0
- package/templates/nextjs-rag/tailwind.config.js +18 -0
package/README.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Create RAG App
|
|
2
|
+
|
|
3
|
+
Scaffold a production-ready RAG (Retrieval Augmented Generation) application in seconds.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🚀 **Next.js & React**: Modern frontend with Tailwind setup.
|
|
8
|
+
- 🦜 **LangChain**: Best-in-class RAG pipeline.
|
|
9
|
+
- 🗄️ **ChromaDB**: Built-in local vector database.
|
|
10
|
+
- 🤖 **Multi-LLM Support**: Switch between OpenAI, Groq, and Ollama.
|
|
11
|
+
- 📄 **Document Ingestion**: Simple script to index PDFs and Text files.
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# Run directly with npx
|
|
17
|
+
npx create-rag-app my-ai-project
|
|
18
|
+
|
|
19
|
+
# Or install globally
|
|
20
|
+
npm install -g create-rag-app
|
|
21
|
+
create-rag-app my-ai-project
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Structure Result
|
|
25
|
+
|
|
26
|
+
The created app will have:
|
|
27
|
+
|
|
28
|
+
- `src/app`: User Interface (Chat)
|
|
29
|
+
- `src/lib`: RAG Utilities
|
|
30
|
+
- `scripts/ingest.js`: Document processor
|
|
31
|
+
- `documents/`: Folder to drop your knowledge base
|
|
32
|
+
|
|
33
|
+
## Development
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# Clone the repo
|
|
37
|
+
git clone https://github.com/your-username/create-rag-app.git
|
|
38
|
+
|
|
39
|
+
# Install dependencies
|
|
40
|
+
npm install
|
|
41
|
+
|
|
42
|
+
# Test locally
|
|
43
|
+
npm link
|
|
44
|
+
create-rag-app test-project
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## License
|
|
48
|
+
|
|
49
|
+
MIT
|
package/bin/cli.js
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "create-rag-app",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Scaffold a new RAG application",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"create-rag-app": "./bin/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"main": "src/index.js",
|
|
10
|
+
"files": [
|
|
11
|
+
"bin",
|
|
12
|
+
"src",
|
|
13
|
+
"templates"
|
|
14
|
+
],
|
|
15
|
+
"scripts": {
|
|
16
|
+
"start": "node bin/cli.js"
|
|
17
|
+
},
|
|
18
|
+
"keywords": [
|
|
19
|
+
"rag",
|
|
20
|
+
"cli",
|
|
21
|
+
"scaffold",
|
|
22
|
+
"create-rag-app",
|
|
23
|
+
"langchain",
|
|
24
|
+
"chroma",
|
|
25
|
+
"ai"
|
|
26
|
+
],
|
|
27
|
+
"license": "MIT",
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"chalk": "^5.6.2",
|
|
30
|
+
"commander": "^14.0.3",
|
|
31
|
+
"fs-extra": "^11.3.3",
|
|
32
|
+
"inquirer": "^13.2.2",
|
|
33
|
+
"ora": "^9.3.0"
|
|
34
|
+
}
|
|
35
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import fs from "fs-extra";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import { Command } from "commander";
|
|
5
|
+
import { getOptions } from "./prompts.js";
|
|
6
|
+
import { checkDir, copyTemplate, installDependencies } from "./utils.js";
|
|
7
|
+
|
|
8
|
+
const packageJson = JSON.parse(
|
|
9
|
+
await fs.readFile(new URL("../package.json", import.meta.url))
|
|
10
|
+
);
|
|
11
|
+
|
|
12
|
+
export async function main() {
|
|
13
|
+
console.log(chalk.bold.cyan("\n🚀 Welcome to Create RAG App!\n"));
|
|
14
|
+
|
|
15
|
+
let projectName;
|
|
16
|
+
|
|
17
|
+
const program = new Command(packageJson.name)
|
|
18
|
+
.version(packageJson.version)
|
|
19
|
+
.arguments('[project-directory]')
|
|
20
|
+
.usage(`${chalk.green('[project-directory]')} [options]`)
|
|
21
|
+
.action((name) => {
|
|
22
|
+
projectName = name;
|
|
23
|
+
})
|
|
24
|
+
.parse(process.argv);
|
|
25
|
+
|
|
26
|
+
const options = await getOptions(projectName);
|
|
27
|
+
const projectPath = path.resolve(process.cwd(), options.projectName);
|
|
28
|
+
|
|
29
|
+
console.log(`\nCreating a new RAG app in: ${chalk.green(projectPath)}\n`);
|
|
30
|
+
|
|
31
|
+
// Ensure directory doesn't exist
|
|
32
|
+
checkDir(projectPath);
|
|
33
|
+
|
|
34
|
+
// Copy template
|
|
35
|
+
// We need to map the template option to a folder name
|
|
36
|
+
// The 'value' in prompts.js choices is 'nextjs-rag', so ensure that folder exists in templates/
|
|
37
|
+
const templateName = options.template;
|
|
38
|
+
await copyTemplate(templateName, projectPath);
|
|
39
|
+
|
|
40
|
+
// Configure Environment Variables
|
|
41
|
+
const envExamplePath = path.join(projectPath, ".env.example");
|
|
42
|
+
const envPath = path.join(projectPath, ".env");
|
|
43
|
+
|
|
44
|
+
if (await fs.pathExists(envExamplePath)) {
|
|
45
|
+
await fs.copy(envExamplePath, envPath);
|
|
46
|
+
console.log(chalk.green("✔ Created .env file from example"));
|
|
47
|
+
|
|
48
|
+
// Inject selected provider config
|
|
49
|
+
let envContent = await fs.readFile(envPath, "utf-8");
|
|
50
|
+
envContent += `\n# Auto-generated config\n`;
|
|
51
|
+
envContent += `LLM_PROVIDER=${options.provider}\n`;
|
|
52
|
+
envContent += `VECTOR_DB=${options.vectorDb}\n`;
|
|
53
|
+
await fs.writeFile(envPath, envContent);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Install Dependencies (user requested automatic installation)
|
|
57
|
+
await installDependencies(projectPath);
|
|
58
|
+
|
|
59
|
+
// Final Success Message
|
|
60
|
+
console.log(chalk.bold.green("\n🎉 Success! Your RAG app is ready."));
|
|
61
|
+
console.log(chalk.yellow("\nNext steps:"));
|
|
62
|
+
console.log(chalk.cyan(` cd ${options.projectName}`));
|
|
63
|
+
console.log(chalk.cyan(` npm run dev`));
|
|
64
|
+
console.log("");
|
|
65
|
+
}
|
package/src/prompts.js
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
|
|
2
|
+
import inquirer from "inquirer";
|
|
3
|
+
|
|
4
|
+
export async function getOptions(initialProjectName) {
|
|
5
|
+
let questions = [
|
|
6
|
+
{
|
|
7
|
+
type: "input",
|
|
8
|
+
name: "projectName",
|
|
9
|
+
message: "What is your project named?",
|
|
10
|
+
default: "my-rag-app",
|
|
11
|
+
validate: (input) => {
|
|
12
|
+
if (/^([a-z0-9\-\_\.]+)$/.test(input)) return true;
|
|
13
|
+
return "Project name may only include letters, numbers, dashes, and underscores.";
|
|
14
|
+
},
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
type: "list",
|
|
18
|
+
name: "template",
|
|
19
|
+
message: "Which template would you like to use?",
|
|
20
|
+
choices: [
|
|
21
|
+
{ name: "Next.js + LangChain + ChromaDB (Recommended)", value: "nextjs-rag" },
|
|
22
|
+
{ name: "Express API + PDF Processor (Backend Only)", value: "express-rag" }, // Future scope
|
|
23
|
+
],
|
|
24
|
+
default: "nextjs-rag",
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
type: "list",
|
|
28
|
+
name: "provider",
|
|
29
|
+
message: "Which LLM provider do you want configured?",
|
|
30
|
+
choices: [
|
|
31
|
+
{ name: "OpenAI (GPT-4o, GPT-3.5)", value: "openai" },
|
|
32
|
+
{ name: "Groq (Llama 3, Mixtral - Fast!)", value: "groq" },
|
|
33
|
+
{ name: "Ollama (Local Models)", value: "ollama" },
|
|
34
|
+
{ name: "Gemini (Google DeepMind)", value: "gemini" },
|
|
35
|
+
],
|
|
36
|
+
default: "openai",
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
type: "list",
|
|
40
|
+
name: "vectorDb",
|
|
41
|
+
message: "Which Vector Database do you want to use?",
|
|
42
|
+
choices: [
|
|
43
|
+
{ name: "ChromaDB (Local)", value: "chroma" },
|
|
44
|
+
{ name: "Supabase pgvector (Cloud)", value: "supabase" },
|
|
45
|
+
],
|
|
46
|
+
default: "chroma",
|
|
47
|
+
},
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
if (initialProjectName) {
|
|
51
|
+
if (/^([a-z0-9\-\_\.]+)$/.test(initialProjectName)) {
|
|
52
|
+
// Valid project name provided via CLI, skip the prompt
|
|
53
|
+
questions = questions.filter(q => q.name !== "projectName");
|
|
54
|
+
} else {
|
|
55
|
+
console.log("⚠ Invalid project name provided via CLI.");
|
|
56
|
+
initialProjectName = null; // Force prompt
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const answers = await inquirer.prompt(questions);
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
projectName: initialProjectName || answers.projectName,
|
|
64
|
+
...answers
|
|
65
|
+
};
|
|
66
|
+
}
|
package/src/utils.js
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
|
|
2
|
+
import fs from "fs-extra";
|
|
3
|
+
import path from "path";
|
|
4
|
+
import chalk from "chalk";
|
|
5
|
+
import { exec } from "child_process";
|
|
6
|
+
import util from "util";
|
|
7
|
+
import ora from "ora";
|
|
8
|
+
import { fileURLToPath } from 'url';
|
|
9
|
+
|
|
10
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
11
|
+
const __dirname = path.dirname(__filename);
|
|
12
|
+
const execAsync = util.promisify(exec);
|
|
13
|
+
|
|
14
|
+
export function checkDir(targetPath) {
|
|
15
|
+
if (fs.existsSync(targetPath)) {
|
|
16
|
+
console.error(chalk.red(`\n❌ Error: Directory ${path.basename(targetPath)} already exists.`));
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export async function copyTemplate(templateName, targetPath) {
|
|
22
|
+
const spinner = ora("Copying project files...").start();
|
|
23
|
+
|
|
24
|
+
// Assuming templates live in ../templates relative to this file
|
|
25
|
+
const templateDir = path.resolve(__dirname, "../templates", templateName);
|
|
26
|
+
|
|
27
|
+
if (!fs.existsSync(templateDir)) {
|
|
28
|
+
spinner.fail(chalk.red(`Template ${templateName} not found at ${templateDir}`));
|
|
29
|
+
process.exit(1);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
await fs.copy(templateDir, targetPath);
|
|
34
|
+
spinner.succeed(chalk.green("Project files created successfully."));
|
|
35
|
+
} catch (err) {
|
|
36
|
+
spinner.fail(chalk.red("Failed to copy files."));
|
|
37
|
+
console.error(err);
|
|
38
|
+
process.exit(1);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export async function installDependencies(targetPath) {
|
|
43
|
+
const spinner = ora("Installing dependencies... This might take a moment.").start();
|
|
44
|
+
|
|
45
|
+
try {
|
|
46
|
+
// Run npm install in the new directory
|
|
47
|
+
await execAsync("npm install", { cwd: targetPath });
|
|
48
|
+
spinner.succeed(chalk.green("Dependencies installed via npm."));
|
|
49
|
+
} catch (err) {
|
|
50
|
+
// Fallback for peer dependency conflicts common in AI libraries
|
|
51
|
+
try {
|
|
52
|
+
spinner.text = "Retrying with --legacy-peer-deps...";
|
|
53
|
+
await execAsync("npm install --legacy-peer-deps", { cwd: targetPath });
|
|
54
|
+
spinner.succeed(chalk.green("Dependencies installed with legacy peer deps."));
|
|
55
|
+
} catch (retryErr) {
|
|
56
|
+
spinner.fail(chalk.red("Failed to install dependencies."));
|
|
57
|
+
console.log(chalk.yellow("You can try installing manually:"));
|
|
58
|
+
console.log(chalk.cyan(` cd ${path.basename(targetPath)}`));
|
|
59
|
+
console.log(chalk.cyan(" npm install --legacy-peer-deps"));
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
|
|
2
|
+
# AI Provider Keys
|
|
3
|
+
OPENAI_API_KEY=sk-your-key-here
|
|
4
|
+
GROQ_API_KEY=your-groq-key
|
|
5
|
+
GOOGLE_API_KEY=your-gemini-key-here
|
|
6
|
+
|
|
7
|
+
# Vector DB
|
|
8
|
+
# Default to local persistent client
|
|
9
|
+
CHROMA_URL=http://localhost:8000
|
|
10
|
+
COLLECTION_NAME=rag-docs
|
|
11
|
+
|
|
12
|
+
# App Config
|
|
13
|
+
NEXT_PUBLIC_APP_NAME="My RAG App"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"baseUrl": ".",
|
|
4
|
+
"paths": {
|
|
5
|
+
"@/*": ["./src/*"]
|
|
6
|
+
},
|
|
7
|
+
// Useful for Next.js strict mode and modern JS features
|
|
8
|
+
"target": "ESNext",
|
|
9
|
+
"module": "ESNext",
|
|
10
|
+
"jsx": "preserve",
|
|
11
|
+
"strict": false
|
|
12
|
+
},
|
|
13
|
+
"include": ["next-env.d.ts", "**/*.js", "**/*.jsx", ".next/types/**/*.ts"],
|
|
14
|
+
"exclude": ["node_modules"]
|
|
15
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "my-rag-app",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"scripts": {
|
|
6
|
+
"dev": "next dev",
|
|
7
|
+
"build": "next build",
|
|
8
|
+
"start": "next start",
|
|
9
|
+
"lint": "next lint",
|
|
10
|
+
"ingest": "node scripts/ingest.js"
|
|
11
|
+
},
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"next": "15.0.3",
|
|
14
|
+
"react": "19.0.0-rc-66855b96-20241106",
|
|
15
|
+
"react-dom": "19.0.0-rc-66855b96-20241106",
|
|
16
|
+
"langchain": "^0.3.5",
|
|
17
|
+
"@langchain/openai": "^0.3.11",
|
|
18
|
+
"@langchain/ollama": "^0.1.2",
|
|
19
|
+
"@langchain/google-genai": "^0.1.3",
|
|
20
|
+
"@langchain/community": "^0.3.11",
|
|
21
|
+
"@langchain/core": "^0.3.15",
|
|
22
|
+
"@langchain/textsplitters": "^0.1.0",
|
|
23
|
+
"chromadb": "^1.9.4",
|
|
24
|
+
"pdf-parse": "1.1.1",
|
|
25
|
+
"openai": "^4.71.1",
|
|
26
|
+
"dotenv": "^16.4.5"
|
|
27
|
+
},
|
|
28
|
+
"devDependencies": {
|
|
29
|
+
"postcss": "^8.4.47",
|
|
30
|
+
"tailwindcss": "^3.4.14",
|
|
31
|
+
"autoprefixer": "^10.4.20"
|
|
32
|
+
}
|
|
33
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import "dotenv/config";
|
|
2
|
+
import { ingestDocuments } from "../src/lib/ingest.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Script entry point for manual document ingestion.
|
|
6
|
+
* Run this via `npm run ingest` to index all files in the ./documents folder.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* npm run ingest
|
|
10
|
+
*
|
|
11
|
+
* This script ensures that all documents in the local directory are
|
|
12
|
+
* processed, split, and embedded into the ChromaDB vector store.
|
|
13
|
+
*/
|
|
14
|
+
const run = async () => {
|
|
15
|
+
try {
|
|
16
|
+
console.log("🚀 Starting document ingestion process...");
|
|
17
|
+
await ingestDocuments("./documents");
|
|
18
|
+
} catch (error) {
|
|
19
|
+
console.error("❌ Document ingestion failed:");
|
|
20
|
+
console.error(error);
|
|
21
|
+
process.exit(1);
|
|
22
|
+
}
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
run();
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { getLLM, getEmbeddings } from "@/lib/llm";
|
|
2
|
+
import { Chroma } from "@langchain/community/vectorstores/chroma";
|
|
3
|
+
import { PromptTemplate } from "@langchain/core/prompts";
|
|
4
|
+
|
|
5
|
+
export async function POST(req) {
|
|
6
|
+
try {
|
|
7
|
+
const body = await req.json();
|
|
8
|
+
const message = body?.message;
|
|
9
|
+
|
|
10
|
+
if (!message || typeof message !== "string") {
|
|
11
|
+
return Response.json(
|
|
12
|
+
{ error: "Invalid request. 'message' must be a string." },
|
|
13
|
+
{ status: 400 }
|
|
14
|
+
);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const vectorStore = await Chroma.fromExistingCollection(getEmbeddings(), {
|
|
18
|
+
collectionName: process.env.COLLECTION_NAME || "rag-docs",
|
|
19
|
+
url: process.env.CHROMA_URL || "http://localhost:8000"
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
const retriever = vectorStore.asRetriever();
|
|
23
|
+
const llm = getLLM();
|
|
24
|
+
|
|
25
|
+
// 🔥 manually retrieve documents
|
|
26
|
+
const docs = await retriever.invoke(message);
|
|
27
|
+
|
|
28
|
+
const context = docs.map((doc) => doc.pageContent).join("\n\n");
|
|
29
|
+
|
|
30
|
+
const prompt = PromptTemplate.fromTemplate(`
|
|
31
|
+
Answer the question based only on the following context:
|
|
32
|
+
|
|
33
|
+
{context}
|
|
34
|
+
|
|
35
|
+
Question: {question}
|
|
36
|
+
`);
|
|
37
|
+
|
|
38
|
+
const formattedPrompt = await prompt.format({
|
|
39
|
+
context,
|
|
40
|
+
question: message
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
const result = await llm.invoke(formattedPrompt);
|
|
44
|
+
|
|
45
|
+
return Response.json({
|
|
46
|
+
response: result.content
|
|
47
|
+
});
|
|
48
|
+
} catch (error) {
|
|
49
|
+
console.error(error);
|
|
50
|
+
return Response.json({ error: error.message }, { status: 500 });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { NextResponse } from "next/server";
|
|
2
|
+
import { ingestDocuments } from "@/lib/ingest";
|
|
3
|
+
import fs from "fs/promises";
|
|
4
|
+
import path from "path";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Handle document uploads and immediate ingestion into ChromaDB.
|
|
8
|
+
* This route accepts a file upload, saves it locally, and triggers the vector store ingestion.
|
|
9
|
+
*
|
|
10
|
+
* @param {Request} req
|
|
11
|
+
* @returns {NextResponse}
|
|
12
|
+
*/
|
|
13
|
+
export async function POST(req) {
|
|
14
|
+
try {
|
|
15
|
+
const formData = await req.formData();
|
|
16
|
+
const file = formData.get("file");
|
|
17
|
+
|
|
18
|
+
if (!file) {
|
|
19
|
+
return NextResponse.json({ error: "No file uploaded" }, { status: 400 });
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Validate file type (basic)
|
|
23
|
+
const allowedExtensions = [".pdf", ".txt"];
|
|
24
|
+
const ext = path.extname(file.name).toLowerCase();
|
|
25
|
+
if (!allowedExtensions.includes(ext)) {
|
|
26
|
+
return NextResponse.json({ error: "Unsupported file type. Only PDF and TXT are allowed." }, { status: 400 });
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const buffer = Buffer.from(await file.arrayBuffer());
|
|
30
|
+
const uploadDir = path.resolve(process.cwd(), "documents");
|
|
31
|
+
|
|
32
|
+
// Ensure the upload directory exists
|
|
33
|
+
try {
|
|
34
|
+
await fs.mkdir(uploadDir, { recursive: true });
|
|
35
|
+
} catch (e) {
|
|
36
|
+
// Directory likely already exists
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const filePath = path.join(uploadDir, file.name);
|
|
40
|
+
await fs.writeFile(filePath, buffer);
|
|
41
|
+
|
|
42
|
+
// Run ingestion for this specific file
|
|
43
|
+
console.log(`Starting ingestion for: ${file.name}`);
|
|
44
|
+
await ingestDocuments(`documents/${file.name}`, true);
|
|
45
|
+
|
|
46
|
+
return NextResponse.json({ success: true, message: "File uploaded and ingested successfully." });
|
|
47
|
+
} catch (error) {
|
|
48
|
+
console.error("Upload/Ingest Error:", error);
|
|
49
|
+
return NextResponse.json({ error: error.message }, { status: 500 });
|
|
50
|
+
}
|
|
51
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
@tailwind base;
|
|
2
|
+
@tailwind components;
|
|
3
|
+
@tailwind utilities;
|
|
4
|
+
|
|
5
|
+
@layer base {
|
|
6
|
+
body {
|
|
7
|
+
@apply bg-zinc-950 text-zinc-100 antialiased;
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
@layer utilities {
|
|
12
|
+
.scrollbar-thin::-webkit-scrollbar {
|
|
13
|
+
width: 6px;
|
|
14
|
+
height: 6px;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
.scrollbar-thin::-webkit-scrollbar-track {
|
|
18
|
+
background: transparent;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
.scrollbar-thin::-webkit-scrollbar-thumb {
|
|
22
|
+
@apply bg-zinc-800 rounded-full;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
.scrollbar-thin::-webkit-scrollbar-thumb:hover {
|
|
26
|
+
@apply bg-zinc-700;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
@keyframes fadeIn {
|
|
31
|
+
from {
|
|
32
|
+
opacity: 0;
|
|
33
|
+
transform: translateY(10px);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
to {
|
|
37
|
+
opacity: 1;
|
|
38
|
+
transform: translateY(0);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
.animate-fade-in {
|
|
43
|
+
animation: fadeIn 0.3s ease-out forwards;
|
|
44
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { Inter } from "next/font/google";
|
|
2
|
+
import "./globals.css";
|
|
3
|
+
|
|
4
|
+
const inter = Inter({ subsets: ["latin"] });
|
|
5
|
+
|
|
6
|
+
export const metadata = {
|
|
7
|
+
title: "Create RAG App",
|
|
8
|
+
description: "Generated by create-rag-app",
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export default function RootLayout({ children }) {
|
|
12
|
+
return (
|
|
13
|
+
<html lang="en">
|
|
14
|
+
<body className={inter.className}>{children}</body>
|
|
15
|
+
</html>
|
|
16
|
+
);
|
|
17
|
+
}
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
"use client";
|
|
2
|
+
import React, { useState, useRef, useEffect } from "react";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Main Chat Interface Component.
|
|
6
|
+
* Features:
|
|
7
|
+
* - Real-time chat with RAG backend
|
|
8
|
+
* - Document upload and ingestion
|
|
9
|
+
* - Modern, responsive UI with Tailwind CSS
|
|
10
|
+
*/
|
|
11
|
+
export default function Home() {
|
|
12
|
+
const [messages, setMessages] = useState([
|
|
13
|
+
{
|
|
14
|
+
role: "bot",
|
|
15
|
+
content: "Hello! I'm your AI assistant. How can I help you with your documents today?",
|
|
16
|
+
},
|
|
17
|
+
]);
|
|
18
|
+
const [query, setQuery] = useState("");
|
|
19
|
+
const [loading, setLoading] = useState(false);
|
|
20
|
+
const [uploading, setUploading] = useState(false);
|
|
21
|
+
const [isUploadModalOpen, setIsUploadModalOpen] = useState(false);
|
|
22
|
+
|
|
23
|
+
const messagesEndRef = useRef(null);
|
|
24
|
+
const fileInputRef = useRef(null);
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Auto-scroll to the bottom of the chat when new messages arrive.
|
|
28
|
+
*/
|
|
29
|
+
const scrollToBottom = () => {
|
|
30
|
+
messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
useEffect(() => {
|
|
34
|
+
scrollToBottom();
|
|
35
|
+
}, [messages]);
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Handle sending a query to the chat API.
|
|
39
|
+
*/
|
|
40
|
+
const handleSubmit = async (e) => {
|
|
41
|
+
e.preventDefault();
|
|
42
|
+
if (!query.trim()) return;
|
|
43
|
+
|
|
44
|
+
const currentQuery = query;
|
|
45
|
+
setQuery("");
|
|
46
|
+
setLoading(true);
|
|
47
|
+
|
|
48
|
+
// Optimistically add user message
|
|
49
|
+
setMessages((prev) => [
|
|
50
|
+
...prev,
|
|
51
|
+
{ role: "user", content: currentQuery },
|
|
52
|
+
]);
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
const res = await fetch("/api/chat", {
|
|
56
|
+
method: "POST",
|
|
57
|
+
headers: { "Content-Type": "application/json" },
|
|
58
|
+
body: JSON.stringify({ message: currentQuery }),
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
const data = await res.json();
|
|
62
|
+
setMessages((prev) => [
|
|
63
|
+
...prev,
|
|
64
|
+
{ role: "bot", content: data.response },
|
|
65
|
+
]);
|
|
66
|
+
} catch (err) {
|
|
67
|
+
console.error("Chat Error:", err);
|
|
68
|
+
setMessages((prev) => [
|
|
69
|
+
...prev,
|
|
70
|
+
{ role: "bot", content: "Sorry, I encountered an error processing your request." },
|
|
71
|
+
]);
|
|
72
|
+
} finally {
|
|
73
|
+
setLoading(false);
|
|
74
|
+
}
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Handle file selection and upload to the ingestion API.
|
|
79
|
+
*/
|
|
80
|
+
const handleFileUpload = async (e) => {
|
|
81
|
+
const file = e.target.files?.[0];
|
|
82
|
+
if (!file) return;
|
|
83
|
+
|
|
84
|
+
// Validate file size (e.g., max 5MB)
|
|
85
|
+
if (file.size > 5 * 1024 * 1024) {
|
|
86
|
+
alert("File size exceeds 5MB limit.");
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
setUploading(true);
|
|
91
|
+
const formData = new FormData();
|
|
92
|
+
formData.append("file", file);
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
const res = await fetch("/api/ingest", {
|
|
96
|
+
method: "POST",
|
|
97
|
+
body: formData,
|
|
98
|
+
});
|
|
99
|
+
const data = await res.json();
|
|
100
|
+
|
|
101
|
+
if (data.success) {
|
|
102
|
+
// Success feedback
|
|
103
|
+
setIsUploadModalOpen(false);
|
|
104
|
+
setMessages((prev) => [
|
|
105
|
+
...prev,
|
|
106
|
+
{ role: "bot", content: `✅ I have processed the document: **${file.name}**. You can now ask questions about it.` },
|
|
107
|
+
]);
|
|
108
|
+
} else {
|
|
109
|
+
alert("Error uploading file: " + (data.error || "Unknown error"));
|
|
110
|
+
}
|
|
111
|
+
} catch (err) {
|
|
112
|
+
console.error("Upload Error:", err);
|
|
113
|
+
alert("Failed to upload file. Please try again.");
|
|
114
|
+
} finally {
|
|
115
|
+
setUploading(false);
|
|
116
|
+
// Reset input so same file can be selected again if needed
|
|
117
|
+
if (fileInputRef.current) fileInputRef.current.value = "";
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
return (
|
|
122
|
+
<div className="flex flex-col h-screen bg-zinc-950 text-zinc-100 font-sans selection:bg-indigo-500/30">
|
|
123
|
+
{/* Header */}
|
|
124
|
+
<header className="fixed top-0 inset-x-0 z-50 bg-zinc-950/80 backdrop-blur-md border-b border-zinc-900 h-16 flex items-center justify-between px-4">
|
|
125
|
+
<div className="flex items-center gap-2">
|
|
126
|
+
<div className="w-8 h-8 rounded-lg bg-indigo-600 flex items-center justify-center shadow-lg shadow-indigo-500/20">
|
|
127
|
+
<svg
|
|
128
|
+
className="w-5 h-5 text-white"
|
|
129
|
+
fill="none"
|
|
130
|
+
stroke="currentColor"
|
|
131
|
+
viewBox="0 0 24 24"
|
|
132
|
+
>
|
|
133
|
+
<path
|
|
134
|
+
strokeLinecap="round"
|
|
135
|
+
strokeLinejoin="round"
|
|
136
|
+
strokeWidth={2}
|
|
137
|
+
d="M13 10V3L4 14h7v7l9-11h-7z"
|
|
138
|
+
/>
|
|
139
|
+
</svg>
|
|
140
|
+
</div>
|
|
141
|
+
<h1 className="text-xl font-bold bg-gradient-to-r from-white to-zinc-400 bg-clip-text text-transparent hidden sm:block">
|
|
142
|
+
RAG Assistant
|
|
143
|
+
</h1>
|
|
144
|
+
</div>
|
|
145
|
+
|
|
146
|
+
<button
|
|
147
|
+
onClick={() => setIsUploadModalOpen(true)}
|
|
148
|
+
className="flex items-center gap-2 px-3 py-1.5 text-sm font-medium text-zinc-300 bg-zinc-900 border border-zinc-800 rounded-lg hover:bg-zinc-800 hover:text-white transition-all focus:ring-2 focus:ring-indigo-500/50 outline-none"
|
|
149
|
+
aria-label="Upload Document"
|
|
150
|
+
>
|
|
151
|
+
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
152
|
+
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-8l-4-4m0 0L8 8m4-4v12" />
|
|
153
|
+
</svg>
|
|
154
|
+
Upload Doc
|
|
155
|
+
</button>
|
|
156
|
+
</header>
|
|
157
|
+
|
|
158
|
+
{/* Main Chat Area */}
|
|
159
|
+
<main className="flex-1 overflow-y-auto pt-24 pb-32 px-4 scroll-smooth scrollbar-thin scrollbar-thumb-zinc-800 scrollbar-track-transparent">
|
|
160
|
+
<div className="max-w-3xl mx-auto space-y-8">
|
|
161
|
+
{messages.map((msg, idx) => (
|
|
162
|
+
<div
|
|
163
|
+
key={idx}
|
|
164
|
+
className={`flex items-start gap-4 animate-fade-in ${msg.role === "user" ? "flex-row-reverse" : ""
|
|
165
|
+
}`}
|
|
166
|
+
>
|
|
167
|
+
{/* Avatar */}
|
|
168
|
+
<div
|
|
169
|
+
className={`w-8 h-8 rounded-full flex items-center justify-center flex-shrink-0 ${msg.role === "user"
|
|
170
|
+
? "bg-indigo-600 text-white shadow-lg shadow-indigo-500/20"
|
|
171
|
+
: "bg-zinc-800 text-zinc-400 border border-zinc-700"
|
|
172
|
+
}`}
|
|
173
|
+
>
|
|
174
|
+
{msg.role === "user" ? (
|
|
175
|
+
<svg
|
|
176
|
+
className="w-5 h-5"
|
|
177
|
+
fill="none"
|
|
178
|
+
stroke="currentColor"
|
|
179
|
+
viewBox="0 0 24 24"
|
|
180
|
+
>
|
|
181
|
+
<path
|
|
182
|
+
strokeLinecap="round"
|
|
183
|
+
strokeLinejoin="round"
|
|
184
|
+
strokeWidth={2}
|
|
185
|
+
d="M16 7a4 4 0 11-8 0 4 4 0 018 0zM12 14a7 7 0 00-7 7h14a7 7 0 00-7-7z"
|
|
186
|
+
/>
|
|
187
|
+
</svg>
|
|
188
|
+
) : (
|
|
189
|
+
<svg
|
|
190
|
+
className="w-5 h-5"
|
|
191
|
+
fill="none"
|
|
192
|
+
stroke="currentColor"
|
|
193
|
+
viewBox="0 0 24 24"
|
|
194
|
+
>
|
|
195
|
+
<path
|
|
196
|
+
strokeLinecap="round"
|
|
197
|
+
strokeLinejoin="round"
|
|
198
|
+
strokeWidth={2}
|
|
199
|
+
d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z"
|
|
200
|
+
/>
|
|
201
|
+
</svg>
|
|
202
|
+
)}
|
|
203
|
+
</div>
|
|
204
|
+
|
|
205
|
+
{/* Message Bubble */}
|
|
206
|
+
<div
|
|
207
|
+
className={`group relative max-w-[85%] px-5 py-3 rounded-2xl shadow-sm leading-relaxed text-sm sm:text-base ${msg.role === "user"
|
|
208
|
+
? "bg-indigo-600 text-white rounded-tr-none"
|
|
209
|
+
: "bg-zinc-900 border border-zinc-800 text-zinc-200 rounded-tl-none"
|
|
210
|
+
}`}
|
|
211
|
+
>
|
|
212
|
+
<div className="bg-transparent whitespace-pre-wrap break-words">
|
|
213
|
+
{msg.content}
|
|
214
|
+
</div>
|
|
215
|
+
</div>
|
|
216
|
+
</div>
|
|
217
|
+
))}
|
|
218
|
+
|
|
219
|
+
{/* Loading Indicator */}
|
|
220
|
+
{loading && (
|
|
221
|
+
<div className="flex items-start gap-4 animate-fade-in">
|
|
222
|
+
<div className="w-8 h-8 rounded-full bg-zinc-800 text-zinc-400 flex items-center justify-center flex-shrink-0 border border-zinc-700">
|
|
223
|
+
<svg
|
|
224
|
+
className="w-5 h-5 animate-spin"
|
|
225
|
+
fill="none"
|
|
226
|
+
stroke="currentColor"
|
|
227
|
+
viewBox="0 0 24 24"
|
|
228
|
+
>
|
|
229
|
+
<circle
|
|
230
|
+
className="opacity-25"
|
|
231
|
+
cx="12"
|
|
232
|
+
cy="12"
|
|
233
|
+
r="10"
|
|
234
|
+
stroke="currentColor"
|
|
235
|
+
strokeWidth="4"
|
|
236
|
+
></circle>
|
|
237
|
+
<path
|
|
238
|
+
className="opacity-75"
|
|
239
|
+
fill="currentColor"
|
|
240
|
+
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
|
|
241
|
+
></path>
|
|
242
|
+
</svg>
|
|
243
|
+
</div>
|
|
244
|
+
<div className="bg-zinc-900 border border-zinc-800 px-5 py-3 rounded-2xl rounded-tl-none text-zinc-400 text-sm">
|
|
245
|
+
Reasoning...
|
|
246
|
+
</div>
|
|
247
|
+
</div>
|
|
248
|
+
)}
|
|
249
|
+
<div ref={messagesEndRef} />
|
|
250
|
+
</div>
|
|
251
|
+
</main>
|
|
252
|
+
|
|
253
|
+
{/* Input Area */}
|
|
254
|
+
<div className="fixed bottom-0 inset-x-0 bg-gradient-to-t from-zinc-950 pt-20 pb-8 z-40 pointer-events-none">
|
|
255
|
+
<div className="max-w-3xl mx-auto px-4 pointer-events-auto">
|
|
256
|
+
<form
|
|
257
|
+
onSubmit={handleSubmit}
|
|
258
|
+
className="relative flex items-center gap-2 bg-zinc-900/90 backdrop-blur-xl border border-zinc-800 p-2 rounded-2xl shadow-2xl shadow-black/50 transition-all focus-within:ring-2 focus-within:ring-indigo-500/20 focus-within:border-indigo-500/50"
|
|
259
|
+
autoComplete="off"
|
|
260
|
+
>
|
|
261
|
+
<input
|
|
262
|
+
type="text"
|
|
263
|
+
name="message"
|
|
264
|
+
value={query}
|
|
265
|
+
onChange={(e) => setQuery(e.target.value)}
|
|
266
|
+
placeholder="Ask anything about your documents..."
|
|
267
|
+
className="flex-1 bg-transparent text-zinc-100 placeholder-zinc-500 px-4 py-3 outline-none min-w-0"
|
|
268
|
+
/>
|
|
269
|
+
<button
|
|
270
|
+
type="submit"
|
|
271
|
+
disabled={loading || !query.trim()}
|
|
272
|
+
className="p-3 mr-1 rounded-xl bg-indigo-600 text-white hover:bg-indigo-500 disabled:opacity-50 disabled:hover:bg-indigo-600 transition-all duration-200 shadow-lg shadow-indigo-900/20 focus:ring-2 focus:ring-offset-2 focus:ring-indigo-600"
|
|
273
|
+
aria-label="Send message"
|
|
274
|
+
>
|
|
275
|
+
<svg
|
|
276
|
+
className="w-5 h-5"
|
|
277
|
+
fill="none"
|
|
278
|
+
stroke="currentColor"
|
|
279
|
+
viewBox="0 0 24 24"
|
|
280
|
+
>
|
|
281
|
+
<path
|
|
282
|
+
strokeLinecap="round"
|
|
283
|
+
strokeLinejoin="round"
|
|
284
|
+
strokeWidth={2}
|
|
285
|
+
d="M5 12h14M12 5l7 7-7 7"
|
|
286
|
+
/>
|
|
287
|
+
</svg>
|
|
288
|
+
</button>
|
|
289
|
+
</form>
|
|
290
|
+
<div className="mt-3 text-center">
|
|
291
|
+
<p className="text-xs text-zinc-600">
|
|
292
|
+
AI can make mistakes. Please verify important information.
|
|
293
|
+
</p>
|
|
294
|
+
</div>
|
|
295
|
+
</div>
|
|
296
|
+
</div>
|
|
297
|
+
|
|
298
|
+
{/* Upload Modal Overlay */}
|
|
299
|
+
{isUploadModalOpen && (
|
|
300
|
+
<div className="fixed inset-0 z-[60] flex items-center justify-center p-4 bg-black/60 backdrop-blur-sm animate-fade-in">
|
|
301
|
+
<div className="bg-zinc-900 border border-zinc-800 rounded-2xl max-w-md w-full p-6 shadow-2xl relative">
|
|
302
|
+
<div className="flex justify-between items-center mb-6">
|
|
303
|
+
<h2 className="text-xl font-bold text-white">Upload Document</h2>
|
|
304
|
+
<button
|
|
305
|
+
onClick={() => setIsUploadModalOpen(false)}
|
|
306
|
+
className="text-zinc-500 hover:text-white transition-colors"
|
|
307
|
+
aria-label="Close modal"
|
|
308
|
+
>
|
|
309
|
+
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
310
|
+
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
|
311
|
+
</svg>
|
|
312
|
+
</button>
|
|
313
|
+
</div>
|
|
314
|
+
|
|
315
|
+
<div className="space-y-4">
|
|
316
|
+
{/* Drag & Drop Zone */}
|
|
317
|
+
<div
|
|
318
|
+
className="border-2 border-dashed border-zinc-800 rounded-xl p-8 hover:bg-zinc-800/50 hover:border-zinc-600 transition-all text-center cursor-pointer group"
|
|
319
|
+
onClick={() => fileInputRef.current?.click()}>
|
|
320
|
+
<input
|
|
321
|
+
type="file"
|
|
322
|
+
ref={fileInputRef}
|
|
323
|
+
className="hidden"
|
|
324
|
+
onChange={handleFileUpload}
|
|
325
|
+
accept=".txt,.pdf"
|
|
326
|
+
disabled={uploading}
|
|
327
|
+
/>
|
|
328
|
+
<div className="w-12 h-12 rounded-full bg-indigo-500/20 text-indigo-400 mx-auto flex items-center justify-center mb-3 group-hover:scale-110 transition-transform">
|
|
329
|
+
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
330
|
+
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-8l-4-4m0 0L8 8m4-4v12" />
|
|
331
|
+
</svg>
|
|
332
|
+
</div>
|
|
333
|
+
<p className="text-sm text-zinc-400 font-medium group-hover:text-zinc-200 transition-colors">Click to upload or drag and drop</p>
|
|
334
|
+
<p className="text-xs text-zinc-600 mt-1">Supported formats: PDF, TXT (Max 5MB)</p>
|
|
335
|
+
</div>
|
|
336
|
+
|
|
337
|
+
{uploading && (
|
|
338
|
+
<div className="flex items-center justify-center gap-3 text-sm text-zinc-400 animate-pulse">
|
|
339
|
+
<span className="relative flex h-3 w-3">
|
|
340
|
+
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-indigo-400 opacity-75"></span>
|
|
341
|
+
<span className="relative inline-flex rounded-full h-3 w-3 bg-indigo-500"></span>
|
|
342
|
+
</span>
|
|
343
|
+
Processing document... This may take a moment.
|
|
344
|
+
</div>
|
|
345
|
+
)}
|
|
346
|
+
</div>
|
|
347
|
+
</div>
|
|
348
|
+
</div>
|
|
349
|
+
)}
|
|
350
|
+
</div>
|
|
351
|
+
);
|
|
352
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
|
|
2
|
+
import { TextLoader } from "langchain/document_loaders/fs/text";
|
|
3
|
+
import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
|
|
4
|
+
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
|
|
5
|
+
import { Chroma } from "@langchain/community/vectorstores/chroma";
|
|
6
|
+
import { GoogleGenerativeAIEmbeddings } from "@langchain/google-genai";
|
|
7
|
+
import { OpenAIEmbeddings } from "@langchain/openai";
|
|
8
|
+
import path from "path";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Sanitizes metadata values to ensure they are compatible with vector database storage (e.g., ChromaDB).
|
|
12
|
+
* Vector stores often only support string, number, or boolean values for metadata.
|
|
13
|
+
*
|
|
14
|
+
* @param {Object} metadata - The original metadata object.
|
|
15
|
+
* @returns {Object} - The clean metadata object.
|
|
16
|
+
*/
|
|
17
|
+
function cleanMetadata(metadata = {}) {
|
|
18
|
+
const clean = {};
|
|
19
|
+
|
|
20
|
+
for (const [key, value] of Object.entries(metadata)) {
|
|
21
|
+
if (
|
|
22
|
+
typeof value === "string" ||
|
|
23
|
+
typeof value === "number" ||
|
|
24
|
+
typeof value === "boolean" ||
|
|
25
|
+
value === null
|
|
26
|
+
) {
|
|
27
|
+
clean[key] = value;
|
|
28
|
+
} else {
|
|
29
|
+
// Convert complex objects/arrays to strings to prevent DB injection errors
|
|
30
|
+
clean[key] = JSON.stringify(value);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return clean;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Ingests documents into the vector database.
|
|
39
|
+
* Supports loading from a directory or a specific single file.
|
|
40
|
+
*
|
|
41
|
+
* @param {string} source - The path to the document directory or single file.
|
|
42
|
+
* @param {boolean} isSingleFile - Flag to indicate if the source is a single file path.
|
|
43
|
+
* @returns {Promise<Object>} - Result object indicating success/failure and chunk count.
|
|
44
|
+
*/
|
|
45
|
+
export async function ingestDocuments(source = "./documents", isSingleFile = false) {
|
|
46
|
+
try {
|
|
47
|
+
const absoluteSource = path.resolve(process.cwd(), source);
|
|
48
|
+
let loader;
|
|
49
|
+
|
|
50
|
+
// Determine the Loader Strategy
|
|
51
|
+
if (isSingleFile) {
|
|
52
|
+
const ext = path.extname(source).toLowerCase();
|
|
53
|
+
if (ext === ".pdf") {
|
|
54
|
+
loader = new PDFLoader(absoluteSource);
|
|
55
|
+
} else if (ext === ".txt") {
|
|
56
|
+
loader = new TextLoader(absoluteSource);
|
|
57
|
+
} else {
|
|
58
|
+
throw new Error(`Unsupported file extension: ${ext}`);
|
|
59
|
+
}
|
|
60
|
+
console.log(`📥 Loading document: ${source}...`);
|
|
61
|
+
} else {
|
|
62
|
+
// Load entire directory
|
|
63
|
+
const absoluteDocsDir = path.resolve(process.cwd(), source);
|
|
64
|
+
loader = new DirectoryLoader(absoluteDocsDir, {
|
|
65
|
+
".txt": (filePath) => new TextLoader(filePath),
|
|
66
|
+
".pdf": (filePath) => new PDFLoader(filePath)
|
|
67
|
+
});
|
|
68
|
+
console.log(`📥 Loading documents from ${source}...`);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const docs = await loader.load();
|
|
72
|
+
|
|
73
|
+
if (docs.length === 0) {
|
|
74
|
+
console.log("⚠ No documents found.");
|
|
75
|
+
return { success: false, message: "No documents found." };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Split text into manageable chunks for the LLM context window
|
|
79
|
+
const splitter = new RecursiveCharacterTextSplitter({
|
|
80
|
+
chunkSize: 1000,
|
|
81
|
+
chunkOverlap: 200
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
console.log("✂ Splitting documents...");
|
|
85
|
+
const splitDocs = await splitter.splitDocuments(docs);
|
|
86
|
+
|
|
87
|
+
// Clean metadata to avoid database errors
|
|
88
|
+
const cleanDocs = splitDocs.map((doc) => ({
|
|
89
|
+
...doc,
|
|
90
|
+
metadata: cleanMetadata(doc.metadata),
|
|
91
|
+
}));
|
|
92
|
+
|
|
93
|
+
console.log(`✅ Split into ${splitDocs.length} chunks.`);
|
|
94
|
+
console.log("📌 Generating embeddings and storing in Chroma...");
|
|
95
|
+
|
|
96
|
+
// Initialize Embeddings Provider
|
|
97
|
+
const provider = process.env.LLM_PROVIDER || "openai";
|
|
98
|
+
let embeddings;
|
|
99
|
+
|
|
100
|
+
if (provider === "gemini") {
|
|
101
|
+
embeddings = new GoogleGenerativeAIEmbeddings({
|
|
102
|
+
apiKey: process.env.GOOGLE_API_KEY,
|
|
103
|
+
modelName: "gemini-embedding-001"
|
|
104
|
+
});
|
|
105
|
+
} else {
|
|
106
|
+
embeddings = new OpenAIEmbeddings({
|
|
107
|
+
apiKey: process.env.OPENAI_API_KEY
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Store vectors in ChromaDB
|
|
112
|
+
await Chroma.fromDocuments(cleanDocs, embeddings, {
|
|
113
|
+
collectionName: process.env.COLLECTION_NAME || "rag-docs",
|
|
114
|
+
url: process.env.CHROMA_URL || "http://localhost:8000"
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
console.log("🎉 ✅ Ingestion Complete!");
|
|
118
|
+
return { success: true, chunks: splitDocs.length };
|
|
119
|
+
} catch (error) {
|
|
120
|
+
console.error("❌ Ingestion Failed:", error);
|
|
121
|
+
throw error;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai";
|
|
2
|
+
import { ChatOllama, OllamaEmbeddings } from "@langchain/ollama";
|
|
3
|
+
import {
|
|
4
|
+
GoogleGenerativeAIEmbeddings,
|
|
5
|
+
ChatGoogleGenerativeAI
|
|
6
|
+
} from "@langchain/google-genai";
|
|
7
|
+
|
|
8
|
+
export function getEmbeddings() {
|
|
9
|
+
const provider = process.env.LLM_PROVIDER || "openai";
|
|
10
|
+
|
|
11
|
+
if (provider === "ollama") {
|
|
12
|
+
return new OllamaEmbeddings({
|
|
13
|
+
baseUrl: "http://localhost:11434",
|
|
14
|
+
model: "nomic-embed-text"
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (provider === "gemini") {
|
|
19
|
+
return new GoogleGenerativeAIEmbeddings({
|
|
20
|
+
apiKey: process.env.GOOGLE_API_KEY,
|
|
21
|
+
modelName: "gemini-embedding-001"
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return new OpenAIEmbeddings({
|
|
26
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
27
|
+
model: "text-embedding-3-small"
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function getLLM() {
|
|
32
|
+
const provider = process.env.LLM_PROVIDER || "openai";
|
|
33
|
+
|
|
34
|
+
if (provider === "ollama") {
|
|
35
|
+
return new ChatOllama({
|
|
36
|
+
baseUrl: "http://localhost:11434",
|
|
37
|
+
model: "llama3"
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (provider === "gemini") {
|
|
42
|
+
return new ChatGoogleGenerativeAI({
|
|
43
|
+
apiKey: process.env.GOOGLE_API_KEY,
|
|
44
|
+
modelName: "gemini-3-flash-preview",
|
|
45
|
+
maxOutputTokens: 2048
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return new ChatOpenAI({
|
|
50
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
51
|
+
modelName: "gpt-4o-mini",
|
|
52
|
+
temperature: 0
|
|
53
|
+
});
|
|
54
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/** @type {import('tailwindcss').Config} */
|
|
2
|
+
export default {
|
|
3
|
+
content: [
|
|
4
|
+
"./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
|
|
5
|
+
"./src/components/**/*.{js,ts,jsx,tsx,mdx}",
|
|
6
|
+
"./src/app/**/*.{js,ts,jsx,tsx,mdx}",
|
|
7
|
+
],
|
|
8
|
+
theme: {
|
|
9
|
+
extend: {
|
|
10
|
+
backgroundImage: {
|
|
11
|
+
"gradient-radial": "radial-gradient(var(--tw-gradient-stops))",
|
|
12
|
+
"gradient-conic":
|
|
13
|
+
"conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))",
|
|
14
|
+
},
|
|
15
|
+
},
|
|
16
|
+
},
|
|
17
|
+
plugins: [],
|
|
18
|
+
};
|