future-lang 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +424 -0
- package/MIGRATION.md +365 -0
- package/README.md +370 -0
- package/ROADMAP.md +263 -0
- package/examples/adult.future +8 -0
- package/examples/api.future +11 -0
- package/examples/assistant.future +8 -0
- package/examples/browser-demo.html +164 -0
- package/examples/greet.future +7 -0
- package/examples/hello.future +1 -0
- package/examples/math.future +8 -0
- package/examples/mini-app.html +301 -0
- package/examples/smarthome.future +10 -0
- package/future-browser.js +102 -0
- package/future-playground.html +650 -0
- package/package.json +27 -0
- package/runtime/ai.js +92 -0
- package/runtime/browser.js +458 -0
- package/runtime/device.js +36 -0
- package/runtime/home.js +19 -0
- package/runtime/http.js +32 -0
- package/runtime/index.js +403 -0
- package/runtime/lsp-metadata.js +104 -0
- package/runtime/math.js +16 -0
- package/runtime/memory.js +61 -0
- package/runtime/mqtt.js +49 -0
- package/runtime/providers/anthropic.js +59 -0
- package/runtime/providers/index.js +93 -0
- package/runtime/providers/openai-compat.js +85 -0
- package/runtime/providers/util.js +70 -0
- package/runtime/rag/chunker.js +65 -0
- package/runtime/rag/pipeline.js +86 -0
- package/runtime/rag/vector-store.js +119 -0
- package/runtime/rag.js +94 -0
- package/runtime/schedule.js +77 -0
- package/runtime/system.js +101 -0
- package/runtime/tts.js +38 -0
- package/runtime/vision.js +85 -0
- package/server.js +42 -0
- package/src/ast.js +202 -0
- package/src/cli.js +391 -0
- package/src/errors.js +21 -0
- package/src/formatter.js +48 -0
- package/src/generator.js +457 -0
- package/src/index.js +48 -0
- package/src/lexer.js +248 -0
- package/src/parser.js +469 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
// runtime/providers/index.js — Provider factory and resolution.
|
|
2
|
+
//
|
|
3
|
+
// Resolution order (first match wins):
|
|
4
|
+
// 1. ai.configure() called from Future code → highest priority
|
|
5
|
+
// 2. FUTURE_AI_PROVIDER + FUTURE_AI_API_KEY → named preset
|
|
6
|
+
// 3. FUTURE_AI_BASE_URL + FUTURE_AI_API_KEY → custom OpenAI-compat endpoint
|
|
7
|
+
// 4. ANTHROPIC_API_KEY → Anthropic (original default)
|
|
8
|
+
// 5. Nothing found → null (offline stub)
|
|
9
|
+
//
|
|
10
|
+
// Supported FUTURE_AI_PROVIDER values:
|
|
11
|
+
// anthropic | openai | ollama | openrouter | gemini | venice | groq | together
|
|
12
|
+
|
|
13
|
+
import process from 'node:process';
|
|
14
|
+
import * as anthropic from './anthropic.js';
|
|
15
|
+
import * as openaiCompat from './openai-compat.js';
|
|
16
|
+
|
|
17
|
+
// Holds a config set by ai.configure() from within a Future program.
|
|
18
|
+
let _runtimeConfig = null;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Called by ai.configure() in the Future runtime.
|
|
22
|
+
* Takes the highest priority over all environment variables.
|
|
23
|
+
*/
|
|
24
|
+
export function setRuntimeConfig(config) {
|
|
25
|
+
_runtimeConfig = config;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function getRuntimeConfig() {
|
|
29
|
+
return _runtimeConfig;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Resolve and instantiate the active AI provider.
|
|
34
|
+
* Returns null if no provider is configured (offline mode).
|
|
35
|
+
* @returns {{ name, ask, chat, stream, embed } | null}
|
|
36
|
+
*/
|
|
37
|
+
export function resolveProvider() {
|
|
38
|
+
// 1. Programmatic config from ai.configure()
|
|
39
|
+
if (_runtimeConfig) return buildProvider(_runtimeConfig);
|
|
40
|
+
|
|
41
|
+
const providerName = process.env.FUTURE_AI_PROVIDER?.toLowerCase();
|
|
42
|
+
const apiKey = process.env.FUTURE_AI_API_KEY;
|
|
43
|
+
const model = process.env.FUTURE_AI_MODEL;
|
|
44
|
+
|
|
45
|
+
// 2. Named preset via FUTURE_AI_PROVIDER
|
|
46
|
+
if (providerName && apiKey) {
|
|
47
|
+
if (providerName === 'anthropic') {
|
|
48
|
+
return anthropic.create({ apiKey, model: model ?? undefined });
|
|
49
|
+
}
|
|
50
|
+
const preset = openaiCompat.PRESETS[providerName];
|
|
51
|
+
const baseUrl = process.env.FUTURE_AI_BASE_URL ?? preset?.baseUrl;
|
|
52
|
+
if (!baseUrl) {
|
|
53
|
+
console.warn(`[ai] Unknown FUTURE_AI_PROVIDER "${providerName}". Set FUTURE_AI_BASE_URL.`);
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
return openaiCompat.create({
|
|
57
|
+
baseUrl,
|
|
58
|
+
apiKey,
|
|
59
|
+
model: model ?? undefined,
|
|
60
|
+
embedModel: preset?.embedModel ?? undefined,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// 3. Custom OpenAI-compat endpoint via env
|
|
65
|
+
if (process.env.FUTURE_AI_BASE_URL && apiKey) {
|
|
66
|
+
return openaiCompat.create({
|
|
67
|
+
baseUrl: process.env.FUTURE_AI_BASE_URL,
|
|
68
|
+
apiKey,
|
|
69
|
+
model: model ?? undefined,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// 4. Anthropic legacy default
|
|
74
|
+
if (process.env.ANTHROPIC_API_KEY) {
|
|
75
|
+
return anthropic.create({ apiKey: process.env.ANTHROPIC_API_KEY, model: model ?? undefined });
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return null; // offline
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/** Build a provider from an explicit config object (used by ai.configure()). */
|
|
82
|
+
function buildProvider(config) {
|
|
83
|
+
if (config.provider === 'anthropic' || (!config.baseUrl && !config.provider)) {
|
|
84
|
+
return anthropic.create(config);
|
|
85
|
+
}
|
|
86
|
+
const preset = openaiCompat.PRESETS[config.provider ?? ''] ?? {};
|
|
87
|
+
return openaiCompat.create({
|
|
88
|
+
baseUrl: config.baseUrl ?? preset.baseUrl,
|
|
89
|
+
apiKey: config.apiKey,
|
|
90
|
+
model: config.model ?? undefined,
|
|
91
|
+
embedModel: config.embedModel ?? preset.embedModel ?? undefined,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
// runtime/providers/openai-compat.js
|
|
2
|
+
// Handles ALL OpenAI-compatible APIs with a single implementation:
|
|
3
|
+
// OpenAI, Ollama, OpenRouter, Venice, Groq, Together, and
|
|
4
|
+
// Gemini (via Google's official OpenAI-compatible endpoint).
|
|
5
|
+
//
|
|
6
|
+
// Gemini note: Google exposes /v1beta/openai/ which is fully compatible.
|
|
7
|
+
// No separate SDK or special casing needed.
|
|
8
|
+
|
|
9
|
+
import { parseSSE, keywordVector } from './util.js';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Well-known provider presets. Used when FUTURE_AI_PROVIDER is set without FUTURE_AI_BASE_URL.
|
|
13
|
+
* Users can always override by setting FUTURE_AI_BASE_URL directly.
|
|
14
|
+
*/
|
|
15
|
+
export const PRESETS = {
|
|
16
|
+
openai: { baseUrl: 'https://api.openai.com/v1', embedModel: 'text-embedding-3-small' },
|
|
17
|
+
ollama: { baseUrl: 'http://localhost:11434/v1', embedModel: 'nomic-embed-text' },
|
|
18
|
+
openrouter: { baseUrl: 'https://openrouter.ai/api/v1', embedModel: null },
|
|
19
|
+
gemini: { baseUrl: 'https://generativelanguage.googleapis.com/v1beta/openai', embedModel: 'text-embedding-004' },
|
|
20
|
+
venice: { baseUrl: 'https://api.venice.ai/api/v1', embedModel: null },
|
|
21
|
+
groq: { baseUrl: 'https://api.groq.com/openai/v1', embedModel: null },
|
|
22
|
+
together: { baseUrl: 'https://api.together.xyz/v1', embedModel: 'togethercomputer/m2-bert-80M-8k-retrieval' },
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Create an OpenAI-compatible provider instance.
|
|
27
|
+
* @param {{ baseUrl: string, apiKey: string, model?: string, embedModel?: string }} config
|
|
28
|
+
*/
|
|
29
|
+
export function create(config) {
|
|
30
|
+
const baseUrl = config.baseUrl.replace(/\/$/, '');
|
|
31
|
+
const apiKey = config.apiKey;
|
|
32
|
+
const model = config.model ?? 'gpt-4o-mini';
|
|
33
|
+
const embedModel = config.embedModel ?? null;
|
|
34
|
+
|
|
35
|
+
const headers = {
|
|
36
|
+
'content-type': 'application/json',
|
|
37
|
+
'authorization': `Bearer ${apiKey}`,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
async function chat(messages) {
|
|
41
|
+
const res = await fetch(`${baseUrl}/chat/completions`, {
|
|
42
|
+
method: 'POST',
|
|
43
|
+
headers,
|
|
44
|
+
body: JSON.stringify({ model, messages, max_tokens: 1024 }),
|
|
45
|
+
});
|
|
46
|
+
if (!res.ok) throw new Error(`[ai/${baseUrl}] HTTP ${res.status}: ${await res.text()}`);
|
|
47
|
+
const data = await res.json();
|
|
48
|
+
return data.choices?.[0]?.message?.content?.trim() ?? '';
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async function ask(prompt) {
|
|
52
|
+
return chat([{ role: 'user', content: String(prompt) }]);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async function stream(messages, onChunk) {
|
|
56
|
+
const res = await fetch(`${baseUrl}/chat/completions`, {
|
|
57
|
+
method: 'POST',
|
|
58
|
+
headers,
|
|
59
|
+
body: JSON.stringify({ model, messages, max_tokens: 1024, stream: true }),
|
|
60
|
+
});
|
|
61
|
+
if (!res.ok) throw new Error(`[ai/${baseUrl}] stream HTTP ${res.status}`);
|
|
62
|
+
for await (const { data } of parseSSE(res.body)) {
|
|
63
|
+
const chunk = data.choices?.[0]?.delta?.content;
|
|
64
|
+
if (chunk) onChunk(chunk);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async function embed(text) {
|
|
69
|
+
if (!embedModel) return keywordVector(String(text));
|
|
70
|
+
try {
|
|
71
|
+
const res = await fetch(`${baseUrl}/embeddings`, {
|
|
72
|
+
method: 'POST',
|
|
73
|
+
headers,
|
|
74
|
+
body: JSON.stringify({ model: embedModel, input: String(text) }),
|
|
75
|
+
});
|
|
76
|
+
if (!res.ok) return keywordVector(String(text));
|
|
77
|
+
const data = await res.json();
|
|
78
|
+
return data.data?.[0]?.embedding ?? keywordVector(String(text));
|
|
79
|
+
} catch {
|
|
80
|
+
return keywordVector(String(text));
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return { name: `openai-compat(${baseUrl})`, ask, chat, stream, embed };
|
|
85
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
// runtime/providers/util.js — Shared utilities for AI providers.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Async generator that yields parsed JSON objects from an SSE stream.
|
|
5
|
+
* Handles both OpenAI-style (`data: {...}`) and Anthropic-style (`event: ...\ndata: {...}`) SSE.
|
|
6
|
+
* @param {ReadableStream} body
|
|
7
|
+
* @yields {{ event?: string, data: object }}
|
|
8
|
+
*/
|
|
9
|
+
export async function* parseSSE(body) {
|
|
10
|
+
const reader = body.getReader();
|
|
11
|
+
const decoder = new TextDecoder();
|
|
12
|
+
let buf = '';
|
|
13
|
+
let pendingEvent = null;
|
|
14
|
+
|
|
15
|
+
while (true) {
|
|
16
|
+
const { done, value } = await reader.read();
|
|
17
|
+
if (done) break;
|
|
18
|
+
buf += decoder.decode(value, { stream: true });
|
|
19
|
+
const lines = buf.split('\n');
|
|
20
|
+
buf = lines.pop() ?? '';
|
|
21
|
+
|
|
22
|
+
for (const line of lines) {
|
|
23
|
+
if (line.startsWith('event: ')) {
|
|
24
|
+
pendingEvent = line.slice(7).trim();
|
|
25
|
+
} else if (line.startsWith('data: ')) {
|
|
26
|
+
const raw = line.slice(6).trim();
|
|
27
|
+
if (raw === '[DONE]') return;
|
|
28
|
+
try {
|
|
29
|
+
yield { event: pendingEvent, data: JSON.parse(raw) };
|
|
30
|
+
} catch { /* skip malformed lines */ }
|
|
31
|
+
pendingEvent = null;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* A deterministic keyword-based vector (256-dim) used when no embedding API is available.
|
|
39
|
+
* Good enough for keyword-match RAG. Replaced by real vectors when an embedding provider is set.
|
|
40
|
+
* @param {string} text
|
|
41
|
+
* @returns {number[]}
|
|
42
|
+
*/
|
|
43
|
+
export function keywordVector(text) {
|
|
44
|
+
const words = String(text).toLowerCase().match(/\b[a-z]{2,}\b/g) ?? [];
|
|
45
|
+
const vec = new Array(256).fill(0);
|
|
46
|
+
for (const word of words) {
|
|
47
|
+
// djb2 hash
|
|
48
|
+
let h = 5381;
|
|
49
|
+
for (let i = 0; i < word.length; i++) h = ((h << 5) + h) ^ word.charCodeAt(i);
|
|
50
|
+
vec[(h >>> 0) % 256] += 1;
|
|
51
|
+
}
|
|
52
|
+
const norm = Math.sqrt(vec.reduce((s, v) => s + v * v, 0)) || 1;
|
|
53
|
+
return vec.map((v) => v / norm);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Cosine similarity between two equal-length vectors. Returns 0–1.
|
|
58
|
+
* @param {number[]} a
|
|
59
|
+
* @param {number[]} b
|
|
60
|
+
* @returns {number}
|
|
61
|
+
*/
|
|
62
|
+
export function cosineSim(a, b) {
|
|
63
|
+
let dot = 0, na = 0, nb = 0;
|
|
64
|
+
for (let i = 0; i < a.length; i++) {
|
|
65
|
+
dot += a[i] * b[i];
|
|
66
|
+
na += a[i] * a[i];
|
|
67
|
+
nb += b[i] * b[i];
|
|
68
|
+
}
|
|
69
|
+
return dot / (Math.sqrt(na) * Math.sqrt(nb) || 1);
|
|
70
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// runtime/rag/chunker.js — Document chunking for RAG pipelines.
|
|
2
|
+
// Splits text into overlapping chunks so long documents don't exceed embedding context windows.
|
|
3
|
+
|
|
4
|
+
const DEFAULT_CHUNK_SIZE = 512; // characters
|
|
5
|
+
const DEFAULT_CHUNK_OVERLAP = 64; // characters of overlap between chunks
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Normalize an input document into a plain string.
|
|
9
|
+
* Accepts: string, { text }, { content }, { body }, or JSON-stringifiable object.
|
|
10
|
+
* @param {any} doc
|
|
11
|
+
* @returns {string}
|
|
12
|
+
*/
|
|
13
|
+
export function docToText(doc) {
|
|
14
|
+
if (typeof doc === 'string') return doc;
|
|
15
|
+
if (doc && typeof doc === 'object') {
|
|
16
|
+
return String(doc.text ?? doc.content ?? doc.body ?? JSON.stringify(doc));
|
|
17
|
+
}
|
|
18
|
+
return String(doc ?? '');
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Split text into overlapping character chunks.
|
|
23
|
+
* Tries to split at sentence boundaries (`. `) rather than mid-word.
|
|
24
|
+
* @param {string} text
|
|
25
|
+
* @param {{ size?: number, overlap?: number }} [opts]
|
|
26
|
+
* @returns {string[]}
|
|
27
|
+
*/
|
|
28
|
+
export function chunk(text, opts = {}) {
|
|
29
|
+
const size = opts.size ?? DEFAULT_CHUNK_SIZE;
|
|
30
|
+
const overlap = opts.overlap ?? DEFAULT_CHUNK_OVERLAP;
|
|
31
|
+
const chunks = [];
|
|
32
|
+
let start = 0;
|
|
33
|
+
|
|
34
|
+
while (start < text.length) {
|
|
35
|
+
let end = Math.min(start + size, text.length);
|
|
36
|
+
|
|
37
|
+
// Try to break at a sentence boundary if we're not at the end.
|
|
38
|
+
if (end < text.length) {
|
|
39
|
+
const lastPeriod = text.lastIndexOf('. ', end);
|
|
40
|
+
if (lastPeriod > start + size / 2) end = lastPeriod + 2;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const part = text.slice(start, end).trim();
|
|
44
|
+
if (part) chunks.push(part);
|
|
45
|
+
start = Math.max(start + 1, end - overlap);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return chunks;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Turn an array of raw documents into an array of chunk objects ready for embedding.
|
|
53
|
+
* @param {any[]} docs
|
|
54
|
+
* @param {{ size?: number, overlap?: number }} [opts]
|
|
55
|
+
* @returns {{ text: string, source: any, chunkIndex: number }[]}
|
|
56
|
+
*/
|
|
57
|
+
export function chunkDocs(docs, opts = {}) {
|
|
58
|
+
const result = [];
|
|
59
|
+
for (const doc of docs) {
|
|
60
|
+
const text = docToText(doc);
|
|
61
|
+
const chunks = chunk(text, opts);
|
|
62
|
+
chunks.forEach((text, i) => result.push({ text, source: doc, chunkIndex: i }));
|
|
63
|
+
}
|
|
64
|
+
return result;
|
|
65
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
// runtime/rag/pipeline.js — Full RAG pipeline.
|
|
2
|
+
//
|
|
3
|
+
// Architecture:
|
|
4
|
+
// Documents → Chunking → Embedding → Vector Store → Similarity Search → LLM → Answer
|
|
5
|
+
//
|
|
6
|
+
// Each Knowledge Base is an isolated pipeline instance.
|
|
7
|
+
// The default KB used by rag.index() / rag.query() is a singleton.
|
|
8
|
+
|
|
9
|
+
import { chunkDocs, docToText } from './chunker.js';
|
|
10
|
+
import { createVectorStore } from './vector-store.js';
|
|
11
|
+
import { resolveProvider } from '../providers/index.js';
|
|
12
|
+
|
|
13
|
+
let _idCounter = 0;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Create a named RAG pipeline (Knowledge Base).
|
|
17
|
+
* @param {string} name Identifier shown in logs.
|
|
18
|
+
* @param {object} [opts]
|
|
19
|
+
* @param {string} [opts.adapter] Vector store adapter: "memory" | "file" | "qdrant" | …
|
|
20
|
+
* @returns {KnowledgeBase}
|
|
21
|
+
*/
|
|
22
|
+
export function createPipeline(name = 'default', opts = {}) {
|
|
23
|
+
const store = createVectorStore(opts);
|
|
24
|
+
let totalChunks = 0;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Index an array of documents (or file paths — file loading is caller's responsibility).
|
|
28
|
+
* @param {any[]} docs
|
|
29
|
+
*/
|
|
30
|
+
async function index(docs) {
|
|
31
|
+
const provider = resolveProvider();
|
|
32
|
+
const normalised = Array.isArray(docs) ? docs : [docs];
|
|
33
|
+
const chunks = chunkDocs(normalised, opts);
|
|
34
|
+
|
|
35
|
+
for (const c of chunks) {
|
|
36
|
+
const id = `${name}:${_idCounter++}`;
|
|
37
|
+
const vector = provider ? await provider.embed(c.text) : await import('../providers/util.js').then(m => m.keywordVector(c.text));
|
|
38
|
+
store.add(id, vector, { text: c.text, source: String(c.source).slice(0, 200), chunkIndex: c.chunkIndex });
|
|
39
|
+
totalChunks++;
|
|
40
|
+
}
|
|
41
|
+
return { indexed: chunks.length, total: totalChunks };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Query the knowledge base and generate an LLM answer from retrieved context.
|
|
46
|
+
* @param {string} question
|
|
47
|
+
* @param {{ topK?: number, answerWithAI?: boolean }} [opts]
|
|
48
|
+
* @returns {Promise<string>}
|
|
49
|
+
*/
|
|
50
|
+
async function query(question, { topK = 5, answerWithAI = true } = {}) {
|
|
51
|
+
if (store.size() === 0) {
|
|
52
|
+
return '[rag] No documents indexed. Call rag.index(docs) first.';
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const provider = resolveProvider();
|
|
56
|
+
const { keywordVector } = await import('../providers/util.js');
|
|
57
|
+
const qVector = provider ? await provider.embed(question) : keywordVector(question);
|
|
58
|
+
const hits = store.search(qVector, topK);
|
|
59
|
+
|
|
60
|
+
if (hits.length === 0) return '[rag] No relevant documents found.';
|
|
61
|
+
|
|
62
|
+
// Build context from top-k chunks
|
|
63
|
+
const context = hits
|
|
64
|
+
.map((h, i) => `[${i + 1}] ${h.metadata.text}`)
|
|
65
|
+
.join('\n\n');
|
|
66
|
+
|
|
67
|
+
if (!answerWithAI || !provider) {
|
|
68
|
+
// Return raw context when no LLM is available
|
|
69
|
+
return `Context:\n${context}`;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const prompt =
|
|
73
|
+
`You are a helpful assistant. Answer the question using only the context below.\n\n` +
|
|
74
|
+
`Context:\n${context}\n\nQuestion: ${question}\n\nAnswer:`;
|
|
75
|
+
|
|
76
|
+
return provider.ask(prompt);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
name,
|
|
81
|
+
index,
|
|
82
|
+
query,
|
|
83
|
+
store,
|
|
84
|
+
stats: () => ({ name, chunks: totalChunks, vectors: store.size() }),
|
|
85
|
+
};
|
|
86
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
// runtime/rag/vector-store.js — Vector store adapters.
|
|
2
|
+
//
|
|
3
|
+
// Adapter interface (all adapters implement):
|
|
4
|
+
// add(id, vector, metadata) → void
|
|
5
|
+
// search(vector, topK) → [{ id, score, metadata }]
|
|
6
|
+
// delete(id) → void
|
|
7
|
+
// clear() → void
|
|
8
|
+
// size() → number
|
|
9
|
+
// persist() → Promise<void> (no-op for memory)
|
|
10
|
+
// load() → Promise<void> (no-op for memory)
|
|
11
|
+
//
|
|
12
|
+
// FUTURE_VECTOR_DB selects the adapter:
|
|
13
|
+
// memory (default) — in-process, fast, no deps
|
|
14
|
+
// file — memory + JSON file persistence (no native deps)
|
|
15
|
+
// qdrant — stub: set FUTURE_VECTOR_DB=qdrant and implement runtime/rag/qdrant.js
|
|
16
|
+
// pinecone — stub
|
|
17
|
+
// weaviate — stub
|
|
18
|
+
|
|
19
|
+
import process from 'node:process';
|
|
20
|
+
import { readFile, writeFile, mkdir } from 'node:fs/promises';
|
|
21
|
+
import path from 'node:path';
|
|
22
|
+
import { cosineSim } from '../providers/util.js';
|
|
23
|
+
|
|
24
|
+
// ─── In-Memory Adapter ─────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
export function createMemoryStore() {
|
|
27
|
+
const entries = new Map(); // id → { vector, metadata }
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
name: 'memory',
|
|
31
|
+
|
|
32
|
+
add(id, vector, metadata = {}) {
|
|
33
|
+
entries.set(String(id), { vector, metadata });
|
|
34
|
+
},
|
|
35
|
+
|
|
36
|
+
search(queryVector, topK = 5) {
|
|
37
|
+
const scored = [];
|
|
38
|
+
for (const [id, { vector, metadata }] of entries) {
|
|
39
|
+
scored.push({ id, score: cosineSim(queryVector, vector), metadata });
|
|
40
|
+
}
|
|
41
|
+
return scored
|
|
42
|
+
.sort((a, b) => b.score - a.score)
|
|
43
|
+
.slice(0, topK);
|
|
44
|
+
},
|
|
45
|
+
|
|
46
|
+
delete(id) { entries.delete(String(id)); },
|
|
47
|
+
clear() { entries.clear(); },
|
|
48
|
+
size() { return entries.size; },
|
|
49
|
+
async persist() { /* no-op */ },
|
|
50
|
+
async load() { /* no-op */ },
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ─── File-Backed Adapter (memory + JSON persistence) ───────────────────────
|
|
55
|
+
|
|
56
|
+
export function createFileStore(filePath) {
|
|
57
|
+
const store = createMemoryStore();
|
|
58
|
+
const fpath = filePath ?? path.join(process.cwd(), '.future-vector-store.json');
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
...store,
|
|
62
|
+
name: 'file',
|
|
63
|
+
|
|
64
|
+
async persist() {
|
|
65
|
+
await mkdir(path.dirname(fpath), { recursive: true });
|
|
66
|
+
const data = {};
|
|
67
|
+
// Re-walk the internal Map via search (all entries with empty vector trick not ideal)
|
|
68
|
+
// Better: expose entries via a snapshot method.
|
|
69
|
+
// Here we use a trick: search with a zero vector returns all entries sorted.
|
|
70
|
+
// Actually, let's just serialize directly.
|
|
71
|
+
for (const [id, entry] of store._entries?.entries() ?? []) {
|
|
72
|
+
data[id] = entry;
|
|
73
|
+
}
|
|
74
|
+
await writeFile(fpath, JSON.stringify(data));
|
|
75
|
+
},
|
|
76
|
+
|
|
77
|
+
async load() {
|
|
78
|
+
try {
|
|
79
|
+
const raw = await readFile(fpath, 'utf8');
|
|
80
|
+
const data = JSON.parse(raw);
|
|
81
|
+
for (const [id, { vector, metadata }] of Object.entries(data)) {
|
|
82
|
+
store.add(id, vector, metadata);
|
|
83
|
+
}
|
|
84
|
+
} catch { /* file doesn't exist yet */ }
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ─── Cloud Adapter Stubs ─────────────────────────────────────────────────────
|
|
90
|
+
// Implement these by creating runtime/rag/qdrant.js, etc. and importing here.
|
|
91
|
+
|
|
92
|
+
function cloudStub(name) {
|
|
93
|
+
return {
|
|
94
|
+
name,
|
|
95
|
+
add() { console.warn(`[vector/${name}] not implemented — add runtime/rag/${name}.js`); },
|
|
96
|
+
search() { console.warn(`[vector/${name}] not implemented`); return []; },
|
|
97
|
+
delete() {},
|
|
98
|
+
clear() {},
|
|
99
|
+
size() { return 0; },
|
|
100
|
+
async persist() {},
|
|
101
|
+
async load() {},
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ─── Factory ──────────────────────────────────────────────────────────────────
|
|
106
|
+
|
|
107
|
+
export function createVectorStore(options = {}) {
|
|
108
|
+
const adapter = options.adapter ?? process.env.FUTURE_VECTOR_DB ?? 'memory';
|
|
109
|
+
switch (adapter.toLowerCase()) {
|
|
110
|
+
case 'memory': return createMemoryStore();
|
|
111
|
+
case 'file': return createFileStore(options.filePath);
|
|
112
|
+
case 'qdrant': return cloudStub('qdrant');
|
|
113
|
+
case 'pinecone': return cloudStub('pinecone');
|
|
114
|
+
case 'weaviate': return cloudStub('weaviate');
|
|
115
|
+
default:
|
|
116
|
+
console.warn(`[vector] Unknown adapter "${adapter}", falling back to memory.`);
|
|
117
|
+
return createMemoryStore();
|
|
118
|
+
}
|
|
119
|
+
}
|
package/runtime/rag.js
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
// runtime/rag.js — Public RAG module for Future programs.
|
|
2
|
+
//
|
|
3
|
+
// Existing syntax continues to work unchanged:
|
|
4
|
+
// rag.index(docs)
|
|
5
|
+
// answer = rag.query(question)
|
|
6
|
+
//
|
|
7
|
+
// New: named Knowledge Bases
|
|
8
|
+
// kb = rag.create("company")
|
|
9
|
+
// kb.index(["Contract clause A...", "Contract clause B..."])
|
|
10
|
+
// answer = kb.query("What are the payment terms?")
|
|
11
|
+
//
|
|
12
|
+
// The pipeline is powered by runtime/rag/pipeline.js which handles:
|
|
13
|
+
// Chunking → Embeddings → Vector Store → Similarity Search → LLM Answer
|
|
14
|
+
|
|
15
|
+
import { createPipeline } from './rag/pipeline.js';
|
|
16
|
+
|
|
17
|
+
// Default singleton pipeline used by rag.index() / rag.query()
|
|
18
|
+
const _default = createPipeline('default');
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Index documents into the default knowledge base.
|
|
22
|
+
* Accepts: string[], objects with {text} or {content}, or plain text strings.
|
|
23
|
+
* @returns {Promise<{ indexed: number, total: number }>}
|
|
24
|
+
*/
|
|
25
|
+
export async function index(docs) {
|
|
26
|
+
const list = Array.isArray(docs) ? docs : [docs];
|
|
27
|
+
return _default.index(list);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Query the default knowledge base.
|
|
32
|
+
* @param {string} question
|
|
33
|
+
* @returns {Promise<string>} LLM-generated answer based on indexed content.
|
|
34
|
+
*/
|
|
35
|
+
export async function query(question) {
|
|
36
|
+
return _default.query(String(question));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Create a named Knowledge Base — an isolated RAG pipeline with its own vector store.
|
|
41
|
+
*
|
|
42
|
+
* Future example:
|
|
43
|
+
* kb = rag.create("legal")
|
|
44
|
+
* kb.index(["Contract clause A...", "Contract clause B..."])
|
|
45
|
+
* answer = kb.query("What are the payment terms?")
|
|
46
|
+
*
|
|
47
|
+
* @param {string} name Identifier for this knowledge base.
|
|
48
|
+
* @param {object} [opts] Options: { adapter, size, overlap }
|
|
49
|
+
* @returns {object} Knowledge base with index(docs) and query(question) methods.
|
|
50
|
+
*/
|
|
51
|
+
export function create(name, opts = {}) {
|
|
52
|
+
return createPipeline(String(name), opts);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Index a local file directly — reads it and passes the content to the default pipeline.
|
|
57
|
+
* Supports any text-based format: TXT, MD, JSON, CSV, HTML.
|
|
58
|
+
* For PDFs, convert first: `system.exec("pdftotext manual.pdf manual.txt")`
|
|
59
|
+
*
|
|
60
|
+
* Future example:
|
|
61
|
+
* rag.indexFile("manual.txt")
|
|
62
|
+
* answer = rag.query("How do I reset the device?")
|
|
63
|
+
*
|
|
64
|
+
* @param {string} filePath Path to the file.
|
|
65
|
+
* @returns {Promise<{ indexed: number, total: number }>}
|
|
66
|
+
*/
|
|
67
|
+
export async function indexFile(filePath) {
|
|
68
|
+
const { readFile } = await import('node:fs/promises');
|
|
69
|
+
const content = await readFile(String(filePath), 'utf8');
|
|
70
|
+
return _default.index([{ text: content, source: String(filePath) }]);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Fetch a URL and index its text content.
|
|
75
|
+
*
|
|
76
|
+
* Future example:
|
|
77
|
+
* rag.indexUrl("https://docs.example.com/api")
|
|
78
|
+
* answer = rag.query("authentication")
|
|
79
|
+
*
|
|
80
|
+
* @param {string} url
|
|
81
|
+
* @returns {Promise<{ indexed: number, total: number }>}
|
|
82
|
+
*/
|
|
83
|
+
export async function indexUrl(url) {
|
|
84
|
+
const res = await fetch(String(url));
|
|
85
|
+
const text = await res.text();
|
|
86
|
+
// Strip HTML tags for cleaner indexing.
|
|
87
|
+
const clean = text.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
88
|
+
return _default.index([{ text: clean, source: String(url) }]);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/** Stats for the default pipeline (indexed chunk count, vector count). */
|
|
92
|
+
export function stats() {
|
|
93
|
+
return _default.stats();
|
|
94
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// runtime/schedule.js — Scheduling utilities for Future programs.
|
|
2
|
+
// Intervals can be a number (milliseconds) or a human-readable string: "5s", "30m", "2h".
|
|
3
|
+
// Cron support requires the optional `node-cron` package.
|
|
4
|
+
|
|
5
|
+
const handles = [];
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Run `callback` repeatedly every `interval`.
|
|
9
|
+
* @param {number|string} interval Duration: ms number or string like "30m", "5s", "1h".
|
|
10
|
+
* @param {Function} callback
|
|
11
|
+
* @returns {Promise<NodeJS.Timeout>}
|
|
12
|
+
*/
|
|
13
|
+
export async function every(interval, callback) {
|
|
14
|
+
const ms = parseInterval(interval);
|
|
15
|
+
const handle = setInterval(async () => {
|
|
16
|
+
try { await callback(); } catch (e) { console.error('[schedule.every]', e.message); }
|
|
17
|
+
}, ms);
|
|
18
|
+
handles.push(handle);
|
|
19
|
+
return handle;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Run `callback` once after `delay`.
|
|
24
|
+
* @param {number|string} delay
|
|
25
|
+
* @param {Function} callback
|
|
26
|
+
* @returns {Promise<any>} resolves with callback's return value.
|
|
27
|
+
*/
|
|
28
|
+
export async function once(delay, callback) {
|
|
29
|
+
const ms = parseInterval(delay);
|
|
30
|
+
return new Promise((resolve) => {
|
|
31
|
+
setTimeout(async () => {
|
|
32
|
+
try { resolve(await callback()); }
|
|
33
|
+
catch (e) { console.error('[schedule.once]', e.message); resolve(null); }
|
|
34
|
+
}, ms);
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Run `callback` on a cron schedule. Requires `node-cron` to be installed.
|
|
40
|
+
* Falls back to a clear warning stub if the package is missing.
|
|
41
|
+
* @param {string} expression Standard 5-field cron expression, e.g. "* * * * *".
|
|
42
|
+
* @param {Function} callback
|
|
43
|
+
* @returns {Promise<any>}
|
|
44
|
+
*/
|
|
45
|
+
export async function cron(expression, callback) {
|
|
46
|
+
try {
|
|
47
|
+
const mod = await import('node-cron');
|
|
48
|
+
const task = mod.default.schedule(String(expression), callback);
|
|
49
|
+
handles.push(task);
|
|
50
|
+
return task;
|
|
51
|
+
} catch {
|
|
52
|
+
console.warn(
|
|
53
|
+
`[schedule.cron] node-cron is not installed — run: npm install node-cron\n` +
|
|
54
|
+
`Expression "${expression}" will not fire.`,
|
|
55
|
+
);
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// --- helpers ---
|
|
61
|
+
|
|
62
|
+
/** Parse a duration string like "30m", "5s", "2h", "500ms" into milliseconds. */
|
|
63
|
+
function parseInterval(interval) {
|
|
64
|
+
if (typeof interval === 'number') return interval;
|
|
65
|
+
const str = String(interval).trim();
|
|
66
|
+
const match = str.match(/^(\d+(?:\.\d+)?)\s*(ms|s|m|h|d)?$/i);
|
|
67
|
+
if (!match) throw new Error(`Invalid interval: "${str}" — use a number or a string like "30m", "5s", "2h".`);
|
|
68
|
+
const [, num, unit = 'ms'] = match;
|
|
69
|
+
const n = parseFloat(num);
|
|
70
|
+
switch (unit.toLowerCase()) {
|
|
71
|
+
case 'd': return n * 86_400_000;
|
|
72
|
+
case 'h': return n * 3_600_000;
|
|
73
|
+
case 'm': return n * 60_000;
|
|
74
|
+
case 's': return n * 1_000;
|
|
75
|
+
default: return n;
|
|
76
|
+
}
|
|
77
|
+
}
|