ragpipe 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,166 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ createPipeline: () => createPipeline,
24
+ createRateLimitedEmbedder: () => createRateLimitedEmbedder,
25
+ defaultChunker: () => defaultChunker,
26
+ defineConfig: () => defineConfig,
27
+ loadConfig: () => loadConfig
28
+ });
29
+ module.exports = __toCommonJS(index_exports);
30
+
31
+ // src/config.ts
32
+ var import_c12 = require("c12");
33
+ function defineConfig(config) {
34
+ return config;
35
+ }
36
+ async function loadConfig(overrides) {
37
+ const { config } = await (0, import_c12.loadConfig)({
38
+ name: "ragpipe",
39
+ defaults: overrides
40
+ });
41
+ if (!config) {
42
+ throw new Error(
43
+ "No ragpipe config found. Create a ragpipe.config.ts or pass config directly."
44
+ );
45
+ }
46
+ const cfg = config;
47
+ if (!cfg.embedding) {
48
+ throw new Error("ragpipe config is missing 'embedding' plugin.");
49
+ }
50
+ if (!cfg.vectorStore) {
51
+ throw new Error("ragpipe config is missing 'vectorStore' plugin.");
52
+ }
53
+ if (!cfg.generation) {
54
+ throw new Error("ragpipe config is missing 'generation' plugin.");
55
+ }
56
+ return cfg;
57
+ }
58
+
59
+ // src/chunker.ts
60
+ var DEFAULT_CHUNK_SIZE = 400;
61
+ var DEFAULT_OVERLAP = 50;
62
+ function defaultChunker(options) {
63
+ const chunkSize = options?.chunkSize ?? DEFAULT_CHUNK_SIZE;
64
+ const overlap = options?.overlap ?? DEFAULT_OVERLAP;
65
+ return {
66
+ name: "default",
67
+ chunk(text, source) {
68
+ const paragraphs = text.split(/\n\s*\n/).map((p) => p.trim()).filter(Boolean);
69
+ const chunks = [];
70
+ let current = "";
71
+ for (const paragraph of paragraphs) {
72
+ if (current && current.length + paragraph.length + 1 > chunkSize) {
73
+ chunks.push({ source, content: current });
74
+ const overlapSlice = current.slice(-overlap);
75
+ current = overlapSlice + paragraph;
76
+ } else {
77
+ current = current ? `${current}
78
+ ${paragraph}` : paragraph;
79
+ }
80
+ }
81
+ if (current.trim()) {
82
+ chunks.push({ source, content: current });
83
+ }
84
+ if (chunks.length === 0 && text.trim()) {
85
+ chunks.push({ source, content: text.trim() });
86
+ }
87
+ return chunks;
88
+ }
89
+ };
90
+ }
91
+
92
+ // src/rate-limiter.ts
93
+ function sleep(ms) {
94
+ return new Promise((resolve) => setTimeout(resolve, ms));
95
+ }
96
+ function createRateLimitedEmbedder(plugin, delayMs) {
97
+ const delay = delayMs ?? plugin.rateLimit?.delayMs ?? 0;
98
+ let lastCall = 0;
99
+ async function throttle() {
100
+ if (delay <= 0) return;
101
+ const now = Date.now();
102
+ const elapsed = now - lastCall;
103
+ if (elapsed < delay) {
104
+ await sleep(delay - elapsed);
105
+ }
106
+ lastCall = Date.now();
107
+ }
108
+ return {
109
+ name: plugin.name,
110
+ dimensions: plugin.dimensions,
111
+ rateLimit: plugin.rateLimit,
112
+ async embed(text) {
113
+ await throttle();
114
+ return plugin.embed(text);
115
+ },
116
+ async embedMany(texts) {
117
+ if (plugin.embedMany) {
118
+ await throttle();
119
+ return plugin.embedMany(texts);
120
+ }
121
+ const results = [];
122
+ for (const text of texts) {
123
+ await throttle();
124
+ results.push(await plugin.embed(text));
125
+ }
126
+ return results;
127
+ }
128
+ };
129
+ }
130
+
131
+ // src/pipeline.ts
132
+ function createPipeline(config) {
133
+ const chunker = config.chunker ?? defaultChunker();
134
+ const embedder = config.embedding.rateLimit ? createRateLimitedEmbedder(config.embedding) : config.embedding;
135
+ return {
136
+ async ingest(text, source) {
137
+ const chunks = chunker.chunk(text, source);
138
+ for (const chunk of chunks) {
139
+ const vector = await embedder.embed(chunk.content);
140
+ await config.vectorStore.upsert(chunk.source, chunk.content, vector);
141
+ }
142
+ return chunks.length;
143
+ },
144
+ async search(query, topK = 5) {
145
+ const vector = await embedder.embed(query);
146
+ return config.vectorStore.search(vector, topK);
147
+ },
148
+ async ask(query, topK = 5) {
149
+ const chunks = await this.search(query, topK);
150
+ const context = chunks.map((c) => `[${c.source}]
151
+ ${c.content}`).join("\n\n---\n\n");
152
+ const answer = await config.generation.generate(query, context, {
153
+ systemPrompt: config.systemPrompt
154
+ });
155
+ return { answer, sources: chunks };
156
+ }
157
+ };
158
+ }
159
+ // Annotate the CommonJS export names for ESM import in node:
160
+ 0 && (module.exports = {
161
+ createPipeline,
162
+ createRateLimitedEmbedder,
163
+ defaultChunker,
164
+ defineConfig,
165
+ loadConfig
166
+ });
@@ -0,0 +1,79 @@
1
+ interface SearchResult {
2
+ source: string;
3
+ content: string;
4
+ score: number;
5
+ }
6
+ interface DocumentChunk {
7
+ source: string;
8
+ content: string;
9
+ }
10
+ interface AskResult {
11
+ answer: string;
12
+ sources: SearchResult[];
13
+ }
14
+ interface EmbeddingPlugin {
15
+ readonly name: string;
16
+ readonly dimensions: number;
17
+ embed(text: string): Promise<number[]>;
18
+ embedMany?(texts: string[]): Promise<number[][]>;
19
+ rateLimit?: {
20
+ delayMs: number;
21
+ };
22
+ }
23
+ interface VectorStorePlugin {
24
+ readonly name: string;
25
+ search(vector: number[], topK: number): Promise<SearchResult[]>;
26
+ upsert(source: string, content: string, vector: number[]): Promise<void>;
27
+ clear?(): Promise<void>;
28
+ disconnect?(): Promise<void>;
29
+ }
30
+ interface GenerationPlugin {
31
+ readonly name: string;
32
+ generate(question: string, context: string, options?: {
33
+ history?: string;
34
+ systemPrompt?: string;
35
+ }): Promise<string>;
36
+ generateStream?(question: string, context: string, options?: {
37
+ history?: string;
38
+ systemPrompt?: string;
39
+ }): AsyncIterable<string>;
40
+ }
41
+ interface ChunkerPlugin {
42
+ readonly name: string;
43
+ chunk(text: string, source: string): DocumentChunk[];
44
+ }
45
+ interface RagpipeConfig {
46
+ embedding: EmbeddingPlugin;
47
+ vectorStore: VectorStorePlugin;
48
+ generation: GenerationPlugin;
49
+ chunker?: ChunkerPlugin;
50
+ systemPrompt?: string;
51
+ }
52
+
53
+ declare function defineConfig(config: RagpipeConfig): RagpipeConfig;
54
+ declare function loadConfig(overrides?: Partial<RagpipeConfig>): Promise<RagpipeConfig>;
55
+
56
+ interface Pipeline {
57
+ ingest(text: string, source: string): Promise<number>;
58
+ search(query: string, topK?: number): Promise<SearchResult[]>;
59
+ ask(query: string, topK?: number): Promise<AskResult>;
60
+ }
61
+ declare function createPipeline(config: RagpipeConfig): Pipeline;
62
+
63
+ interface DefaultChunkerOptions {
64
+ chunkSize?: number;
65
+ overlap?: number;
66
+ }
67
+ /**
68
+ * Splits text into chunks by paragraph boundaries, respecting a max character size.
69
+ * Adjacent chunks overlap by `overlap` characters to preserve context at boundaries.
70
+ */
71
+ declare function defaultChunker(options?: DefaultChunkerOptions): ChunkerPlugin;
72
+
73
+ /**
74
+ * Wraps an EmbeddingPlugin to enforce rate limiting between calls.
75
+ * Uses the plugin's `rateLimit.delayMs` if present, or a custom delay.
76
+ */
77
+ declare function createRateLimitedEmbedder(plugin: EmbeddingPlugin, delayMs?: number): EmbeddingPlugin;
78
+
79
+ export { type AskResult, type ChunkerPlugin, type DefaultChunkerOptions, type DocumentChunk, type EmbeddingPlugin, type GenerationPlugin, type Pipeline, type RagpipeConfig, type SearchResult, type VectorStorePlugin, createPipeline, createRateLimitedEmbedder, defaultChunker, defineConfig, loadConfig };
@@ -0,0 +1,79 @@
1
+ interface SearchResult {
2
+ source: string;
3
+ content: string;
4
+ score: number;
5
+ }
6
+ interface DocumentChunk {
7
+ source: string;
8
+ content: string;
9
+ }
10
+ interface AskResult {
11
+ answer: string;
12
+ sources: SearchResult[];
13
+ }
14
+ interface EmbeddingPlugin {
15
+ readonly name: string;
16
+ readonly dimensions: number;
17
+ embed(text: string): Promise<number[]>;
18
+ embedMany?(texts: string[]): Promise<number[][]>;
19
+ rateLimit?: {
20
+ delayMs: number;
21
+ };
22
+ }
23
+ interface VectorStorePlugin {
24
+ readonly name: string;
25
+ search(vector: number[], topK: number): Promise<SearchResult[]>;
26
+ upsert(source: string, content: string, vector: number[]): Promise<void>;
27
+ clear?(): Promise<void>;
28
+ disconnect?(): Promise<void>;
29
+ }
30
+ interface GenerationPlugin {
31
+ readonly name: string;
32
+ generate(question: string, context: string, options?: {
33
+ history?: string;
34
+ systemPrompt?: string;
35
+ }): Promise<string>;
36
+ generateStream?(question: string, context: string, options?: {
37
+ history?: string;
38
+ systemPrompt?: string;
39
+ }): AsyncIterable<string>;
40
+ }
41
+ interface ChunkerPlugin {
42
+ readonly name: string;
43
+ chunk(text: string, source: string): DocumentChunk[];
44
+ }
45
+ interface RagpipeConfig {
46
+ embedding: EmbeddingPlugin;
47
+ vectorStore: VectorStorePlugin;
48
+ generation: GenerationPlugin;
49
+ chunker?: ChunkerPlugin;
50
+ systemPrompt?: string;
51
+ }
52
+
53
+ declare function defineConfig(config: RagpipeConfig): RagpipeConfig;
54
+ declare function loadConfig(overrides?: Partial<RagpipeConfig>): Promise<RagpipeConfig>;
55
+
56
+ interface Pipeline {
57
+ ingest(text: string, source: string): Promise<number>;
58
+ search(query: string, topK?: number): Promise<SearchResult[]>;
59
+ ask(query: string, topK?: number): Promise<AskResult>;
60
+ }
61
+ declare function createPipeline(config: RagpipeConfig): Pipeline;
62
+
63
+ interface DefaultChunkerOptions {
64
+ chunkSize?: number;
65
+ overlap?: number;
66
+ }
67
+ /**
68
+ * Splits text into chunks by paragraph boundaries, respecting a max character size.
69
+ * Adjacent chunks overlap by `overlap` characters to preserve context at boundaries.
70
+ */
71
+ declare function defaultChunker(options?: DefaultChunkerOptions): ChunkerPlugin;
72
+
73
+ /**
74
+ * Wraps an EmbeddingPlugin to enforce rate limiting between calls.
75
+ * Uses the plugin's `rateLimit.delayMs` if present, or a custom delay.
76
+ */
77
+ declare function createRateLimitedEmbedder(plugin: EmbeddingPlugin, delayMs?: number): EmbeddingPlugin;
78
+
79
+ export { type AskResult, type ChunkerPlugin, type DefaultChunkerOptions, type DocumentChunk, type EmbeddingPlugin, type GenerationPlugin, type Pipeline, type RagpipeConfig, type SearchResult, type VectorStorePlugin, createPipeline, createRateLimitedEmbedder, defaultChunker, defineConfig, loadConfig };
package/dist/index.js ADDED
@@ -0,0 +1,135 @@
1
+ // src/config.ts
2
+ import { loadConfig as c12LoadConfig } from "c12";
3
+ function defineConfig(config) {
4
+ return config;
5
+ }
6
+ async function loadConfig(overrides) {
7
+ const { config } = await c12LoadConfig({
8
+ name: "ragpipe",
9
+ defaults: overrides
10
+ });
11
+ if (!config) {
12
+ throw new Error(
13
+ "No ragpipe config found. Create a ragpipe.config.ts or pass config directly."
14
+ );
15
+ }
16
+ const cfg = config;
17
+ if (!cfg.embedding) {
18
+ throw new Error("ragpipe config is missing 'embedding' plugin.");
19
+ }
20
+ if (!cfg.vectorStore) {
21
+ throw new Error("ragpipe config is missing 'vectorStore' plugin.");
22
+ }
23
+ if (!cfg.generation) {
24
+ throw new Error("ragpipe config is missing 'generation' plugin.");
25
+ }
26
+ return cfg;
27
+ }
28
+
29
+ // src/chunker.ts
30
+ var DEFAULT_CHUNK_SIZE = 400;
31
+ var DEFAULT_OVERLAP = 50;
32
+ function defaultChunker(options) {
33
+ const chunkSize = options?.chunkSize ?? DEFAULT_CHUNK_SIZE;
34
+ const overlap = options?.overlap ?? DEFAULT_OVERLAP;
35
+ return {
36
+ name: "default",
37
+ chunk(text, source) {
38
+ const paragraphs = text.split(/\n\s*\n/).map((p) => p.trim()).filter(Boolean);
39
+ const chunks = [];
40
+ let current = "";
41
+ for (const paragraph of paragraphs) {
42
+ if (current && current.length + paragraph.length + 1 > chunkSize) {
43
+ chunks.push({ source, content: current });
44
+ const overlapSlice = current.slice(-overlap);
45
+ current = overlapSlice + paragraph;
46
+ } else {
47
+ current = current ? `${current}
48
+ ${paragraph}` : paragraph;
49
+ }
50
+ }
51
+ if (current.trim()) {
52
+ chunks.push({ source, content: current });
53
+ }
54
+ if (chunks.length === 0 && text.trim()) {
55
+ chunks.push({ source, content: text.trim() });
56
+ }
57
+ return chunks;
58
+ }
59
+ };
60
+ }
61
+
62
+ // src/rate-limiter.ts
63
+ function sleep(ms) {
64
+ return new Promise((resolve) => setTimeout(resolve, ms));
65
+ }
66
+ function createRateLimitedEmbedder(plugin, delayMs) {
67
+ const delay = delayMs ?? plugin.rateLimit?.delayMs ?? 0;
68
+ let lastCall = 0;
69
+ async function throttle() {
70
+ if (delay <= 0) return;
71
+ const now = Date.now();
72
+ const elapsed = now - lastCall;
73
+ if (elapsed < delay) {
74
+ await sleep(delay - elapsed);
75
+ }
76
+ lastCall = Date.now();
77
+ }
78
+ return {
79
+ name: plugin.name,
80
+ dimensions: plugin.dimensions,
81
+ rateLimit: plugin.rateLimit,
82
+ async embed(text) {
83
+ await throttle();
84
+ return plugin.embed(text);
85
+ },
86
+ async embedMany(texts) {
87
+ if (plugin.embedMany) {
88
+ await throttle();
89
+ return plugin.embedMany(texts);
90
+ }
91
+ const results = [];
92
+ for (const text of texts) {
93
+ await throttle();
94
+ results.push(await plugin.embed(text));
95
+ }
96
+ return results;
97
+ }
98
+ };
99
+ }
100
+
101
+ // src/pipeline.ts
102
+ function createPipeline(config) {
103
+ const chunker = config.chunker ?? defaultChunker();
104
+ const embedder = config.embedding.rateLimit ? createRateLimitedEmbedder(config.embedding) : config.embedding;
105
+ return {
106
+ async ingest(text, source) {
107
+ const chunks = chunker.chunk(text, source);
108
+ for (const chunk of chunks) {
109
+ const vector = await embedder.embed(chunk.content);
110
+ await config.vectorStore.upsert(chunk.source, chunk.content, vector);
111
+ }
112
+ return chunks.length;
113
+ },
114
+ async search(query, topK = 5) {
115
+ const vector = await embedder.embed(query);
116
+ return config.vectorStore.search(vector, topK);
117
+ },
118
+ async ask(query, topK = 5) {
119
+ const chunks = await this.search(query, topK);
120
+ const context = chunks.map((c) => `[${c.source}]
121
+ ${c.content}`).join("\n\n---\n\n");
122
+ const answer = await config.generation.generate(query, context, {
123
+ systemPrompt: config.systemPrompt
124
+ });
125
+ return { answer, sources: chunks };
126
+ }
127
+ };
128
+ }
129
+ export {
130
+ createPipeline,
131
+ createRateLimitedEmbedder,
132
+ defaultChunker,
133
+ defineConfig,
134
+ loadConfig
135
+ };
package/package.json ADDED
@@ -0,0 +1,60 @@
1
+ {
2
+ "name": "ragpipe",
3
+ "version": "0.0.1",
4
+ "description": "Pluggable TypeScript RAG toolkit — defineConfig() one file, embed → search → generate.",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "author": {
8
+ "name": "yungblud",
9
+ "url": "https://github.com/yungblud"
10
+ },
11
+ "repository": {
12
+ "type": "git",
13
+ "url": "https://github.com/yungblud/ragpipe",
14
+ "directory": "packages/ragpipe"
15
+ },
16
+ "bugs": {
17
+ "url": "https://github.com/yungblud/ragpipe/issues"
18
+ },
19
+ "homepage": "https://github.com/yungblud/ragpipe#readme",
20
+ "publishConfig": {
21
+ "access": "public"
22
+ },
23
+ "exports": {
24
+ ".": {
25
+ "types": "./dist/index.d.ts",
26
+ "import": "./dist/index.js",
27
+ "require": "./dist/index.cjs"
28
+ }
29
+ },
30
+ "main": "./dist/index.cjs",
31
+ "module": "./dist/index.js",
32
+ "types": "./dist/index.d.ts",
33
+ "files": ["dist"],
34
+ "scripts": {
35
+ "build": "tsup",
36
+ "dev": "tsup --watch",
37
+ "typecheck": "tsc --noEmit",
38
+ "test": "vitest run",
39
+ "test:watch": "vitest",
40
+ "test:coverage": "vitest run --coverage"
41
+ },
42
+ "keywords": [
43
+ "rag",
44
+ "retrieval-augmented-generation",
45
+ "embeddings",
46
+ "vector-search",
47
+ "llm",
48
+ "ai",
49
+ "typescript",
50
+ "plugin"
51
+ ],
52
+ "devDependencies": {
53
+ "tsup": "^8.4.0",
54
+ "typescript": "^5.8.3",
55
+ "vitest": "^3.1.1"
56
+ },
57
+ "dependencies": {
58
+ "c12": "^2.0.4"
59
+ }
60
+ }