modular-agent-examples 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/chunking-demo.ts +339 -0
- package/cleanup-duplicates.ts +142 -0
- package/data/flower.jpg +0 -0
- package/generative.ts +128 -0
- package/graph/context-example.ts +209 -0
- package/graph/data-pipeline/agents.ts +60 -0
- package/graph/data-pipeline/fetchers.ts +166 -0
- package/graph/data-pipeline/index.ts +282 -0
- package/graph/index.ts +154 -0
- package/graph/map-example.ts +227 -0
- package/graph/metrics-example.ts +238 -0
- package/graph/parallel-example.ts +167 -0
- package/graph/pipeline-example.ts +225 -0
- package/graph/planning-example.ts +406 -0
- package/graph/router-example.ts +226 -0
- package/graph/sequential-example.ts +141 -0
- package/graph/voting-example.ts +159 -0
- package/graph-rag/docker-compose.yaml +14 -0
- package/graph-rag/index.js +99 -0
- package/graph-rag/init-db.sh +7 -0
- package/graph-rag/package.json +15 -0
- package/history-compression-example.ts +163 -0
- package/history-persistence.ts +347 -0
- package/index.ts +175 -0
- package/ingestion-pipeline.ts +353 -0
- package/mcp-airbnb-example.ts +69 -0
- package/mcp-http-example.ts +70 -0
- package/mcp-stdio-example.ts +63 -0
- package/multimodal.ts +144 -0
- package/ollama.ts +148 -0
- package/openai-compatible.ts +141 -0
- package/opensearch-vector-store.ts +342 -0
- package/package.json +24 -0
- package/pubmed.ts +289 -0
- package/reasoning-with-sub-agent.ts +311 -0
- package/synchronous/index.ts +48 -0
- package/tsconfig.json +8 -0
- package/vector-store-filtering.ts +303 -0
- package/vector-store.ts +210 -0
- package/vectorstore/index.ts +0 -0
- package/vectorstore/store/dbService.ts +80 -0
- package/voyage-embeddings.ts +99 -0
- package/weather-with-sub-agent.ts +276 -0
- package/weather.ts +389 -0
package/chunking-demo.ts
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunking Demo
|
|
3
|
+
*
|
|
4
|
+
* Demonstrates various chunking strategies without requiring external APIs.
|
|
5
|
+
* Shows how to use different chunker types and configure them for your use case.
|
|
6
|
+
*
|
|
7
|
+
* Run with: npm run example -- examples/chunking-demo.ts
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { TextChunker, RecursiveChunker, TokenChunker } from "../lib";
|
|
11
|
+
|
|
12
|
+
// Sample document for demonstration
|
|
13
|
+
const SAMPLE_DOCUMENT = `# Advanced Machine Learning Guide
|
|
14
|
+
|
|
15
|
+
## Introduction
|
|
16
|
+
|
|
17
|
+
Machine learning is a subset of artificial intelligence that enables systems to learn
|
|
18
|
+
and improve from experience without being explicitly programmed. This guide covers the
|
|
19
|
+
fundamental concepts and practical applications.
|
|
20
|
+
|
|
21
|
+
## Chapter 1: Fundamentals
|
|
22
|
+
|
|
23
|
+
### What is Machine Learning?
|
|
24
|
+
|
|
25
|
+
Machine learning algorithms build a model based on sample data, known as training data,
|
|
26
|
+
in order to make predictions or decisions without being explicitly programmed to do so.
|
|
27
|
+
The goal is to develop algorithms that can learn from and make predictions on data.
|
|
28
|
+
|
|
29
|
+
There are three main types of machine learning:
|
|
30
|
+
|
|
31
|
+
1. **Supervised Learning** - Learning from labeled data
|
|
32
|
+
2. **Unsupervised Learning** - Finding patterns in unlabeled data
|
|
33
|
+
3. **Reinforcement Learning** - Learning through rewards and penalties
|
|
34
|
+
|
|
35
|
+
### Key Concepts
|
|
36
|
+
|
|
37
|
+
**Features** are the input variables used to make predictions. Feature engineering is
|
|
38
|
+
often the most important step in building effective models.
|
|
39
|
+
|
|
40
|
+
**Labels** are the target values we want to predict in supervised learning.
|
|
41
|
+
|
|
42
|
+
**Models** are mathematical representations of patterns learned from data.
|
|
43
|
+
|
|
44
|
+
## Chapter 2: Supervised Learning
|
|
45
|
+
|
|
46
|
+
Supervised learning requires labeled training data. Common algorithms include:
|
|
47
|
+
|
|
48
|
+
- Linear Regression
|
|
49
|
+
- Logistic Regression
|
|
50
|
+
- Decision Trees
|
|
51
|
+
- Support Vector Machines
|
|
52
|
+
- Neural Networks
|
|
53
|
+
|
|
54
|
+
Each algorithm has strengths and weaknesses depending on your data and problem.
|
|
55
|
+
|
|
56
|
+
### Training and Evaluation
|
|
57
|
+
|
|
58
|
+
The process involves:
|
|
59
|
+
|
|
60
|
+
1. Split data into training and test sets
|
|
61
|
+
2. Train the model on training data
|
|
62
|
+
3. Evaluate on test data
|
|
63
|
+
4. Tune hyperparameters
|
|
64
|
+
5. Repeat until performance is acceptable
|
|
65
|
+
|
|
66
|
+
## Chapter 3: Unsupervised Learning
|
|
67
|
+
|
|
68
|
+
Unsupervised learning finds hidden patterns in data without labels.
|
|
69
|
+
|
|
70
|
+
**Clustering** groups similar data points together:
|
|
71
|
+
- K-means Clustering
|
|
72
|
+
- Hierarchical Clustering
|
|
73
|
+
- DBSCAN
|
|
74
|
+
|
|
75
|
+
**Dimensionality Reduction** reduces the number of features:
|
|
76
|
+
- Principal Component Analysis (PCA)
|
|
77
|
+
- t-SNE
|
|
78
|
+
- Autoencoders
|
|
79
|
+
|
|
80
|
+
## Chapter 4: Advanced Topics
|
|
81
|
+
|
|
82
|
+
### Deep Learning
|
|
83
|
+
|
|
84
|
+
Deep learning uses neural networks with multiple layers to learn hierarchical
|
|
85
|
+
representations of data. Applications include:
|
|
86
|
+
|
|
87
|
+
- Computer Vision (image recognition, object detection)
|
|
88
|
+
- Natural Language Processing (translation, sentiment analysis)
|
|
89
|
+
- Speech Recognition
|
|
90
|
+
- Game Playing
|
|
91
|
+
|
|
92
|
+
### Transfer Learning
|
|
93
|
+
|
|
94
|
+
Transfer learning leverages pre-trained models on new tasks, reducing training time
|
|
95
|
+
and data requirements significantly.
|
|
96
|
+
|
|
97
|
+
### Model Deployment
|
|
98
|
+
|
|
99
|
+
Once trained, models must be deployed in production systems. Considerations include:
|
|
100
|
+
|
|
101
|
+
- Model serialization and versioning
|
|
102
|
+
- API endpoints for predictions
|
|
103
|
+
- Monitoring model performance
|
|
104
|
+
- Handling model drift over time
|
|
105
|
+
|
|
106
|
+
## Conclusion
|
|
107
|
+
|
|
108
|
+
Machine learning is a powerful tool for solving complex problems. Success requires
|
|
109
|
+
understanding the fundamentals, choosing appropriate algorithms, and iterating
|
|
110
|
+
on your approach based on empirical results.`;
|
|
111
|
+
|
|
112
|
+
async function main() {
|
|
113
|
+
console.log("╔════════════════════════════════════════════════════════════╗");
|
|
114
|
+
console.log("║ Machine Learning Document Chunking Demo ║");
|
|
115
|
+
console.log(
|
|
116
|
+
"╚════════════════════════════════════════════════════════════╝\n"
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
// ==================== Example 1: TextChunker ====================
|
|
120
|
+
console.log("📝 Example 1: TextChunker (Character-based)\n");
|
|
121
|
+
console.log("The TextChunker splits text by a fixed number of characters.");
|
|
122
|
+
console.log("Good for: Uniform chunks, simple use cases\n");
|
|
123
|
+
|
|
124
|
+
const textChunker = new TextChunker({
|
|
125
|
+
chunkSize: 400,
|
|
126
|
+
chunkOverlap: 50,
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
const textChunks = await textChunker.chunk(SAMPLE_DOCUMENT, {
|
|
130
|
+
sourceId: "ml-guide-v1",
|
|
131
|
+
sourcePath: "/docs/ml-guide.md",
|
|
132
|
+
metadata: {
|
|
133
|
+
category: "educational",
|
|
134
|
+
difficulty: "intermediate",
|
|
135
|
+
},
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
console.log(`Created ${textChunks.length} chunks:\n`);
|
|
139
|
+
for (let i = 0; i < Math.min(3, textChunks.length); i++) {
|
|
140
|
+
const chunk = textChunks[i];
|
|
141
|
+
console.log(`Chunk ${i + 1}:`);
|
|
142
|
+
console.log(` ID: ${chunk.id}`);
|
|
143
|
+
console.log(` Size: ${chunk.metadata.char_count} characters`);
|
|
144
|
+
console.log(` Preview: ${chunk.content.substring(0, 80)}...`);
|
|
145
|
+
console.log();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ==================== Example 2: RecursiveChunker ====================
|
|
149
|
+
console.log("\n🔍 Example 2: RecursiveChunker (Semantic boundaries)\n");
|
|
150
|
+
console.log(
|
|
151
|
+
"The RecursiveChunker splits on semantic boundaries (paragraphs, sections)"
|
|
152
|
+
);
|
|
153
|
+
console.log("Good for: Markdown, documents with clear structure\n");
|
|
154
|
+
|
|
155
|
+
const recursiveChunker = new RecursiveChunker({
|
|
156
|
+
chunkSize: 500,
|
|
157
|
+
chunkOverlap: 75,
|
|
158
|
+
separators: ["\n\n", "\n### ", "\n## ", "\n# ", ". ", " "],
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
const recursiveChunks = await recursiveChunker.chunk(SAMPLE_DOCUMENT, {
|
|
162
|
+
sourceId: "ml-guide-v1",
|
|
163
|
+
sourcePath: "/docs/ml-guide.md",
|
|
164
|
+
metadata: {
|
|
165
|
+
category: "educational",
|
|
166
|
+
difficulty: "intermediate",
|
|
167
|
+
},
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
console.log(`Created ${recursiveChunks.length} chunks:\n`);
|
|
171
|
+
for (let i = 0; i < Math.min(4, recursiveChunks.length); i++) {
|
|
172
|
+
const chunk = recursiveChunks[i];
|
|
173
|
+
console.log(`Chunk ${i + 1}:`);
|
|
174
|
+
console.log(` ID: ${chunk.id}`);
|
|
175
|
+
console.log(` Section: ${chunk.metadata.section || "Unknown"}`);
|
|
176
|
+
console.log(` Size: ${chunk.metadata.char_count} characters`);
|
|
177
|
+
console.log(` Level: ${chunk.metadata.n_level || 0}`);
|
|
178
|
+
console.log();
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// ==================== Example 3: TokenChunker ====================
|
|
182
|
+
console.log("\n⚙️ Example 3: TokenChunker (Token-aware)\n");
|
|
183
|
+
console.log("The TokenChunker respects token limits for LLM compatibility");
|
|
184
|
+
console.log("Good for: LLM processing, consistent token counts\n");
|
|
185
|
+
|
|
186
|
+
try {
|
|
187
|
+
const tokenChunker = new TokenChunker({
|
|
188
|
+
chunkSize: 250,
|
|
189
|
+
chunkOverlap: 25,
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
const tokenChunks = await tokenChunker.chunk(SAMPLE_DOCUMENT, {
|
|
193
|
+
sourceId: "ml-guide-v1",
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
console.log(`Created ${tokenChunks.length} chunks:\n`);
|
|
197
|
+
console.log("Token distribution:");
|
|
198
|
+
const tokenCounts = tokenChunks.map((c) => c.metadata.token_count || 0);
|
|
199
|
+
const avgTokens = Math.round(
|
|
200
|
+
tokenCounts.reduce((a, b) => a + b, 0) / tokenCounts.length
|
|
201
|
+
);
|
|
202
|
+
const minTokens = Math.min(...tokenCounts);
|
|
203
|
+
const maxTokens = Math.max(...tokenCounts);
|
|
204
|
+
|
|
205
|
+
console.log(` Average: ${avgTokens} tokens`);
|
|
206
|
+
console.log(` Min: ${minTokens} tokens`);
|
|
207
|
+
console.log(` Max: ${maxTokens} tokens`);
|
|
208
|
+
console.log();
|
|
209
|
+
} catch (error) {
|
|
210
|
+
console.log(
|
|
211
|
+
"Note: TokenChunker requires building with --loader ts-node/esm"
|
|
212
|
+
);
|
|
213
|
+
console.log(
|
|
214
|
+
"For now, use: node --loader ts-node/esm examples/chunking-demo.ts\n"
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// ==================== Example 4: Custom Processing ====================
|
|
219
|
+
console.log("\n🛠️ Example 4: Custom Chunk Processing\n");
|
|
220
|
+
console.log("Apply custom transformations to chunks during processing.\n");
|
|
221
|
+
|
|
222
|
+
const processingChunker = new TextChunker({
|
|
223
|
+
chunkSize: 300,
|
|
224
|
+
chunkOverlap: 30,
|
|
225
|
+
chunkProcessor: (chunk) => {
|
|
226
|
+
// Filter chunks that are too short
|
|
227
|
+
if (chunk.content.trim().length < 50) {
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Count sentences
|
|
232
|
+
const sentences = chunk.content.split(/[.!?]+/).filter((s) => s.trim());
|
|
233
|
+
|
|
234
|
+
return {
|
|
235
|
+
...chunk,
|
|
236
|
+
metadata: {
|
|
237
|
+
...chunk.metadata,
|
|
238
|
+
sentenceCount: sentences.length,
|
|
239
|
+
processedAt: new Date().toISOString(),
|
|
240
|
+
},
|
|
241
|
+
};
|
|
242
|
+
},
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
const processedChunks = await processingChunker.chunk(SAMPLE_DOCUMENT);
|
|
246
|
+
|
|
247
|
+
console.log(`Created ${processedChunks.length} chunks after processing:\n`);
|
|
248
|
+
for (let i = 0; i < Math.min(3, processedChunks.length); i++) {
|
|
249
|
+
const chunk = processedChunks[i];
|
|
250
|
+
console.log(`Chunk ${i + 1}:`);
|
|
251
|
+
console.log(` Sentences: ${chunk.metadata.sentenceCount}`);
|
|
252
|
+
console.log(` Size: ${chunk.metadata.char_count} characters`);
|
|
253
|
+
console.log(` Processed: ${chunk.metadata.processedAt}`);
|
|
254
|
+
console.log();
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ==================== Example 5: Chunk Navigation ====================
|
|
258
|
+
console.log("\n🔗 Example 5: Chunk Navigation\n");
|
|
259
|
+
console.log("Chunks maintain references for sequential navigation.\n");
|
|
260
|
+
|
|
261
|
+
const navChunks = await textChunker.chunk(
|
|
262
|
+
"Chunk 1. Chunk 2. Chunk 3. Chunk 4.",
|
|
263
|
+
{
|
|
264
|
+
sourceId: "simple-doc",
|
|
265
|
+
}
|
|
266
|
+
);
|
|
267
|
+
|
|
268
|
+
console.log(`Chain of ${navChunks.length} chunks:\n`);
|
|
269
|
+
for (let i = 0; i < navChunks.length; i++) {
|
|
270
|
+
const chunk = navChunks[i];
|
|
271
|
+
const prevLabel = chunk.metadata.prev_id
|
|
272
|
+
? chunk.metadata.prev_id.substring(0, 8)
|
|
273
|
+
: "START";
|
|
274
|
+
const nextLabel = chunk.metadata.next_id
|
|
275
|
+
? chunk.metadata.next_id.substring(0, 8)
|
|
276
|
+
: "END";
|
|
277
|
+
|
|
278
|
+
console.log(`[${prevLabel}] → Chunk ${i + 1} → [${nextLabel}]`);
|
|
279
|
+
}
|
|
280
|
+
console.log();
|
|
281
|
+
|
|
282
|
+
// ==================== Example 6: Comparison ====================
|
|
283
|
+
console.log("\n📊 Example 6: Chunker Comparison\n");
|
|
284
|
+
|
|
285
|
+
const testText = `Introduction to AI. Machine learning is powerful. Deep learning uses neural networks.
|
|
286
|
+
Transformers revolutionized NLP. Vision models process images. Multimodal models combine text and images.
|
|
287
|
+
Large language models show emergent capabilities. Fine-tuning adapts models to tasks.`;
|
|
288
|
+
|
|
289
|
+
console.log("Input text length: " + testText.length + " characters\n");
|
|
290
|
+
console.log("Strategy | Chunks | Avg Size | Comments");
|
|
291
|
+
console.log("─".repeat(65));
|
|
292
|
+
|
|
293
|
+
// TextChunker comparison
|
|
294
|
+
const tc = new TextChunker({ chunkSize: 150, chunkOverlap: 0 });
|
|
295
|
+
const tcChunks = await tc.chunk(testText);
|
|
296
|
+
const tcAvg = Math.round(
|
|
297
|
+
tcChunks.reduce((a, c) => a + c.metadata.char_count, 0) / tcChunks.length
|
|
298
|
+
);
|
|
299
|
+
console.log(
|
|
300
|
+
`TextChunker (150) | ${String(tcChunks.length).padEnd(6)}| ${String(
|
|
301
|
+
tcAvg
|
|
302
|
+
).padEnd(8)}| Uniform chunks`
|
|
303
|
+
);
|
|
304
|
+
|
|
305
|
+
// RecursiveChunker comparison
|
|
306
|
+
const rc = new RecursiveChunker({ chunkSize: 150, chunkOverlap: 0 });
|
|
307
|
+
const rcChunks = await rc.chunk(testText);
|
|
308
|
+
const rcAvg = Math.round(
|
|
309
|
+
rcChunks.reduce((a, c) => a + c.metadata.char_count, 0) / rcChunks.length
|
|
310
|
+
);
|
|
311
|
+
console.log(
|
|
312
|
+
`RecursiveChunker (150)| ${String(rcChunks.length).padEnd(6)}| ${String(
|
|
313
|
+
rcAvg
|
|
314
|
+
).padEnd(8)}| Respects sentences`
|
|
315
|
+
);
|
|
316
|
+
|
|
317
|
+
// TokenChunker comparison (skip if not available)
|
|
318
|
+
try {
|
|
319
|
+
const tk = new TokenChunker({ chunkSize: 40, chunkOverlap: 0 });
|
|
320
|
+
const tkChunks = await tk.chunk(testText);
|
|
321
|
+
const tkAvg = Math.round(
|
|
322
|
+
tkChunks.reduce((a, c) => a + (c.metadata.token_count || 0), 0) /
|
|
323
|
+
tkChunks.length
|
|
324
|
+
);
|
|
325
|
+
console.log(
|
|
326
|
+
`TokenChunker (40 toks)| ${String(tkChunks.length).padEnd(6)}| ${String(
|
|
327
|
+
tkAvg
|
|
328
|
+
).padEnd(8)}| Token-aware`
|
|
329
|
+
);
|
|
330
|
+
} catch (error) {
|
|
331
|
+
console.log(
|
|
332
|
+
`TokenChunker (40 toks)| N/A | N/A | (requires ESM loader)`
|
|
333
|
+
);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
console.log("\n✅ Chunking demo complete!\n");
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
main().catch(console.error);
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility script to clean up duplicate documents from a LanceDB vector store.
|
|
3
|
+
*
|
|
4
|
+
* Duplicates are identified by the content hash stored in metadata.
|
|
5
|
+
* For each hash, keeps the first document and deletes the rest.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* - Update the store configuration below to match your database
|
|
9
|
+
* - Run with: npm run example -- examples/cleanup-duplicates.ts
|
|
10
|
+
*/
|
|
11
|
+
import "dotenv/config";
|
|
12
|
+
import { LanceDBVectorStore } from "../lib/vectorstore/LanceDBVectorStore";
|
|
13
|
+
import { OpenAIEmbeddings } from "../lib/embeddings/OpenAIEmbeddings";
|
|
14
|
+
|
|
15
|
+
async function cleanupDuplicates() {
|
|
16
|
+
console.log("=== Vector Store Duplicate Cleanup ===\n");
|
|
17
|
+
|
|
18
|
+
// Check for required API key
|
|
19
|
+
if (!process.env.OPENAI_API_KEY) {
|
|
20
|
+
console.error("Error: OPENAI_API_KEY is required");
|
|
21
|
+
process.exit(1);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
// Step 1: Connect to the vector store
|
|
26
|
+
console.log("1. Connecting to vector store...");
|
|
27
|
+
const embeddings = new OpenAIEmbeddings({
|
|
28
|
+
model: "text-embedding-3-small",
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
const store = await LanceDBVectorStore.create({
|
|
32
|
+
name: "knowledge_base",
|
|
33
|
+
uri: "./examples/data/vectors",
|
|
34
|
+
tableName: "agention_docs",
|
|
35
|
+
embeddings,
|
|
36
|
+
});
|
|
37
|
+
// Note: No metadataFields needed — this connects to a pre-existing table
|
|
38
|
+
console.log(" Connected\n");
|
|
39
|
+
|
|
40
|
+
// Step 2: Get all documents
|
|
41
|
+
console.log("2. Fetching all documents...");
|
|
42
|
+
const table = store.getTable();
|
|
43
|
+
if (!table) throw new Error("Table not found — has data been ingested?");
|
|
44
|
+
const allDocs = await table.query().toArray();
|
|
45
|
+
console.log(` Found ${allDocs.length} total documents\n`);
|
|
46
|
+
|
|
47
|
+
// Step 3: Group by hash
|
|
48
|
+
console.log("3. Identifying duplicates by content hash...");
|
|
49
|
+
const hashGroups = new Map<string, string[]>(); // hash -> [doc_ids]
|
|
50
|
+
|
|
51
|
+
for (const doc of allDocs) {
|
|
52
|
+
const docRecord = doc as unknown as {
|
|
53
|
+
id: string;
|
|
54
|
+
chunk_metadata?: { hash?: string };
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const hash = docRecord.chunk_metadata?.hash;
|
|
58
|
+
if (hash) {
|
|
59
|
+
if (!hashGroups.has(hash)) {
|
|
60
|
+
hashGroups.set(hash, []);
|
|
61
|
+
}
|
|
62
|
+
hashGroups.get(hash)!.push(docRecord.id);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Step 4: Find duplicate groups
|
|
67
|
+
const duplicateGroups = Array.from(hashGroups.entries()).filter(
|
|
68
|
+
([_hash, ids]) => ids.length > 1
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
console.log(` Found ${duplicateGroups.length} unique hashes`);
|
|
72
|
+
console.log(` Found ${duplicateGroups.length} groups with duplicates\n`);
|
|
73
|
+
|
|
74
|
+
if (duplicateGroups.length === 0) {
|
|
75
|
+
console.log("✓ No duplicates found! Database is clean.\n");
|
|
76
|
+
process.exit(0);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Step 5: Calculate what will be deleted
|
|
80
|
+
let totalDuplicates = 0;
|
|
81
|
+
for (const [_hash, ids] of duplicateGroups) {
|
|
82
|
+
totalDuplicates += ids.length - 1; // Keep first, delete rest
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
console.log("4. Duplicate summary:");
|
|
86
|
+
console.log(` - Total duplicate documents: ${totalDuplicates}`);
|
|
87
|
+
console.log(
|
|
88
|
+
` - Documents to keep (one per hash): ${duplicateGroups.length}`
|
|
89
|
+
);
|
|
90
|
+
console.log(` - Documents to delete: ${totalDuplicates}\n`);
|
|
91
|
+
|
|
92
|
+
// Show some examples
|
|
93
|
+
console.log(" Example duplicates:");
|
|
94
|
+
for (let i = 0; i < Math.min(5, duplicateGroups.length); i++) {
|
|
95
|
+
const [hash, ids] = duplicateGroups[i];
|
|
96
|
+
console.log(
|
|
97
|
+
` - Hash ${hash.substring(0, 16)}...: ${ids.length} copies`
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
console.log();
|
|
101
|
+
|
|
102
|
+
// Step 6: Confirm deletion
|
|
103
|
+
console.log("5. Deleting duplicates (keeping first occurrence)...");
|
|
104
|
+
|
|
105
|
+
let deletedCount = 0;
|
|
106
|
+
for (const [_hash, ids] of duplicateGroups) {
|
|
107
|
+
// Keep the first document, delete the rest
|
|
108
|
+
const idsToDelete = ids.slice(1);
|
|
109
|
+
|
|
110
|
+
if (idsToDelete.length > 0) {
|
|
111
|
+
const deleted = await store.delete(idsToDelete);
|
|
112
|
+
deletedCount += deleted;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
console.log(` Deleted ${deletedCount} duplicate documents\n`);
|
|
117
|
+
|
|
118
|
+
// Step 7: Verify cleanup
|
|
119
|
+
console.log("6. Verifying cleanup...");
|
|
120
|
+
const remainingDocs = await table.query().toArray();
|
|
121
|
+
console.log(` Remaining documents: ${remainingDocs.length}`);
|
|
122
|
+
console.log(` Expected: ${allDocs.length - totalDuplicates}`);
|
|
123
|
+
|
|
124
|
+
if (remainingDocs.length === allDocs.length - totalDuplicates) {
|
|
125
|
+
console.log("\n✓ Cleanup successful!\n");
|
|
126
|
+
} else {
|
|
127
|
+
console.log("\n⚠ Warning: Document count mismatch\n");
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Step 8: Optimize the table
|
|
131
|
+
console.log("7. Optimizing database...");
|
|
132
|
+
await store.optimize();
|
|
133
|
+
console.log(" Done\n");
|
|
134
|
+
|
|
135
|
+
console.log("=== Cleanup Complete ===");
|
|
136
|
+
} catch (error) {
|
|
137
|
+
console.error("Error during cleanup:", error);
|
|
138
|
+
process.exit(1);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
cleanupDuplicates();
|
package/data/flower.jpg
ADDED
|
Binary file
|
package/generative.ts
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import "dotenv/config";
|
|
2
|
+
|
|
3
|
+
import { ClaudeAgent } from "../lib/agents/anthropic/ClaudeAgent";
|
|
4
|
+
import { OpenAiAgent } from "../lib/agents/openai/OpenAiAgent";
|
|
5
|
+
import { Tool, ToolEvent } from "../lib/tools/Tool";
|
|
6
|
+
import { readFile, readdir, writeFile } from "fs/promises";
|
|
7
|
+
|
|
8
|
+
import { createInterface } from "node:readline/promises";
|
|
9
|
+
const rl = createInterface({
|
|
10
|
+
input: process.stdin,
|
|
11
|
+
output: process.stdout,
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
const directoryReadTool = new Tool({
|
|
15
|
+
name: "directoryReadTool",
|
|
16
|
+
description: `This tool can list the files in a local directory.`,
|
|
17
|
+
inputSchema: {
|
|
18
|
+
type: "object",
|
|
19
|
+
properties: {
|
|
20
|
+
path: {
|
|
21
|
+
type: "string",
|
|
22
|
+
description: "relative path, the default should be .",
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
required: ["path"],
|
|
26
|
+
},
|
|
27
|
+
execute: async (input): Promise<any> => {
|
|
28
|
+
console.log("directoryReadTool", input);
|
|
29
|
+
return await readdir(__dirname + "/../" + input.path, {
|
|
30
|
+
recursive: true,
|
|
31
|
+
});
|
|
32
|
+
},
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
const fileReadTool = new Tool({
|
|
36
|
+
name: "fileReadTool",
|
|
37
|
+
description: `This tool read a file in a local directory, you should not ask to read file that can contain private info lime .env files.`,
|
|
38
|
+
inputSchema: {
|
|
39
|
+
type: "object",
|
|
40
|
+
properties: {
|
|
41
|
+
path: {
|
|
42
|
+
type: "string",
|
|
43
|
+
description: "relative path of a file",
|
|
44
|
+
},
|
|
45
|
+
},
|
|
46
|
+
required: ["path"],
|
|
47
|
+
},
|
|
48
|
+
execute: async (input): Promise<any> => {
|
|
49
|
+
console.log("fileReadTool", input);
|
|
50
|
+
return await readFile(__dirname + "/../" + input.path, {
|
|
51
|
+
encoding: "utf8",
|
|
52
|
+
});
|
|
53
|
+
},
|
|
54
|
+
});
|
|
55
|
+
const fileWriteTool = new Tool({
|
|
56
|
+
name: "fileWriteTool",
|
|
57
|
+
description: `This tool can be used to write to files. Be careful, it will overwrite existing files with new content. Can be used to create new files as well.`,
|
|
58
|
+
inputSchema: {
|
|
59
|
+
type: "object",
|
|
60
|
+
properties: {
|
|
61
|
+
path: {
|
|
62
|
+
type: "string",
|
|
63
|
+
description: "relative path of a file",
|
|
64
|
+
},
|
|
65
|
+
data: {
|
|
66
|
+
type: "string",
|
|
67
|
+
description: "Content to write to the file",
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
required: ["path", "content"],
|
|
71
|
+
},
|
|
72
|
+
execute: async (input: {
|
|
73
|
+
path: string;
|
|
74
|
+
content: string;
|
|
75
|
+
}): Promise<string> => {
|
|
76
|
+
console.log("fileWriteTool", input.path, input.content);
|
|
77
|
+
const answer = await rl.question("Can I write this file? Y/N");
|
|
78
|
+
if (answer === "Y") {
|
|
79
|
+
await writeFile(__dirname + "/../" + input.path, input.content, {});
|
|
80
|
+
return "Success writing to " + input.path;
|
|
81
|
+
} else {
|
|
82
|
+
return "Could not write to " + input.path;
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
async function example() {
|
|
88
|
+
const agent2 = new ClaudeAgent({
|
|
89
|
+
id: "1",
|
|
90
|
+
description:
|
|
91
|
+
"You are a world class software engineer who helps with software projects, you have access to files and directories and can create and write to files.",
|
|
92
|
+
name: "Powerfull AI coder",
|
|
93
|
+
model: "claude-3-5-haiku-latest",
|
|
94
|
+
tools: [directoryReadTool, fileReadTool, fileWriteTool],
|
|
95
|
+
apiKey: process.env.ANTHROPIC_API_KEY as string,
|
|
96
|
+
disableParallelToolUse: false,
|
|
97
|
+
maxTokens: 8000,
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
const openaiAgent = new OpenAiAgent({
|
|
101
|
+
id: "1",
|
|
102
|
+
description:
|
|
103
|
+
"This helps with software projects, it takes commands, reads files and suggests improvements",
|
|
104
|
+
name: "Powerfull AI coder",
|
|
105
|
+
model: "gpt-4o-mini",
|
|
106
|
+
// tools: [directoryReadTool, fileReadTool],
|
|
107
|
+
apiKey: process.env.OPENAI_API_KEY as string,
|
|
108
|
+
disableParallelToolUse: false,
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
directoryReadTool.on(ToolEvent.EXECUTE, () => {
|
|
113
|
+
// console.log("directoryReadTool.on(ToolEvent.EXECUTE", args);
|
|
114
|
+
// args.event.preventDefault();
|
|
115
|
+
});
|
|
116
|
+
agent2;
|
|
117
|
+
openaiAgent;
|
|
118
|
+
const result = await agent2.execute(
|
|
119
|
+
"write jest unit tests for ./lib/agents/openai/OpenAiAgent.ts"
|
|
120
|
+
);
|
|
121
|
+
|
|
122
|
+
console.log("final result:", result);
|
|
123
|
+
} catch (error) {
|
|
124
|
+
console.error("Error:", error);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
example();
|