@ragieai/skills 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +8 -0
- package/.mcp.json +11 -0
- package/LICENSE +21 -0
- package/README.md +88 -0
- package/dist/index.cjs +55 -0
- package/dist/index.d.cts +16 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +26 -0
- package/package.json +43 -0
- package/skills/ragie/SKILL.md +50 -0
- package/skills/ragie/references/api-reference.md +203 -0
- package/skills/ragie/references/ingestion.md +127 -0
- package/skills/ragie/references/mcp.md +84 -0
- package/skills/ragie/references/metadata-filtering.md +149 -0
- package/skills/ragie/references/partitions.md +85 -0
- package/skills/ragie/references/python.md +232 -0
- package/skills/ragie/references/quickstart.md +69 -0
- package/skills/ragie/references/rag-patterns.md +160 -0
- package/skills/ragie/references/retrieval.md +77 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# RAG Patterns with Ragie
|
|
2
|
+
|
|
3
|
+
> Python user? See `references/python.md` for Python equivalents.
|
|
4
|
+
|
|
5
|
+
## Basic RAG Response
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
9
|
+
import { Ragie } from "ragie";
|
|
10
|
+
|
|
11
|
+
const ragie = new Ragie({ auth: process.env.RAGIE_API_KEY });
|
|
12
|
+
const anthropic = new Anthropic();
|
|
13
|
+
|
|
14
|
+
async function answer(question: string): Promise<string> {
|
|
15
|
+
const results = await ragie.retrievals.retrieve({
|
|
16
|
+
query: question,
|
|
17
|
+
rerank: true,
|
|
18
|
+
topK: 6,
|
|
19
|
+
});
|
|
20
|
+
const context = results.scoredChunks.map((c) => c.text).join("\n\n");
|
|
21
|
+
|
|
22
|
+
const msg = await anthropic.messages.create({
|
|
23
|
+
model: "claude-sonnet-4-6",
|
|
24
|
+
max_tokens: 1024,
|
|
25
|
+
messages: [{ role: "user", content: `Context:\n${context}\n\nQuestion: ${question}` }],
|
|
26
|
+
});
|
|
27
|
+
return (msg.content[0] as { text: string }).text;
|
|
28
|
+
}
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Streaming
|
|
32
|
+
|
|
33
|
+
```typescript
|
|
34
|
+
async function streamAnswer(question: string): Promise<void> {
|
|
35
|
+
const results = await ragie.retrievals.retrieve({
|
|
36
|
+
query: question,
|
|
37
|
+
rerank: true,
|
|
38
|
+
topK: 6,
|
|
39
|
+
});
|
|
40
|
+
const context = results.scoredChunks.map((c) => c.text).join("\n\n");
|
|
41
|
+
|
|
42
|
+
const stream = await anthropic.messages.stream({
|
|
43
|
+
model: "claude-sonnet-4-6",
|
|
44
|
+
max_tokens: 1024,
|
|
45
|
+
messages: [{ role: "user", content: `Context:\n${context}\n\nQuestion: ${question}` }],
|
|
46
|
+
});
|
|
47
|
+
for await (const chunk of stream) {
|
|
48
|
+
if (chunk.type === "content_block_delta" && chunk.delta.type === "text_delta") {
|
|
49
|
+
process.stdout.write(chunk.delta.text);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Citations
|
|
56
|
+
|
|
57
|
+
```typescript
|
|
58
|
+
async function answerWithCitations(question: string) {
|
|
59
|
+
const results = await ragie.retrievals.retrieve({
|
|
60
|
+
query: question,
|
|
61
|
+
rerank: true,
|
|
62
|
+
topK: 6,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
const sources = results.scoredChunks.map((c) => ({
|
|
66
|
+
name: c.documentName,
|
|
67
|
+
id: c.documentId,
|
|
68
|
+
}));
|
|
69
|
+
const context = results.scoredChunks
|
|
70
|
+
.map((c) => `[Source: ${c.documentName}]\n${c.text}`)
|
|
71
|
+
.join("\n\n");
|
|
72
|
+
|
|
73
|
+
const msg = await anthropic.messages.create({
|
|
74
|
+
model: "claude-sonnet-4-6",
|
|
75
|
+
max_tokens: 1024,
|
|
76
|
+
messages: [{
|
|
77
|
+
role: "user",
|
|
78
|
+
content: `Answer using only the context below. Reference sources by name.\n\nContext:\n${context}\n\nQuestion: ${question}`,
|
|
79
|
+
}],
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
return { answer: (msg.content[0] as { text: string }).text, sources };
|
|
83
|
+
}
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Tool Use Integration
|
|
87
|
+
|
|
88
|
+
Expose Ragie as a tool Claude can call during an agentic loop:
|
|
89
|
+
|
|
90
|
+
```typescript
|
|
91
|
+
const tools: Anthropic.Tool[] = [{
|
|
92
|
+
name: "search_knowledge_base",
|
|
93
|
+
description: "Search the internal knowledge base for relevant information.",
|
|
94
|
+
input_schema: {
|
|
95
|
+
type: "object",
|
|
96
|
+
properties: { query: { type: "string", description: "The search query" } },
|
|
97
|
+
required: ["query"],
|
|
98
|
+
},
|
|
99
|
+
}];
|
|
100
|
+
|
|
101
|
+
async function runAgent(userMessage: string): Promise<string> {
|
|
102
|
+
const messages: Anthropic.MessageParam[] = [{ role: "user", content: userMessage }];
|
|
103
|
+
|
|
104
|
+
while (true) {
|
|
105
|
+
const response = await anthropic.messages.create({
|
|
106
|
+
model: "claude-sonnet-4-6",
|
|
107
|
+
max_tokens: 2048,
|
|
108
|
+
tools,
|
|
109
|
+
messages,
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
if (response.stop_reason === "tool_use") {
|
|
113
|
+
const toolUse = response.content.find((b) => b.type === "tool_use") as Anthropic.ToolUseBlock;
|
|
114
|
+
const { query } = toolUse.input as { query: string };
|
|
115
|
+
const results = await ragie.retrievals.retrieve({ query, rerank: true });
|
|
116
|
+
const context = results.scoredChunks.map((c) => c.text).join("\n\n");
|
|
117
|
+
|
|
118
|
+
messages.push(
|
|
119
|
+
{ role: "assistant", content: response.content },
|
|
120
|
+
{ role: "user", content: [{ type: "tool_result", tool_use_id: toolUse.id, content: context }] }
|
|
121
|
+
);
|
|
122
|
+
} else {
|
|
123
|
+
const text = response.content.find((b) => b.type === "text") as Anthropic.TextBlock;
|
|
124
|
+
return text.text;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Multi-Tenant RAG
|
|
131
|
+
|
|
132
|
+
```typescript
|
|
133
|
+
async function answerForTenant(tenantId: string, question: string): Promise<string> {
|
|
134
|
+
const results = await ragie.retrievals.retrieve({
|
|
135
|
+
query: question,
|
|
136
|
+
partition: `tenant-${tenantId}`,
|
|
137
|
+
rerank: true,
|
|
138
|
+
});
|
|
139
|
+
const context = results.scoredChunks.map((c) => c.text).join("\n\n");
|
|
140
|
+
|
|
141
|
+
const msg = await anthropic.messages.create({
|
|
142
|
+
model: "claude-sonnet-4-6",
|
|
143
|
+
max_tokens: 1024,
|
|
144
|
+
messages: [{ role: "user", content: `Context:\n${context}\n\nQuestion: ${question}` }],
|
|
145
|
+
});
|
|
146
|
+
return (msg.content[0] as { text: string }).text;
|
|
147
|
+
}
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Production Checklist
|
|
151
|
+
|
|
152
|
+
- [ ] API key in environment variable, not source code
|
|
153
|
+
- [ ] `rerank: true` for generation use cases
|
|
154
|
+
- [ ] Document readiness verified before querying (polling or webhook)
|
|
155
|
+
- [ ] Partitions used for multi-tenant isolation
|
|
156
|
+
- [ ] Metadata attached for future filtering
|
|
157
|
+
- [ ] Retry on 429 / 5xx with exponential back-off
|
|
158
|
+
- [ ] Citations surfaced in answers
|
|
159
|
+
- [ ] `topK` tuned to use case (6–10 is typical)
|
|
160
|
+
- [ ] Chunk scores logged for retrieval quality monitoring
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Ragie Retrieval
|
|
2
|
+
|
|
3
|
+
> Python user? See `references/python.md` for Python equivalents.
|
|
4
|
+
|
|
5
|
+
## Basic Retrieval
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
const results = await client.retrievals.retrieve({
|
|
9
|
+
query: "What are the rate limits?",
|
|
10
|
+
});
|
|
11
|
+
for (const chunk of results.scoredChunks) {
|
|
12
|
+
console.log(chunk.text, chunk.score);
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Full Options
|
|
17
|
+
|
|
18
|
+
```typescript
|
|
19
|
+
const results = await client.retrievals.retrieve({
|
|
20
|
+
query: "your question",
|
|
21
|
+
topK: 8, // number of chunks returned (default 8)
|
|
22
|
+
rerank: true, // cross-encoder rerank — improves quality significantly
|
|
23
|
+
partition: "tenant-42", // scope to a partition (optional)
|
|
24
|
+
filter: { // metadata filter (optional) — see metadata-filtering.md
|
|
25
|
+
product: "api",
|
|
26
|
+
version: "v3",
|
|
27
|
+
},
|
|
28
|
+
maxChunksPerDocument: 2, // limit chunks per source doc — improves source diversity (optional)
|
|
29
|
+
recencyBias: false, // favor more recently ingested documents (default false) (optional)
|
|
30
|
+
});
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Scored Chunk Fields
|
|
34
|
+
|
|
35
|
+
Each item in `scoredChunks` has:
|
|
36
|
+
|
|
37
|
+
| Field | Description |
|
|
38
|
+
|-------|-------------|
|
|
39
|
+
| `text` | The chunk text |
|
|
40
|
+
| `score` | Relevance score (higher = more relevant) |
|
|
41
|
+
| `documentId` | ID of the source document |
|
|
42
|
+
| `documentName` | Display name of the source document |
|
|
43
|
+
| `documentMetadata` | Metadata attached to the source document |
|
|
44
|
+
| `id` | The chunk's own ID |
|
|
45
|
+
| `index` | The chunk's position index within the document |
|
|
46
|
+
| `metadata` | Chunk-level metadata (distinct from `documentMetadata`) |
|
|
47
|
+
|
|
48
|
+
## Hybrid Search
|
|
49
|
+
|
|
50
|
+
Ragie automatically blends semantic (vector) and keyword (BM25) search on every query. No configuration needed.
|
|
51
|
+
|
|
52
|
+
`rerank: true` applies a cross-encoder on top of the hybrid results. Always enable for generation use cases:
|
|
53
|
+
|
|
54
|
+
```typescript
|
|
55
|
+
// Fast path — lower latency
|
|
56
|
+
const fast = await client.retrievals.retrieve({ query: q, rerank: false });
|
|
57
|
+
|
|
58
|
+
// Quality path — recommended for RAG
|
|
59
|
+
const best = await client.retrievals.retrieve({ query: q, rerank: true });
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Tuning topK
|
|
63
|
+
|
|
64
|
+
| Use case | Recommended topK |
|
|
65
|
+
|----------|------------------|
|
|
66
|
+
| Short-answer generation | 4–6 |
|
|
67
|
+
| Long-form generation | 8–12 |
|
|
68
|
+
| Exploratory / debugging | 20+ |
|
|
69
|
+
|
|
70
|
+
Too many chunks dilutes the context and increases token cost. Too few risks missing relevant content.
|
|
71
|
+
|
|
72
|
+
## Gotchas
|
|
73
|
+
|
|
74
|
+
- `rerank` adds ~100–300ms latency but meaningfully improves result quality. Use it by default.
|
|
75
|
+
- Metadata `filter` supports rich operators (`$eq`, `$ne`, `$gt`, `$gte`, `$lt`, `$lte`, `$in`, `$nin`, `$and`, `$or`). Simple object shorthand `{ key: value }` is equality. See `metadata-filtering.md`.
|
|
76
|
+
- Scores are not normalized across queries; use them for ranking within a single result set, not across queries.
|
|
77
|
+
- Query documents only after `status === "ready"`. See `ingestion.md`.
|