@ragieai/skills 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,160 @@
1
+ # RAG Patterns with Ragie
2
+
3
+ > Python user? See `references/python.md` for Python equivalents.
4
+
5
+ ## Basic RAG Response
6
+
7
+ ```typescript
8
+ import Anthropic from "@anthropic-ai/sdk";
9
+ import { Ragie } from "ragie";
10
+
11
+ const ragie = new Ragie({ auth: process.env.RAGIE_API_KEY });
12
+ const anthropic = new Anthropic();
13
+
14
+ async function answer(question: string): Promise<string> {
15
+ const results = await ragie.retrievals.retrieve({
16
+ query: question,
17
+ rerank: true,
18
+ topK: 6,
19
+ });
20
+ const context = results.scoredChunks.map((c) => c.text).join("\n\n");
21
+
22
+ const msg = await anthropic.messages.create({
23
+ model: "claude-sonnet-4-6",
24
+ max_tokens: 1024,
25
+ messages: [{ role: "user", content: `Context:\n${context}\n\nQuestion: ${question}` }],
26
+ });
27
+ return (msg.content[0] as { text: string }).text;
28
+ }
29
+ ```
30
+
31
+ ## Streaming
32
+
33
+ ```typescript
34
+ async function streamAnswer(question: string): Promise<void> {
35
+ const results = await ragie.retrievals.retrieve({
36
+ query: question,
37
+ rerank: true,
38
+ topK: 6,
39
+ });
40
+ const context = results.scoredChunks.map((c) => c.text).join("\n\n");
41
+
42
+ const stream = await anthropic.messages.stream({
43
+ model: "claude-sonnet-4-6",
44
+ max_tokens: 1024,
45
+ messages: [{ role: "user", content: `Context:\n${context}\n\nQuestion: ${question}` }],
46
+ });
47
+ for await (const chunk of stream) {
48
+ if (chunk.type === "content_block_delta" && chunk.delta.type === "text_delta") {
49
+ process.stdout.write(chunk.delta.text);
50
+ }
51
+ }
52
+ }
53
+ ```
54
+
55
+ ## Citations
56
+
57
+ ```typescript
58
+ async function answerWithCitations(question: string) {
59
+ const results = await ragie.retrievals.retrieve({
60
+ query: question,
61
+ rerank: true,
62
+ topK: 6,
63
+ });
64
+
65
+ const sources = results.scoredChunks.map((c) => ({
66
+ name: c.documentName,
67
+ id: c.documentId,
68
+ }));
69
+ const context = results.scoredChunks
70
+ .map((c) => `[Source: ${c.documentName}]\n${c.text}`)
71
+ .join("\n\n");
72
+
73
+ const msg = await anthropic.messages.create({
74
+ model: "claude-sonnet-4-6",
75
+ max_tokens: 1024,
76
+ messages: [{
77
+ role: "user",
78
+ content: `Answer using only the context below. Reference sources by name.\n\nContext:\n${context}\n\nQuestion: ${question}`,
79
+ }],
80
+ });
81
+
82
+ return { answer: (msg.content[0] as { text: string }).text, sources };
83
+ }
84
+ ```
85
+
86
+ ## Tool Use Integration
87
+
88
+ Expose Ragie as a tool Claude can call during an agentic loop:
89
+
90
+ ```typescript
91
+ const tools: Anthropic.Tool[] = [{
92
+ name: "search_knowledge_base",
93
+ description: "Search the internal knowledge base for relevant information.",
94
+ input_schema: {
95
+ type: "object",
96
+ properties: { query: { type: "string", description: "The search query" } },
97
+ required: ["query"],
98
+ },
99
+ }];
100
+
101
+ async function runAgent(userMessage: string): Promise<string> {
102
+ const messages: Anthropic.MessageParam[] = [{ role: "user", content: userMessage }];
103
+
104
+ while (true) {
105
+ const response = await anthropic.messages.create({
106
+ model: "claude-sonnet-4-6",
107
+ max_tokens: 2048,
108
+ tools,
109
+ messages,
110
+ });
111
+
112
+ if (response.stop_reason === "tool_use") {
113
+ const toolUse = response.content.find((b) => b.type === "tool_use") as Anthropic.ToolUseBlock;
114
+ const { query } = toolUse.input as { query: string };
115
+ const results = await ragie.retrievals.retrieve({ query, rerank: true });
116
+ const context = results.scoredChunks.map((c) => c.text).join("\n\n");
117
+
118
+ messages.push(
119
+ { role: "assistant", content: response.content },
120
+ { role: "user", content: [{ type: "tool_result", tool_use_id: toolUse.id, content: context }] }
121
+ );
122
+ } else {
123
+ const text = response.content.find((b) => b.type === "text") as Anthropic.TextBlock;
124
+ return text.text;
125
+ }
126
+ }
127
+ }
128
+ ```
129
+
130
+ ## Multi-Tenant RAG
131
+
132
+ ```typescript
133
+ async function answerForTenant(tenantId: string, question: string): Promise<string> {
134
+ const results = await ragie.retrievals.retrieve({
135
+ query: question,
136
+ partition: `tenant-${tenantId}`,
137
+ rerank: true,
138
+ });
139
+ const context = results.scoredChunks.map((c) => c.text).join("\n\n");
140
+
141
+ const msg = await anthropic.messages.create({
142
+ model: "claude-sonnet-4-6",
143
+ max_tokens: 1024,
144
+ messages: [{ role: "user", content: `Context:\n${context}\n\nQuestion: ${question}` }],
145
+ });
146
+ return (msg.content[0] as { text: string }).text;
147
+ }
148
+ ```
149
+
150
+ ## Production Checklist
151
+
152
+ - [ ] API key in environment variable, not source code
153
+ - [ ] `rerank: true` for generation use cases
154
+ - [ ] Document readiness verified before querying (polling or webhook)
155
+ - [ ] Partitions used for multi-tenant isolation
156
+ - [ ] Metadata attached for future filtering
157
+ - [ ] Retry on 429 / 5xx with exponential back-off
158
+ - [ ] Citations surfaced in answers
159
+ - [ ] `topK` tuned to use case (6–10 is typical)
160
+ - [ ] Chunk scores logged for retrieval quality monitoring
@@ -0,0 +1,77 @@
1
+ # Ragie Retrieval
2
+
3
+ > Python user? See `references/python.md` for Python equivalents.
4
+
5
+ ## Basic Retrieval
6
+
7
+ ```typescript
8
+ const results = await client.retrievals.retrieve({
9
+ query: "What are the rate limits?",
10
+ });
11
+ for (const chunk of results.scoredChunks) {
12
+ console.log(chunk.text, chunk.score);
13
+ }
14
+ ```
15
+
16
+ ## Full Options
17
+
18
+ ```typescript
19
+ const results = await client.retrievals.retrieve({
20
+ query: "your question",
21
+ topK: 8, // number of chunks returned (default 8)
22
+ rerank: true, // cross-encoder rerank — improves quality significantly
23
+ partition: "tenant-42", // scope to a partition (optional)
24
+ filter: { // metadata filter (optional) — see metadata-filtering.md
25
+ product: "api",
26
+ version: "v3",
27
+ },
28
+ maxChunksPerDocument: 2, // limit chunks per source doc — improves source diversity (optional)
29
+ recencyBias: false, // favor more recently ingested documents (default false) (optional)
30
+ });
31
+ ```
32
+
33
+ ## Scored Chunk Fields
34
+
35
+ Each item in `scoredChunks` has:
36
+
37
+ | Field | Description |
38
+ |-------|-------------|
39
+ | `text` | The chunk text |
40
+ | `score` | Relevance score (higher = more relevant) |
41
+ | `documentId` | ID of the source document |
42
+ | `documentName` | Display name of the source document |
43
+ | `documentMetadata` | Metadata attached to the source document |
44
+ | `id` | The chunk's own ID |
45
+ | `index` | The chunk's position index within the document |
46
+ | `metadata` | Chunk-level metadata (distinct from `documentMetadata`) |
47
+
48
+ ## Hybrid Search
49
+
50
+ Ragie automatically blends semantic (vector) and keyword (BM25) search on every query. No configuration needed.
51
+
52
+ `rerank: true` applies a cross-encoder on top of the hybrid results. Always enable for generation use cases:
53
+
54
+ ```typescript
55
+ // Fast path — lower latency
56
+ const fast = await client.retrievals.retrieve({ query: q, rerank: false });
57
+
58
+ // Quality path — recommended for RAG
59
+ const best = await client.retrievals.retrieve({ query: q, rerank: true });
60
+ ```
61
+
62
+ ## Tuning topK
63
+
64
+ | Use case | Recommended topK |
65
+ |----------|------------------|
66
+ | Short-answer generation | 4–6 |
67
+ | Long-form generation | 8–12 |
68
+ | Exploratory / debugging | 20+ |
69
+
70
+ Too many chunks dilutes the context and increases token cost. Too few risks missing relevant content.
71
+
72
+ ## Gotchas
73
+
74
+ - `rerank` adds ~100–300ms latency but meaningfully improves result quality. Use it by default.
75
+ - Metadata `filter` supports rich operators (`$eq`, `$ne`, `$gt`, `$gte`, `$lt`, `$lte`, `$in`, `$nin`, `$and`, `$or`). Simple object shorthand `{ key: value }` is equality. See `metadata-filtering.md`.
76
+ - Scores are not normalized across queries; use them for ranking within a single result set, not across queries.
77
+ - Query documents only after `status === "ready"`. See `ingestion.md`.