spine-framework-cortex 0.2.19 → 0.2.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/custom_case_analysis.ts +507 -0
- package/functions/custom_cortex-chunks.ts +52 -0
- package/functions/custom_cortex-handler.ts +35 -0
- package/functions/custom_kb-chunker-test.ts +364 -0
- package/functions/custom_kb-chunker.ts +576 -0
- package/functions/custom_kb-embeddings.ts +472 -0
- package/functions/custom_kb-ingestion.ts +447 -0
- package/functions/custom_tag_management.ts +314 -0
- package/manifest.json +1 -0
- package/package.json +1 -1
- package/pages/courses/CoursesPage.tsx +1 -1
- package/pages/kb/KBEditorPage.tsx +1 -1
- package/pages/support/RedactionReview.tsx +1 -1
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test script for the adaptive article chunker.
|
|
3
|
+
*
|
|
4
|
+
* Run: npx tsx custom/functions/custom_kb-chunker-test.ts
|
|
5
|
+
*/
|
|
6
|
+
import { readFileSync } from 'fs'
|
|
7
|
+
import { chunkArticle, estimateTokens, htmlToPlainText } from './custom_kb-chunker'
|
|
8
|
+
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Helpers
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
let passed = 0
|
|
14
|
+
let failed = 0
|
|
15
|
+
|
|
16
|
+
function assert(condition: boolean, message: string) {
|
|
17
|
+
if (condition) {
|
|
18
|
+
passed++
|
|
19
|
+
console.log(` ✅ ${message}`)
|
|
20
|
+
} else {
|
|
21
|
+
failed++
|
|
22
|
+
console.log(` ❌ ${message}`)
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function section(name: string) {
|
|
27
|
+
console.log(`\n── ${name} ──`)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Test 1: Short article — single chunk, no splitting
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
section('Test 1: Short article (< 600 tokens)')
|
|
35
|
+
|
|
36
|
+
const shortArticle = `This is a short support article about resetting passwords.
|
|
37
|
+
|
|
38
|
+
If you forgot your password, click the "Forgot Password" link on the login page.
|
|
39
|
+
You will receive an email with a reset link. The link expires in 24 hours.
|
|
40
|
+
|
|
41
|
+
If you don't receive the email, check your spam folder or contact support.`
|
|
42
|
+
|
|
43
|
+
const shortChunks = chunkArticle(shortArticle, {
|
|
44
|
+
articleTitle: 'How to Reset Your Password',
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
assert(shortChunks.length === 1, `Single chunk produced (got ${shortChunks.length})`)
|
|
48
|
+
assert(shortChunks[0].content.startsWith('How to Reset Your Password'), 'Prefixed with article title')
|
|
49
|
+
assert(shortChunks[0].sectionPath === null, 'No section path for short article')
|
|
50
|
+
assert(shortChunks[0].chunkIndex === 0, 'chunkIndex is 0')
|
|
51
|
+
assert(shortChunks[0].chunkTotal === 1, 'chunkTotal is 1')
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Test 2: Structured markdown with headings — heading-based split
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
section('Test 2: Structured markdown with headings')
|
|
58
|
+
|
|
59
|
+
const structuredArticle = `# Getting Started Guide
|
|
60
|
+
|
|
61
|
+
Welcome to our platform. This guide will walk you through the complete process of setting up, configuring, and using the Spine SDK in your application. By the end, you will have a fully working integration.
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
Run the following command to install the SDK and its peer dependencies:
|
|
66
|
+
|
|
67
|
+
\`\`\`bash
|
|
68
|
+
npm install @spine/sdk @spine/auth @spine/utils
|
|
69
|
+
\`\`\`
|
|
70
|
+
|
|
71
|
+
Then configure your environment variables. Make sure you have Node.js 18 or later installed. The SDK uses native fetch and ES modules, so older Node versions are not supported.
|
|
72
|
+
|
|
73
|
+
After installation, verify the package is available by running \`npx spine-check\`. This will confirm the SDK is correctly installed and your environment meets all requirements.
|
|
74
|
+
|
|
75
|
+
## Configuration
|
|
76
|
+
|
|
77
|
+
Create a \`.env\` file in your project root with the following variables:
|
|
78
|
+
|
|
79
|
+
\`\`\`
|
|
80
|
+
SPINE_API_KEY=your-key-here
|
|
81
|
+
SPINE_URL=https://api.spine.dev
|
|
82
|
+
SPINE_ACCOUNT_ID=your-account-uuid
|
|
83
|
+
\`\`\`
|
|
84
|
+
|
|
85
|
+
### Required Variables
|
|
86
|
+
|
|
87
|
+
- \`SPINE_API_KEY\` — your API key from the admin dashboard. Navigate to Settings > API Keys to generate one. Each key is scoped to a specific account and set of permissions.
|
|
88
|
+
- \`SPINE_URL\` — the API endpoint for your region. Use \`https://api.spine.dev\` for US, \`https://api.eu.spine.dev\` for EU.
|
|
89
|
+
- \`SPINE_ACCOUNT_ID\` — your account UUID, found on the Settings page.
|
|
90
|
+
|
|
91
|
+
### Optional Variables
|
|
92
|
+
|
|
93
|
+
- \`SPINE_TIMEOUT\` — request timeout in milliseconds (default: 30000). Increase this for large batch operations.
|
|
94
|
+
- \`SPINE_RETRY\` — number of automatic retries for transient failures (default: 3). Set to 0 to disable.
|
|
95
|
+
- \`SPINE_LOG_LEVEL\` — logging verbosity: \`debug\`, \`info\`, \`warn\`, \`error\` (default: \`info\`).
|
|
96
|
+
- \`SPINE_PROXY\` — HTTP proxy URL for corporate network environments.
|
|
97
|
+
|
|
98
|
+
## Usage
|
|
99
|
+
|
|
100
|
+
Import the SDK and create a client instance. The client handles authentication, retries, and connection pooling automatically:
|
|
101
|
+
|
|
102
|
+
\`\`\`typescript
|
|
103
|
+
import { SpineClient } from '@spine/sdk'
|
|
104
|
+
|
|
105
|
+
const client = new SpineClient({
|
|
106
|
+
apiKey: process.env.SPINE_API_KEY,
|
|
107
|
+
url: process.env.SPINE_URL,
|
|
108
|
+
accountId: process.env.SPINE_ACCOUNT_ID,
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
// List all accounts
|
|
112
|
+
const accounts = await client.accounts.list()
|
|
113
|
+
|
|
114
|
+
// Get a specific item
|
|
115
|
+
const item = await client.items.get('uuid-here')
|
|
116
|
+
|
|
117
|
+
// Create a new item
|
|
118
|
+
const newItem = await client.items.create({
|
|
119
|
+
title: 'My New Item',
|
|
120
|
+
type_slug: 'task',
|
|
121
|
+
data: { priority: 'high' },
|
|
122
|
+
})
|
|
123
|
+
\`\`\`
|
|
124
|
+
|
|
125
|
+
The client is thread-safe and can be shared across your application. Create one instance at startup and reuse it.
|
|
126
|
+
|
|
127
|
+
## Error Handling
|
|
128
|
+
|
|
129
|
+
The SDK throws typed errors for different failure modes. Always wrap API calls in try-catch blocks:
|
|
130
|
+
|
|
131
|
+
\`\`\`typescript
|
|
132
|
+
try {
|
|
133
|
+
const result = await client.items.get('invalid-uuid')
|
|
134
|
+
} catch (err) {
|
|
135
|
+
if (err instanceof SpineAuthError) {
|
|
136
|
+
console.error('Authentication failed:', err.message)
|
|
137
|
+
} else if (err instanceof SpineNotFoundError) {
|
|
138
|
+
console.error('Item not found')
|
|
139
|
+
} else if (err instanceof SpineRateLimitError) {
|
|
140
|
+
console.error('Rate limited, retry after:', err.retryAfter)
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
\`\`\`
|
|
144
|
+
|
|
145
|
+
## Troubleshooting
|
|
146
|
+
|
|
147
|
+
### Error: Invalid API Key
|
|
148
|
+
|
|
149
|
+
Make sure your API key is correct and has not expired. API keys can be rotated from the admin dashboard under Settings > API Keys. If you recently rotated your key, update your \`.env\` file with the new value.
|
|
150
|
+
|
|
151
|
+
### Error: Connection Timeout
|
|
152
|
+
|
|
153
|
+
Check that your \`SPINE_URL\` is correct and the server is reachable. Common causes include firewall rules blocking outbound HTTPS traffic, incorrect proxy configuration, or DNS resolution failures. Try running \`curl -v https://api.spine.dev/health\` to verify connectivity.
|
|
154
|
+
|
|
155
|
+
### Error: Rate Limited
|
|
156
|
+
|
|
157
|
+
The API enforces rate limits per API key. Default limits are 100 requests per second for read operations and 20 per second for write operations. If you need higher limits, contact support to discuss your use case.`
|
|
158
|
+
|
|
159
|
+
const structuredChunks = chunkArticle(structuredArticle, {
|
|
160
|
+
articleTitle: 'Getting Started Guide',
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
assert(structuredChunks.length > 1, `Multiple chunks produced (got ${structuredChunks.length})`)
|
|
164
|
+
assert(structuredChunks.every(c => c.content.includes('Getting Started Guide')),
|
|
165
|
+
'All chunks prefixed with article title')
|
|
166
|
+
assert(structuredChunks.some(c => c.sectionPath?.includes('Installation')),
|
|
167
|
+
'Has Installation section')
|
|
168
|
+
assert(structuredChunks.some(c => c.sectionPath?.includes('Configuration')),
|
|
169
|
+
'Has Configuration section')
|
|
170
|
+
assert(structuredChunks.some(c => c.sectionPath?.includes('Usage')),
|
|
171
|
+
'Has Usage section')
|
|
172
|
+
assert(structuredChunks.some(c => c.sectionPath?.includes('Troubleshooting')),
|
|
173
|
+
'Has Troubleshooting section')
|
|
174
|
+
assert(structuredChunks.some(c => c.sectionPath?.includes('Error Handling')),
|
|
175
|
+
'Has Error Handling section')
|
|
176
|
+
|
|
177
|
+
console.log('\n Chunk breakdown:')
|
|
178
|
+
structuredChunks.forEach((c, i) => {
|
|
179
|
+
console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(preamble)'}`)
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
// ---------------------------------------------------------------------------
|
|
183
|
+
// Test 3: Unstructured narrative — paragraph grouping
|
|
184
|
+
// ---------------------------------------------------------------------------
|
|
185
|
+
|
|
186
|
+
section('Test 3: Unstructured narrative (no headings)')
|
|
187
|
+
|
|
188
|
+
const narrative = Array.from({ length: 20 }, (_, i) =>
|
|
189
|
+
`Paragraph ${i + 1}: This is a block of narrative content that represents a success story or editorial piece. It describes how the customer implemented the solution and achieved measurable results in their business operations. The team worked closely with the vendor to ensure a smooth rollout across all departments.`
|
|
190
|
+
).join('\n\n')
|
|
191
|
+
|
|
192
|
+
const narrativeChunks = chunkArticle(narrative, {
|
|
193
|
+
articleTitle: 'Acme Corp Success Story',
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
assert(narrativeChunks.length > 1, `Multiple chunks produced (got ${narrativeChunks.length})`)
|
|
197
|
+
assert(narrativeChunks.every(c => c.sectionPath === null), 'No section paths for unstructured content')
|
|
198
|
+
assert(narrativeChunks.every(c => c.content.includes('Acme Corp Success Story')), 'All prefixed with title')
|
|
199
|
+
assert(narrativeChunks.every(c => c.content.includes('chunk')), 'All have chunk N of M context')
|
|
200
|
+
|
|
201
|
+
console.log('\n Chunk breakdown:')
|
|
202
|
+
narrativeChunks.forEach((c, i) => {
|
|
203
|
+
console.log(` [${i}] ~${estimateTokens(c.content)} tokens | ${c.chunkIndex + 1}/${c.chunkTotal}`)
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
// ---------------------------------------------------------------------------
|
|
207
|
+
// Test 4: Code blocks stay atomic
|
|
208
|
+
// ---------------------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
section('Test 4: Code blocks are atomic')
|
|
211
|
+
|
|
212
|
+
const codeArticle = `## Overview
|
|
213
|
+
|
|
214
|
+
Short intro.
|
|
215
|
+
|
|
216
|
+
## The Code
|
|
217
|
+
|
|
218
|
+
Here is a large code block:
|
|
219
|
+
|
|
220
|
+
\`\`\`typescript
|
|
221
|
+
${Array.from({ length: 80 }, (_, i) => ` const line${i} = 'this is line ${i} of the code block'`).join('\n')}
|
|
222
|
+
\`\`\`
|
|
223
|
+
|
|
224
|
+
## After the Code
|
|
225
|
+
|
|
226
|
+
Some text after.`
|
|
227
|
+
|
|
228
|
+
const codeChunks = chunkArticle(codeArticle, {
|
|
229
|
+
articleTitle: 'Code Example Article',
|
|
230
|
+
maxTokens: 400, // Force sub-splitting
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
// Verify no code block was split mid-way
|
|
234
|
+
const codeBlockContent = codeChunks.map(c => c.content)
|
|
235
|
+
const codeBlockChunk = codeBlockContent.find(c => c.includes("const line0 = 'this is line 0"))
|
|
236
|
+
assert(!!codeBlockChunk, 'Found chunk containing start of code block')
|
|
237
|
+
if (codeBlockChunk) {
|
|
238
|
+
assert(codeBlockChunk.includes("const line79 = 'this is line 79"), 'Same chunk contains end of code block (atomic)')
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
console.log('\n Chunk breakdown:')
|
|
242
|
+
codeChunks.forEach((c, i) => {
|
|
243
|
+
console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(none)'}`)
|
|
244
|
+
})
|
|
245
|
+
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
// Test 5: Tiny chunks get merged
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
|
|
250
|
+
section('Test 5: Tiny chunks get merged')
|
|
251
|
+
|
|
252
|
+
const tinyArticle = `## Section A
|
|
253
|
+
|
|
254
|
+
Tiny.
|
|
255
|
+
|
|
256
|
+
## Section B
|
|
257
|
+
|
|
258
|
+
Also tiny.
|
|
259
|
+
|
|
260
|
+
## Section C
|
|
261
|
+
|
|
262
|
+
This section has enough content to be meaningful. It contains several sentences that describe the topic in detail. The reader should come away with a clear understanding of the concepts presented here. Additional context is provided to ensure the chunk meets the minimum token threshold.`
|
|
263
|
+
|
|
264
|
+
const tinyChunks = chunkArticle(tinyArticle, {
|
|
265
|
+
articleTitle: 'Merge Test',
|
|
266
|
+
minTokens: 100,
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
// Section A and B are tiny (~5 tokens each), should be merged
|
|
270
|
+
assert(tinyChunks.length < 3, `Tiny sections merged (got ${tinyChunks.length} chunks, expected < 3)`)
|
|
271
|
+
|
|
272
|
+
console.log('\n Chunk breakdown:')
|
|
273
|
+
tinyChunks.forEach((c, i) => {
|
|
274
|
+
console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(none)'}`)
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
// ---------------------------------------------------------------------------
|
|
278
|
+
// Test 6: HTML content
|
|
279
|
+
// ---------------------------------------------------------------------------
|
|
280
|
+
|
|
281
|
+
section('Test 6: HTML content')
|
|
282
|
+
|
|
283
|
+
const htmlArticle = `<h2>Introduction</h2>
|
|
284
|
+
<p>This is an HTML article stored by the rich text editor.</p>
|
|
285
|
+
<p>It contains multiple paragraphs of formatted content.</p>
|
|
286
|
+
|
|
287
|
+
<h2>Details</h2>
|
|
288
|
+
<p>Here are the details with <strong>bold</strong> and <em>italic</em> text.</p>
|
|
289
|
+
<ul>
|
|
290
|
+
<li>Item one</li>
|
|
291
|
+
<li>Item two</li>
|
|
292
|
+
<li>Item three</li>
|
|
293
|
+
</ul>
|
|
294
|
+
|
|
295
|
+
<h2>Conclusion</h2>
|
|
296
|
+
<p>Wrapping up the article with a summary of key points.</p>`
|
|
297
|
+
|
|
298
|
+
const htmlChunks = chunkArticle(htmlArticle, {
|
|
299
|
+
articleTitle: 'HTML Article Test',
|
|
300
|
+
})
|
|
301
|
+
|
|
302
|
+
assert(htmlChunks.length >= 1, `Chunks produced from HTML (got ${htmlChunks.length})`)
|
|
303
|
+
assert(!htmlChunks.some(c => c.content.includes('<p>')), 'HTML tags stripped from chunk content')
|
|
304
|
+
assert(!htmlChunks.some(c => c.content.includes('<h2>')), 'Heading tags stripped from chunk content')
|
|
305
|
+
|
|
306
|
+
console.log('\n Chunk breakdown:')
|
|
307
|
+
htmlChunks.forEach((c, i) => {
|
|
308
|
+
console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(none)'}`)
|
|
309
|
+
})
|
|
310
|
+
|
|
311
|
+
// ---------------------------------------------------------------------------
|
|
312
|
+
// Test 7: Real file — design-schema-spec.md
|
|
313
|
+
// ---------------------------------------------------------------------------
|
|
314
|
+
|
|
315
|
+
section('Test 7: Real file — design-schema-spec.md')
|
|
316
|
+
|
|
317
|
+
try {
|
|
318
|
+
const specContent = readFileSync(
|
|
319
|
+
new URL('../docs/design-schema-spec.md', import.meta.url),
|
|
320
|
+
'utf-8'
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
const specChunks = chunkArticle(specContent, {
|
|
324
|
+
articleTitle: 'Design Schema & Validation Schema Specification',
|
|
325
|
+
})
|
|
326
|
+
|
|
327
|
+
assert(specChunks.length > 5, `Multiple chunks from spec (got ${specChunks.length})`)
|
|
328
|
+
assert(specChunks.length < 40, `Reasonable chunk count (got ${specChunks.length}, expected < 40)`)
|
|
329
|
+
assert(specChunks.every(c => estimateTokens(c.content) >= 50),
|
|
330
|
+
`All chunks have meaningful content (min ${Math.min(...specChunks.map(c => estimateTokens(c.content)))} tokens)`)
|
|
331
|
+
|
|
332
|
+
const maxChunkTokens = Math.max(...specChunks.map(c => estimateTokens(c.content)))
|
|
333
|
+
assert(maxChunkTokens < 2000, `No chunk is excessively large (max ${maxChunkTokens} tokens)`)
|
|
334
|
+
|
|
335
|
+
console.log('\n Chunk breakdown:')
|
|
336
|
+
specChunks.forEach((c, i) => {
|
|
337
|
+
console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(preamble)'}`)
|
|
338
|
+
})
|
|
339
|
+
|
|
340
|
+
console.log(`\n Total tokens across all chunks: ~${specChunks.reduce((sum, c) => sum + estimateTokens(c.content), 0)}`)
|
|
341
|
+
console.log(` Original doc tokens: ~${estimateTokens(specContent)}`)
|
|
342
|
+
} catch (err: any) {
|
|
343
|
+
console.log(` ⚠️ Skipped — could not read design-schema-spec.md: ${err.message}`)
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// ---------------------------------------------------------------------------
|
|
347
|
+
// Test 8: Empty content
|
|
348
|
+
// ---------------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
section('Test 8: Empty content')
|
|
351
|
+
|
|
352
|
+
const emptyChunks = chunkArticle('', { articleTitle: 'Empty Article' })
|
|
353
|
+
assert(emptyChunks.length === 1, `Single chunk for empty content (got ${emptyChunks.length})`)
|
|
354
|
+
assert(emptyChunks[0].content === 'Empty Article', 'Content is just the title')
|
|
355
|
+
|
|
356
|
+
// ---------------------------------------------------------------------------
|
|
357
|
+
// Summary
|
|
358
|
+
// ---------------------------------------------------------------------------
|
|
359
|
+
|
|
360
|
+
console.log(`\n${'═'.repeat(50)}`)
|
|
361
|
+
console.log(`Results: ${passed} passed, ${failed} failed`)
|
|
362
|
+
console.log(`${'═'.repeat(50)}`)
|
|
363
|
+
|
|
364
|
+
if (failed > 0) process.exit(1)
|