spine-framework-cortex 0.2.19 → 0.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,364 @@
1
+ /**
2
+ * Test script for the adaptive article chunker.
3
+ *
4
+ * Run: npx tsx custom/functions/custom_kb-chunker-test.ts
5
+ */
6
+ import { readFileSync } from 'fs'
7
+ import { chunkArticle, estimateTokens, htmlToPlainText } from './custom_kb-chunker'
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // Helpers
11
+ // ---------------------------------------------------------------------------
12
+
13
+ let passed = 0
14
+ let failed = 0
15
+
16
+ function assert(condition: boolean, message: string) {
17
+ if (condition) {
18
+ passed++
19
+ console.log(` ✅ ${message}`)
20
+ } else {
21
+ failed++
22
+ console.log(` ❌ ${message}`)
23
+ }
24
+ }
25
+
26
+ function section(name: string) {
27
+ console.log(`\n── ${name} ──`)
28
+ }
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Test 1: Short article — single chunk, no splitting
32
+ // ---------------------------------------------------------------------------
33
+
34
+ section('Test 1: Short article (< 600 tokens)')
35
+
36
+ const shortArticle = `This is a short support article about resetting passwords.
37
+
38
+ If you forgot your password, click the "Forgot Password" link on the login page.
39
+ You will receive an email with a reset link. The link expires in 24 hours.
40
+
41
+ If you don't receive the email, check your spam folder or contact support.`
42
+
43
+ const shortChunks = chunkArticle(shortArticle, {
44
+ articleTitle: 'How to Reset Your Password',
45
+ })
46
+
47
+ assert(shortChunks.length === 1, `Single chunk produced (got ${shortChunks.length})`)
48
+ assert(shortChunks[0].content.startsWith('How to Reset Your Password'), 'Prefixed with article title')
49
+ assert(shortChunks[0].sectionPath === null, 'No section path for short article')
50
+ assert(shortChunks[0].chunkIndex === 0, 'chunkIndex is 0')
51
+ assert(shortChunks[0].chunkTotal === 1, 'chunkTotal is 1')
52
+
53
+ // ---------------------------------------------------------------------------
54
+ // Test 2: Structured markdown with headings — heading-based split
55
+ // ---------------------------------------------------------------------------
56
+
57
+ section('Test 2: Structured markdown with headings')
58
+
59
+ const structuredArticle = `# Getting Started Guide
60
+
61
+ Welcome to our platform. This guide will walk you through the complete process of setting up, configuring, and using the Spine SDK in your application. By the end, you will have a fully working integration.
62
+
63
+ ## Installation
64
+
65
+ Run the following command to install the SDK and its peer dependencies:
66
+
67
+ \`\`\`bash
68
+ npm install @spine/sdk @spine/auth @spine/utils
69
+ \`\`\`
70
+
71
+ Then configure your environment variables. Make sure you have Node.js 18 or later installed. The SDK uses native fetch and ES modules, so older Node versions are not supported.
72
+
73
+ After installation, verify the package is available by running \`npx spine-check\`. This will confirm the SDK is correctly installed and your environment meets all requirements.
74
+
75
+ ## Configuration
76
+
77
+ Create a \`.env\` file in your project root with the following variables:
78
+
79
+ \`\`\`
80
+ SPINE_API_KEY=your-key-here
81
+ SPINE_URL=https://api.spine.dev
82
+ SPINE_ACCOUNT_ID=your-account-uuid
83
+ \`\`\`
84
+
85
+ ### Required Variables
86
+
87
+ - \`SPINE_API_KEY\` — your API key from the admin dashboard. Navigate to Settings > API Keys to generate one. Each key is scoped to a specific account and set of permissions.
88
+ - \`SPINE_URL\` — the API endpoint for your region. Use \`https://api.spine.dev\` for US, \`https://api.eu.spine.dev\` for EU.
89
+ - \`SPINE_ACCOUNT_ID\` — your account UUID, found on the Settings page.
90
+
91
+ ### Optional Variables
92
+
93
+ - \`SPINE_TIMEOUT\` — request timeout in milliseconds (default: 30000). Increase this for large batch operations.
94
+ - \`SPINE_RETRY\` — number of automatic retries for transient failures (default: 3). Set to 0 to disable.
95
+ - \`SPINE_LOG_LEVEL\` — logging verbosity: \`debug\`, \`info\`, \`warn\`, \`error\` (default: \`info\`).
96
+ - \`SPINE_PROXY\` — HTTP proxy URL for corporate network environments.
97
+
98
+ ## Usage
99
+
100
+ Import the SDK and create a client instance. The client handles authentication, retries, and connection pooling automatically:
101
+
102
+ \`\`\`typescript
103
+ import { SpineClient } from '@spine/sdk'
104
+
105
+ const client = new SpineClient({
106
+ apiKey: process.env.SPINE_API_KEY,
107
+ url: process.env.SPINE_URL,
108
+ accountId: process.env.SPINE_ACCOUNT_ID,
109
+ })
110
+
111
+ // List all accounts
112
+ const accounts = await client.accounts.list()
113
+
114
+ // Get a specific item
115
+ const item = await client.items.get('uuid-here')
116
+
117
+ // Create a new item
118
+ const newItem = await client.items.create({
119
+ title: 'My New Item',
120
+ type_slug: 'task',
121
+ data: { priority: 'high' },
122
+ })
123
+ \`\`\`
124
+
125
+ The client is thread-safe and can be shared across your application. Create one instance at startup and reuse it.
126
+
127
+ ## Error Handling
128
+
129
+ The SDK throws typed errors for different failure modes. Always wrap API calls in try-catch blocks:
130
+
131
+ \`\`\`typescript
132
+ try {
133
+ const result = await client.items.get('invalid-uuid')
134
+ } catch (err) {
135
+ if (err instanceof SpineAuthError) {
136
+ console.error('Authentication failed:', err.message)
137
+ } else if (err instanceof SpineNotFoundError) {
138
+ console.error('Item not found')
139
+ } else if (err instanceof SpineRateLimitError) {
140
+ console.error('Rate limited, retry after:', err.retryAfter)
141
+ }
142
+ }
143
+ \`\`\`
144
+
145
+ ## Troubleshooting
146
+
147
+ ### Error: Invalid API Key
148
+
149
+ Make sure your API key is correct and has not expired. API keys can be rotated from the admin dashboard under Settings > API Keys. If you recently rotated your key, update your \`.env\` file with the new value.
150
+
151
+ ### Error: Connection Timeout
152
+
153
+ Check that your \`SPINE_URL\` is correct and the server is reachable. Common causes include firewall rules blocking outbound HTTPS traffic, incorrect proxy configuration, or DNS resolution failures. Try running \`curl -v https://api.spine.dev/health\` to verify connectivity.
154
+
155
+ ### Error: Rate Limited
156
+
157
+ The API enforces rate limits per API key. Default limits are 100 requests per second for read operations and 20 per second for write operations. If you need higher limits, contact support to discuss your use case.`
158
+
159
+ const structuredChunks = chunkArticle(structuredArticle, {
160
+ articleTitle: 'Getting Started Guide',
161
+ })
162
+
163
+ assert(structuredChunks.length > 1, `Multiple chunks produced (got ${structuredChunks.length})`)
164
+ assert(structuredChunks.every(c => c.content.includes('Getting Started Guide')),
165
+ 'All chunks prefixed with article title')
166
+ assert(structuredChunks.some(c => c.sectionPath?.includes('Installation')),
167
+ 'Has Installation section')
168
+ assert(structuredChunks.some(c => c.sectionPath?.includes('Configuration')),
169
+ 'Has Configuration section')
170
+ assert(structuredChunks.some(c => c.sectionPath?.includes('Usage')),
171
+ 'Has Usage section')
172
+ assert(structuredChunks.some(c => c.sectionPath?.includes('Troubleshooting')),
173
+ 'Has Troubleshooting section')
174
+ assert(structuredChunks.some(c => c.sectionPath?.includes('Error Handling')),
175
+ 'Has Error Handling section')
176
+
177
+ console.log('\n Chunk breakdown:')
178
+ structuredChunks.forEach((c, i) => {
179
+ console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(preamble)'}`)
180
+ })
181
+
182
+ // ---------------------------------------------------------------------------
183
+ // Test 3: Unstructured narrative — paragraph grouping
184
+ // ---------------------------------------------------------------------------
185
+
186
+ section('Test 3: Unstructured narrative (no headings)')
187
+
188
+ const narrative = Array.from({ length: 20 }, (_, i) =>
189
+ `Paragraph ${i + 1}: This is a block of narrative content that represents a success story or editorial piece. It describes how the customer implemented the solution and achieved measurable results in their business operations. The team worked closely with the vendor to ensure a smooth rollout across all departments.`
190
+ ).join('\n\n')
191
+
192
+ const narrativeChunks = chunkArticle(narrative, {
193
+ articleTitle: 'Acme Corp Success Story',
194
+ })
195
+
196
+ assert(narrativeChunks.length > 1, `Multiple chunks produced (got ${narrativeChunks.length})`)
197
+ assert(narrativeChunks.every(c => c.sectionPath === null), 'No section paths for unstructured content')
198
+ assert(narrativeChunks.every(c => c.content.includes('Acme Corp Success Story')), 'All prefixed with title')
199
+ assert(narrativeChunks.every(c => c.content.includes('chunk')), 'All have chunk N of M context')
200
+
201
+ console.log('\n Chunk breakdown:')
202
+ narrativeChunks.forEach((c, i) => {
203
+ console.log(` [${i}] ~${estimateTokens(c.content)} tokens | ${c.chunkIndex + 1}/${c.chunkTotal}`)
204
+ })
205
+
206
+ // ---------------------------------------------------------------------------
207
+ // Test 4: Code blocks stay atomic
208
+ // ---------------------------------------------------------------------------
209
+
210
+ section('Test 4: Code blocks are atomic')
211
+
212
+ const codeArticle = `## Overview
213
+
214
+ Short intro.
215
+
216
+ ## The Code
217
+
218
+ Here is a large code block:
219
+
220
+ \`\`\`typescript
221
+ ${Array.from({ length: 80 }, (_, i) => ` const line${i} = 'this is line ${i} of the code block'`).join('\n')}
222
+ \`\`\`
223
+
224
+ ## After the Code
225
+
226
+ Some text after.`
227
+
228
+ const codeChunks = chunkArticle(codeArticle, {
229
+ articleTitle: 'Code Example Article',
230
+ maxTokens: 400, // Force sub-splitting
231
+ })
232
+
233
+ // Verify no code block was split mid-way
234
+ const codeBlockContent = codeChunks.map(c => c.content)
235
+ const codeBlockChunk = codeBlockContent.find(c => c.includes("const line0 = 'this is line 0"))
236
+ assert(!!codeBlockChunk, 'Found chunk containing start of code block')
237
+ if (codeBlockChunk) {
238
+ assert(codeBlockChunk.includes("const line79 = 'this is line 79"), 'Same chunk contains end of code block (atomic)')
239
+ }
240
+
241
+ console.log('\n Chunk breakdown:')
242
+ codeChunks.forEach((c, i) => {
243
+ console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(none)'}`)
244
+ })
245
+
246
+ // ---------------------------------------------------------------------------
247
+ // Test 5: Tiny chunks get merged
248
+ // ---------------------------------------------------------------------------
249
+
250
+ section('Test 5: Tiny chunks get merged')
251
+
252
+ const tinyArticle = `## Section A
253
+
254
+ Tiny.
255
+
256
+ ## Section B
257
+
258
+ Also tiny.
259
+
260
+ ## Section C
261
+
262
+ This section has enough content to be meaningful. It contains several sentences that describe the topic in detail. The reader should come away with a clear understanding of the concepts presented here. Additional context is provided to ensure the chunk meets the minimum token threshold.`
263
+
264
+ const tinyChunks = chunkArticle(tinyArticle, {
265
+ articleTitle: 'Merge Test',
266
+ minTokens: 100,
267
+ })
268
+
269
+ // Section A and B are tiny (~5 tokens each), should be merged
270
+ assert(tinyChunks.length < 3, `Tiny sections merged (got ${tinyChunks.length} chunks, expected < 3)`)
271
+
272
+ console.log('\n Chunk breakdown:')
273
+ tinyChunks.forEach((c, i) => {
274
+ console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(none)'}`)
275
+ })
276
+
277
+ // ---------------------------------------------------------------------------
278
+ // Test 6: HTML content
279
+ // ---------------------------------------------------------------------------
280
+
281
+ section('Test 6: HTML content')
282
+
283
+ const htmlArticle = `<h2>Introduction</h2>
284
+ <p>This is an HTML article stored by the rich text editor.</p>
285
+ <p>It contains multiple paragraphs of formatted content.</p>
286
+
287
+ <h2>Details</h2>
288
+ <p>Here are the details with <strong>bold</strong> and <em>italic</em> text.</p>
289
+ <ul>
290
+ <li>Item one</li>
291
+ <li>Item two</li>
292
+ <li>Item three</li>
293
+ </ul>
294
+
295
+ <h2>Conclusion</h2>
296
+ <p>Wrapping up the article with a summary of key points.</p>`
297
+
298
+ const htmlChunks = chunkArticle(htmlArticle, {
299
+ articleTitle: 'HTML Article Test',
300
+ })
301
+
302
+ assert(htmlChunks.length >= 1, `Chunks produced from HTML (got ${htmlChunks.length})`)
303
+ assert(!htmlChunks.some(c => c.content.includes('<p>')), 'HTML tags stripped from chunk content')
304
+ assert(!htmlChunks.some(c => c.content.includes('<h2>')), 'Heading tags stripped from chunk content')
305
+
306
+ console.log('\n Chunk breakdown:')
307
+ htmlChunks.forEach((c, i) => {
308
+ console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(none)'}`)
309
+ })
310
+
311
+ // ---------------------------------------------------------------------------
312
+ // Test 7: Real file — design-schema-spec.md
313
+ // ---------------------------------------------------------------------------
314
+
315
+ section('Test 7: Real file — design-schema-spec.md')
316
+
317
+ try {
318
+ const specContent = readFileSync(
319
+ new URL('../docs/design-schema-spec.md', import.meta.url),
320
+ 'utf-8'
321
+ )
322
+
323
+ const specChunks = chunkArticle(specContent, {
324
+ articleTitle: 'Design Schema & Validation Schema Specification',
325
+ })
326
+
327
+ assert(specChunks.length > 5, `Multiple chunks from spec (got ${specChunks.length})`)
328
+ assert(specChunks.length < 40, `Reasonable chunk count (got ${specChunks.length}, expected < 40)`)
329
+ assert(specChunks.every(c => estimateTokens(c.content) >= 50),
330
+ `All chunks have meaningful content (min ${Math.min(...specChunks.map(c => estimateTokens(c.content)))} tokens)`)
331
+
332
+ const maxChunkTokens = Math.max(...specChunks.map(c => estimateTokens(c.content)))
333
+ assert(maxChunkTokens < 2000, `No chunk is excessively large (max ${maxChunkTokens} tokens)`)
334
+
335
+ console.log('\n Chunk breakdown:')
336
+ specChunks.forEach((c, i) => {
337
+ console.log(` [${i}] ~${estimateTokens(c.content)} tokens | section: ${c.sectionPath || '(preamble)'}`)
338
+ })
339
+
340
+ console.log(`\n Total tokens across all chunks: ~${specChunks.reduce((sum, c) => sum + estimateTokens(c.content), 0)}`)
341
+ console.log(` Original doc tokens: ~${estimateTokens(specContent)}`)
342
+ } catch (err: any) {
343
+ console.log(` ⚠️ Skipped — could not read design-schema-spec.md: ${err.message}`)
344
+ }
345
+
346
+ // ---------------------------------------------------------------------------
347
+ // Test 8: Empty content
348
+ // ---------------------------------------------------------------------------
349
+
350
+ section('Test 8: Empty content')
351
+
352
+ const emptyChunks = chunkArticle('', { articleTitle: 'Empty Article' })
353
+ assert(emptyChunks.length === 1, `Single chunk for empty content (got ${emptyChunks.length})`)
354
+ assert(emptyChunks[0].content === 'Empty Article', 'Content is just the title')
355
+
356
+ // ---------------------------------------------------------------------------
357
+ // Summary
358
+ // ---------------------------------------------------------------------------
359
+
360
+ console.log(`\n${'═'.repeat(50)}`)
361
+ console.log(`Results: ${passed} passed, ${failed} failed`)
362
+ console.log(`${'═'.repeat(50)}`)
363
+
364
+ if (failed > 0) process.exit(1)