k0ntext 3.6.0 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/analyzer/intelligent-analyzer.d.ts +7 -0
  2. package/dist/analyzer/intelligent-analyzer.d.ts.map +1 -1
  3. package/dist/analyzer/intelligent-analyzer.js +46 -1
  4. package/dist/analyzer/intelligent-analyzer.js.map +1 -1
  5. package/dist/cli/commands/embeddings-refresh.d.ts.map +1 -1
  6. package/dist/cli/commands/embeddings-refresh.js +4 -1
  7. package/dist/cli/commands/embeddings-refresh.js.map +1 -1
  8. package/dist/cli/commands/migrate.d.ts.map +1 -1
  9. package/dist/cli/commands/migrate.js +8 -0
  10. package/dist/cli/commands/migrate.js.map +1 -1
  11. package/dist/cli/repl/init/wizard.d.ts.map +1 -1
  12. package/dist/cli/repl/init/wizard.js +12 -4
  13. package/dist/cli/repl/init/wizard.js.map +1 -1
  14. package/dist/db/schema.d.ts +1 -1
  15. package/dist/db/schema.js +1 -1
  16. package/dist/embeddings/openrouter.d.ts.map +1 -1
  17. package/dist/embeddings/openrouter.js +8 -3
  18. package/dist/embeddings/openrouter.js.map +1 -1
  19. package/dist/utils/chunking.d.ts +38 -0
  20. package/dist/utils/chunking.d.ts.map +1 -0
  21. package/dist/utils/chunking.js +133 -0
  22. package/dist/utils/chunking.js.map +1 -0
  23. package/dist/utils/encoding.d.ts +24 -0
  24. package/dist/utils/encoding.d.ts.map +1 -0
  25. package/dist/utils/encoding.js +32 -0
  26. package/dist/utils/encoding.js.map +1 -0
  27. package/dist/utils/index.d.ts +8 -0
  28. package/dist/utils/index.d.ts.map +1 -0
  29. package/dist/utils/index.js +8 -0
  30. package/dist/utils/index.js.map +1 -0
  31. package/docs/plans/2026-02-09-v3.7.0-database-fixes-and-improvements.md +900 -0
  32. package/package.json +1 -1
  33. package/src/analyzer/intelligent-analyzer.ts +58 -1
  34. package/src/cli/commands/embeddings-refresh.ts +4 -1
  35. package/src/cli/commands/migrate.ts +8 -0
  36. package/src/cli/repl/init/wizard.ts +12 -4
  37. package/src/db/migrations/files/0015_add_sync_state_version_tracking.sql +18 -0
  38. package/src/db/schema.ts +1 -1
  39. package/src/embeddings/openrouter.ts +10 -4
  40. package/src/utils/chunking.ts +152 -0
  41. package/src/utils/encoding.ts +33 -0
  42. package/src/utils/index.ts +8 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "k0ntext",
3
- "version": "3.6.0",
3
+ "version": "3.7.0",
4
4
  "description": "Unified AI Context Engineering - Intelligent context for Claude, Copilot, Cline, and more with OpenRouter-powered initialization",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -10,6 +10,7 @@ import path from 'path';
10
10
  import { glob } from 'glob';
11
11
  import { OpenRouterClient, createOpenRouterClient, hasOpenRouterKey } from '../embeddings/openrouter.js';
12
12
  import { AI_TOOLS, AI_TOOL_FOLDERS, type AITool } from '../db/schema.js';
13
+ import { estimateTokens, chunkForEmbedding } from '../utils/chunking.js';
13
14
 
14
15
  /**
15
16
  * Discovery result for a file
@@ -588,12 +589,68 @@ Return ONLY valid JSON, no markdown formatting.
588
589
 
589
590
  /**
590
591
  * Generate embedding for a single text string (e.g., search query)
592
+ *
593
+ * Automatically chunks large texts (>8K tokens) to fit within API limits.
594
+ * For chunked texts, returns the average of all chunk embeddings.
591
595
  */
592
596
  async embedText(text: string): Promise<number[]> {
593
597
  if (!this.client) {
594
598
  throw new Error('OpenRouter client not available for embeddings');
595
599
  }
596
- return this.client.embed(text);
600
+
601
+ // Check if text needs chunking (8K token limit for OpenRouter)
602
+ const tokenEstimate = estimateTokens(text);
603
+
604
+ if (tokenEstimate <= 8000) {
605
+ // Text is small enough, embed directly
606
+ return this.client.embed(text);
607
+ }
608
+
609
+ // Text is too large, chunk it and embed each chunk
610
+ const chunks = chunkForEmbedding(text);
611
+
612
+ if (chunks.length === 1) {
613
+ return this.client.embed(chunks[0]);
614
+ }
615
+
616
+ // Embed all chunks
617
+ const embeddings: number[][] = [];
618
+ for (const chunk of chunks) {
619
+ const embedding = await this.client.embed(chunk);
620
+ embeddings.push(embedding);
621
+ }
622
+
623
+ // Return the average embedding across all chunks
624
+ return this.averageEmbeddings(embeddings);
625
+ }
626
+
627
+ /**
628
+ * Average multiple embeddings into a single vector
629
+ */
630
+ private averageEmbeddings(embeddings: number[][]): number[] {
631
+ if (embeddings.length === 0) {
632
+ throw new Error('Cannot average empty embeddings array');
633
+ }
634
+
635
+ if (embeddings.length === 1) {
636
+ return embeddings[0];
637
+ }
638
+
639
+ const dimension = embeddings[0].length;
640
+ const averaged = new Array(dimension).fill(0);
641
+
642
+ for (const embedding of embeddings) {
643
+ for (let i = 0; i < dimension; i++) {
644
+ averaged[i] += embedding[i];
645
+ }
646
+ }
647
+
648
+ // Divide by count to get average
649
+ for (let i = 0; i < dimension; i++) {
650
+ averaged[i] /= embeddings.length;
651
+ }
652
+
653
+ return averaged;
597
654
  }
598
655
 
599
656
  /**
@@ -11,6 +11,7 @@ import { confirm } from '@inquirer/prompts';
11
11
  import { createIntelligentAnalyzer } from '../../analyzer/intelligent-analyzer.js';
12
12
  import { hasOpenRouterKey } from '../../embeddings/openrouter.js';
13
13
  import { DatabaseClient } from '../../db/client.js';
14
+ import { estimateTokens } from '../../utils/chunking.js';
14
15
 
15
16
  /**
16
17
  * Embeddings refresh command
@@ -105,7 +106,9 @@ export const embeddingsRefreshCommand = new Command('embeddings:refresh')
105
106
  for (const item of batch) {
106
107
  if (options.verbose) {
107
108
  spinner.stop();
108
- console.log(chalk.dim(` Embedding: ${item.name}`));
109
+ const tokenEstimate = estimateTokens(item.content);
110
+ const chunkInfo = tokenEstimate > 8000 ? chalk.yellow(` (${Math.ceil(tokenEstimate / 8000)} chunks)`) : '';
111
+ console.log(chalk.dim(` Embedding: ${item.name}${chunkInfo}`));
109
112
  spinner.start();
110
113
  }
111
114
 
@@ -18,6 +18,14 @@ import { MigrationRunner } from '../../db/migrations/index.js';
18
18
  */
19
19
  export const migrateCommand = new Command('migrate')
20
20
  .description('Manage database schema migrations')
21
+ .action(() => {
22
+ // Default action: show help if no subcommand specified
23
+ console.log('\nAvailable subcommands:\n');
24
+ console.log(' k0ntext migrate status Show migration status');
25
+ console.log(' k0ntext migrate up Apply pending migrations');
26
+ console.log(' k0ntext migrate rollback Rollback to a previous backup\n');
27
+ console.log('Run "k0ntext migrate <subcommand> --help" for more information.\n');
28
+ })
21
29
 
22
30
  // Status subcommand
23
31
  .command('status')
@@ -8,6 +8,7 @@ import { input, confirm, select, checkbox } from '@inquirer/prompts';
8
8
  import chalk from 'chalk';
9
9
  import { ProjectType } from '../core/session.js';
10
10
  import { K0NTEXT_THEME } from '../tui/theme.js';
11
+ import { stripBOM } from '../../../utils/encoding.js';
11
12
 
12
13
  /**
13
14
  * Wizard configuration result
@@ -66,7 +67,9 @@ export class InitWizard {
66
67
 
67
68
  constructor(projectRoot: string) {
68
69
  this.projectRoot = projectRoot;
69
- this.hasExistingKey = !!process.env.OPENROUTER_API_KEY;
70
+ // Strip UTF-8 BOM from env var if present (Windows editors sometimes add this)
71
+ const cleanKey = process.env.OPENROUTER_API_KEY ? stripBOM(process.env.OPENROUTER_API_KEY) : '';
72
+ this.hasExistingKey = cleanKey.length > 0;
70
73
  }
71
74
 
72
75
  /**
@@ -146,7 +149,9 @@ for your specific needs.
146
149
  });
147
150
 
148
151
  if (useExisting) {
149
- return process.env.OPENROUTER_API_KEY!;
152
+ // Strip UTF-8 BOM from env var if present (Windows editors sometimes add this)
153
+ const envKey = process.env.OPENROUTER_API_KEY || '';
154
+ return stripBOM(envKey);
150
155
  }
151
156
  }
152
157
 
@@ -158,7 +163,9 @@ for your specific needs.
158
163
  message: 'Enter your OpenRouter API key (or press Enter to skip):',
159
164
  validate: (value: string) => {
160
165
  if (!value) return true; // Allow skipping
161
- if (value.startsWith('sk-or-v1-')) return true;
166
+ // Strip BOM before validation
167
+ const cleanValue = stripBOM(value);
168
+ if (cleanValue.startsWith('sk-or-v1-')) return true;
162
169
  return 'Invalid API key format. Should start with "sk-or-v1-"';
163
170
  }
164
171
  });
@@ -174,7 +181,8 @@ for your specific needs.
174
181
  }
175
182
  }
176
183
 
177
- return apiKey || '';
184
+ // Strip BOM from user input before returning
185
+ return stripBOM(apiKey || '');
178
186
  }
179
187
 
180
188
  /**
@@ -0,0 +1,18 @@
1
+ -- Migration: 1.5.0
2
+ -- Description: Add version tracking columns to sync_state table
3
+ -- Breaks: false
4
+ -- Dependencies: 1.4.0
5
+
6
+ -- Add k0ntext_version column to track package version when sync occurred
7
+ -- Note: Existing rows will have NULL for this column
8
+ ALTER TABLE sync_state ADD COLUMN k0ntext_version TEXT;
9
+
10
+ -- Add user_modified flag to track if user manually edited the synced file
11
+ ALTER TABLE sync_state ADD COLUMN user_modified INTEGER DEFAULT 0;
12
+
13
+ -- Add last_checked timestamp for version checking (ISO 8601 format)
14
+ ALTER TABLE sync_state ADD COLUMN last_checked TEXT;
15
+
16
+ -- Create indexes for efficient queries
17
+ CREATE INDEX IF NOT EXISTS idx_sync_state_version ON sync_state(k0ntext_version);
18
+ CREATE INDEX IF NOT EXISTS idx_sync_state_user_modified ON sync_state(user_modified);
package/src/db/schema.ts CHANGED
@@ -5,7 +5,7 @@
5
5
  * Supports vector embeddings, knowledge graph, and sync state.
6
6
  */
7
7
 
8
- export const SCHEMA_VERSION = '1.4.0';
8
+ export const SCHEMA_VERSION = '1.5.0';
9
9
 
10
10
  /**
11
11
  * Core database schema SQL
@@ -1,12 +1,13 @@
1
1
  /**
2
2
  * OpenRouter Client
3
- *
3
+ *
4
4
  * Client for OpenRouter API supporting both embeddings and chat completions.
5
5
  * Used for intelligent initialization and context understanding.
6
6
  */
7
7
 
8
8
  import { createHash } from 'crypto';
9
9
  import { K0NTEXT_MODELS, MODEL_CONFIG, getPrimaryChatModel, getEmbeddingModel } from '../config/models.js';
10
+ import { stripBOM } from '../utils/encoding.js';
10
11
 
11
12
  /**
12
13
  * OpenRouter API endpoints
@@ -185,7 +186,8 @@ export class OpenRouterClient {
185
186
  throw new Error('OPENROUTER_API_KEY is required');
186
187
  }
187
188
 
188
- this.apiKey = config.apiKey;
189
+ // Strip UTF-8 BOM if present (Windows editors sometimes add this to .env files)
190
+ this.apiKey = stripBOM(config.apiKey);
189
191
  this.embeddingModel = config.embeddingModel || DEFAULT_EMBEDDING_MODEL;
190
192
  this.chatModel = config.chatModel || DEFAULT_CHAT_MODEL;
191
193
  this.siteUrl = config.siteUrl || 'https://github.com/SireJeff/claude-context-engineering-template';
@@ -582,8 +584,11 @@ export function createOpenRouterClient(): OpenRouterClient {
582
584
  );
583
585
  }
584
586
 
587
+ // Strip UTF-8 BOM from API key (Windows editors sometimes add this to .env files)
588
+ const cleanApiKey = stripBOM(apiKey);
589
+
585
590
  return new OpenRouterClient({
586
- apiKey,
591
+ apiKey: cleanApiKey,
587
592
  // Use centralized models from config, allow env override for testing
588
593
  embeddingModel: process.env.OPENROUTER_EMBEDDING_MODEL || getEmbeddingModel(),
589
594
  chatModel: process.env.OPENROUTER_CHAT_MODEL || getPrimaryChatModel()
@@ -594,7 +599,8 @@ export function createOpenRouterClient(): OpenRouterClient {
594
599
  * Check if OpenRouter API key is available
595
600
  */
596
601
  export function hasOpenRouterKey(): boolean {
597
- return !!process.env.OPENROUTER_API_KEY;
602
+ const apiKey = process.env.OPENROUTER_API_KEY;
603
+ return !!apiKey && stripBOM(apiKey).length > 0;
598
604
  }
599
605
 
600
606
  /**
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Text Chunking Utility
3
+ *
4
+ * Splits large texts into chunks suitable for embedding generation.
5
+ * Handles token limits, word boundaries, and overlap for context preservation.
6
+ */
7
+
8
+ /**
9
+ * Estimate token count for text.
10
+ *
11
+ * Uses a simple heuristic: ~4 characters per token for English text.
12
+ * This is approximate but works well for our use case.
13
+ *
14
+ * @param text - Text to estimate tokens for
15
+ * @returns Estimated token count
16
+ */
17
+ export function estimateTokens(text: string): number {
18
+ if (!text) return 0;
19
+
20
+ // Remove whitespace for more accurate estimate
21
+ const trimmed = text.trim();
22
+ if (trimmed.length === 0) return 0;
23
+
24
+ // Rough estimate: 1 token per 4 characters for English text
25
+ // This is a simplification but works well for most cases
26
+ return Math.ceil(trimmed.length / 4);
27
+ }
28
+
29
+ /**
30
+ * Split text into chunks that fit within max tokens.
31
+ *
32
+ * Tries to break at word boundaries when possible.
33
+ * Adds overlap between chunks to preserve context.
34
+ *
35
+ * @param text - Text to chunk
36
+ * @param maxTokens - Maximum tokens per chunk (default: 8000 for OpenRouter)
37
+ * @param overlapTokens - Number of tokens to overlap between chunks (default: 0)
38
+ * @returns Array of text chunks
39
+ */
40
+ export function chunkText(
41
+ text: string,
42
+ maxTokens: number = 8000,
43
+ overlapTokens: number = 0
44
+ ): string[] {
45
+ // Handle empty or very short text
46
+ if (!text || text.trim().length === 0) {
47
+ return [''];
48
+ }
49
+
50
+ const trimmedText = text.trim();
51
+ const estimatedTokens = estimateTokens(trimmedText);
52
+
53
+ // If text is under the limit, return as-is
54
+ if (estimatedTokens <= maxTokens) {
55
+ return [trimmedText];
56
+ }
57
+
58
+ const chunks: string[] = [];
59
+ const maxChars = maxTokens * 4; // Convert tokens to approximate characters
60
+ const overlapChars = overlapTokens * 4;
61
+
62
+ let startIndex = 0;
63
+ let previousEndIndex = 0;
64
+ let loopCount = 0;
65
+ const maxLoops = 1000; // Safety limit to prevent infinite loops
66
+
67
+ while (startIndex < trimmedText.length && loopCount < maxLoops) {
68
+ loopCount++;
69
+
70
+ // Calculate end index for this chunk
71
+ let endIndex = Math.min(startIndex + maxChars, trimmedText.length);
72
+
73
+ // If not the last chunk, try to break at a word boundary
74
+ if (endIndex < trimmedText.length) {
75
+ // Look for word boundary near the end
76
+ const boundaryChars = 200; // Look back up to 200 chars
77
+ const searchStart = Math.max(startIndex, endIndex - boundaryChars);
78
+ const substring = trimmedText.slice(searchStart, endIndex);
79
+
80
+ // Try to find line break first, then space, then punctuation
81
+ let breakIndex = -1;
82
+
83
+ // Look for last newline in the window
84
+ const lastNewline = substring.lastIndexOf('\n');
85
+ if (lastNewline !== -1) {
86
+ breakIndex = searchStart + lastNewline + 1;
87
+ } else {
88
+ // Look for last space in the window
89
+ const lastSpace = substring.lastIndexOf(' ');
90
+ if (lastSpace !== -1) {
91
+ breakIndex = searchStart + lastSpace + 1;
92
+ } else {
93
+ // Look for sentence-ending punctuation
94
+ for (let i = substring.length - 1; i >= Math.max(0, substring.length - 100); i--) {
95
+ const char = substring[i];
96
+ if (char === '.' || char === '!' || char === '?') {
97
+ // Make sure it's actually a sentence end (followed by space or end)
98
+ const nextChar = substring[i + 1];
99
+ if (!nextChar || nextChar === ' ' || nextChar === '\n') {
100
+ breakIndex = searchStart + i + 1;
101
+ break;
102
+ }
103
+ }
104
+ }
105
+ }
106
+ }
107
+
108
+ // Use the break index if found, otherwise use the calculated end
109
+ if (breakIndex > startIndex) {
110
+ endIndex = breakIndex;
111
+ }
112
+ }
113
+
114
+ // Extract the chunk
115
+ const chunk = trimmedText.slice(startIndex, endIndex);
116
+ chunks.push(chunk);
117
+
118
+ // Move to next chunk, accounting for overlap
119
+ if (overlapChars > 0 && endIndex < trimmedText.length) {
120
+ // Only apply overlap if not at the end
121
+ startIndex = Math.max(endIndex - overlapChars, endIndex - maxChars / 2);
122
+
123
+ // Ensure we make progress
124
+ if (startIndex <= previousEndIndex) {
125
+ startIndex = endIndex;
126
+ }
127
+
128
+ // Also ensure we move forward at least a bit
129
+ if (startIndex >= endIndex) {
130
+ startIndex = endIndex;
131
+ }
132
+ } else {
133
+ startIndex = endIndex;
134
+ }
135
+
136
+ previousEndIndex = endIndex;
137
+ }
138
+
139
+ return chunks;
140
+ }
141
+
142
+ /**
143
+ * Chunk text specifically for embedding generation.
144
+ *
145
+ * Uses 8000 token limit (OpenRouter's limit for text-embedding-3-small).
146
+ *
147
+ * @param text - Text to chunk
148
+ * @returns Array of text chunks suitable for embeddings
149
+ */
150
+ export function chunkForEmbedding(text: string): string[] {
151
+ return chunkText(text, 8000, 100); // 100 token overlap for context
152
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Encoding Utilities
3
+ *
4
+ * Handles text encoding issues across different platforms.
5
+ */
6
+
7
+ /**
8
+ * Strip UTF-8 BOM (Byte Order Mark) from a string.
9
+ *
10
+ * The UTF-8 BOM is the byte sequence EF BB BF (U+FEFF).
11
+ * Some Windows editors add this to the start of files,
12
+ * which can break environment variable parsing.
13
+ *
14
+ * @param str - String that may contain a BOM
15
+ * @returns String with BOM removed if present
16
+ */
17
+ export function stripBOM(str: string): string {
18
+ // Check for BOM at position 0
19
+ if (str.charCodeAt(0) === 0xFEFF) {
20
+ return str.slice(1);
21
+ }
22
+ return str;
23
+ }
24
+
25
+ /**
26
+ * Detect if a string has a UTF-8 BOM.
27
+ *
28
+ * @param str - String to check
29
+ * @returns true if BOM is present
30
+ */
31
+ export function hasBOM(str: string): boolean {
32
+ return str.charCodeAt(0) === 0xFEFF;
33
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Shared Utilities Module
3
+ *
4
+ * Exports all shared utility functions.
5
+ */
6
+
7
+ export * from './encoding.js';
8
+ export * from './chunking.js';