lynkr 7.2.1 → 7.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lynkr",
3
- "version": "7.2.1",
3
+ "version": "7.2.2",
4
4
  "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -47,7 +47,6 @@
47
47
  "@azure/openai": "^2.0.0",
48
48
  "@babel/parser": "^7.29.0",
49
49
  "@babel/traverse": "^7.29.0",
50
- "better-sqlite3": "^12.6.2",
51
50
  "compression": "^1.7.4",
52
51
  "diff": "^5.2.0",
53
52
  "dockerode": "^4.0.2",
@@ -62,6 +61,7 @@
62
61
  "undici": "^6.22.0"
63
62
  },
64
63
  "optionalDependencies": {
64
+ "better-sqlite3": "^12.6.2",
65
65
  "tree-sitter": "^0.21.1",
66
66
  "tree-sitter-javascript": "^0.21.0",
67
67
  "tree-sitter-python": "^0.21.0",
@@ -142,9 +142,9 @@ class SemanticCache {
142
142
 
143
143
  /**
144
144
  * Extract cacheable text from messages
145
- * IMPORTANT: Only extracts user message content, NOT system prompt.
146
- * System prompt is handled separately via hash for exact matching.
147
- * This prevents false cache hits when system prompts are large and similar.
145
+ * IMPORTANT: Only extracts the ACTUAL user query, NOT system-like content.
146
+ * When messages are merged (e.g., Codex sends AGENTS.md as user role),
147
+ * we need to extract only the real user query from the end.
148
148
  *
149
149
  * @param {Array} messages - Chat messages
150
150
  * @returns {string|null} - Extracted user prompt or null
@@ -169,12 +169,121 @@ class SemanticCache {
169
169
  .join('\n');
170
170
  }
171
171
 
172
- // REMOVED: System prompt inclusion was causing false cache hits
173
- // The system prompt is now handled via hash matching instead
172
+ // Extract ONLY the actual user query when content contains merged system-like prefixes
173
+ // Codex and other clients send AGENTS.md, environment_context, etc. as user role messages
174
+ // which get merged into one large user message. We need to extract just the real query.
175
+ const originalLength = content.length;
176
+ content = this._extractActualUserQuery(content);
177
+
178
+ // Log extraction for debugging
179
+ if (originalLength !== content.length) {
180
+ logger.info({
181
+ originalLength,
182
+ extractedLength: content.length,
183
+ extracted: content.substring(0, 100),
184
+ }, '[SemanticCache] Extracted user query from merged content');
185
+ }
174
186
 
175
187
  return content.trim() || null;
176
188
  }
177
189
 
190
+ /**
191
+ * Extract the actual user query from potentially merged content.
192
+ * Codex and other clients merge system instructions with user queries.
193
+ * We need to find the ACTUAL user query, which is usually short and at the end.
194
+ *
195
+ * @param {string} content - Potentially merged user content
196
+ * @returns {string} - The actual user query
197
+ */
198
+ _extractActualUserQuery(content) {
199
+ if (!content) return content;
200
+
201
+ // Short content is likely the actual query - no extraction needed
202
+ if (content.length < 100) {
203
+ return content;
204
+ }
205
+
206
+ // Patterns that indicate SYSTEM/INSTRUCTION content (NOT user queries)
207
+ const systemPatterns = [
208
+ /^#\s*(AGENTS|CLAUDE|README)/i, // Markdown doc headers
209
+ /^<[a-z_-]+[\s>]/i, // XML-like tags
210
+ /^```/, // Code blocks
211
+ /^---\s*$/m, // YAML/markdown separators
212
+ /^IMPORTANT:/i, // Instruction markers
213
+ /^(permissions|environment|collaboration|context|instructions|Focus on)/i,
214
+ /sandboxing|workspace|cwd|shell/i, // Environment info
215
+ /Do not summarize|respond ONLY/i, // Instruction text
216
+ ];
217
+
218
+ // Split content by double newlines or single newlines
219
+ const segments = content.split(/\n\n+|\n(?=[A-Z#<])/);
220
+
221
+ // Strategy 1: Find the LAST SHORT segment that looks like a real query
222
+ // Real user queries are usually short (< 200 chars) and don't match system patterns
223
+ for (let i = segments.length - 1; i >= 0; i--) {
224
+ const segment = segments[i].trim();
225
+
226
+ // Skip empty or very short segments (< 2 chars)
227
+ if (!segment || segment.length < 2) continue;
228
+
229
+ // Skip if too long (system content tends to be verbose)
230
+ if (segment.length > 300) continue;
231
+
232
+ // Check if this looks like system content
233
+ const isSystemContent = systemPatterns.some(pattern => pattern.test(segment));
234
+
235
+ if (!isSystemContent) {
236
+ // Found a non-system segment - likely the real query
237
+ logger.debug({
238
+ originalLength: content.length,
239
+ extractedLength: segment.length,
240
+ extracted: segment.substring(0, 100),
241
+ }, '[SemanticCache] Extracted actual user query');
242
+ return segment;
243
+ }
244
+ }
245
+
246
+ // Strategy 2: Look for content after the last XML closing tag
247
+ const afterXmlMatch = content.match(/<\/[^>]+>\s*\n*([^<\n]{2,200})$/);
248
+ if (afterXmlMatch) {
249
+ const extracted = afterXmlMatch[1].trim();
250
+ if (extracted.length >= 2) {
251
+ logger.debug({
252
+ extractedLength: extracted.length,
253
+ extracted: extracted.substring(0, 100),
254
+ }, '[SemanticCache] Extracted query after XML tag');
255
+ return extracted;
256
+ }
257
+ }
258
+
259
+ // Strategy 3: Take the very last line if it's short
260
+ const lines = content.split('\n').filter(l => l.trim());
261
+ const lastLine = lines[lines.length - 1]?.trim();
262
+ if (lastLine && lastLine.length >= 2 && lastLine.length <= 200) {
263
+ const isSystem = systemPatterns.some(p => p.test(lastLine));
264
+ if (!isSystem) {
265
+ logger.debug({
266
+ extractedLength: lastLine.length,
267
+ extracted: lastLine.substring(0, 100),
268
+ }, '[SemanticCache] Extracted last line as query');
269
+ return lastLine;
270
+ }
271
+ }
272
+
273
+ // Strategy 4: If all else fails, return last 150 chars
274
+ // This ensures we don't cache based on system prompt prefix
275
+ if (content.length > 500) {
276
+ const tail = content.slice(-150).trim();
277
+ logger.debug({
278
+ originalLength: content.length,
279
+ extractedLength: tail.length,
280
+ }, '[SemanticCache] Using tail extraction fallback');
281
+ return tail;
282
+ }
283
+
284
+ return content;
285
+ }
286
+
178
287
  /**
179
288
  * Find the most similar cached response
180
289
  * IMPORTANT: Only matches entries with the same system prompt hash.