@askalf/dario 2.8.7 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/proxy.js +104 -47
  2. package/package.json +1 -1
package/dist/proxy.js CHANGED
@@ -236,6 +236,88 @@ function sanitizeMessages(body) {
236
236
  }
237
237
  }
238
238
  }
239
+ /**
240
+ * Strip thinking blocks from prior assistant messages.
241
+ * Real Claude Code strips thinking from conversation history before building the next request.
242
+ * The API's context_management: clear_thinking does NOT reduce input token billing —
243
+ * tokens are counted before server-side edits. Client-side stripping is the only way
244
+ * to avoid burning the 5h window on stale thinking traces.
245
+ * Only strips from prior turns — the most recent assistant message is left intact.
246
+ */
247
+ function stripThinkingFromHistory(body) {
248
+ const messages = body.messages;
249
+ if (!messages)
250
+ return;
251
+ // Strip thinking blocks from ALL assistant messages.
252
+ // Real Claude Code never sends thinking blocks in the messages array —
253
+ // it strips them before building the next request. The API will generate
254
+ // fresh thinking for the current turn; prior thinking is dead weight.
255
+ for (const msg of messages) {
256
+ if (msg.role !== 'assistant')
257
+ continue;
258
+ if (Array.isArray(msg.content)) {
259
+ msg.content = msg.content.filter(b => b.type !== 'thinking');
260
+ }
261
+ }
262
+ }
263
+ /**
264
+ * Scrub non-Claude-Code fields and normalize field ordering.
265
+ * Real Claude Code never sends these fields. Their presence is a fingerprint.
266
+ * JSON field order is also detectable — Claude Code always sends fields in a
267
+ * specific order. We rebuild the object to match.
268
+ */
269
+ const NON_CC_FIELDS = new Set(['service_tier', 'top_p', 'top_k', 'stop_sequences', 'temperature']);
270
+ // Claude Code's field order (from MITM capture). Fields not in this list are appended at end.
271
+ const CC_FIELD_ORDER = [
272
+ 'model', 'messages', 'system', 'max_tokens', 'thinking', 'output_config',
273
+ 'context_management', 'metadata', 'stream', 'tools', 'tool_choice',
274
+ ];
275
+ function scrubAndReorderFields(body) {
276
+ // Remove non-CC fields
277
+ for (const field of NON_CC_FIELDS) {
278
+ delete body[field];
279
+ }
280
+ // Rebuild with Claude Code field ordering
281
+ const ordered = {};
282
+ for (const key of CC_FIELD_ORDER) {
283
+ if (key in body) {
284
+ ordered[key] = body[key];
285
+ delete body[key];
286
+ }
287
+ }
288
+ // Append any remaining fields (custom client fields we don't recognize)
289
+ for (const [key, value] of Object.entries(body)) {
290
+ ordered[key] = value;
291
+ }
292
+ return ordered;
293
+ }
294
+ /**
295
+ * Normalize system prompt to exactly 3 blocks.
296
+ * Real Claude Code always sends exactly 3 system blocks:
297
+ * [0] billing tag (no cache), [1] agent identity (cache 1h), [2] system prompt (cache 1h)
298
+ * If the client sends multiple system blocks, merge them into block [2].
299
+ */
300
+ function normalizeSystemTo3Blocks(system, billingTag, agentIdentity, cache1h) {
301
+ let systemText;
302
+ if (typeof system === 'string') {
303
+ systemText = system;
304
+ }
305
+ else if (Array.isArray(system)) {
306
+ // Merge all text blocks into one, skip any existing billing tags
307
+ systemText = system
308
+ .filter(b => b.text && !b.text.includes('x-anthropic-billing-header:'))
309
+ .map(b => b.text)
310
+ .join('\n\n');
311
+ }
312
+ else {
313
+ systemText = '';
314
+ }
315
+ return [
316
+ { type: 'text', text: billingTag },
317
+ { type: 'text', text: agentIdentity, cache_control: cache1h },
318
+ { type: 'text', text: systemText || 'You are a helpful assistant.', cache_control: cache1h },
319
+ ];
320
+ }
239
321
  // OpenAI model names → Anthropic (fallback if client sends GPT names)
240
322
  const OPENAI_MODEL_MAP = {
241
323
  'gpt-5.4': 'claude-opus-4-6',
@@ -669,7 +751,18 @@ export async function startProxy(opts = {}) {
669
751
  const r = result;
670
752
  // In passthrough mode, skip all Claude-specific injection — OAuth swap only
671
753
  if (!passthrough) {
672
- // Inject device identity metadata for session tracking
754
+ // ── Stealth layer: make request indistinguishable from real Claude Code ──
755
+ // 1. Strip thinking blocks from prior assistant turns (client-side).
756
+ // context_management: clear_thinking does NOT reduce input token billing.
757
+ // Real Claude Code strips thinking before building the next request.
758
+ stripThinkingFromHistory(r);
759
+ // 2. Scrub non-CC fields and normalize field ordering
760
+ const reordered = scrubAndReorderFields(r);
761
+ // Copy reordered keys back (r is a reference to result)
762
+ for (const key of Object.keys(r))
763
+ delete r[key];
764
+ Object.assign(r, reordered);
765
+ // 3. Inject device identity metadata for session tracking
673
766
  if (identity.deviceId) {
674
767
  r.metadata = {
675
768
  user_id: JSON.stringify({
@@ -679,71 +772,31 @@ export async function startProxy(opts = {}) {
679
772
  }),
680
773
  };
681
774
  }
682
- // Enable adaptive thinking for models that support it (Opus/Sonnet 4.6+)
683
- // Haiku 4.5 does not support thinking at all
775
+ // 4. Model-aware defaults matching Claude Code behavior
684
776
  const modelName = (r.model || '').toLowerCase();
685
777
  const supportsThinking = !modelName.includes('haiku');
686
778
  if (supportsThinking && !r.thinking) {
687
779
  r.thinking = { type: 'adaptive' };
688
780
  }
689
- // Match Claude Code's default max_tokens (64000) when client sends low values
690
781
  if (!r.max_tokens || r.max_tokens < 16000) {
691
782
  r.max_tokens = 64000;
692
783
  }
693
- // Set reasoning effort (pass through client value or default to 'medium' matching Claude Code)
694
- // Haiku does not support the effort parameter
695
784
  if (supportsThinking && !r.output_config) {
696
785
  r.output_config = { effort: 'medium' };
697
786
  }
698
- // Enable context management (matches Claude Code default)
699
- // Requires thinking to be enabled — skip for models without thinking support (e.g. Haiku)
700
787
  if (supportsThinking && !r.context_management) {
701
788
  r.context_management = { edits: [{ type: 'clear_thinking_20251015', keep: 'all' }] };
702
789
  }
703
- // Inject Claude Code billing header into system prompt.
704
- // Anthropic uses this to route requests through priority rate limiting
705
- // instead of the general API quota. Without it, Opus/Sonnet get 429
706
- // when overall utilization is high, even though model-specific limits
707
- // have headroom. The CLI binary embeds this in its system prompt.
708
- //
709
- // Build tag and cch are computed per-request using the same algorithm
710
- // as the real Claude Code binary (Oz$ function):
711
- // - build tag = SHA-256(seed + msg_chars[4,7,20] + version).slice(0,3)
712
- // - cch = SHA-256(seed + version + msg_chars[4,7,20]).slice(0,5)
713
- // Build per-request billing tag matching Claude Code binary
790
+ // 5. Build per-request billing tag matching Claude Code binary (Oz$ algorithm)
714
791
  const userMsg = extractFirstUserMessage(r);
715
792
  const buildTag = computeBuildTag(userMsg, cliVersion);
716
793
  const cch = computeCch();
717
794
  const fullVersion = `${cliVersion}.${buildTag}`;
718
795
  const billingTag = `x-anthropic-billing-header: cc_version=${fullVersion}; cc_entrypoint=cli; cch=${cch};`;
719
- // Structure system prompt as 3 blocks matching real Claude Code:
720
- // [0] billing tag (no cache_control)
721
- // [1] agent identity string (cache 1h)
722
- // [2] actual system prompt (cache 1h)
796
+ // 6. Normalize system prompt to exactly 3 blocks (real Claude Code always sends 3)
723
797
  const AGENT_IDENTITY = 'You are a Claude agent, built on Anthropic\'s Claude Agent SDK.';
724
798
  const CACHE_1H = { type: 'ephemeral', ttl: '1h' };
725
- if (typeof r.system === 'string') {
726
- if (!r.system.includes('x-anthropic-billing-header:')) {
727
- r.system = [
728
- { type: 'text', text: billingTag },
729
- { type: 'text', text: AGENT_IDENTITY, cache_control: CACHE_1H },
730
- { type: 'text', text: r.system, cache_control: CACHE_1H },
731
- ];
732
- }
733
- }
734
- else if (Array.isArray(r.system)) {
735
- const hasTag = r.system.some(b => typeof b.text === 'string' && b.text.includes('x-anthropic-billing-header:'));
736
- if (!hasTag) {
737
- // Prepend billing tag and agent identity before existing blocks
738
- r.system.unshift({ type: 'text', text: billingTag }, { type: 'text', text: AGENT_IDENTITY, cache_control: CACHE_1H });
739
- }
740
- }
741
- else {
742
- r.system = [
743
- { type: 'text', text: billingTag },
744
- { type: 'text', text: AGENT_IDENTITY, cache_control: CACHE_1H },
745
- ];
746
- }
799
+ r.system = normalizeSystemTo3Blocks(r.system, billingTag, AGENT_IDENTITY, CACHE_1H);
747
800
  }
748
801
  finalBody = Buffer.from(JSON.stringify(r));
749
802
  }
@@ -764,9 +817,11 @@ export async function startProxy(opts = {}) {
764
817
  }
765
818
  else {
766
819
  // Claude-optimized: full beta set matching real Claude Code (exact order from MITM capture)
767
- beta = 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24';
820
+ beta = 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,effort-2025-11-24,fast-mode-2026-02-01';
768
821
  if (clientBeta) {
769
- const filtered = filterBillableBetas(clientBeta);
822
+ const baseSet = new Set(beta.split(','));
823
+ const filtered = filterBillableBetas(clientBeta)
824
+ .split(',').filter(b => b.length > 0 && !baseSet.has(b)).join(',');
770
825
  if (filtered)
771
826
  beta += ',' + filtered;
772
827
  }
@@ -776,6 +831,8 @@ export async function startProxy(opts = {}) {
776
831
  'Authorization': `Bearer ${accessToken}`,
777
832
  'anthropic-version': req.headers['anthropic-version'] || '2023-06-01',
778
833
  'anthropic-beta': beta,
834
+ // Real Claude Code adds x-client-request-id for firstParty + api.anthropic.com
835
+ 'x-client-request-id': randomUUID(),
779
836
  // Real Claude Code sends 600 on first request, 300 on subsequent
780
837
  'x-stainless-timeout': requestCount <= 1 ? '600' : '300',
781
838
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askalf/dario",
3
- "version": "2.8.7",
3
+ "version": "2.9.0",
4
4
  "description": "Use your Claude subscription as an API. No API key needed. Local proxy for Claude Max/Pro subscriptions.",
5
5
  "type": "module",
6
6
  "bin": {